Explorar el Código

AK: Implement `slugify` function for URL slug generation

The slugify function is used to convert input into URL-friendly slugs.
It processes each character in the input, keeping ascii alpha characters
after lowercase and replacing non-alphanum characters with the glue
character or a space if multiple spaces are encountered consecutively.
The resulting string is trimmed of leading and trailing whitespace, and
any internal whitespace is replaced with the glue character.

It is currently used in LibMarkdown headings generation code.
Gurkirat Singh hace 1 año
padre
commit
f1b79e0cd3
Se han modificado 5 ficheros con 95 adiciones y 0 borrados
  1. 1 0
      AK/CMakeLists.txt
  2. 33 0
      AK/Slugify.cpp
  3. 17 0
      AK/Slugify.h
  4. 1 0
      Tests/AK/CMakeLists.txt
  5. 43 0
      Tests/AK/TestSlugify.cpp

+ 1 - 0
AK/CMakeLists.txt

@@ -24,6 +24,7 @@ set(AK_SOURCES
     OptionParser.cpp
     Random.cpp
     SipHash.cpp
+    Slugify.cpp
     StackInfo.cpp
     Stream.cpp
     String.cpp

+ 33 - 0
AK/Slugify.cpp

@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2023, Gurkirat Singh <tbhaxor@gmail.com>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include <AK/CharacterTypes.h>
+#include <AK/Slugify.h>
+#include <AK/StringView.h>
+
+namespace AK {
+ErrorOr<String> slugify(String const& input, char const glue)
+{
+    StringBuilder sb;
+    bool just_processed_space = false;
+
+    for (auto const& code_point : input.code_points()) {
+        if (is_ascii_alphanumeric(code_point)) {
+            sb.append_code_point(to_ascii_lowercase(code_point));
+            just_processed_space = false;
+        } else if ((code_point == static_cast<u32>(glue) || is_ascii_space(code_point)) && !just_processed_space) {
+            sb.append_code_point(glue);
+            just_processed_space = true;
+        }
+    }
+
+    auto output = TRY(sb.to_string());
+    if (output.ends_with(static_cast<u32>(glue))) {
+        return output.trim(StringView { &glue, 1 }, TrimMode::Right);
+    }
+    return output;
+}
+}

+ 17 - 0
AK/Slugify.h

@@ -0,0 +1,17 @@
+/*
+ * Copyright (c) 2023, Gurkirat Singh <tbhaxor@gmail.com>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#pragma once
+
+#include <AK/String.h>
+
+namespace AK {
+ErrorOr<String> slugify(String const& input, char glue = '-');
+}
+
+#if USING_AK_GLOBALLY
+using AK::slugify;
+#endif

+ 1 - 0
Tests/AK/CMakeLists.txt

@@ -65,6 +65,7 @@ set(AK_TEST_SOURCES
     TestRefPtr.cpp
     TestSIMD.cpp
     TestSinglyLinkedList.cpp
+    TestSlugify.cpp
     TestSourceGenerator.cpp
     TestSourceLocation.cpp
     TestSpan.cpp

+ 43 - 0
Tests/AK/TestSlugify.cpp

@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2023, Gurkirat Singh <tbhaxor@gmail.com>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include <AK/Slugify.h>
+#include <LibTest/TestCase.h>
+
+TEST_CASE(ignore_unicode_characters)
+{
+    EXPECT_EQ(MUST(slugify("Hello World!🎉"_string)), "hello-world"_string);
+}
+
+TEST_CASE(all_whitespace_empty_string)
+{
+    EXPECT_EQ(MUST(slugify("  "_string)), ""_string);
+}
+
+TEST_CASE(squeeze_multiple_whitespace)
+{
+    EXPECT_EQ(MUST(slugify("Hello   World"_string)), "hello-world"_string);
+}
+
+TEST_CASE(trim_trailing_whitelist)
+{
+    EXPECT_EQ(MUST(slugify("Hello   World    "_string)), "hello-world"_string);
+}
+
+TEST_CASE(lowercase_all_result)
+{
+    EXPECT_EQ(MUST(slugify("HelloWorld"_string)), "helloworld"_string);
+}
+
+TEST_CASE(slug_glue_change)
+{
+    EXPECT_EQ(MUST(slugify("Hello World"_string, '|')), "hello|world"_string);
+}
+
+TEST_CASE(multiple_glue_squeeze)
+{
+    EXPECT_EQ(MUST(slugify("Hello_ World"_string, '_')), "hello_world"_string);
+}