AK: Implement slugify function for URL slug generation

The slugify function is used to convert input into URL-friendly slugs.
It processes each character in the input, keeping ascii alpha characters
after lowercase and replacing non-alphanum characters with the glue
character or a space if multiple spaces are encountered consecutively.
The resulting string is trimmed of leading and trailing whitespace, and
any internal whitespace is replaced with the glue character.

It is currently used in LibMarkdown headings generation code.
This commit is contained in:
Gurkirat Singh 2023-09-27 22:41:57 +05:30 committed by Sam Atkins
parent 670925a84f
commit f1b79e0cd3
Notes: sideshowbarker 2024-07-17 04:21:32 +09:00
5 changed files with 95 additions and 0 deletions

View file

@ -24,6 +24,7 @@ set(AK_SOURCES
OptionParser.cpp
Random.cpp
SipHash.cpp
Slugify.cpp
StackInfo.cpp
Stream.cpp
String.cpp

33
AK/Slugify.cpp Normal file
View file

@ -0,0 +1,33 @@
/*
* Copyright (c) 2023, Gurkirat Singh <tbhaxor@gmail.com>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/CharacterTypes.h>
#include <AK/Slugify.h>
#include <AK/StringView.h>
namespace AK {
ErrorOr<String> slugify(String const& input, char const glue)
{
StringBuilder sb;
bool just_processed_space = false;
for (auto const& code_point : input.code_points()) {
if (is_ascii_alphanumeric(code_point)) {
sb.append_code_point(to_ascii_lowercase(code_point));
just_processed_space = false;
} else if ((code_point == static_cast<u32>(glue) || is_ascii_space(code_point)) && !just_processed_space) {
sb.append_code_point(glue);
just_processed_space = true;
}
}
auto output = TRY(sb.to_string());
if (output.ends_with(static_cast<u32>(glue))) {
return output.trim(StringView { &glue, 1 }, TrimMode::Right);
}
return output;
}
}

17
AK/Slugify.h Normal file
View file

@ -0,0 +1,17 @@
/*
* Copyright (c) 2023, Gurkirat Singh <tbhaxor@gmail.com>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/String.h>
namespace AK {
ErrorOr<String> slugify(String const& input, char glue = '-');
}
#if USING_AK_GLOBALLY
using AK::slugify;
#endif

View file

@ -65,6 +65,7 @@ set(AK_TEST_SOURCES
TestRefPtr.cpp
TestSIMD.cpp
TestSinglyLinkedList.cpp
TestSlugify.cpp
TestSourceGenerator.cpp
TestSourceLocation.cpp
TestSpan.cpp

43
Tests/AK/TestSlugify.cpp Normal file
View file

@ -0,0 +1,43 @@
/*
* Copyright (c) 2023, Gurkirat Singh <tbhaxor@gmail.com>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/Slugify.h>
#include <LibTest/TestCase.h>
TEST_CASE(ignore_unicode_characters)
{
EXPECT_EQ(MUST(slugify("Hello World!🎉"_string)), "hello-world"_string);
}
TEST_CASE(all_whitespace_empty_string)
{
EXPECT_EQ(MUST(slugify(" "_string)), ""_string);
}
TEST_CASE(squeeze_multiple_whitespace)
{
EXPECT_EQ(MUST(slugify("Hello World"_string)), "hello-world"_string);
}
TEST_CASE(trim_trailing_whitelist)
{
EXPECT_EQ(MUST(slugify("Hello World "_string)), "hello-world"_string);
}
TEST_CASE(lowercase_all_result)
{
EXPECT_EQ(MUST(slugify("HelloWorld"_string)), "helloworld"_string);
}
TEST_CASE(slug_glue_change)
{
EXPECT_EQ(MUST(slugify("Hello World"_string, '|')), "hello|world"_string);
}
TEST_CASE(multiple_glue_squeeze)
{
EXPECT_EQ(MUST(slugify("Hello_ World"_string, '_')), "hello_world"_string);
}