mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-11-21 15:10:19 +00:00
AK+LibUnicode: Add a method to trim non-ASCII whitespace from a String
Required by WebDriver.
This commit is contained in:
parent
969ee0f3e0
commit
cfcb29bdfd
Notes:
github-actions[bot]
2024-11-04 01:43:52 +00:00
Author: https://github.com/trflynn89 Commit: https://github.com/LadybirdBrowser/ladybird/commit/cfcb29bdfd8 Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/2150
5 changed files with 118 additions and 2 deletions
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
* Copyright (c) 2018-2022, Andreas Kling <andreas@ladybird.org>
|
||||
* Copyright (c) 2023, Tim Flynn <trflynn89@serenityos.org>
|
||||
* Copyright (c) 2023-2024, Tim Flynn <trflynn89@ladybird.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
@ -144,6 +144,7 @@ public:
|
|||
|
||||
ErrorOr<String> trim(Utf8View const& code_points_to_trim, TrimMode mode = TrimMode::Both) const;
|
||||
ErrorOr<String> trim(StringView code_points_to_trim, TrimMode mode = TrimMode::Both) const;
|
||||
ErrorOr<String> trim_whitespace(TrimMode mode = TrimMode::Both) const;
|
||||
ErrorOr<String> trim_ascii_whitespace(TrimMode mode = TrimMode::Both) const;
|
||||
|
||||
ErrorOr<Vector<String>> split_limit(u32 separator, size_t limit, SplitBehavior = SplitBehavior::Nothing) const;
|
||||
|
|
|
@ -1289,6 +1289,82 @@ TEST_CASE(trim)
|
|||
}
|
||||
}
|
||||
|
||||
TEST_CASE(trim_whitespace)
|
||||
{
|
||||
{
|
||||
String string {};
|
||||
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Both)), String {});
|
||||
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Left)), String {});
|
||||
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Right)), String {});
|
||||
}
|
||||
{
|
||||
auto string = " "_string;
|
||||
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Both)), String {});
|
||||
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Left)), String {});
|
||||
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Right)), String {});
|
||||
}
|
||||
{
|
||||
auto string = " "_string;
|
||||
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Both)), String {});
|
||||
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Left)), String {});
|
||||
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Right)), String {});
|
||||
}
|
||||
{
|
||||
auto string = " \t \n \r \u00A0 \u202F "_string;
|
||||
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Both)), String {});
|
||||
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Left)), String {});
|
||||
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Right)), String {});
|
||||
}
|
||||
{
|
||||
auto string = "abcdef"_string;
|
||||
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Both)), "abcdef"_string);
|
||||
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Left)), "abcdef"_string);
|
||||
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Right)), "abcdef"_string);
|
||||
}
|
||||
{
|
||||
auto string = " \u00A0 abcdef"_string;
|
||||
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Both)), "abcdef"_string);
|
||||
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Left)), "abcdef"_string);
|
||||
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Right)), " \u00A0 abcdef"_string);
|
||||
}
|
||||
{
|
||||
auto string = "abcdef \u202F "_string;
|
||||
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Both)), "abcdef"_string);
|
||||
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Left)), "abcdef \u202F "_string);
|
||||
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Right)), "abcdef"_string);
|
||||
}
|
||||
{
|
||||
auto string = " \u00A0 abcdef \u202F "_string;
|
||||
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Both)), "abcdef"_string);
|
||||
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Left)), "abcdef \u202F "_string);
|
||||
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Right)), " \u00A0 abcdef"_string);
|
||||
}
|
||||
{
|
||||
auto string = "ab \t cd \n ef"_string;
|
||||
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Both)), "ab \t cd \n ef"_string);
|
||||
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Left)), "ab \t cd \n ef"_string);
|
||||
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Right)), "ab \t cd \n ef"_string);
|
||||
}
|
||||
{
|
||||
auto string = " \u00A0 ab \t cd \n ef"_string;
|
||||
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Both)), "ab \t cd \n ef"_string);
|
||||
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Left)), "ab \t cd \n ef"_string);
|
||||
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Right)), " \u00A0 ab \t cd \n ef"_string);
|
||||
}
|
||||
{
|
||||
auto string = "ab \t cd \n ef \u202F "_string;
|
||||
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Both)), "ab \t cd \n ef"_string);
|
||||
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Left)), "ab \t cd \n ef \u202F "_string);
|
||||
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Right)), "ab \t cd \n ef"_string);
|
||||
}
|
||||
{
|
||||
auto string = " \u00A0 ab \t cd \n ef \u202F "_string;
|
||||
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Both)), "ab \t cd \n ef"_string);
|
||||
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Left)), "ab \t cd \n ef \u202F "_string);
|
||||
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Right)), " \u00A0 ab \t cd \n ef"_string);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE(contains)
|
||||
{
|
||||
EXPECT(!String {}.contains({}));
|
||||
|
|
|
@ -231,6 +231,11 @@ bool code_point_has_variation_selector_property(u32 code_point)
|
|||
return code_point_has_property(code_point, UCHAR_VARIATION_SELECTOR);
|
||||
}
|
||||
|
||||
bool code_point_has_white_space_property(u32 code_point)
|
||||
{
|
||||
return code_point_has_property(code_point, UCHAR_WHITE_SPACE);
|
||||
}
|
||||
|
||||
// https://tc39.es/ecma262/#table-binary-unicode-properties
|
||||
bool is_ecma262_property(Property property)
|
||||
{
|
||||
|
|
|
@ -36,6 +36,7 @@ bool code_point_has_identifier_start_property(u32 code_point);
|
|||
bool code_point_has_identifier_continue_property(u32 code_point);
|
||||
bool code_point_has_regional_indicator_property(u32 code_point);
|
||||
bool code_point_has_variation_selector_property(u32 code_point);
|
||||
bool code_point_has_white_space_property(u32 code_point);
|
||||
|
||||
bool is_ecma262_property(Property);
|
||||
|
||||
|
|
|
@ -1,11 +1,12 @@
|
|||
/*
|
||||
* Copyright (c) 2023-2024, Tim Flynn <trflynn89@serenityos.org>
|
||||
* Copyright (c) 2023-2024, Tim Flynn <trflynn89@ladybird.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <AK/String.h>
|
||||
#include <AK/StringBuilder.h>
|
||||
#include <LibUnicode/CharacterTypes.h>
|
||||
#include <LibUnicode/ICU.h>
|
||||
|
||||
#include <unicode/bytestream.h>
|
||||
|
@ -155,4 +156,36 @@ Optional<size_t> String::find_byte_offset_ignoring_case(StringView needle, size_
|
|||
return {};
|
||||
}
|
||||
|
||||
ErrorOr<String> String::trim_whitespace(TrimMode mode) const
|
||||
{
|
||||
auto code_points = this->code_points();
|
||||
|
||||
Optional<size_t> start;
|
||||
size_t length = 0;
|
||||
|
||||
for (auto it = code_points.begin(); it != code_points.end(); ++it) {
|
||||
if (Unicode::code_point_has_white_space_property(*it))
|
||||
continue;
|
||||
|
||||
auto offset = code_points.byte_offset_of(it);
|
||||
|
||||
if (!start.has_value())
|
||||
start = offset;
|
||||
|
||||
length = offset + it.underlying_code_point_length_in_bytes();
|
||||
}
|
||||
|
||||
if (mode == TrimMode::Right)
|
||||
start = 0;
|
||||
if (mode == TrimMode::Left)
|
||||
length = bytes_as_string_view().length();
|
||||
|
||||
if (!start.has_value() || start == length)
|
||||
return String {};
|
||||
if (start == 0uz && length == bytes_as_string_view().length())
|
||||
return *this;
|
||||
|
||||
return substring_from_byte_offset_with_shared_superstring(*start, length - *start);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue