mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-11-21 23:20:20 +00:00
AK+LibUnicode: Add a method to trim non-ASCII whitespace from a String
Required by WebDriver.
This commit is contained in:
parent
969ee0f3e0
commit
cfcb29bdfd
Notes:
github-actions[bot]
2024-11-04 01:43:52 +00:00
Author: https://github.com/trflynn89 Commit: https://github.com/LadybirdBrowser/ladybird/commit/cfcb29bdfd8 Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/2150
5 changed files with 118 additions and 2 deletions
|
@ -1,6 +1,6 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2018-2022, Andreas Kling <andreas@ladybird.org>
|
* Copyright (c) 2018-2022, Andreas Kling <andreas@ladybird.org>
|
||||||
* Copyright (c) 2023, Tim Flynn <trflynn89@serenityos.org>
|
* Copyright (c) 2023-2024, Tim Flynn <trflynn89@ladybird.org>
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: BSD-2-Clause
|
* SPDX-License-Identifier: BSD-2-Clause
|
||||||
*/
|
*/
|
||||||
|
@ -144,6 +144,7 @@ public:
|
||||||
|
|
||||||
ErrorOr<String> trim(Utf8View const& code_points_to_trim, TrimMode mode = TrimMode::Both) const;
|
ErrorOr<String> trim(Utf8View const& code_points_to_trim, TrimMode mode = TrimMode::Both) const;
|
||||||
ErrorOr<String> trim(StringView code_points_to_trim, TrimMode mode = TrimMode::Both) const;
|
ErrorOr<String> trim(StringView code_points_to_trim, TrimMode mode = TrimMode::Both) const;
|
||||||
|
ErrorOr<String> trim_whitespace(TrimMode mode = TrimMode::Both) const;
|
||||||
ErrorOr<String> trim_ascii_whitespace(TrimMode mode = TrimMode::Both) const;
|
ErrorOr<String> trim_ascii_whitespace(TrimMode mode = TrimMode::Both) const;
|
||||||
|
|
||||||
ErrorOr<Vector<String>> split_limit(u32 separator, size_t limit, SplitBehavior = SplitBehavior::Nothing) const;
|
ErrorOr<Vector<String>> split_limit(u32 separator, size_t limit, SplitBehavior = SplitBehavior::Nothing) const;
|
||||||
|
|
|
@ -1289,6 +1289,82 @@ TEST_CASE(trim)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_CASE(trim_whitespace)
|
||||||
|
{
|
||||||
|
{
|
||||||
|
String string {};
|
||||||
|
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Both)), String {});
|
||||||
|
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Left)), String {});
|
||||||
|
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Right)), String {});
|
||||||
|
}
|
||||||
|
{
|
||||||
|
auto string = " "_string;
|
||||||
|
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Both)), String {});
|
||||||
|
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Left)), String {});
|
||||||
|
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Right)), String {});
|
||||||
|
}
|
||||||
|
{
|
||||||
|
auto string = " "_string;
|
||||||
|
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Both)), String {});
|
||||||
|
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Left)), String {});
|
||||||
|
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Right)), String {});
|
||||||
|
}
|
||||||
|
{
|
||||||
|
auto string = " \t \n \r \u00A0 \u202F "_string;
|
||||||
|
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Both)), String {});
|
||||||
|
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Left)), String {});
|
||||||
|
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Right)), String {});
|
||||||
|
}
|
||||||
|
{
|
||||||
|
auto string = "abcdef"_string;
|
||||||
|
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Both)), "abcdef"_string);
|
||||||
|
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Left)), "abcdef"_string);
|
||||||
|
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Right)), "abcdef"_string);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
auto string = " \u00A0 abcdef"_string;
|
||||||
|
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Both)), "abcdef"_string);
|
||||||
|
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Left)), "abcdef"_string);
|
||||||
|
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Right)), " \u00A0 abcdef"_string);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
auto string = "abcdef \u202F "_string;
|
||||||
|
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Both)), "abcdef"_string);
|
||||||
|
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Left)), "abcdef \u202F "_string);
|
||||||
|
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Right)), "abcdef"_string);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
auto string = " \u00A0 abcdef \u202F "_string;
|
||||||
|
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Both)), "abcdef"_string);
|
||||||
|
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Left)), "abcdef \u202F "_string);
|
||||||
|
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Right)), " \u00A0 abcdef"_string);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
auto string = "ab \t cd \n ef"_string;
|
||||||
|
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Both)), "ab \t cd \n ef"_string);
|
||||||
|
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Left)), "ab \t cd \n ef"_string);
|
||||||
|
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Right)), "ab \t cd \n ef"_string);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
auto string = " \u00A0 ab \t cd \n ef"_string;
|
||||||
|
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Both)), "ab \t cd \n ef"_string);
|
||||||
|
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Left)), "ab \t cd \n ef"_string);
|
||||||
|
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Right)), " \u00A0 ab \t cd \n ef"_string);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
auto string = "ab \t cd \n ef \u202F "_string;
|
||||||
|
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Both)), "ab \t cd \n ef"_string);
|
||||||
|
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Left)), "ab \t cd \n ef \u202F "_string);
|
||||||
|
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Right)), "ab \t cd \n ef"_string);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
auto string = " \u00A0 ab \t cd \n ef \u202F "_string;
|
||||||
|
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Both)), "ab \t cd \n ef"_string);
|
||||||
|
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Left)), "ab \t cd \n ef \u202F "_string);
|
||||||
|
EXPECT_EQ(MUST(string.trim_whitespace(TrimMode::Right)), " \u00A0 ab \t cd \n ef"_string);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
TEST_CASE(contains)
|
TEST_CASE(contains)
|
||||||
{
|
{
|
||||||
EXPECT(!String {}.contains({}));
|
EXPECT(!String {}.contains({}));
|
||||||
|
|
|
@ -231,6 +231,11 @@ bool code_point_has_variation_selector_property(u32 code_point)
|
||||||
return code_point_has_property(code_point, UCHAR_VARIATION_SELECTOR);
|
return code_point_has_property(code_point, UCHAR_VARIATION_SELECTOR);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool code_point_has_white_space_property(u32 code_point)
|
||||||
|
{
|
||||||
|
return code_point_has_property(code_point, UCHAR_WHITE_SPACE);
|
||||||
|
}
|
||||||
|
|
||||||
// https://tc39.es/ecma262/#table-binary-unicode-properties
|
// https://tc39.es/ecma262/#table-binary-unicode-properties
|
||||||
bool is_ecma262_property(Property property)
|
bool is_ecma262_property(Property property)
|
||||||
{
|
{
|
||||||
|
|
|
@ -36,6 +36,7 @@ bool code_point_has_identifier_start_property(u32 code_point);
|
||||||
bool code_point_has_identifier_continue_property(u32 code_point);
|
bool code_point_has_identifier_continue_property(u32 code_point);
|
||||||
bool code_point_has_regional_indicator_property(u32 code_point);
|
bool code_point_has_regional_indicator_property(u32 code_point);
|
||||||
bool code_point_has_variation_selector_property(u32 code_point);
|
bool code_point_has_variation_selector_property(u32 code_point);
|
||||||
|
bool code_point_has_white_space_property(u32 code_point);
|
||||||
|
|
||||||
bool is_ecma262_property(Property);
|
bool is_ecma262_property(Property);
|
||||||
|
|
||||||
|
|
|
@ -1,11 +1,12 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2023-2024, Tim Flynn <trflynn89@serenityos.org>
|
* Copyright (c) 2023-2024, Tim Flynn <trflynn89@ladybird.org>
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: BSD-2-Clause
|
* SPDX-License-Identifier: BSD-2-Clause
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <AK/String.h>
|
#include <AK/String.h>
|
||||||
#include <AK/StringBuilder.h>
|
#include <AK/StringBuilder.h>
|
||||||
|
#include <LibUnicode/CharacterTypes.h>
|
||||||
#include <LibUnicode/ICU.h>
|
#include <LibUnicode/ICU.h>
|
||||||
|
|
||||||
#include <unicode/bytestream.h>
|
#include <unicode/bytestream.h>
|
||||||
|
@ -155,4 +156,36 @@ Optional<size_t> String::find_byte_offset_ignoring_case(StringView needle, size_
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ErrorOr<String> String::trim_whitespace(TrimMode mode) const
|
||||||
|
{
|
||||||
|
auto code_points = this->code_points();
|
||||||
|
|
||||||
|
Optional<size_t> start;
|
||||||
|
size_t length = 0;
|
||||||
|
|
||||||
|
for (auto it = code_points.begin(); it != code_points.end(); ++it) {
|
||||||
|
if (Unicode::code_point_has_white_space_property(*it))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
auto offset = code_points.byte_offset_of(it);
|
||||||
|
|
||||||
|
if (!start.has_value())
|
||||||
|
start = offset;
|
||||||
|
|
||||||
|
length = offset + it.underlying_code_point_length_in_bytes();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mode == TrimMode::Right)
|
||||||
|
start = 0;
|
||||||
|
if (mode == TrimMode::Left)
|
||||||
|
length = bytes_as_string_view().length();
|
||||||
|
|
||||||
|
if (!start.has_value() || start == length)
|
||||||
|
return String {};
|
||||||
|
if (start == 0uz && length == bytes_as_string_view().length())
|
||||||
|
return *this;
|
||||||
|
|
||||||
|
return substring_from_byte_offset_with_shared_superstring(*start, length - *start);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue