Browse Source

AK+LibWeb: Add {Fly,}String::to_ascii_{upper,lower}_case()

These don't have to worry about the input not being valid UTF-8 and
so can be infallible (and can even return self if no changes needed.)

We use this instead of Infra::to_ascii_{upper,lower}_case in LibWeb.
Andreas Kling 9 tháng trước cách đây
mục cha
commit
073bcfd386

+ 48 - 0
AK/FlyString.cpp

@@ -173,6 +173,54 @@ ErrorOr<void> Formatter<FlyString>::format(FormatBuilder& builder, FlyString con
     return Formatter<StringView>::format(builder, fly_string.bytes_as_string_view());
 }
 
+FlyString FlyString::to_ascii_lowercase() const
+{
+    bool const has_ascii_uppercase = [&] {
+        for (u8 const byte : bytes()) {
+            if (AK::is_ascii_upper_alpha(byte))
+                return true;
+        }
+        return false;
+    }();
+
+    if (!has_ascii_uppercase)
+        return *this;
+
+    Vector<u8> lowercase_bytes;
+    lowercase_bytes.ensure_capacity(bytes().size());
+    for (u8 const byte : bytes()) {
+        if (AK::is_ascii_upper_alpha(byte))
+            lowercase_bytes.unchecked_append(AK::to_ascii_lowercase(byte));
+        else
+            lowercase_bytes.unchecked_append(byte);
+    }
+    return String::from_utf8_without_validation(lowercase_bytes);
+}
+
+FlyString FlyString::to_ascii_uppercase() const
+{
+    bool const has_ascii_lowercase = [&] {
+        for (u8 const byte : bytes()) {
+            if (AK::is_ascii_lower_alpha(byte))
+                return true;
+        }
+        return false;
+    }();
+
+    if (!has_ascii_lowercase)
+        return *this;
+
+    Vector<u8> uppercase_bytes;
+    uppercase_bytes.ensure_capacity(bytes().size());
+    for (u8 const byte : bytes()) {
+        if (AK::is_ascii_lower_alpha(byte))
+            uppercase_bytes.unchecked_append(AK::to_ascii_uppercase(byte));
+        else
+            uppercase_bytes.unchecked_append(byte);
+    }
+    return String::from_utf8_without_validation(uppercase_bytes);
+}
+
 bool FlyString::equals_ignoring_ascii_case(FlyString const& other) const
 {
     if (*this == other)

+ 3 - 0
AK/FlyString.h

@@ -66,6 +66,9 @@ public:
     [[nodiscard]] bool equals_ignoring_ascii_case(FlyString const&) const;
     [[nodiscard]] bool equals_ignoring_ascii_case(StringView) const;
 
+    [[nodiscard]] FlyString to_ascii_lowercase() const;
+    [[nodiscard]] FlyString to_ascii_uppercase() const;
+
     [[nodiscard]] bool starts_with_bytes(StringView, CaseSensitivity = CaseSensitivity::CaseSensitive) const;
 
     [[nodiscard]] bool ends_with_bytes(StringView, CaseSensitivity = CaseSensitivity::CaseSensitive) const;

+ 53 - 0
AK/String.cpp

@@ -381,6 +381,59 @@ ErrorOr<String> String::from_byte_string(ByteString const& byte_string)
     return String::from_utf8(byte_string.view());
 }
 
+String String::to_ascii_lowercase() const
+{
+    bool const has_ascii_uppercase = [&] {
+        for (u8 const byte : bytes()) {
+            if (AK::is_ascii_upper_alpha(byte))
+                return true;
+        }
+        return false;
+    }();
+
+    if (!has_ascii_uppercase)
+        return *this;
+
+    Vector<u8> lowercase_bytes;
+    lowercase_bytes.ensure_capacity(bytes().size());
+    for (u8 const byte : bytes()) {
+        if (AK::is_ascii_upper_alpha(byte))
+            lowercase_bytes.unchecked_append(AK::to_ascii_lowercase(byte));
+        else
+            lowercase_bytes.unchecked_append(byte);
+    }
+    return String::from_utf8_without_validation(lowercase_bytes);
+}
+
+String String::to_ascii_uppercase() const
+{
+    bool const has_ascii_lowercase = [&] {
+        for (u8 const byte : bytes()) {
+            if (AK::is_ascii_lower_alpha(byte))
+                return true;
+        }
+        return false;
+    }();
+
+    if (!has_ascii_lowercase)
+        return *this;
+
+    Vector<u8> uppercase_bytes;
+    uppercase_bytes.ensure_capacity(bytes().size());
+    for (u8 const byte : bytes()) {
+        if (AK::is_ascii_lower_alpha(byte))
+            uppercase_bytes.unchecked_append(AK::to_ascii_uppercase(byte));
+        else
+            uppercase_bytes.unchecked_append(byte);
+    }
+    return String::from_utf8_without_validation(uppercase_bytes);
+}
+
+bool String::equals_ignoring_ascii_case(String const& other) const
+{
+    return StringUtils::equals_ignoring_ascii_case(bytes_as_string_view(), other.bytes_as_string_view());
+}
+
 bool String::equals_ignoring_ascii_case(StringView other) const
 {
     return StringUtils::equals_ignoring_ascii_case(bytes_as_string_view(), other);

+ 4 - 0
AK/String.h

@@ -102,9 +102,13 @@ public:
     ErrorOr<String> to_titlecase(Optional<StringView> const& locale = {}, TrailingCodePointTransformation trailing_code_point_transformation = TrailingCodePointTransformation::Lowercase) const;
     ErrorOr<String> to_casefold() const;
 
+    [[nodiscard]] String to_ascii_lowercase() const;
+    [[nodiscard]] String to_ascii_uppercase() const;
+
     // Compare this String against another string with caseless matching. Using this method requires linking LibUnicode into your application.
     [[nodiscard]] bool equals_ignoring_case(String const&) const;
 
+    [[nodiscard]] bool equals_ignoring_ascii_case(String const&) const;
     [[nodiscard]] bool equals_ignoring_ascii_case(StringView) const;
 
     [[nodiscard]] bool starts_with(u32 code_point) const;

+ 24 - 0
Tests/AK/TestString.cpp

@@ -1416,3 +1416,27 @@ TEST_CASE(ends_with)
     EXPECT(emoji.ends_with(0x1F643));
     EXPECT(!emoji.ends_with(0x1F600));
 }
+
+TEST_CASE(to_ascii_lowercase)
+{
+    EXPECT_EQ("foobar"_string.to_ascii_lowercase(), "foobar"_string);
+    EXPECT_EQ("FooBar"_string.to_ascii_lowercase(), "foobar"_string);
+    EXPECT_EQ("FOOBAR"_string.to_ascii_lowercase(), "foobar"_string);
+
+    // NOTE: We expect to_ascii_lowercase() to return the same underlying string if no changes are needed.
+    auto long_string = "this is a long string that cannot use the short string optimization"_string;
+    auto lowercased = long_string.to_ascii_lowercase();
+    EXPECT_EQ(long_string.bytes().data(), lowercased.bytes().data());
+}
+
+TEST_CASE(to_ascii_uppercase)
+{
+    EXPECT_EQ("foobar"_string.to_ascii_uppercase(), "FOOBAR"_string);
+    EXPECT_EQ("FooBar"_string.to_ascii_uppercase(), "FOOBAR"_string);
+    EXPECT_EQ("FOOBAR"_string.to_ascii_uppercase(), "FOOBAR"_string);
+
+    // NOTE: We expect to_ascii_uppercase() to return the same underlying string if no changes are needed.
+    auto long_string = "THIS IS A LONG STRING THAT CANNOT USE THE SHORT STRING OPTIMIZATION"_string;
+    auto uppercased = long_string.to_ascii_uppercase();
+    EXPECT_EQ(long_string.bytes().data(), uppercased.bytes().data());
+}

+ 1 - 1
Userland/Libraries/LibWeb/CSS/Parser/SelectorParsing.cpp

@@ -395,7 +395,7 @@ Parser::ParseErrorOr<Selector::SimpleSelector> Parser::parse_pseudo_simple_selec
             return Selector::SimpleSelector {
                 .type = Selector::SimpleSelector::Type::PseudoElement,
                 // Unknown -webkit- pseudo-elements must be serialized in ASCII lowercase.
-                .value = Selector::PseudoElement { Selector::PseudoElement::Type::UnknownWebKit, MUST(Infra::to_ascii_lowercase(pseudo_name.to_string())) },
+                .value = Selector::PseudoElement { Selector::PseudoElement::Type::UnknownWebKit, pseudo_name.to_string().to_ascii_lowercase() },
             };
         }
 

+ 1 - 1
Userland/Libraries/LibWeb/DOM/Document.cpp

@@ -3698,7 +3698,7 @@ WebIDL::ExceptionOr<JS::NonnullGCPtr<Attr>> Document::create_attribute(String co
 
     // 2. If this is an HTML document, then set localName to localName in ASCII lowercase.
     // 3. Return a new attribute whose local name is localName and node document is this.
-    return Attr::create(*this, is_html_document() ? MUST(Infra::to_ascii_lowercase(local_name)) : local_name);
+    return Attr::create(*this, is_html_document() ? local_name.to_ascii_lowercase() : local_name);
 }
 
 // https://dom.spec.whatwg.org/#dom-document-createattributens

+ 3 - 3
Userland/Libraries/LibWeb/DOM/Element.cpp

@@ -184,7 +184,7 @@ WebIDL::ExceptionOr<void> Element::set_attribute(FlyString const& name, String c
     // 4. If attribute is null, create an attribute whose local name is qualifiedName, value is value, and node document
     //    is this’s node document, then append this attribute to this, and then return.
     if (!attribute) {
-        auto new_attribute = Attr::create(document(), insert_as_lowercase ? MUST(Infra::to_ascii_lowercase(name)) : name, value);
+        auto new_attribute = Attr::create(document(), insert_as_lowercase ? name.to_ascii_lowercase() : name, value);
         m_attributes->append_attribute(new_attribute);
 
         return {};
@@ -354,7 +354,7 @@ WebIDL::ExceptionOr<bool> Element::toggle_attribute(FlyString const& name, Optio
         // 1. If force is not given or is true, create an attribute whose local name is qualifiedName, value is the empty
         //    string, and node document is this’s node document, then append this attribute to this, and then return true.
         if (!force.has_value() || force.value()) {
-            auto new_attribute = Attr::create(document(), insert_as_lowercase ? MUST(Infra::to_ascii_lowercase(name)) : name.to_string(), String {});
+            auto new_attribute = Attr::create(document(), insert_as_lowercase ? name.to_ascii_lowercase() : name.to_string(), String {});
             m_attributes->append_attribute(new_attribute);
 
             return true;
@@ -891,7 +891,7 @@ void Element::make_html_uppercased_qualified_name()
 {
     // This is allowed by the spec: "User agents could optimize qualified name and HTML-uppercased qualified name by storing them in internal slots."
     if (namespace_uri() == Namespace::HTML && document().document_type() == Document::Type::HTML)
-        m_html_uppercased_qualified_name = MUST(Infra::to_ascii_uppercase(qualified_name()));
+        m_html_uppercased_qualified_name = qualified_name().to_ascii_uppercase();
     else
         m_html_uppercased_qualified_name = qualified_name();
 }

+ 1 - 1
Userland/Libraries/LibWeb/DOM/NamedNodeMap.cpp

@@ -65,7 +65,7 @@ Vector<FlyString> NamedNodeMap::supported_property_names() const
     if (associated_element().namespace_uri() == Namespace::HTML) {
         // 1. Let lowercaseName be name, in ASCII lowercase.
         // 2. If lowercaseName is not equal to name, remove name from names.
-        names.remove_all_matching([](auto const& name) { return name != MUST(Infra::to_ascii_lowercase(name)); });
+        names.remove_all_matching([](auto const& name) { return name != name.to_ascii_lowercase(); });
     }
 
     // 3. Return names.

+ 1 - 1
Userland/Libraries/LibWeb/DOM/ParentNode.cpp

@@ -139,7 +139,7 @@ JS::NonnullGCPtr<HTMLCollection> ParentNode::get_elements_by_tag_name(FlyString
 
     // 2. Otherwise, if root’s node document is an HTML document, return a HTMLCollection rooted at root, whose filter matches the following descendant elements:
     if (root().document().document_type() == Document::Type::HTML) {
-        FlyString qualified_name_in_ascii_lowercase = MUST(Infra::to_ascii_lowercase(qualified_name));
+        FlyString qualified_name_in_ascii_lowercase = qualified_name.to_ascii_lowercase();
         return HTMLCollection::create(*this, HTMLCollection::Scope::Descendants, [qualified_name, qualified_name_in_ascii_lowercase](Element const& element) {
             // - Whose namespace is the HTML namespace and whose qualified name is qualifiedName, in ASCII lowercase.
             if (element.namespace_uri() == Namespace::HTML)

+ 1 - 1
Userland/Libraries/LibWeb/FileAPI/Blob.cpp

@@ -292,7 +292,7 @@ WebIDL::ExceptionOr<JS::NonnullGCPtr<Blob>> Blob::slice_blob(Optional<i64> start
         }
         // 2. Convert every character in relativeContentType to ASCII lowercase.
         else {
-            relative_content_type = TRY_OR_THROW_OOM(vm, Infra::to_ascii_lowercase(content_type.value()));
+            relative_content_type = content_type.value().to_ascii_lowercase();
         }
     }
 

+ 1 - 1
Userland/Libraries/LibWeb/HTML/DataTransfer.cpp

@@ -142,7 +142,7 @@ String DataTransfer::get_data(String const& format_argument) const
         return {};
 
     // 3. Let format be the first argument, converted to ASCII lowercase.
-    auto format = MUST(Infra::to_ascii_lowercase(format_argument));
+    auto format = format_argument.to_ascii_lowercase();
 
     // 4. Let convert-to-URL be false.
     [[maybe_unused]] bool convert_to_url = false;

+ 2 - 2
Userland/Libraries/LibWeb/HTML/DataTransferItemList.cpp

@@ -76,7 +76,7 @@ WebIDL::ExceptionOr<JS::GCPtr<DataTransferItem>> DataTransferItemList::add(Strin
     // method's first argument.
     auto item = m_data_transfer->add_item({
         .kind = HTML::DragDataStoreItem::Kind::Text,
-        .type_string = MUST(Infra::to_ascii_lowercase(type)),
+        .type_string = type.to_ascii_lowercase(),
         .data = MUST(ByteBuffer::copy(data.bytes())),
         .file_name = {},
     });
@@ -100,7 +100,7 @@ JS::GCPtr<DataTransferItem> DataTransferItemList::add(JS::NonnullGCPtr<FileAPI::
     // converted to ASCII lowercase, and whose data is the same as the File's data.
     auto item = m_data_transfer->add_item({
         .kind = HTML::DragDataStoreItem::Kind::File,
-        .type_string = MUST(Infra::to_ascii_lowercase(file->type())),
+        .type_string = file->type().to_ascii_lowercase(),
         .data = MUST(ByteBuffer::copy(file->raw_bytes())),
         .file_name = file->name().to_byte_string(),
     });

+ 1 - 1
Userland/Libraries/LibWeb/HTML/HTMLInputElement.cpp

@@ -1506,7 +1506,7 @@ String HTMLInputElement::value_sanitization_algorithm(String const& value) const
         // https://html.spec.whatwg.org/multipage/input.html#color-state-(type=color):value-sanitization-algorithm
         // If the value of the element is a valid simple color, then set it to the value of the element converted to ASCII lowercase;
         if (is_valid_simple_color(value))
-            return MUST(Infra::to_ascii_lowercase(value));
+            return value.to_ascii_lowercase();
         // otherwise, set it to the string "#000000".
         return "#000000"_string;
     }

+ 1 - 1
Userland/Libraries/LibWeb/HTML/HTMLLinkElement.cpp

@@ -137,7 +137,7 @@ void HTMLLinkElement::attribute_changed(FlyString const& name, Optional<String>
     if (name == HTML::AttributeNames::rel) {
         m_relationship = 0;
         // Keywords are always ASCII case-insensitive, and must be compared as such.
-        auto lowercased_value = MUST(Infra::to_ascii_lowercase(value.value_or(String {})));
+        auto lowercased_value = value.value_or(String {}).to_ascii_lowercase();
         // To determine which link types apply to a link, a, area, or form element,
         // the element's rel attribute must be split on ASCII whitespace.
         // The resulting tokens are the keywords for the link types that apply to that element.

+ 2 - 0
Userland/Libraries/LibWeb/Infra/Strings.cpp

@@ -3,11 +3,13 @@
  * Copyright (c) 2022, networkException <networkexception@serenityos.org>
  * Copyright (c) 2023, Kenneth Myhra <kennethmyhra@serenityos.org>
  * Copyright (c) 2023, Sam Atkins <atkinssj@serenityos.org>
+ * Copyright (c) 2024, Andreas Kling <andreas@ladybird.org>
  *
  * SPDX-License-Identifier: BSD-2-Clause
  */
 
 #include <AK/CharacterTypes.h>
+#include <AK/FlyString.h>
 #include <AK/GenericLexer.h>
 #include <AK/String.h>
 #include <AK/Utf16View.h>