diff --git a/AK/FlyString.cpp b/AK/FlyString.cpp index b7cd4d0e5dc..55fb34dd47d 100644 --- a/AK/FlyString.cpp +++ b/AK/FlyString.cpp @@ -173,6 +173,54 @@ ErrorOr Formatter::format(FormatBuilder& builder, FlyString con return Formatter::format(builder, fly_string.bytes_as_string_view()); } +FlyString FlyString::to_ascii_lowercase() const +{ + bool const has_ascii_uppercase = [&] { + for (u8 const byte : bytes()) { + if (AK::is_ascii_upper_alpha(byte)) + return true; + } + return false; + }(); + + if (!has_ascii_uppercase) + return *this; + + Vector lowercase_bytes; + lowercase_bytes.ensure_capacity(bytes().size()); + for (u8 const byte : bytes()) { + if (AK::is_ascii_upper_alpha(byte)) + lowercase_bytes.unchecked_append(AK::to_ascii_lowercase(byte)); + else + lowercase_bytes.unchecked_append(byte); + } + return String::from_utf8_without_validation(lowercase_bytes); +} + +FlyString FlyString::to_ascii_uppercase() const +{ + bool const has_ascii_lowercase = [&] { + for (u8 const byte : bytes()) { + if (AK::is_ascii_lower_alpha(byte)) + return true; + } + return false; + }(); + + if (!has_ascii_lowercase) + return *this; + + Vector uppercase_bytes; + uppercase_bytes.ensure_capacity(bytes().size()); + for (u8 const byte : bytes()) { + if (AK::is_ascii_lower_alpha(byte)) + uppercase_bytes.unchecked_append(AK::to_ascii_uppercase(byte)); + else + uppercase_bytes.unchecked_append(byte); + } + return String::from_utf8_without_validation(uppercase_bytes); +} + bool FlyString::equals_ignoring_ascii_case(FlyString const& other) const { if (*this == other) diff --git a/AK/FlyString.h b/AK/FlyString.h index 90ca5d16694..1e1cd169009 100644 --- a/AK/FlyString.h +++ b/AK/FlyString.h @@ -66,6 +66,9 @@ public: [[nodiscard]] bool equals_ignoring_ascii_case(FlyString const&) const; [[nodiscard]] bool equals_ignoring_ascii_case(StringView) const; + [[nodiscard]] FlyString to_ascii_lowercase() const; + [[nodiscard]] FlyString to_ascii_uppercase() const; + [[nodiscard]] bool starts_with_bytes(StringView, CaseSensitivity = CaseSensitivity::CaseSensitive) const; [[nodiscard]] bool ends_with_bytes(StringView, CaseSensitivity = CaseSensitivity::CaseSensitive) const; diff --git a/AK/String.cpp b/AK/String.cpp index 5459b6907d2..d1ad97bc0ca 100644 --- a/AK/String.cpp +++ b/AK/String.cpp @@ -381,6 +381,59 @@ ErrorOr String::from_byte_string(ByteString const& byte_string) return String::from_utf8(byte_string.view()); } +String String::to_ascii_lowercase() const +{ + bool const has_ascii_uppercase = [&] { + for (u8 const byte : bytes()) { + if (AK::is_ascii_upper_alpha(byte)) + return true; + } + return false; + }(); + + if (!has_ascii_uppercase) + return *this; + + Vector lowercase_bytes; + lowercase_bytes.ensure_capacity(bytes().size()); + for (u8 const byte : bytes()) { + if (AK::is_ascii_upper_alpha(byte)) + lowercase_bytes.unchecked_append(AK::to_ascii_lowercase(byte)); + else + lowercase_bytes.unchecked_append(byte); + } + return String::from_utf8_without_validation(lowercase_bytes); +} + +String String::to_ascii_uppercase() const +{ + bool const has_ascii_lowercase = [&] { + for (u8 const byte : bytes()) { + if (AK::is_ascii_lower_alpha(byte)) + return true; + } + return false; + }(); + + if (!has_ascii_lowercase) + return *this; + + Vector uppercase_bytes; + uppercase_bytes.ensure_capacity(bytes().size()); + for (u8 const byte : bytes()) { + if (AK::is_ascii_lower_alpha(byte)) + uppercase_bytes.unchecked_append(AK::to_ascii_uppercase(byte)); + else + uppercase_bytes.unchecked_append(byte); + } + return String::from_utf8_without_validation(uppercase_bytes); +} + +bool String::equals_ignoring_ascii_case(String const& other) const +{ + return StringUtils::equals_ignoring_ascii_case(bytes_as_string_view(), other.bytes_as_string_view()); +} + bool String::equals_ignoring_ascii_case(StringView other) const { return StringUtils::equals_ignoring_ascii_case(bytes_as_string_view(), other); diff --git a/AK/String.h b/AK/String.h index 13e31401a32..93c04983fe7 100644 --- a/AK/String.h +++ b/AK/String.h @@ -102,9 +102,13 @@ public: ErrorOr to_titlecase(Optional const& locale = {}, TrailingCodePointTransformation trailing_code_point_transformation = TrailingCodePointTransformation::Lowercase) const; ErrorOr to_casefold() const; + [[nodiscard]] String to_ascii_lowercase() const; + [[nodiscard]] String to_ascii_uppercase() const; + // Compare this String against another string with caseless matching. Using this method requires linking LibUnicode into your application. [[nodiscard]] bool equals_ignoring_case(String const&) const; + [[nodiscard]] bool equals_ignoring_ascii_case(String const&) const; [[nodiscard]] bool equals_ignoring_ascii_case(StringView) const; [[nodiscard]] bool starts_with(u32 code_point) const; diff --git a/Tests/AK/TestString.cpp b/Tests/AK/TestString.cpp index 3523f35cd79..ccffba67179 100644 --- a/Tests/AK/TestString.cpp +++ b/Tests/AK/TestString.cpp @@ -1416,3 +1416,27 @@ TEST_CASE(ends_with) EXPECT(emoji.ends_with(0x1F643)); EXPECT(!emoji.ends_with(0x1F600)); } + +TEST_CASE(to_ascii_lowercase) +{ + EXPECT_EQ("foobar"_string.to_ascii_lowercase(), "foobar"_string); + EXPECT_EQ("FooBar"_string.to_ascii_lowercase(), "foobar"_string); + EXPECT_EQ("FOOBAR"_string.to_ascii_lowercase(), "foobar"_string); + + // NOTE: We expect to_ascii_lowercase() to return the same underlying string if no changes are needed. + auto long_string = "this is a long string that cannot use the short string optimization"_string; + auto lowercased = long_string.to_ascii_lowercase(); + EXPECT_EQ(long_string.bytes().data(), lowercased.bytes().data()); +} + +TEST_CASE(to_ascii_uppercase) +{ + EXPECT_EQ("foobar"_string.to_ascii_uppercase(), "FOOBAR"_string); + EXPECT_EQ("FooBar"_string.to_ascii_uppercase(), "FOOBAR"_string); + EXPECT_EQ("FOOBAR"_string.to_ascii_uppercase(), "FOOBAR"_string); + + // NOTE: We expect to_ascii_uppercase() to return the same underlying string if no changes are needed. + auto long_string = "THIS IS A LONG STRING THAT CANNOT USE THE SHORT STRING OPTIMIZATION"_string; + auto uppercased = long_string.to_ascii_uppercase(); + EXPECT_EQ(long_string.bytes().data(), uppercased.bytes().data()); +} diff --git a/Userland/Libraries/LibWeb/CSS/Parser/SelectorParsing.cpp b/Userland/Libraries/LibWeb/CSS/Parser/SelectorParsing.cpp index a06fb21e26c..303989800a6 100644 --- a/Userland/Libraries/LibWeb/CSS/Parser/SelectorParsing.cpp +++ b/Userland/Libraries/LibWeb/CSS/Parser/SelectorParsing.cpp @@ -395,7 +395,7 @@ Parser::ParseErrorOr Parser::parse_pseudo_simple_selec return Selector::SimpleSelector { .type = Selector::SimpleSelector::Type::PseudoElement, // Unknown -webkit- pseudo-elements must be serialized in ASCII lowercase. - .value = Selector::PseudoElement { Selector::PseudoElement::Type::UnknownWebKit, MUST(Infra::to_ascii_lowercase(pseudo_name.to_string())) }, + .value = Selector::PseudoElement { Selector::PseudoElement::Type::UnknownWebKit, pseudo_name.to_string().to_ascii_lowercase() }, }; } diff --git a/Userland/Libraries/LibWeb/DOM/Document.cpp b/Userland/Libraries/LibWeb/DOM/Document.cpp index 45bfa69e60a..969316c62ff 100644 --- a/Userland/Libraries/LibWeb/DOM/Document.cpp +++ b/Userland/Libraries/LibWeb/DOM/Document.cpp @@ -3698,7 +3698,7 @@ WebIDL::ExceptionOr> Document::create_attribute(String co // 2. If this is an HTML document, then set localName to localName in ASCII lowercase. // 3. Return a new attribute whose local name is localName and node document is this. - return Attr::create(*this, is_html_document() ? MUST(Infra::to_ascii_lowercase(local_name)) : local_name); + return Attr::create(*this, is_html_document() ? local_name.to_ascii_lowercase() : local_name); } // https://dom.spec.whatwg.org/#dom-document-createattributens diff --git a/Userland/Libraries/LibWeb/DOM/Element.cpp b/Userland/Libraries/LibWeb/DOM/Element.cpp index 606bfcd89ff..120f831b542 100644 --- a/Userland/Libraries/LibWeb/DOM/Element.cpp +++ b/Userland/Libraries/LibWeb/DOM/Element.cpp @@ -184,7 +184,7 @@ WebIDL::ExceptionOr Element::set_attribute(FlyString const& name, String c // 4. If attribute is null, create an attribute whose local name is qualifiedName, value is value, and node document // is this’s node document, then append this attribute to this, and then return. if (!attribute) { - auto new_attribute = Attr::create(document(), insert_as_lowercase ? MUST(Infra::to_ascii_lowercase(name)) : name, value); + auto new_attribute = Attr::create(document(), insert_as_lowercase ? name.to_ascii_lowercase() : name, value); m_attributes->append_attribute(new_attribute); return {}; @@ -354,7 +354,7 @@ WebIDL::ExceptionOr Element::toggle_attribute(FlyString const& name, Optio // 1. If force is not given or is true, create an attribute whose local name is qualifiedName, value is the empty // string, and node document is this’s node document, then append this attribute to this, and then return true. if (!force.has_value() || force.value()) { - auto new_attribute = Attr::create(document(), insert_as_lowercase ? MUST(Infra::to_ascii_lowercase(name)) : name.to_string(), String {}); + auto new_attribute = Attr::create(document(), insert_as_lowercase ? name.to_ascii_lowercase() : name.to_string(), String {}); m_attributes->append_attribute(new_attribute); return true; @@ -891,7 +891,7 @@ void Element::make_html_uppercased_qualified_name() { // This is allowed by the spec: "User agents could optimize qualified name and HTML-uppercased qualified name by storing them in internal slots." if (namespace_uri() == Namespace::HTML && document().document_type() == Document::Type::HTML) - m_html_uppercased_qualified_name = MUST(Infra::to_ascii_uppercase(qualified_name())); + m_html_uppercased_qualified_name = qualified_name().to_ascii_uppercase(); else m_html_uppercased_qualified_name = qualified_name(); } diff --git a/Userland/Libraries/LibWeb/DOM/NamedNodeMap.cpp b/Userland/Libraries/LibWeb/DOM/NamedNodeMap.cpp index 94a8b6558c1..a8afdf1a881 100644 --- a/Userland/Libraries/LibWeb/DOM/NamedNodeMap.cpp +++ b/Userland/Libraries/LibWeb/DOM/NamedNodeMap.cpp @@ -65,7 +65,7 @@ Vector NamedNodeMap::supported_property_names() const if (associated_element().namespace_uri() == Namespace::HTML) { // 1. Let lowercaseName be name, in ASCII lowercase. // 2. If lowercaseName is not equal to name, remove name from names. - names.remove_all_matching([](auto const& name) { return name != MUST(Infra::to_ascii_lowercase(name)); }); + names.remove_all_matching([](auto const& name) { return name != name.to_ascii_lowercase(); }); } // 3. Return names. diff --git a/Userland/Libraries/LibWeb/DOM/ParentNode.cpp b/Userland/Libraries/LibWeb/DOM/ParentNode.cpp index 69a101b9943..47698c308d9 100644 --- a/Userland/Libraries/LibWeb/DOM/ParentNode.cpp +++ b/Userland/Libraries/LibWeb/DOM/ParentNode.cpp @@ -139,7 +139,7 @@ JS::NonnullGCPtr ParentNode::get_elements_by_tag_name(FlyString // 2. Otherwise, if root’s node document is an HTML document, return a HTMLCollection rooted at root, whose filter matches the following descendant elements: if (root().document().document_type() == Document::Type::HTML) { - FlyString qualified_name_in_ascii_lowercase = MUST(Infra::to_ascii_lowercase(qualified_name)); + FlyString qualified_name_in_ascii_lowercase = qualified_name.to_ascii_lowercase(); return HTMLCollection::create(*this, HTMLCollection::Scope::Descendants, [qualified_name, qualified_name_in_ascii_lowercase](Element const& element) { // - Whose namespace is the HTML namespace and whose qualified name is qualifiedName, in ASCII lowercase. if (element.namespace_uri() == Namespace::HTML) diff --git a/Userland/Libraries/LibWeb/FileAPI/Blob.cpp b/Userland/Libraries/LibWeb/FileAPI/Blob.cpp index 591b7642049..e5b7a9c1368 100644 --- a/Userland/Libraries/LibWeb/FileAPI/Blob.cpp +++ b/Userland/Libraries/LibWeb/FileAPI/Blob.cpp @@ -292,7 +292,7 @@ WebIDL::ExceptionOr> Blob::slice_blob(Optional start } // 2. Convert every character in relativeContentType to ASCII lowercase. else { - relative_content_type = TRY_OR_THROW_OOM(vm, Infra::to_ascii_lowercase(content_type.value())); + relative_content_type = content_type.value().to_ascii_lowercase(); } } diff --git a/Userland/Libraries/LibWeb/HTML/DataTransfer.cpp b/Userland/Libraries/LibWeb/HTML/DataTransfer.cpp index 69be65202dc..e7f9489b1f3 100644 --- a/Userland/Libraries/LibWeb/HTML/DataTransfer.cpp +++ b/Userland/Libraries/LibWeb/HTML/DataTransfer.cpp @@ -142,7 +142,7 @@ String DataTransfer::get_data(String const& format_argument) const return {}; // 3. Let format be the first argument, converted to ASCII lowercase. - auto format = MUST(Infra::to_ascii_lowercase(format_argument)); + auto format = format_argument.to_ascii_lowercase(); // 4. Let convert-to-URL be false. [[maybe_unused]] bool convert_to_url = false; diff --git a/Userland/Libraries/LibWeb/HTML/DataTransferItemList.cpp b/Userland/Libraries/LibWeb/HTML/DataTransferItemList.cpp index 41db4be45b3..c0dba11f360 100644 --- a/Userland/Libraries/LibWeb/HTML/DataTransferItemList.cpp +++ b/Userland/Libraries/LibWeb/HTML/DataTransferItemList.cpp @@ -76,7 +76,7 @@ WebIDL::ExceptionOr> DataTransferItemList::add(Strin // method's first argument. auto item = m_data_transfer->add_item({ .kind = HTML::DragDataStoreItem::Kind::Text, - .type_string = MUST(Infra::to_ascii_lowercase(type)), + .type_string = type.to_ascii_lowercase(), .data = MUST(ByteBuffer::copy(data.bytes())), .file_name = {}, }); @@ -100,7 +100,7 @@ JS::GCPtr DataTransferItemList::add(JS::NonnullGCPtradd_item({ .kind = HTML::DragDataStoreItem::Kind::File, - .type_string = MUST(Infra::to_ascii_lowercase(file->type())), + .type_string = file->type().to_ascii_lowercase(), .data = MUST(ByteBuffer::copy(file->raw_bytes())), .file_name = file->name().to_byte_string(), }); diff --git a/Userland/Libraries/LibWeb/HTML/HTMLInputElement.cpp b/Userland/Libraries/LibWeb/HTML/HTMLInputElement.cpp index 5bb926de880..177f9639a03 100644 --- a/Userland/Libraries/LibWeb/HTML/HTMLInputElement.cpp +++ b/Userland/Libraries/LibWeb/HTML/HTMLInputElement.cpp @@ -1506,7 +1506,7 @@ String HTMLInputElement::value_sanitization_algorithm(String const& value) const // https://html.spec.whatwg.org/multipage/input.html#color-state-(type=color):value-sanitization-algorithm // If the value of the element is a valid simple color, then set it to the value of the element converted to ASCII lowercase; if (is_valid_simple_color(value)) - return MUST(Infra::to_ascii_lowercase(value)); + return value.to_ascii_lowercase(); // otherwise, set it to the string "#000000". return "#000000"_string; } diff --git a/Userland/Libraries/LibWeb/HTML/HTMLLinkElement.cpp b/Userland/Libraries/LibWeb/HTML/HTMLLinkElement.cpp index 2f4d595a8bc..6f6e0d1130b 100644 --- a/Userland/Libraries/LibWeb/HTML/HTMLLinkElement.cpp +++ b/Userland/Libraries/LibWeb/HTML/HTMLLinkElement.cpp @@ -137,7 +137,7 @@ void HTMLLinkElement::attribute_changed(FlyString const& name, Optional if (name == HTML::AttributeNames::rel) { m_relationship = 0; // Keywords are always ASCII case-insensitive, and must be compared as such. - auto lowercased_value = MUST(Infra::to_ascii_lowercase(value.value_or(String {}))); + auto lowercased_value = value.value_or(String {}).to_ascii_lowercase(); // To determine which link types apply to a link, a, area, or form element, // the element's rel attribute must be split on ASCII whitespace. // The resulting tokens are the keywords for the link types that apply to that element. diff --git a/Userland/Libraries/LibWeb/Infra/Strings.cpp b/Userland/Libraries/LibWeb/Infra/Strings.cpp index e32e6fa90db..66986181b73 100644 --- a/Userland/Libraries/LibWeb/Infra/Strings.cpp +++ b/Userland/Libraries/LibWeb/Infra/Strings.cpp @@ -3,11 +3,13 @@ * Copyright (c) 2022, networkException * Copyright (c) 2023, Kenneth Myhra * Copyright (c) 2023, Sam Atkins + * Copyright (c) 2024, Andreas Kling * * SPDX-License-Identifier: BSD-2-Clause */ #include +#include #include #include #include