Browse Source

LibWeb: Handle empty string in `lang` attribute

If the attribute value is the empty string `(lang="")`, the language
is set to unknown. `lang` attribute higher up in the document tree
will no longer be applied to the content of that element.
Piotr 9 months ago
parent
commit
e2613090ed

+ 1 - 0
Tests/LibWeb/Text/expected/HTML/lang-attribute-empty-1.txt

@@ -0,0 +1 @@
+OK

+ 1 - 0
Tests/LibWeb/Text/expected/HTML/lang-attribute-empty-2.txt

@@ -0,0 +1 @@
+OK

+ 25 - 0
Tests/LibWeb/Text/input/HTML/lang-attribute-empty-1.html

@@ -0,0 +1,25 @@
+<!DOCTYPE html>
+<html>
+    <head>
+        <style type='text/css'>
+            .test div { width: 50px; }
+            #box:lang(ko) { width: 100px; }
+        </style>
+    </head>
+    <body>
+        <div class="test" lang="ko">
+            <div id="box" lang="">Test</div>
+        </div>
+    </body>
+    <script src="../include.js"></script>
+    <script>
+        asyncTest((done) => {
+            if (document.getElementById('box').offsetWidth == 50) {
+                println("OK");
+            } else {
+                println("FAIL. If an element contains a lang attribute with an empty value, the value of a lang attribute higher up the document tree will no longer be applied to the content of that element.");
+            }
+            done();
+        });
+    </script>
+</html>

+ 25 - 0
Tests/LibWeb/Text/input/HTML/lang-attribute-empty-2.html

@@ -0,0 +1,25 @@
+<!DOCTYPE html>
+<html lang="ko">
+    <head>
+        <style type='text/css'>
+            .test div { width: 50px; }
+            #box:lang(ko) { width: 100px; }
+        </style>
+    </head>
+    <body>
+        <div class="test">
+            <div id="box" lang="">Test</div>
+        </div>
+    </body>
+    <script src="../include.js"></script>
+    <script>
+        asyncTest((done) => {
+            if (document.getElementById('box').offsetWidth == 50) {
+                println("OK");
+            } else {
+                println("FAIL. If the meta Content-Language element contains a language declaration but the html element uses an empty lang value, the UA will not recognize the language declared in the meta Content-Language element.");
+            }
+            done();
+        });
+    </script>
+</html>

+ 42 - 34
Userland/Libraries/LibWeb/DOM/Element.cpp

@@ -2905,40 +2905,48 @@ void Element::inherit_counters()
 // https://html.spec.whatwg.org/multipage/dom.html#the-lang-and-xml:lang-attributes
 Optional<String> Element::lang() const
 {
-    // 1. If the node is an element that has a lang attribute in the XML namespace set
-    //      Use the value of that attribute.
-    auto maybe_xml_lang = get_attribute_ns(Namespace::XML, HTML::AttributeNames::lang);
-    if (maybe_xml_lang.has_value())
-        return maybe_xml_lang.release_value();
-
-    // 2. If the node is an HTML element or an element in the SVG namespace, and it has a lang in no namespace attribute set
-    //      Use the value of that attribute.
-    if (is_html_element() || namespace_uri() == Namespace::SVG) {
-        auto maybe_lang = get_attribute(HTML::AttributeNames::lang);
-        if (maybe_lang.has_value())
-            return maybe_lang.release_value();
-    }
-
-    // 3. If the node's parent is a shadow root
-    //      Use the language of that shadow root's host.
-    if (auto const* parent = parent_element()) {
-        if (parent->is_shadow_root())
-            return parent->shadow_root()->host()->lang();
-    }
-
-    // 4. If the node's parent element is not null
-    //      Use the language of that parent element.
-    if (auto const* parent = parent_element())
-        return parent->lang();
-
-    // 5. Otherwise
-    //      - If there is a pragma-set default language set, then that is the language of the node.
-    //      - If there is no pragma-set default language set, then language information from a higher-level protocol (such as HTTP),
-    //        if any, must be used as the final fallback language instead.
-    //      - In the absence of any such language information, and in cases where the higher-level protocol reports multiple languages,
-    //        the language of the node is unknown, and the corresponding language tag is the empty string.
-    // Default locale sounds like a reasonable fallback here.
-    return {};
+    auto attempt_to_determine_lang_attribute = [&]() -> Optional<String> {
+        // 1. If the node is an element that has a lang attribute in the XML namespace set
+        //      Use the value of that attribute.
+        auto maybe_xml_lang = get_attribute_ns(Namespace::XML, HTML::AttributeNames::lang);
+        if (maybe_xml_lang.has_value())
+            return maybe_xml_lang.release_value();
+
+        // 2. If the node is an HTML element or an element in the SVG namespace, and it has a lang in no namespace attribute set
+        //      Use the value of that attribute.
+        if (is_html_element() || namespace_uri() == Namespace::SVG) {
+            auto maybe_lang = get_attribute(HTML::AttributeNames::lang);
+            if (maybe_lang.has_value())
+                return maybe_lang.release_value();
+        }
+
+        // 3. If the node's parent is a shadow root
+        //      Use the language of that shadow root's host.
+        if (auto const* parent = parent_element()) {
+            if (parent->is_shadow_root())
+                return parent->shadow_root()->host()->lang();
+        }
+
+        // 4. If the node's parent element is not null
+        //      Use the language of that parent element.
+        if (auto const* parent = parent_element())
+            return parent->lang();
+
+        // 5. Otherwise
+        //      - If there is a pragma-set default language set, then that is the language of the node.
+        //      - If there is no pragma-set default language set, then language information from a higher-level protocol (such as HTTP),
+        //        if any, must be used as the final fallback language instead.
+        //      - In the absence of any such language information, and in cases where the higher-level protocol reports multiple languages,
+        //        the language of the node is unknown, and the corresponding language tag is the empty string.
+        // Default locale sounds like a reasonable fallback here.
+        return {};
+    };
+
+    // If the resulting value is the empty string, then it must be interpreted as meaning that the language of the node is explicitly unknown.
+    auto maybe_lang = attempt_to_determine_lang_attribute();
+    if (!maybe_lang.has_value() || maybe_lang->is_empty())
+        return {};
+    return maybe_lang.release_value();
 }
 
 }