Browse Source

LibWeb: Support for "content-language" http-equiv state

Implemented support for setting the pragma-set default language in the
`<meta/>` tag with an `http-equiv` attribute `content-language`.
Piotr 8 months ago
parent
commit
06154b87dd

+ 1 - 0
Tests/LibWeb/Text/expected/HTML/lang-pragma-set-1.txt

@@ -0,0 +1 @@
+OK

+ 1 - 0
Tests/LibWeb/Text/expected/HTML/lang-pragma-set-2.txt

@@ -0,0 +1 @@
+OK

+ 1 - 0
Tests/LibWeb/Text/expected/HTML/lang-pragma-set-3.txt

@@ -0,0 +1 @@
+OK

+ 1 - 0
Tests/LibWeb/Text/expected/HTML/lang-pragma-set-4.txt

@@ -0,0 +1 @@
+OK

+ 1 - 0
Tests/LibWeb/Text/expected/HTML/lang-pragma-set-5.txt

@@ -0,0 +1 @@
+OK

+ 25 - 0
Tests/LibWeb/Text/input/HTML/lang-pragma-set-1.html

@@ -0,0 +1,25 @@
+<!DOCTYPE html>
+<html>
+    <head>
+        <meta http-equiv="Content-Language" content="ko" >
+        <style type='text/css'>
+            .test div { width: 50px; background-color: red; }
+            #box:lang(ko) { width: 100px; background-color: limegreen; }
+        </style>
+    </head>
+    <body>
+        <div class="test"><div id="box">TEST</div></div>
+    </body>
+</html>
+    <script src="../include.js"></script>
+    <script>
+        asyncTest((done) => {
+            if (document.getElementById('box').offsetWidth == 100) {
+                println("OK");
+            } else {
+                println("FAIL. If there is a pragma-set default language set, then that is the language of the node.");
+            }
+            done();
+        });
+    </script>
+</html>

+ 27 - 0
Tests/LibWeb/Text/input/HTML/lang-pragma-set-2.html

@@ -0,0 +1,27 @@
+<!DOCTYPE html>
+<html>
+    <head>
+        <meta http-equiv="Content-Language" content="
+            ko
+                som! ?et #$% hi;0 ng  " >
+        <style type='text/css'>
+            .test div { width: 50px; background-color: red; }
+            #box:lang(ko) { width: 100px; background-color: limegreen; }
+        </style>
+    </head>
+    <body>
+        <div class="test"><div id="box">TEST</div></div>
+    </body>
+</html>
+    <script src="../include.js"></script>
+    <script>
+        asyncTest((done) => {
+            if (document.getElementById('box').offsetWidth == 100) {
+                println("OK");
+            } else {
+                println("FAIL. If there is a pragma-set default language set, then that is the language of the node. It should match the first non-whitespace code points from the contet attribute.");
+            }
+            done();
+        });
+    </script>
+</html>

+ 25 - 0
Tests/LibWeb/Text/input/HTML/lang-pragma-set-3.html

@@ -0,0 +1,25 @@
+<!DOCTYPE html>
+<html>
+    <head>
+        <meta http-equiv="Content-Language" content="ko,pl" >
+        <style type='text/css'>
+            .test div { width: 50px; background-color: limegreen; }
+            #box:lang(ko) { width: 100px; background-color: red; }
+        </style>
+    </head>
+    <body>
+        <div class="test"><div id="box">TEST</div></div>
+    </body>
+</html>
+    <script src="../include.js"></script>
+    <script>
+        asyncTest((done) => {
+            if (document.getElementById('box').offsetWidth == 50) {
+                println("OK");
+            } else {
+                println("FAIL. If the element's content attribute contains a U+002C COMMA character (,) then return.");
+            }
+            done();
+        });
+    </script>
+</html>

+ 25 - 0
Tests/LibWeb/Text/input/HTML/lang-pragma-set-4.html

@@ -0,0 +1,25 @@
+<!DOCTYPE html>
+<html>
+    <head>
+        <meta http-equiv="Content-Language" content="" >
+        <style type='text/css'>
+            .test div { width: 50px; background-color: limegreen; }
+            #box:lang(ko) { width: 100px; background-color: red; }
+        </style>
+    </head>
+    <body>
+        <div class="test"><div id="box">TEST</div></div>
+    </body>
+</html>
+    <script src="../include.js"></script>
+    <script>
+        asyncTest((done) => {
+            if (document.getElementById('box').offsetWidth == 50) {
+                println("OK");
+            } else {
+                println("FAIL. If candidate is the empty string, return.");
+            }
+            done();
+        });
+    </script>
+</html>

+ 25 - 0
Tests/LibWeb/Text/input/HTML/lang-pragma-set-5.html

@@ -0,0 +1,25 @@
+<!DOCTYPE html>
+<html>
+    <head>
+        <meta http-equiv="Content-Language" >
+        <style type='text/css'>
+            .test div { width: 50px; background-color: limegreen; }
+            #box:lang(ko) { width: 100px; background-color: red; }
+        </style>
+    </head>
+    <body>
+        <div class="test"><div id="box">TEST</div></div>
+    </body>
+</html>
+    <script src="../include.js"></script>
+    <script>
+        asyncTest((done) => {
+            if (document.getElementById('box').offsetWidth == 50) {
+                println("OK");
+            } else {
+                println("FAIL. If the meta element has no content attribute, then return.");
+            }
+            done();
+        });
+    </script>
+</html>

+ 4 - 0
Userland/Libraries/LibWeb/DOM/Document.h

@@ -380,6 +380,9 @@ public:
     String const& content_type() const { return m_content_type; }
     void set_content_type(String content_type) { m_content_type = move(content_type); }
 
+    Optional<String> const& pragma_set_default_language() const { return m_pragma_set_default_language; }
+    void set_pragma_set_default_language(String language) { m_pragma_set_default_language = move(language); }
+
     bool has_encoding() const { return m_encoding.has_value(); }
     Optional<String> const& encoding() const { return m_encoding; }
     String encoding_or_default() const { return m_encoding.value_or("UTF-8"_string); }
@@ -822,6 +825,7 @@ private:
 
     HTML::DocumentReadyState m_readiness { HTML::DocumentReadyState::Loading };
     String m_content_type { "application/xml"_string };
+    Optional<String> m_pragma_set_default_language;
     Optional<String> m_encoding;
 
     bool m_ready_for_post_load_tasks { false };

+ 4 - 0
Userland/Libraries/LibWeb/DOM/Element.cpp

@@ -2934,6 +2934,10 @@ Optional<String> Element::lang() const
 
         // 5. Otherwise
         //      - If there is a pragma-set default language set, then that is the language of the node.
+        if (document().pragma_set_default_language().has_value()) {
+            return document().pragma_set_default_language();
+        }
+
         //      - If there is no pragma-set default language set, then language information from a higher-level protocol (such as HTTP),
         //        if any, must be used as the final fallback language instead.
         //      - In the absence of any such language information, and in cases where the higher-level protocol reports multiple languages,

+ 32 - 0
Userland/Libraries/LibWeb/HTML/HTMLMetaElement.cpp

@@ -5,6 +5,7 @@
  * SPDX-License-Identifier: BSD-2-Clause
  */
 
+#include <AK/GenericLexer.h>
 #include <LibWeb/Bindings/HTMLMetaElementPrototype.h>
 #include <LibWeb/Bindings/Intrinsics.h>
 #include <LibWeb/CSS/Parser/Parser.h>
@@ -121,6 +122,37 @@ void HTMLMetaElement::inserted()
             // For meta elements with an http-equiv attribute in the X-UA-Compatible state, the content attribute must have a value that is an ASCII case-insensitive match for the string "IE=edge".
             // User agents are required to ignore this pragma.
             break;
+        case HttpEquivAttributeState::ContentLanguage: {
+            // https://html.spec.whatwg.org/multipage/semantics.html#attr-meta-http-equiv-content-language
+            // 1. If the meta element has no content attribute, then return.
+            if (!has_attribute(AttributeNames::content))
+                break;
+
+            // 2. If the element's content attribute contains a U+002C COMMA character (,) then return.
+            auto content = get_attribute_value(AttributeNames::content);
+            if (content.contains(","sv))
+                break;
+
+            // 3. Let input be the value of the element's content attribute.
+            // 4. Let position point at the first character of input.
+            GenericLexer lexer { content };
+
+            // 5. Skip ASCII whitespace within input given position.
+            lexer.ignore_while(Web::Infra::is_ascii_whitespace);
+
+            // 6. Collect a sequence of code points that are not ASCII whitespace from input given position.
+            // 7. Let candidate be the string that resulted from the previous step.
+            auto candidate = lexer.consume_until(Web::Infra::is_ascii_whitespace);
+
+            // 8. If candidate is the empty string, return.
+            if (candidate.is_empty())
+                break;
+
+            // 9. Set the pragma-set default language to candidate.
+            auto language = String::from_utf8_without_validation(candidate.bytes());
+            document().set_pragma_set_default_language(language);
+            break;
+        }
         default:
             dbgln("FIXME: Implement '{}' http-equiv state", get_attribute_value(AttributeNames::http_equiv));
             break;