LibWeb: Support for "content-language" http-equiv state

Implemented support for setting the pragma-set default language in the
`<meta/>` tag with an `http-equiv` attribute `content-language`.
This commit is contained in:
Piotr 2024-11-05 09:12:11 +01:00 committed by Andreas Kling
parent 413cf6b329
commit 06154b87dd
Notes: github-actions[bot] 2024-11-06 09:57:52 +00:00
13 changed files with 172 additions and 0 deletions

View file

@ -0,0 +1 @@
OK

View file

@ -0,0 +1 @@
OK

View file

@ -0,0 +1 @@
OK

View file

@ -0,0 +1 @@
OK

View file

@ -0,0 +1 @@
OK

View file

@ -0,0 +1,25 @@
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-Language" content="ko" >
<style type='text/css'>
.test div { width: 50px; background-color: red; }
#box:lang(ko) { width: 100px; background-color: limegreen; }
</style>
</head>
<body>
<div class="test"><div id="box">TEST</div></div>
</body>
</html>
<script src="../include.js"></script>
<script>
asyncTest((done) => {
if (document.getElementById('box').offsetWidth == 100) {
println("OK");
} else {
println("FAIL. If there is a pragma-set default language set, then that is the language of the node.");
}
done();
});
</script>
</html>

View file

@ -0,0 +1,27 @@
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-Language" content="
ko
som! ?et #$% hi;0 ng " >
<style type='text/css'>
.test div { width: 50px; background-color: red; }
#box:lang(ko) { width: 100px; background-color: limegreen; }
</style>
</head>
<body>
<div class="test"><div id="box">TEST</div></div>
</body>
</html>
<script src="../include.js"></script>
<script>
asyncTest((done) => {
if (document.getElementById('box').offsetWidth == 100) {
println("OK");
} else {
println("FAIL. If there is a pragma-set default language set, then that is the language of the node. It should match the first non-whitespace code points from the contet attribute.");
}
done();
});
</script>
</html>

View file

@ -0,0 +1,25 @@
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-Language" content="ko,pl" >
<style type='text/css'>
.test div { width: 50px; background-color: limegreen; }
#box:lang(ko) { width: 100px; background-color: red; }
</style>
</head>
<body>
<div class="test"><div id="box">TEST</div></div>
</body>
</html>
<script src="../include.js"></script>
<script>
asyncTest((done) => {
if (document.getElementById('box').offsetWidth == 50) {
println("OK");
} else {
println("FAIL. If the element's content attribute contains a U+002C COMMA character (,) then return.");
}
done();
});
</script>
</html>

View file

@ -0,0 +1,25 @@
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-Language" content="" >
<style type='text/css'>
.test div { width: 50px; background-color: limegreen; }
#box:lang(ko) { width: 100px; background-color: red; }
</style>
</head>
<body>
<div class="test"><div id="box">TEST</div></div>
</body>
</html>
<script src="../include.js"></script>
<script>
asyncTest((done) => {
if (document.getElementById('box').offsetWidth == 50) {
println("OK");
} else {
println("FAIL. If candidate is the empty string, return.");
}
done();
});
</script>
</html>

View file

@ -0,0 +1,25 @@
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-Language" >
<style type='text/css'>
.test div { width: 50px; background-color: limegreen; }
#box:lang(ko) { width: 100px; background-color: red; }
</style>
</head>
<body>
<div class="test"><div id="box">TEST</div></div>
</body>
</html>
<script src="../include.js"></script>
<script>
asyncTest((done) => {
if (document.getElementById('box').offsetWidth == 50) {
println("OK");
} else {
println("FAIL. If the meta element has no content attribute, then return.");
}
done();
});
</script>
</html>

View file

@ -380,6 +380,9 @@ public:
String const& content_type() const { return m_content_type; }
void set_content_type(String content_type) { m_content_type = move(content_type); }
Optional<String> const& pragma_set_default_language() const { return m_pragma_set_default_language; }
void set_pragma_set_default_language(String language) { m_pragma_set_default_language = move(language); }
bool has_encoding() const { return m_encoding.has_value(); }
Optional<String> const& encoding() const { return m_encoding; }
String encoding_or_default() const { return m_encoding.value_or("UTF-8"_string); }
@ -822,6 +825,7 @@ private:
HTML::DocumentReadyState m_readiness { HTML::DocumentReadyState::Loading };
String m_content_type { "application/xml"_string };
Optional<String> m_pragma_set_default_language;
Optional<String> m_encoding;
bool m_ready_for_post_load_tasks { false };

View file

@ -2934,6 +2934,10 @@ Optional<String> Element::lang() const
// 5. Otherwise
// - If there is a pragma-set default language set, then that is the language of the node.
if (document().pragma_set_default_language().has_value()) {
return document().pragma_set_default_language();
}
// - If there is no pragma-set default language set, then language information from a higher-level protocol (such as HTTP),
// if any, must be used as the final fallback language instead.
// - In the absence of any such language information, and in cases where the higher-level protocol reports multiple languages,

View file

@ -5,6 +5,7 @@
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/GenericLexer.h>
#include <LibWeb/Bindings/HTMLMetaElementPrototype.h>
#include <LibWeb/Bindings/Intrinsics.h>
#include <LibWeb/CSS/Parser/Parser.h>
@ -121,6 +122,37 @@ void HTMLMetaElement::inserted()
// For meta elements with an http-equiv attribute in the X-UA-Compatible state, the content attribute must have a value that is an ASCII case-insensitive match for the string "IE=edge".
// User agents are required to ignore this pragma.
break;
case HttpEquivAttributeState::ContentLanguage: {
// https://html.spec.whatwg.org/multipage/semantics.html#attr-meta-http-equiv-content-language
// 1. If the meta element has no content attribute, then return.
if (!has_attribute(AttributeNames::content))
break;
// 2. If the element's content attribute contains a U+002C COMMA character (,) then return.
auto content = get_attribute_value(AttributeNames::content);
if (content.contains(","sv))
break;
// 3. Let input be the value of the element's content attribute.
// 4. Let position point at the first character of input.
GenericLexer lexer { content };
// 5. Skip ASCII whitespace within input given position.
lexer.ignore_while(Web::Infra::is_ascii_whitespace);
// 6. Collect a sequence of code points that are not ASCII whitespace from input given position.
// 7. Let candidate be the string that resulted from the previous step.
auto candidate = lexer.consume_until(Web::Infra::is_ascii_whitespace);
// 8. If candidate is the empty string, return.
if (candidate.is_empty())
break;
// 9. Set the pragma-set default language to candidate.
auto language = String::from_utf8_without_validation(candidate.bytes());
document().set_pragma_set_default_language(language);
break;
}
default:
dbgln("FIXME: Implement '{}' http-equiv state", get_attribute_value(AttributeNames::http_equiv));
break;