mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-11-21 23:20:20 +00:00
LibWeb: Deduplicate attributes when emitting start and end tags
Some checks are pending
CI / Lagom (true, NO_FUZZ, ubuntu-22.04, Linux, Clang) (push) Waiting to run
CI / Lagom (false, FUZZ, ubuntu-22.04, Linux, Clang) (push) Waiting to run
CI / Lagom (false, NO_FUZZ, macos-14, macOS, Clang) (push) Waiting to run
CI / Lagom (false, NO_FUZZ, ubuntu-22.04, Linux, GNU) (push) Waiting to run
Package the js repl as a binary artifact / build-and-package (macos-14, macOS, macOS-universal2) (push) Waiting to run
Package the js repl as a binary artifact / build-and-package (ubuntu-22.04, Linux, Linux-x86_64) (push) Waiting to run
Run test262 and test-wasm / run_and_update_results (push) Waiting to run
Lint Code / lint (push) Waiting to run
Push notes / build (push) Waiting to run
Some checks are pending
CI / Lagom (true, NO_FUZZ, ubuntu-22.04, Linux, Clang) (push) Waiting to run
CI / Lagom (false, FUZZ, ubuntu-22.04, Linux, Clang) (push) Waiting to run
CI / Lagom (false, NO_FUZZ, macos-14, macOS, Clang) (push) Waiting to run
CI / Lagom (false, NO_FUZZ, ubuntu-22.04, Linux, GNU) (push) Waiting to run
Package the js repl as a binary artifact / build-and-package (macos-14, macOS, macOS-universal2) (push) Waiting to run
Package the js repl as a binary artifact / build-and-package (ubuntu-22.04, Linux, Linux-x86_64) (push) Waiting to run
Run test262 and test-wasm / run_and_update_results (push) Waiting to run
Lint Code / lint (push) Waiting to run
Push notes / build (push) Waiting to run
The HTML tokenizer specification says that we're supposed to do this when leaving the Attribute name or when emitting the token, as appropriate. Hopefully 'as appropriate' can mean only when emitting the token, as that's the easiest place to insert this logic without complicating the tokenizer any more.
This commit is contained in:
parent
b3f8d63372
commit
7aa0165fe7
Notes:
github-actions[bot]
2024-10-01 09:05:13 +00:00
Author: https://github.com/ADKaster Commit: https://github.com/LadybirdBrowser/ladybird/commit/7aa0165fe75 Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/1582
5 changed files with 63 additions and 0 deletions
|
@ -0,0 +1,8 @@
|
|||
divs[0].id: fred
|
||||
divs[0].className: math
|
||||
divs[1].id: spaghetti
|
||||
divs[1].className:
|
||||
divs[2].getAttribute("grape"): foo
|
||||
divs[0].numAttributes: 2
|
||||
divs[1].numAttributes: 2
|
||||
divs[2].numAttributes: 1
|
|
@ -0,0 +1,21 @@
|
|||
<!DOCTYPE html>
|
||||
<script src="include.js"></script>
|
||||
<div id="fred"id="barney" class="math"></div>
|
||||
<div class class=1"foo" id="spaghetti" id></div>
|
||||
<div grape="foo" grape grape="bar" grape grape grape=baz></div>
|
||||
<script>
|
||||
test(() => {
|
||||
let divs = document.getElementsByTagName("div");
|
||||
|
||||
// Per the HTML spec, the first attribute wins.
|
||||
println(`divs[0].id: ${divs[0].id}`);
|
||||
println(`divs[0].className: ${divs[0].className}`);
|
||||
println(`divs[1].id: ${divs[1].id}`);
|
||||
println(`divs[1].className: ${divs[1].className}`);
|
||||
println(`divs[2].getAttribute("grape"): ${divs[2].getAttribute("grape")}`);
|
||||
|
||||
println(`divs[0].numAttributes: ${divs[0].attributes.length}`); // 2
|
||||
println(`divs[1].numAttributes: ${divs[1].attributes.length}`); // 2
|
||||
println(`divs[2].numAttributes: ${divs[2].attributes.length}`); // 1
|
||||
});
|
||||
</script>
|
|
@ -4,6 +4,7 @@
|
|||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <AK/HashTable.h>
|
||||
#include <LibWeb/HTML/Parser/HTMLToken.h>
|
||||
|
||||
namespace Web::HTML {
|
||||
|
@ -73,4 +74,32 @@ String HTMLToken::to_string() const
|
|||
return MUST(builder.to_string());
|
||||
}
|
||||
|
||||
void HTMLToken::normalize_attributes()
|
||||
{
|
||||
// From AttributeNameState: https://html.spec.whatwg.org/multipage/parsing.html#attribute-name-state
|
||||
//
|
||||
// When the user agent leaves the attribute name state (and before emitting the tag token, if appropriate),
|
||||
// the complete attribute's name must be compared to the other attributes on the same token;
|
||||
// if there is already an attribute on the token with the exact same name, then this is a duplicate-attribute
|
||||
// parse error and the new attribute must be removed from the token.
|
||||
|
||||
// NOTE: If an attribute is so removed from a token, it, and the value that gets associated with it, if any,
|
||||
// are never subsequently used by the parser, and are therefore effectively discarded. Removing the attribute
|
||||
// in this way does not change its status as the "current attribute" for the purposes of the tokenizer, however.
|
||||
|
||||
HashTable<FlyString> seen_attributes;
|
||||
auto* ptr = tag_attributes();
|
||||
if (!ptr)
|
||||
return;
|
||||
auto& tag_attributes = *ptr;
|
||||
for (size_t i = 0; i < tag_attributes.size(); ++i) {
|
||||
auto& attribute = tag_attributes[i];
|
||||
if (seen_attributes.set(attribute.local_name, AK::HashSetExistingEntryBehavior::Keep) == AK::HashSetResult::KeptExistingEntry) {
|
||||
// This is a duplicate attribute, remove it.
|
||||
tag_attributes.remove(i);
|
||||
--i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -328,6 +328,8 @@ public:
|
|||
void set_start_position(Badge<HTMLTokenizer>, Position start_position) { m_start_position = start_position; }
|
||||
void set_end_position(Badge<HTMLTokenizer>, Position end_position) { m_end_position = end_position; }
|
||||
|
||||
void normalize_attributes();
|
||||
|
||||
private:
|
||||
Vector<Attribute> const* tag_attributes() const
|
||||
{
|
||||
|
|
|
@ -2863,6 +2863,9 @@ void HTMLTokenizer::will_emit(HTMLToken& token)
|
|||
|
||||
auto is_start_or_end_tag = token.type() == HTMLToken::Type::StartTag || token.type() == HTMLToken::Type::EndTag;
|
||||
token.set_end_position({}, nth_last_position(is_start_or_end_tag ? 1 : 0));
|
||||
|
||||
if (is_start_or_end_tag)
|
||||
token.normalize_attributes();
|
||||
}
|
||||
|
||||
bool HTMLTokenizer::current_end_tag_token_is_appropriate() const
|
||||
|
|
Loading…
Reference in a new issue