9 месяцев назад · 7aa0165fe7
--- a/Tests/LibWeb/Text/expected/html-parser-duplicate-attributes.txt
+++ b/Tests/LibWeb/Text/expected/html-parser-duplicate-attributes.txt
@@ -0,0 +1,8 @@
 
				+    divs[0].id: fred
			
 
				+divs[0].className: math
			
 
				+divs[1].id: spaghetti
			
 
				+divs[1].className: 
			
 
				+divs[2].getAttribute("grape"): foo
			
 
				+divs[0].numAttributes: 2
			
 
				+divs[1].numAttributes: 2
			
 
				+divs[2].numAttributes: 1
			
--- a/Tests/LibWeb/Text/input/html-parser-duplicate-attributes.html
+++ b/Tests/LibWeb/Text/input/html-parser-duplicate-attributes.html
@@ -0,0 +1,21 @@
 
				+<!DOCTYPE html>
			
 
				+<script src="include.js"></script>
			
 
				+<div id="fred"id="barney" class="math"></div>
			
 
				+<div class class=1"foo" id="spaghetti" id></div>
			
 
				+<div grape="foo" grape grape="bar" grape grape grape=baz></div>
			
 
				+<script>
			
 
				+    test(() => {
			
 
				+        let divs = document.getElementsByTagName("div");
			
 
				+
			
 
				+        // Per the HTML spec, the first attribute wins.
			
 
				+        println(`divs[0].id: ${divs[0].id}`);
			
 
				+        println(`divs[0].className: ${divs[0].className}`);
			
 
				+        println(`divs[1].id: ${divs[1].id}`);
			
 
				+        println(`divs[1].className: ${divs[1].className}`);
			
 
				+        println(`divs[2].getAttribute("grape"): ${divs[2].getAttribute("grape")}`);
			
 
				+
			
 
				+        println(`divs[0].numAttributes: ${divs[0].attributes.length}`); // 2
			
 
				+        println(`divs[1].numAttributes: ${divs[1].attributes.length}`); // 2
			
 
				+        println(`divs[2].numAttributes: ${divs[2].attributes.length}`); // 1
			
 
				+    });
			
 
				+</script>
			
--- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.cpp
+++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.cpp
@@ -4,6 +4,7 @@
 
				  * SPDX-License-Identifier: BSD-2-Clause
			
 
				  */
			
 
				 
			
 
				+#include <AK/HashTable.h>
			
 
				 #include <LibWeb/HTML/Parser/HTMLToken.h>
			
 
				 
			
 
				 namespace Web::HTML {
			
@@ -73,4 +74,32 @@ String HTMLToken::to_string() const
 
				     return MUST(builder.to_string());
			
 
				 }
			
 
				 
			
 
				+void HTMLToken::normalize_attributes()
			
 
				+{
			
 
				+    // From AttributeNameState: https://html.spec.whatwg.org/multipage/parsing.html#attribute-name-state
			
 
				+    //
			
 
				+    // When the user agent leaves the attribute name state (and before emitting the tag token, if appropriate),
			
 
				+    // the complete attribute's name must be compared to the other attributes on the same token;
			
 
				+    // if there is already an attribute on the token with the exact same name, then this is a duplicate-attribute
			
 
				+    // parse error and the new attribute must be removed from the token.
			
 
				+
			
 
				+    // NOTE: If an attribute is so removed from a token, it, and the value that gets associated with it, if any,
			
 
				+    // are never subsequently used by the parser, and are therefore effectively discarded. Removing the attribute
			
 
				+    // in this way does not change its status as the "current attribute" for the purposes of the tokenizer, however.
			
 
				+
			
 
				+    HashTable<FlyString> seen_attributes;
			
 
				+    auto* ptr = tag_attributes();
			
 
				+    if (!ptr)
			
 
				+        return;
			
 
				+    auto& tag_attributes = *ptr;
			
 
				+    for (size_t i = 0; i < tag_attributes.size(); ++i) {
			
 
				+        auto& attribute = tag_attributes[i];
			
 
				+        if (seen_attributes.set(attribute.local_name, AK::HashSetExistingEntryBehavior::Keep) == AK::HashSetResult::KeptExistingEntry) {
			
 
				+            // This is a duplicate attribute, remove it.
			
 
				+            tag_attributes.remove(i);
			
 
				+            --i;
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				 }
			
--- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.h
+++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.h
@@ -328,6 +328,8 @@ public:
 
				     void set_start_position(Badge<HTMLTokenizer>, Position start_position) { m_start_position = start_position; }
			
 
				     void set_end_position(Badge<HTMLTokenizer>, Position end_position) { m_end_position = end_position; }
			
 
				 
			
 
				+    void normalize_attributes();
			
 
				+
			
 
				 private:
			
 
				     Vector<Attribute> const* tag_attributes() const
			
 
				     {
			
--- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp
+++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp
@@ -2863,6 +2863,9 @@ void HTMLTokenizer::will_emit(HTMLToken& token)
 
				 
			
 
				     auto is_start_or_end_tag = token.type() == HTMLToken::Type::StartTag || token.type() == HTMLToken::Type::EndTag;
			
 
				     token.set_end_position({}, nth_last_position(is_start_or_end_tag ? 1 : 0));
			
 
				+
			
 
				+    if (is_start_or_end_tag)
			
 
				+        token.normalize_attributes();
			
 
				 }
			
 
				 
			
 
				 bool HTMLTokenizer::current_end_tag_token_is_appropriate() const