Browse Source

LibWeb: Fix tokenization of attributes with URL query strings in them

<a href="/foo&amp=bar"> was being tokenized into <a href="/foo&=bar">.
The spec mentions this but I had overlooked it. The bug happens because
we interpreted the "&amp" as a named character reference.
Andreas Kling 5 years ago
parent
commit
c33d17d363
1 changed files with 8 additions and 0 deletions
  1. 8 0
      Libraries/LibWeb/Parser/HTMLTokenizer.cpp

+ 8 - 0
Libraries/LibWeb/Parser/HTMLTokenizer.cpp

@@ -1458,6 +1458,14 @@ _StartOfFunction:
                     for (auto ch : match.value().entity)
                         m_temporary_buffer.append(ch);
 
+                    if (consumed_as_part_of_an_attribute() && match.value().codepoints.last() != ';') {
+                        auto next = peek_codepoint(0);
+                        if (next.has_value() && (next.value() == '=' || isalnum(next.value()))) {
+                            FLUSH_CODEPOINTS_CONSUMED_AS_A_CHARACTER_REFERENCE;
+                            SWITCH_TO_RETURN_STATE;
+                        }
+                    }
+
                     if (consumed_as_part_of_an_attribute() && match.value().entity.ends_with(';')) {
                         auto next_codepoint = peek_codepoint(0);
                         if (next_codepoint.has_value() && next_codepoint.value() == '=') {