ladybird/Libraries/LibWeb/HTML/Parser/HTMLToken.cpp

105 lines
3.5 KiB
C++

/*
* Copyright (c) 2020, Andreas Kling <andreas@ladybird.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/HashTable.h>
#include <LibWeb/HTML/Parser/HTMLToken.h>
namespace Web::HTML {
String HTMLToken::to_string() const
{
StringBuilder builder;
switch (type()) {
case HTMLToken::Type::DOCTYPE:
builder.append("DOCTYPE"sv);
builder.append(" { name: '"sv);
builder.append(doctype_data().name);
builder.append("' }"sv);
break;
case HTMLToken::Type::StartTag:
builder.append("StartTag"sv);
break;
case HTMLToken::Type::EndTag:
builder.append("EndTag"sv);
break;
case HTMLToken::Type::Comment:
builder.append("Comment"sv);
break;
case HTMLToken::Type::Character:
builder.append("Character"sv);
break;
case HTMLToken::Type::EndOfFile:
builder.append("EndOfFile"sv);
break;
case HTMLToken::Type::Invalid:
VERIFY_NOT_REACHED();
}
if (type() == HTMLToken::Type::StartTag || type() == HTMLToken::Type::EndTag) {
builder.append(" { name: '"sv);
builder.append(tag_name());
builder.append("', { "sv);
for_each_attribute([&](auto& attribute) {
builder.append(attribute.local_name);
builder.append("=\""sv);
builder.append(attribute.value);
builder.append("\" "sv);
return IterationDecision::Continue;
});
builder.append("} }"sv);
}
if (is_comment()) {
builder.append(" { data: '"sv);
builder.append(comment());
builder.append("' }"sv);
}
if (is_character()) {
builder.append(" { data: '"sv);
builder.append_code_point(code_point());
builder.append("' }"sv);
}
if (type() == HTMLToken::Type::Character) {
builder.appendff("@{}:{}", m_start_position.line, m_start_position.column);
} else {
builder.appendff("@{}:{}-{}:{}", m_start_position.line, m_start_position.column, m_end_position.line, m_end_position.column);
}
return MUST(builder.to_string());
}
void HTMLToken::normalize_attributes()
{
// From AttributeNameState: https://html.spec.whatwg.org/multipage/parsing.html#attribute-name-state
//
// When the user agent leaves the attribute name state (and before emitting the tag token, if appropriate),
// the complete attribute's name must be compared to the other attributes on the same token;
// if there is already an attribute on the token with the exact same name, then this is a duplicate-attribute
// parse error and the new attribute must be removed from the token.
// NOTE: If an attribute is so removed from a token, it, and the value that gets associated with it, if any,
// are never subsequently used by the parser, and are therefore effectively discarded. Removing the attribute
// in this way does not change its status as the "current attribute" for the purposes of the tokenizer, however.
HashTable<FlyString> seen_attributes;
auto* ptr = tag_attributes();
if (!ptr)
return;
auto& tag_attributes = *ptr;
for (size_t i = 0; i < tag_attributes.size(); ++i) {
auto& attribute = tag_attributes[i];
if (seen_attributes.set(attribute.local_name, AK::HashSetExistingEntryBehavior::Keep) == AK::HashSetResult::KeptExistingEntry) {
// This is a duplicate attribute, remove it.
tag_attributes.remove(i);
--i;
}
}
}
}