LibWeb: Switch HTMLToken and HTMLTokenizer to String & FlyString

This commit is contained in:
Andreas Kling 2023-11-04 10:08:07 +01:00
parent 0902f552a3
commit b341aeb5c1
Notes: sideshowbarker 2024-07-16 22:26:05 +09:00
4 changed files with 89 additions and 86 deletions

View file

@ -454,29 +454,29 @@ DOM::QuirksMode HTMLParser::which_quirks_mode(HTMLToken const& doctype_token) co
return DOM::QuirksMode::Yes;
for (auto& public_id : s_quirks_public_ids) {
if (public_identifier.starts_with(public_id, CaseSensitivity::CaseInsensitive))
if (public_identifier.starts_with_bytes(public_id, CaseSensitivity::CaseInsensitive))
return DOM::QuirksMode::Yes;
}
if (doctype_token.doctype_data().missing_system_identifier) {
if (public_identifier.starts_with("-//W3C//DTD HTML 4.01 Frameset//"sv, CaseSensitivity::CaseInsensitive))
if (public_identifier.starts_with_bytes("-//W3C//DTD HTML 4.01 Frameset//"sv, CaseSensitivity::CaseInsensitive))
return DOM::QuirksMode::Yes;
if (public_identifier.starts_with("-//W3C//DTD HTML 4.01 Transitional//"sv, CaseSensitivity::CaseInsensitive))
if (public_identifier.starts_with_bytes("-//W3C//DTD HTML 4.01 Transitional//"sv, CaseSensitivity::CaseInsensitive))
return DOM::QuirksMode::Yes;
}
if (public_identifier.starts_with("-//W3C//DTD XHTML 1.0 Frameset//"sv, CaseSensitivity::CaseInsensitive))
if (public_identifier.starts_with_bytes("-//W3C//DTD XHTML 1.0 Frameset//"sv, CaseSensitivity::CaseInsensitive))
return DOM::QuirksMode::Limited;
if (public_identifier.starts_with("-//W3C//DTD XHTML 1.0 Transitional//"sv, CaseSensitivity::CaseInsensitive))
if (public_identifier.starts_with_bytes("-//W3C//DTD XHTML 1.0 Transitional//"sv, CaseSensitivity::CaseInsensitive))
return DOM::QuirksMode::Limited;
if (!doctype_token.doctype_data().missing_system_identifier) {
if (public_identifier.starts_with("-//W3C//DTD HTML 4.01 Frameset//"sv, CaseSensitivity::CaseInsensitive))
if (public_identifier.starts_with_bytes("-//W3C//DTD HTML 4.01 Frameset//"sv, CaseSensitivity::CaseInsensitive))
return DOM::QuirksMode::Limited;
if (public_identifier.starts_with("-//W3C//DTD HTML 4.01 Transitional//"sv, CaseSensitivity::CaseInsensitive))
if (public_identifier.starts_with_bytes("-//W3C//DTD HTML 4.01 Transitional//"sv, CaseSensitivity::CaseInsensitive))
return DOM::QuirksMode::Limited;
}
@ -490,16 +490,16 @@ void HTMLParser::handle_initial(HTMLToken& token)
}
if (token.is_comment()) {
auto comment = realm().heap().allocate<DOM::Comment>(realm(), document(), MUST(String::from_deprecated_string(token.comment())));
auto comment = realm().heap().allocate<DOM::Comment>(realm(), document(), token.comment());
MUST(document().append_child(*comment));
return;
}
if (token.is_doctype()) {
auto doctype = realm().heap().allocate<DOM::DocumentType>(realm(), document());
doctype->set_name(String::from_deprecated_string(token.doctype_data().name).release_value());
doctype->set_public_id(String::from_deprecated_string(token.doctype_data().public_identifier).release_value());
doctype->set_system_id(String::from_deprecated_string(token.doctype_data().system_identifier).release_value());
doctype->set_name(token.doctype_data().name);
doctype->set_public_id(token.doctype_data().public_identifier);
doctype->set_system_id(token.doctype_data().system_identifier);
MUST(document().append_child(*doctype));
document().set_quirks_mode(which_quirks_mode(token));
m_insertion_mode = InsertionMode::BeforeHTML;
@ -525,7 +525,7 @@ void HTMLParser::handle_before_html(HTMLToken& token)
// -> A comment token
if (token.is_comment()) {
// Insert a comment as the last child of the Document object.
auto comment = realm().heap().allocate<DOM::Comment>(realm(), document(), MUST(String::from_deprecated_string(token.comment())));
auto comment = realm().heap().allocate<DOM::Comment>(realm(), document(), token.comment());
MUST(document().append_child(*comment));
return;
}
@ -822,7 +822,7 @@ AnythingElse:
void HTMLParser::insert_comment(HTMLToken& token)
{
auto adjusted_insertion_location = find_appropriate_place_for_inserting_node();
adjusted_insertion_location.parent->insert_before(realm().heap().allocate<DOM::Comment>(realm(), document(), MUST(String::from_deprecated_string(token.comment()))), adjusted_insertion_location.insert_before_sibling);
adjusted_insertion_location.parent->insert_before(realm().heap().allocate<DOM::Comment>(realm(), document(), token.comment()), adjusted_insertion_location.insert_before_sibling);
}
void HTMLParser::handle_in_head(HTMLToken& token)
@ -1142,7 +1142,7 @@ void HTMLParser::handle_after_body(HTMLToken& token)
if (token.is_comment()) {
auto& insertion_location = m_stack_of_open_elements.first();
MUST(insertion_location.append_child(realm().heap().allocate<DOM::Comment>(realm(), document(), MUST(String::from_deprecated_string(token.comment())))));
MUST(insertion_location.append_child(realm().heap().allocate<DOM::Comment>(realm(), document(), token.comment())));
return;
}
@ -1178,7 +1178,7 @@ void HTMLParser::handle_after_body(HTMLToken& token)
void HTMLParser::handle_after_after_body(HTMLToken& token)
{
if (token.is_comment()) {
auto comment = realm().heap().allocate<DOM::Comment>(realm(), document(), MUST(String::from_deprecated_string(token.comment())));
auto comment = realm().heap().allocate<DOM::Comment>(realm(), document(), token.comment());
MUST(document().append_child(*comment));
return;
}
@ -2025,7 +2025,7 @@ void HTMLParser::handle_in_body(HTMLToken& token)
if (token.is_start_tag() && token.tag_name() == HTML::TagNames::image) {
// Parse error. Change the token's tag name to HTML::TagNames::img and reprocess it. (Don't ask.)
log_parse_error();
token.set_tag_name("img");
token.set_tag_name("img"_fly_string);
process_using_the_rules_for(m_insertion_mode, token);
return;
}
@ -2194,41 +2194,41 @@ void HTMLParser::adjust_mathml_attributes(HTMLToken& token)
void HTMLParser::adjust_svg_tag_names(HTMLToken& token)
{
token.adjust_tag_name("altglyph", "altGlyph");
token.adjust_tag_name("altglyphdef", "altGlyphDef");
token.adjust_tag_name("altglyphitem", "altGlyphItem");
token.adjust_tag_name("animatecolor", "animateColor");
token.adjust_tag_name("animatemotion", "animateMotion");
token.adjust_tag_name("animatetransform", "animateTransform");
token.adjust_tag_name("clippath", "clipPath");
token.adjust_tag_name("feblend", "feBlend");
token.adjust_tag_name("fecolormatrix", "feColorMatrix");
token.adjust_tag_name("fecomponenttransfer", "feComponentTransfer");
token.adjust_tag_name("fecomposite", "feComposite");
token.adjust_tag_name("feconvolvematrix", "feConvolveMatrix");
token.adjust_tag_name("fediffuselighting", "feDiffuseLighting");
token.adjust_tag_name("fedisplacementmap", "feDisplacementMap");
token.adjust_tag_name("fedistantlight", "feDistantLight");
token.adjust_tag_name("fedropshadow", "feDropShadow");
token.adjust_tag_name("feflood", "feFlood");
token.adjust_tag_name("fefunca", "feFuncA");
token.adjust_tag_name("fefuncb", "feFuncB");
token.adjust_tag_name("fefuncg", "feFuncG");
token.adjust_tag_name("fefuncr", "feFuncR");
token.adjust_tag_name("fegaussianblur", "feGaussianBlur");
token.adjust_tag_name("feimage", "feImage");
token.adjust_tag_name("femerge", "feMerge");
token.adjust_tag_name("femergenode", "feMergeNode");
token.adjust_tag_name("femorphology", "feMorphology");
token.adjust_tag_name("feoffset", "feOffset");
token.adjust_tag_name("fepointlight", "fePointLight");
token.adjust_tag_name("fespecularlighting", "feSpecularLighting");
token.adjust_tag_name("fespotlight", "feSpotlight");
token.adjust_tag_name("foreignobject", "foreignObject");
token.adjust_tag_name("glyphref", "glyphRef");
token.adjust_tag_name("lineargradient", "linearGradient");
token.adjust_tag_name("radialgradient", "radialGradient");
token.adjust_tag_name("textpath", "textPath");
token.adjust_tag_name("altglyph"_fly_string, "altGlyph"_fly_string);
token.adjust_tag_name("altglyphdef"_fly_string, "altGlyphDef"_fly_string);
token.adjust_tag_name("altglyphitem"_fly_string, "altGlyphItem"_fly_string);
token.adjust_tag_name("animatecolor"_fly_string, "animateColor"_fly_string);
token.adjust_tag_name("animatemotion"_fly_string, "animateMotion"_fly_string);
token.adjust_tag_name("animatetransform"_fly_string, "animateTransform"_fly_string);
token.adjust_tag_name("clippath"_fly_string, "clipPath"_fly_string);
token.adjust_tag_name("feblend"_fly_string, "feBlend"_fly_string);
token.adjust_tag_name("fecolormatrix"_fly_string, "feColorMatrix"_fly_string);
token.adjust_tag_name("fecomponenttransfer"_fly_string, "feComponentTransfer"_fly_string);
token.adjust_tag_name("fecomposite"_fly_string, "feComposite"_fly_string);
token.adjust_tag_name("feconvolvematrix"_fly_string, "feConvolveMatrix"_fly_string);
token.adjust_tag_name("fediffuselighting"_fly_string, "feDiffuseLighting"_fly_string);
token.adjust_tag_name("fedisplacementmap"_fly_string, "feDisplacementMap"_fly_string);
token.adjust_tag_name("fedistantlight"_fly_string, "feDistantLight"_fly_string);
token.adjust_tag_name("fedropshadow"_fly_string, "feDropShadow"_fly_string);
token.adjust_tag_name("feflood"_fly_string, "feFlood"_fly_string);
token.adjust_tag_name("fefunca"_fly_string, "feFuncA"_fly_string);
token.adjust_tag_name("fefuncb"_fly_string, "feFuncB"_fly_string);
token.adjust_tag_name("fefuncg"_fly_string, "feFuncG"_fly_string);
token.adjust_tag_name("fefuncr"_fly_string, "feFuncR"_fly_string);
token.adjust_tag_name("fegaussianblur"_fly_string, "feGaussianBlur"_fly_string);
token.adjust_tag_name("feimage"_fly_string, "feImage"_fly_string);
token.adjust_tag_name("femerge"_fly_string, "feMerge"_fly_string);
token.adjust_tag_name("femergenode"_fly_string, "feMergeNode"_fly_string);
token.adjust_tag_name("femorphology"_fly_string, "feMorphology"_fly_string);
token.adjust_tag_name("feoffset"_fly_string, "feOffset"_fly_string);
token.adjust_tag_name("fepointlight"_fly_string, "fePointLight"_fly_string);
token.adjust_tag_name("fespecularlighting"_fly_string, "feSpecularLighting"_fly_string);
token.adjust_tag_name("fespotlight"_fly_string, "feSpotlight"_fly_string);
token.adjust_tag_name("foreignobject"_fly_string, "foreignObject"_fly_string);
token.adjust_tag_name("glyphref"_fly_string, "glyphRef"_fly_string);
token.adjust_tag_name("lineargradient"_fly_string, "linearGradient"_fly_string);
token.adjust_tag_name("radialgradient"_fly_string, "radialGradient"_fly_string);
token.adjust_tag_name("textpath"_fly_string, "textPath"_fly_string);
}
void HTMLParser::adjust_svg_attributes(HTMLToken& token)
@ -3407,7 +3407,7 @@ void HTMLParser::handle_after_frameset(HTMLToken& token)
void HTMLParser::handle_after_after_frameset(HTMLToken& token)
{
if (token.is_comment()) {
auto comment = document().heap().allocate<DOM::Comment>(document().realm(), document(), MUST(String::from_deprecated_string(token.comment())));
auto comment = document().heap().allocate<DOM::Comment>(document().realm(), document(), token.comment());
MUST(document().append_child(comment));
return;
}

View file

@ -54,9 +54,9 @@ public:
struct DoctypeData {
// NOTE: "Missing" is a distinct state from the empty string.
DeprecatedString name;
DeprecatedString public_identifier;
DeprecatedString system_identifier;
String name;
String public_identifier;
String system_identifier;
bool missing_name { true };
bool missing_public_identifier { true };
bool missing_system_identifier { true };
@ -73,7 +73,7 @@ public:
static HTMLToken make_start_tag(FlyString const& tag_name)
{
HTMLToken token { Type::StartTag };
token.set_tag_name(tag_name.to_deprecated_fly_string());
token.set_tag_name(tag_name);
return token;
}
@ -134,25 +134,25 @@ public:
m_data.get<u32>() = code_point;
}
DeprecatedFlyString const& comment() const
String const& comment() const
{
VERIFY(is_comment());
return m_comment_data;
}
void set_comment(String comment)
{
VERIFY(is_comment());
m_comment_data = move(comment);
}
FlyString const& tag_name() const
{
VERIFY(is_start_tag() || is_end_tag());
return m_string_data;
}
void set_comment(DeprecatedString comment)
{
VERIFY(is_comment());
m_string_data = move(comment);
}
FlyString tag_name() const
{
VERIFY(is_start_tag() || is_end_tag());
return MUST(FlyString::from_deprecated_fly_string(m_string_data));
}
void set_tag_name(DeprecatedString name)
void set_tag_name(FlyString name)
{
VERIFY(is_start_tag() || is_end_tag());
m_string_data = move(name);
@ -273,10 +273,10 @@ public:
return !attribute(attribute_name).is_null();
}
void adjust_tag_name(DeprecatedFlyString const& old_name, DeprecatedFlyString const& new_name)
void adjust_tag_name(FlyString const& old_name, FlyString const& new_name)
{
VERIFY(is_start_tag() || is_end_tag());
if (old_name == tag_name().to_deprecated_fly_string())
if (old_name == tag_name())
set_tag_name(new_name);
}
@ -356,8 +356,11 @@ private:
bool m_tag_self_closing { false };
bool m_tag_self_closing_acknowledged { false };
// Type::Comment (comment data), Type::StartTag and Type::EndTag (tag name)
DeprecatedFlyString m_string_data;
// Type::StartTag and Type::EndTag (tag name)
FlyString m_string_data;
// Type::Comment (comment data)
String m_comment_data;
Variant<Empty, u32, OwnPtr<DoctypeData>, OwnPtr<Vector<Attribute>>> m_data {};

View file

@ -1108,31 +1108,31 @@ _StartOfFunction:
ON_WHITESPACE
{
m_current_token.last_attribute().name_end_position = nth_last_position(1);
m_current_token.last_attribute().local_name = MUST(FlyString::from_deprecated_fly_string(consume_current_builder()));
m_current_token.last_attribute().local_name = consume_current_builder();
RECONSUME_IN(AfterAttributeName);
}
ON('/')
{
m_current_token.last_attribute().name_end_position = nth_last_position(1);
m_current_token.last_attribute().local_name = MUST(FlyString::from_deprecated_fly_string(consume_current_builder()));
m_current_token.last_attribute().local_name = consume_current_builder();
RECONSUME_IN(AfterAttributeName);
}
ON('>')
{
m_current_token.last_attribute().name_end_position = nth_last_position(1);
m_current_token.last_attribute().local_name = MUST(FlyString::from_deprecated_fly_string(consume_current_builder()));
m_current_token.last_attribute().local_name = consume_current_builder();
RECONSUME_IN(AfterAttributeName);
}
ON_EOF
{
m_current_token.last_attribute().name_end_position = nth_last_position(1);
m_current_token.last_attribute().local_name = MUST(FlyString::from_deprecated_fly_string(consume_current_builder()));
m_current_token.last_attribute().local_name = consume_current_builder();
RECONSUME_IN(AfterAttributeName);
}
ON('=')
{
m_current_token.last_attribute().name_end_position = nth_last_position(1);
m_current_token.last_attribute().local_name = MUST(FlyString::from_deprecated_fly_string(consume_current_builder()));
m_current_token.last_attribute().local_name = consume_current_builder();
SWITCH_TO(BeforeAttributeValue);
}
ON_ASCII_UPPER_ALPHA
@ -1238,7 +1238,7 @@ _StartOfFunction:
{
ON('"')
{
m_current_token.last_attribute().value = MUST(String::from_deprecated_string(consume_current_builder()));
m_current_token.last_attribute().value = consume_current_builder();
SWITCH_TO(AfterAttributeValueQuoted);
}
ON('&')
@ -1270,7 +1270,7 @@ _StartOfFunction:
{
ON('\'')
{
m_current_token.last_attribute().value = MUST(String::from_deprecated_string(consume_current_builder()));
m_current_token.last_attribute().value = consume_current_builder();
SWITCH_TO(AfterAttributeValueQuoted);
}
ON('&')
@ -1302,7 +1302,7 @@ _StartOfFunction:
{
ON_WHITESPACE
{
m_current_token.last_attribute().value = MUST(String::from_deprecated_string(consume_current_builder()));
m_current_token.last_attribute().value = consume_current_builder();
m_current_token.last_attribute().value_end_position = nth_last_position(1);
SWITCH_TO(BeforeAttributeName);
}
@ -1313,7 +1313,7 @@ _StartOfFunction:
}
ON('>')
{
m_current_token.last_attribute().value = MUST(String::from_deprecated_string(consume_current_builder()));
m_current_token.last_attribute().value = consume_current_builder();
m_current_token.last_attribute().value_end_position = nth_last_position(1);
SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
}
@ -2879,9 +2879,9 @@ void HTMLTokenizer::restore_to(Utf8CodePointIterator const& new_iterator)
m_utf8_iterator = new_iterator;
}
DeprecatedString HTMLTokenizer::consume_current_builder()
String HTMLTokenizer::consume_current_builder()
{
auto string = m_current_builder.to_deprecated_string();
auto string = MUST(m_current_builder.to_string());
m_current_builder.clear();
return string;
}

View file

@ -154,7 +154,7 @@ private:
bool consume_next_if_match(StringView, CaseSensitivity = CaseSensitivity::CaseSensitive);
void create_new_token(HTMLToken::Type);
bool current_end_tag_token_is_appropriate() const;
DeprecatedString consume_current_builder();
String consume_current_builder();
static char const* state_name(State state)
{