mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-11-22 07:30:19 +00:00
LibXML: Read code points when parsing names
This commit is contained in:
parent
42b31820a6
commit
453e034801
Notes:
github-actions[bot]
2024-11-06 09:09:03 +00:00
Author: https://github.com/Gingeh Commit: https://github.com/LadybirdBrowser/ladybird/commit/453e0348010 Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/2192 Reviewed-by: https://github.com/alimpfard
2 changed files with 26 additions and 6 deletions
|
@ -41,3 +41,9 @@ TEST_CASE(predefined_character_reference)
|
||||||
auto const& content = node.children[0]->content.get<XML::Node::Text>();
|
auto const& content = node.children[0]->content.get<XML::Node::Text>();
|
||||||
EXPECT_EQ(content.builder.string_view(), "Well hello &, <, >, ', and \"!");
|
EXPECT_EQ(content.builder.string_view(), "Well hello &, <, >, ', and \"!");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_CASE(unicode_name)
|
||||||
|
{
|
||||||
|
XML::Parser parser("<div 中文=\"\"></div>"sv);
|
||||||
|
TRY_OR_FAIL(parser.parse());
|
||||||
|
}
|
||||||
|
|
|
@ -545,16 +545,30 @@ ErrorOr<Name, ParseError> Parser::parse_name()
|
||||||
auto rule = enter_rule();
|
auto rule = enter_rule();
|
||||||
|
|
||||||
// Name ::= NameStartChar (NameChar)*
|
// Name ::= NameStartChar (NameChar)*
|
||||||
auto start = TRY(expect(s_name_start_characters, "a NameStartChar"sv));
|
|
||||||
|
// FIXME: This is a hacky workaround to read code points instead of bytes.
|
||||||
|
// Replace this once we have a unicode-aware lexer.
|
||||||
|
auto start = m_lexer.tell();
|
||||||
|
StringView remaining = m_lexer.input().substring_view(start);
|
||||||
|
Utf8View view { remaining };
|
||||||
|
auto code_points = view.begin();
|
||||||
|
if (code_points.done() || !s_name_start_characters.contains(*code_points)) {
|
||||||
|
if (m_options.treat_errors_as_fatal)
|
||||||
|
return parse_error(m_lexer.current_position(), Expectation { "a NameStartChar"sv });
|
||||||
|
}
|
||||||
|
|
||||||
|
m_lexer.ignore(code_points.underlying_code_point_length_in_bytes());
|
||||||
|
++code_points;
|
||||||
|
|
||||||
auto accept = accept_rule();
|
auto accept = accept_rule();
|
||||||
|
|
||||||
auto rest = m_lexer.consume_while(s_name_characters);
|
while (!code_points.done() && s_name_characters.contains(*code_points)) {
|
||||||
StringBuilder builder;
|
m_lexer.ignore(code_points.underlying_code_point_length_in_bytes());
|
||||||
builder.append(start);
|
++code_points;
|
||||||
builder.append(rest);
|
}
|
||||||
|
|
||||||
rollback.disarm();
|
rollback.disarm();
|
||||||
return builder.to_byte_string();
|
return remaining.substring_view(0, m_lexer.tell() - start);
|
||||||
}
|
}
|
||||||
|
|
||||||
// 2.8.28. doctypedecl, https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-doctypedecl
|
// 2.8.28. doctypedecl, https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-doctypedecl
|
||||||
|
|
Loading…
Reference in a new issue