HTMLToken.cpp 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105
  1. /*
  2. * Copyright (c) 2020, Andreas Kling <andreas@ladybird.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/HashTable.h>
  7. #include <LibWeb/HTML/Parser/HTMLToken.h>
  8. namespace Web::HTML {
  9. String HTMLToken::to_string() const
  10. {
  11. StringBuilder builder;
  12. switch (type()) {
  13. case HTMLToken::Type::DOCTYPE:
  14. builder.append("DOCTYPE"sv);
  15. builder.append(" { name: '"sv);
  16. builder.append(doctype_data().name);
  17. builder.append("' }"sv);
  18. break;
  19. case HTMLToken::Type::StartTag:
  20. builder.append("StartTag"sv);
  21. break;
  22. case HTMLToken::Type::EndTag:
  23. builder.append("EndTag"sv);
  24. break;
  25. case HTMLToken::Type::Comment:
  26. builder.append("Comment"sv);
  27. break;
  28. case HTMLToken::Type::Character:
  29. builder.append("Character"sv);
  30. break;
  31. case HTMLToken::Type::EndOfFile:
  32. builder.append("EndOfFile"sv);
  33. break;
  34. case HTMLToken::Type::Invalid:
  35. VERIFY_NOT_REACHED();
  36. }
  37. if (type() == HTMLToken::Type::StartTag || type() == HTMLToken::Type::EndTag) {
  38. builder.append(" { name: '"sv);
  39. builder.append(tag_name());
  40. builder.append("', { "sv);
  41. for_each_attribute([&](auto& attribute) {
  42. builder.append(attribute.local_name);
  43. builder.append("=\""sv);
  44. builder.append(attribute.value);
  45. builder.append("\" "sv);
  46. return IterationDecision::Continue;
  47. });
  48. builder.append("} }"sv);
  49. }
  50. if (is_comment()) {
  51. builder.append(" { data: '"sv);
  52. builder.append(comment());
  53. builder.append("' }"sv);
  54. }
  55. if (is_character()) {
  56. builder.append(" { data: '"sv);
  57. builder.append_code_point(code_point());
  58. builder.append("' }"sv);
  59. }
  60. if (type() == HTMLToken::Type::Character) {
  61. builder.appendff("@{}:{}", m_start_position.line, m_start_position.column);
  62. } else {
  63. builder.appendff("@{}:{}-{}:{}", m_start_position.line, m_start_position.column, m_end_position.line, m_end_position.column);
  64. }
  65. return MUST(builder.to_string());
  66. }
  67. void HTMLToken::normalize_attributes()
  68. {
  69. // From AttributeNameState: https://html.spec.whatwg.org/multipage/parsing.html#attribute-name-state
  70. //
  71. // When the user agent leaves the attribute name state (and before emitting the tag token, if appropriate),
  72. // the complete attribute's name must be compared to the other attributes on the same token;
  73. // if there is already an attribute on the token with the exact same name, then this is a duplicate-attribute
  74. // parse error and the new attribute must be removed from the token.
  75. // NOTE: If an attribute is so removed from a token, it, and the value that gets associated with it, if any,
  76. // are never subsequently used by the parser, and are therefore effectively discarded. Removing the attribute
  77. // in this way does not change its status as the "current attribute" for the purposes of the tokenizer, however.
  78. HashTable<FlyString> seen_attributes;
  79. auto* ptr = tag_attributes();
  80. if (!ptr)
  81. return;
  82. auto& tag_attributes = *ptr;
  83. for (size_t i = 0; i < tag_attributes.size(); ++i) {
  84. auto& attribute = tag_attributes[i];
  85. if (seen_attributes.set(attribute.local_name, AK::HashSetExistingEntryBehavior::Keep) == AK::HashSetResult::KeptExistingEntry) {
  86. // This is a duplicate attribute, remove it.
  87. tag_attributes.remove(i);
  88. --i;
  89. }
  90. }
  91. }
  92. }