HTMLToken.h 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369
  1. /*
  2. * Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
  3. * Copyright (c) 2021, Max Wipfli <max.wipfli@serenityos.org>
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #pragma once
  8. #include <AK/FlyString.h>
  9. #include <AK/Function.h>
  10. #include <AK/OwnPtr.h>
  11. #include <AK/Types.h>
  12. #include <AK/Variant.h>
  13. #include <AK/Vector.h>
  14. namespace Web::HTML {
  15. class HTMLTokenizer;
  16. class HTMLToken {
  17. AK_MAKE_NONCOPYABLE(HTMLToken);
  18. AK_MAKE_DEFAULT_MOVABLE(HTMLToken);
  19. public:
  20. enum class Type : u8 {
  21. Invalid,
  22. DOCTYPE,
  23. StartTag,
  24. EndTag,
  25. Comment,
  26. Character,
  27. EndOfFile,
  28. };
  29. struct Position {
  30. size_t line { 0 };
  31. size_t column { 0 };
  32. size_t byte_offset { 0 };
  33. };
  34. struct Attribute {
  35. Optional<FlyString> prefix;
  36. FlyString local_name;
  37. Optional<FlyString> namespace_;
  38. String value;
  39. Position name_start_position;
  40. Position value_start_position;
  41. Position name_end_position;
  42. Position value_end_position;
  43. };
  44. struct DoctypeData {
  45. // NOTE: "Missing" is a distinct state from the empty string.
  46. String name;
  47. String public_identifier;
  48. String system_identifier;
  49. bool missing_name { true };
  50. bool missing_public_identifier { true };
  51. bool missing_system_identifier { true };
  52. bool force_quirks { false };
  53. };
  54. static HTMLToken make_character(u32 code_point)
  55. {
  56. HTMLToken token { Type::Character };
  57. token.set_code_point(code_point);
  58. return token;
  59. }
  60. static HTMLToken make_start_tag(FlyString const& tag_name)
  61. {
  62. HTMLToken token { Type::StartTag };
  63. token.set_tag_name(tag_name);
  64. return token;
  65. }
  66. HTMLToken() = default;
  67. HTMLToken(Type type)
  68. : m_type(type)
  69. {
  70. switch (m_type) {
  71. case Type::Character:
  72. m_data.set(0u);
  73. break;
  74. case Type::DOCTYPE:
  75. m_data.set(OwnPtr<DoctypeData> {});
  76. break;
  77. case Type::StartTag:
  78. case Type::EndTag:
  79. m_data.set(OwnPtr<Vector<Attribute>>());
  80. break;
  81. default:
  82. break;
  83. }
  84. }
  85. bool is_doctype() const { return m_type == Type::DOCTYPE; }
  86. bool is_start_tag() const { return m_type == Type::StartTag; }
  87. bool is_end_tag() const { return m_type == Type::EndTag; }
  88. bool is_comment() const { return m_type == Type::Comment; }
  89. bool is_character() const { return m_type == Type::Character; }
  90. bool is_end_of_file() const { return m_type == Type::EndOfFile; }
  91. u32 code_point() const
  92. {
  93. VERIFY(is_character());
  94. return m_data.get<u32>();
  95. }
  96. bool is_parser_whitespace() const
  97. {
  98. // NOTE: The parser considers '\r' to be whitespace, while the tokenizer does not.
  99. if (!is_character())
  100. return false;
  101. switch (code_point()) {
  102. case '\t':
  103. case '\n':
  104. case '\f':
  105. case '\r':
  106. case ' ':
  107. return true;
  108. default:
  109. return false;
  110. }
  111. }
  112. void set_code_point(u32 code_point)
  113. {
  114. VERIFY(is_character());
  115. m_data.get<u32>() = code_point;
  116. }
  117. String const& comment() const
  118. {
  119. VERIFY(is_comment());
  120. return m_comment_data;
  121. }
  122. void set_comment(String comment)
  123. {
  124. VERIFY(is_comment());
  125. m_comment_data = move(comment);
  126. }
  127. FlyString const& tag_name() const
  128. {
  129. VERIFY(is_start_tag() || is_end_tag());
  130. return m_string_data;
  131. }
  132. void set_tag_name(FlyString name)
  133. {
  134. VERIFY(is_start_tag() || is_end_tag());
  135. m_string_data = move(name);
  136. }
  137. bool is_self_closing() const
  138. {
  139. VERIFY(is_start_tag() || is_end_tag());
  140. return m_tag_self_closing;
  141. }
  142. void set_self_closing(bool self_closing)
  143. {
  144. VERIFY(is_start_tag() || is_end_tag());
  145. m_tag_self_closing = self_closing;
  146. }
  147. bool has_acknowledged_self_closing_flag() const
  148. {
  149. VERIFY(is_self_closing());
  150. return m_tag_self_closing_acknowledged;
  151. }
  152. void acknowledge_self_closing_flag_if_set()
  153. {
  154. if (is_self_closing())
  155. m_tag_self_closing_acknowledged = true;
  156. }
  157. bool has_attributes() const
  158. {
  159. VERIFY(is_start_tag() || is_end_tag());
  160. auto* ptr = tag_attributes();
  161. return ptr && !ptr->is_empty();
  162. }
  163. size_t attribute_count() const
  164. {
  165. VERIFY(is_start_tag() || is_end_tag());
  166. if (auto* ptr = tag_attributes())
  167. return ptr->size();
  168. return 0;
  169. }
  170. void add_attribute(Attribute attribute)
  171. {
  172. VERIFY(is_start_tag() || is_end_tag());
  173. ensure_tag_attributes().append(move(attribute));
  174. }
  175. Attribute const& last_attribute() const
  176. {
  177. VERIFY(is_start_tag() || is_end_tag());
  178. VERIFY(has_attributes());
  179. return tag_attributes()->last();
  180. }
  181. Attribute& last_attribute()
  182. {
  183. VERIFY(is_start_tag() || is_end_tag());
  184. VERIFY(has_attributes());
  185. return tag_attributes()->last();
  186. }
  187. void drop_attributes()
  188. {
  189. VERIFY(is_start_tag() || is_end_tag());
  190. m_data.get<OwnPtr<Vector<Attribute>>>().clear();
  191. }
  192. void for_each_attribute(NOESCAPE Function<IterationDecision(Attribute const&)> callback) const
  193. {
  194. VERIFY(is_start_tag() || is_end_tag());
  195. auto* ptr = tag_attributes();
  196. if (!ptr)
  197. return;
  198. for (auto& attribute : *ptr) {
  199. if (callback(attribute) == IterationDecision::Break)
  200. break;
  201. }
  202. }
  203. void for_each_attribute(NOESCAPE Function<IterationDecision(Attribute&)> callback)
  204. {
  205. VERIFY(is_start_tag() || is_end_tag());
  206. auto* ptr = tag_attributes();
  207. if (!ptr)
  208. return;
  209. for (auto& attribute : *ptr) {
  210. if (callback(attribute) == IterationDecision::Break)
  211. break;
  212. }
  213. }
  214. Optional<String> attribute(FlyString const& attribute_name) const
  215. {
  216. if (auto result = raw_attribute(attribute_name); result.has_value())
  217. return result->value;
  218. return {};
  219. }
  220. Optional<Attribute const&> raw_attribute(FlyString const& attribute_name) const
  221. {
  222. VERIFY(is_start_tag() || is_end_tag());
  223. auto* ptr = tag_attributes();
  224. if (!ptr)
  225. return {};
  226. for (auto const& attribute : *ptr) {
  227. if (attribute_name == attribute.local_name)
  228. return attribute;
  229. }
  230. return {};
  231. }
  232. bool has_attribute(FlyString const& attribute_name) const
  233. {
  234. return attribute(attribute_name).has_value();
  235. }
  236. void adjust_tag_name(FlyString const& old_name, FlyString const& new_name)
  237. {
  238. VERIFY(is_start_tag() || is_end_tag());
  239. if (old_name == tag_name())
  240. set_tag_name(new_name);
  241. }
  242. void adjust_attribute_name(FlyString const& old_name, FlyString const& new_name)
  243. {
  244. VERIFY(is_start_tag() || is_end_tag());
  245. for_each_attribute([&](Attribute& attribute) {
  246. if (old_name == attribute.local_name)
  247. attribute.local_name = new_name;
  248. return IterationDecision::Continue;
  249. });
  250. }
  251. void adjust_foreign_attribute(FlyString const& old_name, Optional<FlyString> const& prefix, FlyString const& local_name, Optional<FlyString> const& namespace_)
  252. {
  253. VERIFY(is_start_tag() || is_end_tag());
  254. for_each_attribute([&](Attribute& attribute) {
  255. if (old_name == attribute.local_name) {
  256. attribute.prefix = prefix;
  257. attribute.local_name = local_name;
  258. attribute.namespace_ = namespace_;
  259. }
  260. return IterationDecision::Continue;
  261. });
  262. }
  263. DoctypeData const& doctype_data() const
  264. {
  265. VERIFY(is_doctype());
  266. auto* ptr = m_data.get<OwnPtr<DoctypeData>>().ptr();
  267. VERIFY(ptr);
  268. return *ptr;
  269. }
  270. DoctypeData& ensure_doctype_data()
  271. {
  272. VERIFY(is_doctype());
  273. auto& ptr = m_data.get<OwnPtr<DoctypeData>>();
  274. if (!ptr)
  275. ptr = make<DoctypeData>();
  276. return *ptr;
  277. }
  278. Type type() const { return m_type; }
  279. String to_string() const;
  280. Position const& start_position() const { return m_start_position; }
  281. Position const& end_position() const { return m_end_position; }
  282. void set_start_position(Badge<HTMLTokenizer>, Position start_position) { m_start_position = start_position; }
  283. void set_end_position(Badge<HTMLTokenizer>, Position end_position) { m_end_position = end_position; }
  284. private:
  285. Vector<Attribute> const* tag_attributes() const
  286. {
  287. return m_data.get<OwnPtr<Vector<Attribute>>>().ptr();
  288. }
  289. Vector<Attribute>* tag_attributes()
  290. {
  291. return m_data.get<OwnPtr<Vector<Attribute>>>().ptr();
  292. }
  293. Vector<Attribute>& ensure_tag_attributes()
  294. {
  295. VERIFY(is_start_tag() || is_end_tag());
  296. auto& ptr = m_data.get<OwnPtr<Vector<Attribute>>>();
  297. if (!ptr)
  298. ptr = make<Vector<Attribute>>();
  299. return *ptr;
  300. }
  301. Type m_type { Type::Invalid };
  302. // Type::StartTag and Type::EndTag
  303. bool m_tag_self_closing { false };
  304. bool m_tag_self_closing_acknowledged { false };
  305. // Type::StartTag and Type::EndTag (tag name)
  306. FlyString m_string_data;
  307. // Type::Comment (comment data)
  308. String m_comment_data;
  309. Variant<Empty, u32, OwnPtr<DoctypeData>, OwnPtr<Vector<Attribute>>> m_data {};
  310. Position m_start_position;
  311. Position m_end_position;
  312. };
  313. }