HTMLDocumentParser.cpp 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263
  1. /*
  2. * Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. *
  8. * 1. Redistributions of source code must retain the above copyright notice, this
  9. * list of conditions and the following disclaimer.
  10. *
  11. * 2. Redistributions in binary form must reproduce the above copyright notice,
  12. * this list of conditions and the following disclaimer in the documentation
  13. * and/or other materials provided with the distribution.
  14. *
  15. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  16. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  17. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  18. * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
  19. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  20. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  21. * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  22. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  23. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  24. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. */
  26. #include <LibWeb/DOM/Document.h>
  27. #include <LibWeb/DOM/DocumentType.h>
  28. #include <LibWeb/DOM/ElementFactory.h>
  29. #include <LibWeb/DOM/HTMLFormElement.h>
  30. #include <LibWeb/DOM/HTMLHeadElement.h>
  31. #include <LibWeb/Parser/HTMLDocumentParser.h>
  32. #include <LibWeb/Parser/HTMLToken.h>
  33. namespace Web {
  34. HTMLDocumentParser::HTMLDocumentParser(const StringView& input)
  35. : m_tokenizer(input)
  36. {
  37. }
  38. HTMLDocumentParser::~HTMLDocumentParser()
  39. {
  40. }
  41. void HTMLDocumentParser::run()
  42. {
  43. m_document = adopt(*new Document);
  44. for (;;) {
  45. auto optional_token = m_tokenizer.next_token();
  46. if (!optional_token.has_value())
  47. return;
  48. auto& token = optional_token.value();
  49. dbg() << "[" << insertion_mode_name() << "] " << token.to_string();
  50. if (token.type() == HTMLToken::Type::EndOfFile)
  51. return;
  52. switch (m_insertion_mode) {
  53. case InsertionMode::Initial:
  54. handle_initial(token);
  55. break;
  56. case InsertionMode::BeforeHTML:
  57. handle_before_html(token);
  58. break;
  59. case InsertionMode::BeforeHead:
  60. handle_before_head(token);
  61. break;
  62. case InsertionMode::InHead:
  63. handle_in_head(token);
  64. break;
  65. case InsertionMode::InHeadNoscript:
  66. handle_in_head_noscript(token);
  67. break;
  68. case InsertionMode::AfterHead:
  69. handle_after_head(token);
  70. break;
  71. case InsertionMode::InBody:
  72. handle_in_body(token);
  73. break;
  74. case InsertionMode::Text:
  75. handle_text(token);
  76. break;
  77. default:
  78. ASSERT_NOT_REACHED();
  79. }
  80. }
  81. }
  82. void HTMLDocumentParser::handle_initial(HTMLToken& token)
  83. {
  84. if (token.type() == HTMLToken::Type::DOCTYPE) {
  85. auto doctype = adopt(*new DocumentType(document()));
  86. doctype->set_name(token.m_doctype.name.to_string());
  87. document().append_child(move(doctype));
  88. m_insertion_mode = InsertionMode::BeforeHTML;
  89. return;
  90. }
  91. ASSERT_NOT_REACHED();
  92. }
  93. void HTMLDocumentParser::handle_before_html(HTMLToken& token)
  94. {
  95. if (token.is_start_tag() && token.tag_name() == "html") {
  96. auto element = create_element_for(token);
  97. document().append_child(element);
  98. m_stack_of_open_elements.append(element);
  99. m_insertion_mode = InsertionMode::BeforeHead;
  100. return;
  101. }
  102. ASSERT_NOT_REACHED();
  103. }
  104. NonnullRefPtr<Node> HTMLDocumentParser::current_node()
  105. {
  106. return m_stack_of_open_elements.last();
  107. }
  108. RefPtr<Node> HTMLDocumentParser::find_appropriate_place_for_inserting_node()
  109. {
  110. auto target = current_node();
  111. if (m_foster_parenting) {
  112. ASSERT_NOT_REACHED();
  113. }
  114. return target;
  115. }
  116. NonnullRefPtr<Element> HTMLDocumentParser::create_element_for(HTMLToken& token)
  117. {
  118. auto element = create_element(document(), token.tag_name());
  119. for (auto& attribute : token.m_tag.attributes) {
  120. element->set_attribute(attribute.name_builder.to_string(), attribute.value_builder.to_string());
  121. }
  122. return element;
  123. }
  124. RefPtr<Element> HTMLDocumentParser::insert_html_element(HTMLToken& token)
  125. {
  126. auto adjusted_insertion_location = find_appropriate_place_for_inserting_node();
  127. auto element = create_element_for(token);
  128. // FIXME: Check if it's possible to insert `element` at `adjusted_insertion_location`
  129. adjusted_insertion_location->append_child(element);
  130. m_stack_of_open_elements.append(element);
  131. return element;
  132. }
  133. void HTMLDocumentParser::handle_before_head(HTMLToken& token)
  134. {
  135. if (token.is_start_tag() && token.tag_name() == "head") {
  136. auto element = insert_html_element(token);
  137. m_head_element = to<HTMLHeadElement>(element);
  138. m_insertion_mode = InsertionMode::InHead;
  139. return;
  140. }
  141. ASSERT_NOT_REACHED();
  142. }
  143. void HTMLDocumentParser::handle_in_head(HTMLToken& token)
  144. {
  145. if (token.is_start_tag() && token.tag_name() == "meta") {
  146. auto element = insert_html_element(token);
  147. m_stack_of_open_elements.take_last();
  148. if (token.is_self_closing()) {
  149. ASSERT_NOT_REACHED();
  150. }
  151. return;
  152. }
  153. if (token.is_end_tag() && token.tag_name() == "head") {
  154. m_stack_of_open_elements.take_last();
  155. m_insertion_mode = InsertionMode::AfterHead;
  156. return;
  157. }
  158. ASSERT_NOT_REACHED();
  159. }
  160. void HTMLDocumentParser::handle_in_head_noscript(HTMLToken&)
  161. {
  162. ASSERT_NOT_REACHED();
  163. }
  164. void HTMLDocumentParser::handle_after_head(HTMLToken& token)
  165. {
  166. if (token.is_character()) {
  167. ASSERT_NOT_REACHED();
  168. }
  169. if (token.is_comment()) {
  170. ASSERT_NOT_REACHED();
  171. }
  172. if (token.is_doctype()) {
  173. ASSERT_NOT_REACHED();
  174. }
  175. if (token.is_start_tag() && token.tag_name() == "html") {
  176. ASSERT_NOT_REACHED();
  177. }
  178. if (token.is_start_tag() && token.tag_name() == "body") {
  179. ASSERT_NOT_REACHED();
  180. }
  181. if (token.is_start_tag() && token.tag_name() == "frameset") {
  182. ASSERT_NOT_REACHED();
  183. }
  184. {
  185. Vector<String> names = { "base", "basefont", "bgsound", "link", "meta", "noframes", "script", "style", "template", "title" };
  186. if (token.is_end_tag() && names.contains_slow(token.tag_name())) {
  187. ASSERT_NOT_REACHED();
  188. }
  189. }
  190. if (token.is_end_tag() && token.tag_name() == "template") {
  191. ASSERT_NOT_REACHED();
  192. }
  193. if (token.is_end_tag() && (token.tag_name() == "body" || token.tag_name() == "html" || token.tag_name() == "br")) {
  194. goto AnythingElse;
  195. }
  196. if ((token.is_start_tag() && token.tag_name() == "head") || token.is_end_tag()) {
  197. ASSERT_NOT_REACHED();
  198. }
  199. AnythingElse:
  200. HTMLToken fake_body_token;
  201. fake_body_token.m_type = HTMLToken::Type::StartTag;
  202. fake_body_token.m_tag.tag_name.append("body");
  203. insert_html_element(fake_body_token);
  204. m_insertion_mode = InsertionMode::InBody;
  205. }
  206. void HTMLDocumentParser::handle_in_body(HTMLToken&)
  207. {
  208. ASSERT_NOT_REACHED();
  209. }
  210. void HTMLDocumentParser::handle_text(HTMLToken&)
  211. {
  212. ASSERT_NOT_REACHED();
  213. }
  214. const char* HTMLDocumentParser::insertion_mode_name() const
  215. {
  216. switch (m_insertion_mode) {
  217. #define __ENUMERATE_INSERTION_MODE(mode) \
  218. case InsertionMode::mode: \
  219. return #mode;
  220. ENUMERATE_INSERTION_MODES
  221. #undef __ENUMERATE_INSERTION_MODE
  222. }
  223. ASSERT_NOT_REACHED();
  224. }
  225. Document& HTMLDocumentParser::document()
  226. {
  227. return *m_document;
  228. }
  229. }