HTMLDocumentParser.cpp 76 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127
  1. /*
  2. * Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. *
  8. * 1. Redistributions of source code must retain the above copyright notice, this
  9. * list of conditions and the following disclaimer.
  10. *
  11. * 2. Redistributions in binary form must reproduce the above copyright notice,
  12. * this list of conditions and the following disclaimer in the documentation
  13. * and/or other materials provided with the distribution.
  14. *
  15. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  16. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  17. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  18. * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
  19. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  20. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  21. * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  22. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  23. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  24. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. */
  26. //#define PARSER_DEBUG
  27. #include <AK/Utf32View.h>
  28. #include <LibWeb/DOM/Comment.h>
  29. #include <LibWeb/DOM/Document.h>
  30. #include <LibWeb/DOM/DocumentType.h>
  31. #include <LibWeb/DOM/ElementFactory.h>
  32. #include <LibWeb/DOM/Event.h>
  33. #include <LibWeb/DOM/HTMLFormElement.h>
  34. #include <LibWeb/DOM/HTMLHeadElement.h>
  35. #include <LibWeb/DOM/HTMLScriptElement.h>
  36. #include <LibWeb/DOM/Text.h>
  37. #include <LibWeb/Parser/HTMLDocumentParser.h>
  38. #include <LibWeb/Parser/HTMLToken.h>
  39. #define PARSE_ERROR() \
  40. do { \
  41. dbg() << "Parse error! " << __PRETTY_FUNCTION__ << " @ " << __LINE__; \
  42. } while (0)
  43. namespace Web {
  44. HTMLDocumentParser::HTMLDocumentParser(const StringView& input, const String& encoding)
  45. : m_tokenizer(input, encoding)
  46. {
  47. }
  48. HTMLDocumentParser::~HTMLDocumentParser()
  49. {
  50. }
  51. void HTMLDocumentParser::run(const URL& url)
  52. {
  53. m_document = adopt(*new Document);
  54. m_document->set_url(url);
  55. m_document->set_source(m_tokenizer.source());
  56. for (;;) {
  57. auto optional_token = m_tokenizer.next_token();
  58. if (!optional_token.has_value())
  59. break;
  60. auto& token = optional_token.value();
  61. #ifdef PARSER_DEBUG
  62. dbg() << "[" << insertion_mode_name() << "] " << token.to_string();
  63. #endif
  64. process_using_the_rules_for(m_insertion_mode, token);
  65. if (m_stop_parsing) {
  66. dbg() << "Stop parsing! :^)";
  67. break;
  68. }
  69. }
  70. flush_character_insertions();
  71. // "The end"
  72. auto scripts_to_execute_when_parsing_has_finished = m_document->take_scripts_to_execute_when_parsing_has_finished({});
  73. for (auto& script : scripts_to_execute_when_parsing_has_finished) {
  74. script.execute_script();
  75. }
  76. m_document->dispatch_event(Event::create("DOMContentLoaded"));
  77. auto scripts_to_execute_as_soon_as_possible = m_document->take_scripts_to_execute_as_soon_as_possible({});
  78. for (auto& script : scripts_to_execute_as_soon_as_possible) {
  79. script.execute_script();
  80. }
  81. }
  82. void HTMLDocumentParser::process_using_the_rules_for(InsertionMode mode, HTMLToken& token)
  83. {
  84. switch (mode) {
  85. case InsertionMode::Initial:
  86. handle_initial(token);
  87. break;
  88. case InsertionMode::BeforeHTML:
  89. handle_before_html(token);
  90. break;
  91. case InsertionMode::BeforeHead:
  92. handle_before_head(token);
  93. break;
  94. case InsertionMode::InHead:
  95. handle_in_head(token);
  96. break;
  97. case InsertionMode::InHeadNoscript:
  98. handle_in_head_noscript(token);
  99. break;
  100. case InsertionMode::AfterHead:
  101. handle_after_head(token);
  102. break;
  103. case InsertionMode::InBody:
  104. handle_in_body(token);
  105. break;
  106. case InsertionMode::AfterBody:
  107. handle_after_body(token);
  108. break;
  109. case InsertionMode::AfterAfterBody:
  110. handle_after_after_body(token);
  111. break;
  112. case InsertionMode::Text:
  113. handle_text(token);
  114. break;
  115. case InsertionMode::InTable:
  116. handle_in_table(token);
  117. break;
  118. case InsertionMode::InTableBody:
  119. handle_in_table_body(token);
  120. break;
  121. case InsertionMode::InRow:
  122. handle_in_row(token);
  123. break;
  124. case InsertionMode::InCell:
  125. handle_in_cell(token);
  126. break;
  127. case InsertionMode::InTableText:
  128. handle_in_table_text(token);
  129. break;
  130. case InsertionMode::InSelectInTable:
  131. handle_in_select_in_table(token);
  132. break;
  133. case InsertionMode::InSelect:
  134. handle_in_select(token);
  135. break;
  136. case InsertionMode::InCaption:
  137. handle_in_caption(token);
  138. break;
  139. case InsertionMode::InColumnGroup:
  140. handle_in_column_group(token);
  141. break;
  142. default:
  143. ASSERT_NOT_REACHED();
  144. }
  145. }
  146. void HTMLDocumentParser::handle_initial(HTMLToken& token)
  147. {
  148. if (token.is_character() && token.is_parser_whitespace()) {
  149. return;
  150. }
  151. if (token.is_comment()) {
  152. auto comment = adopt(*new Comment(document(), token.m_comment_or_character.data.to_string()));
  153. document().append_child(move(comment));
  154. return;
  155. }
  156. if (token.is_doctype()) {
  157. auto doctype = adopt(*new DocumentType(document()));
  158. doctype->set_name(token.m_doctype.name.to_string());
  159. document().append_child(move(doctype));
  160. document().set_quirks_mode(token.m_doctype.force_quirks);
  161. m_insertion_mode = InsertionMode::BeforeHTML;
  162. return;
  163. }
  164. PARSE_ERROR();
  165. document().set_quirks_mode(true);
  166. m_insertion_mode = InsertionMode::BeforeHTML;
  167. process_using_the_rules_for(InsertionMode::BeforeHTML, token);
  168. }
  169. void HTMLDocumentParser::handle_before_html(HTMLToken& token)
  170. {
  171. if (token.is_doctype()) {
  172. PARSE_ERROR();
  173. return;
  174. }
  175. if (token.is_comment()) {
  176. auto comment = adopt(*new Comment(document(), token.m_comment_or_character.data.to_string()));
  177. document().append_child(move(comment));
  178. return;
  179. }
  180. if (token.is_character() && token.is_parser_whitespace()) {
  181. return;
  182. }
  183. if (token.is_start_tag() && token.tag_name() == HTML::TagNames::html) {
  184. auto element = create_element_for(token);
  185. document().append_child(element);
  186. m_stack_of_open_elements.push(move(element));
  187. m_insertion_mode = InsertionMode::BeforeHead;
  188. return;
  189. }
  190. if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::head, HTML::TagNames::body, HTML::TagNames::html, HTML::TagNames::br)) {
  191. goto AnythingElse;
  192. }
  193. if (token.is_end_tag()) {
  194. PARSE_ERROR();
  195. return;
  196. }
  197. AnythingElse:
  198. auto element = create_element(document(), HTML::TagNames::html);
  199. document().append_child(element);
  200. m_stack_of_open_elements.push(element);
  201. // FIXME: If the Document is being loaded as part of navigation of a browsing context, then: run the application cache selection algorithm with no manifest, passing it the Document object.
  202. m_insertion_mode = InsertionMode::BeforeHead;
  203. process_using_the_rules_for(InsertionMode::BeforeHead, token);
  204. return;
  205. }
  206. Element& HTMLDocumentParser::current_node()
  207. {
  208. return m_stack_of_open_elements.current_node();
  209. }
  210. Element& HTMLDocumentParser::node_before_current_node()
  211. {
  212. return m_stack_of_open_elements.elements().at(m_stack_of_open_elements.elements().size() - 2);
  213. }
  214. RefPtr<Node> HTMLDocumentParser::find_appropriate_place_for_inserting_node()
  215. {
  216. auto& target = current_node();
  217. if (m_foster_parenting) {
  218. TODO();
  219. }
  220. return target;
  221. }
  222. NonnullRefPtr<Element> HTMLDocumentParser::create_element_for(HTMLToken& token)
  223. {
  224. auto element = create_element(document(), token.tag_name());
  225. for (auto& attribute : token.m_tag.attributes) {
  226. element->set_attribute(attribute.name_builder.to_string(), attribute.value_builder.to_string());
  227. }
  228. return element;
  229. }
  230. RefPtr<Element> HTMLDocumentParser::insert_html_element(HTMLToken& token)
  231. {
  232. auto adjusted_insertion_location = find_appropriate_place_for_inserting_node();
  233. auto element = create_element_for(token);
  234. // FIXME: Check if it's possible to insert `element` at `adjusted_insertion_location`
  235. adjusted_insertion_location->append_child(element);
  236. m_stack_of_open_elements.push(element);
  237. return element;
  238. }
  239. void HTMLDocumentParser::handle_before_head(HTMLToken& token)
  240. {
  241. if (token.is_character() && token.is_parser_whitespace()) {
  242. return;
  243. }
  244. if (token.is_comment()) {
  245. insert_comment(token);
  246. return;
  247. }
  248. if (token.is_doctype()) {
  249. PARSE_ERROR();
  250. return;
  251. }
  252. if (token.is_start_tag() && token.tag_name() == HTML::TagNames::html) {
  253. process_using_the_rules_for(InsertionMode::InBody, token);
  254. return;
  255. }
  256. if (token.is_start_tag() && token.tag_name() == HTML::TagNames::head) {
  257. auto element = insert_html_element(token);
  258. m_head_element = to<HTMLHeadElement>(element);
  259. m_insertion_mode = InsertionMode::InHead;
  260. return;
  261. }
  262. if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::head, HTML::TagNames::body, HTML::TagNames::html, HTML::TagNames::br)) {
  263. goto AnythingElse;
  264. }
  265. if (token.is_end_tag()) {
  266. PARSE_ERROR();
  267. return;
  268. }
  269. AnythingElse:
  270. HTMLToken fake_head_token;
  271. fake_head_token.m_type = HTMLToken::Type::StartTag;
  272. fake_head_token.m_tag.tag_name.append(HTML::TagNames::head);
  273. m_head_element = to<HTMLHeadElement>(insert_html_element(fake_head_token));
  274. m_insertion_mode = InsertionMode::InHead;
  275. process_using_the_rules_for(InsertionMode::InHead, token);
  276. return;
  277. }
  278. void HTMLDocumentParser::insert_comment(HTMLToken& token)
  279. {
  280. auto data = token.m_comment_or_character.data.to_string();
  281. auto adjusted_insertion_location = find_appropriate_place_for_inserting_node();
  282. adjusted_insertion_location->append_child(adopt(*new Comment(document(), data)));
  283. }
  284. void HTMLDocumentParser::handle_in_head(HTMLToken& token)
  285. {
  286. if (token.is_parser_whitespace()) {
  287. insert_character(token.codepoint());
  288. return;
  289. }
  290. if (token.is_comment()) {
  291. insert_comment(token);
  292. return;
  293. }
  294. if (token.is_doctype()) {
  295. PARSE_ERROR();
  296. return;
  297. }
  298. if (token.is_start_tag() && token.tag_name() == HTML::TagNames::html) {
  299. process_using_the_rules_for(InsertionMode::InBody, token);
  300. return;
  301. }
  302. if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::base, HTML::TagNames::basefont, HTML::TagNames::bgsound, HTML::TagNames::link)) {
  303. insert_html_element(token);
  304. m_stack_of_open_elements.pop();
  305. token.acknowledge_self_closing_flag_if_set();
  306. return;
  307. }
  308. if (token.is_start_tag() && token.tag_name() == HTML::TagNames::meta) {
  309. auto element = insert_html_element(token);
  310. m_stack_of_open_elements.pop();
  311. token.acknowledge_self_closing_flag_if_set();
  312. return;
  313. }
  314. if (token.is_start_tag() && token.tag_name() == HTML::TagNames::title) {
  315. insert_html_element(token);
  316. m_tokenizer.switch_to({}, HTMLTokenizer::State::RCDATA);
  317. m_original_insertion_mode = m_insertion_mode;
  318. m_insertion_mode = InsertionMode::Text;
  319. return;
  320. }
  321. if (token.is_start_tag() && ((token.tag_name() == HTML::TagNames::noscript && m_scripting_enabled) || token.tag_name() == HTML::TagNames::noframes || token.tag_name() == HTML::TagNames::style)) {
  322. parse_generic_raw_text_element(token);
  323. return;
  324. }
  325. if (token.is_start_tag() && token.tag_name() == HTML::TagNames::script) {
  326. auto adjusted_insertion_location = find_appropriate_place_for_inserting_node();
  327. auto element = create_element_for(token);
  328. auto& script_element = to<HTMLScriptElement>(*element);
  329. script_element.set_parser_document({}, document());
  330. script_element.set_non_blocking({}, false);
  331. if (m_parsing_fragment) {
  332. TODO();
  333. }
  334. if (m_invoked_via_document_write) {
  335. TODO();
  336. }
  337. adjusted_insertion_location->append_child(element, false);
  338. m_stack_of_open_elements.push(element);
  339. m_tokenizer.switch_to({}, HTMLTokenizer::State::ScriptData);
  340. m_original_insertion_mode = m_insertion_mode;
  341. m_insertion_mode = InsertionMode::Text;
  342. return;
  343. }
  344. if (token.is_end_tag() && token.tag_name() == HTML::TagNames::head) {
  345. m_stack_of_open_elements.pop();
  346. m_insertion_mode = InsertionMode::AfterHead;
  347. return;
  348. }
  349. if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::body, HTML::TagNames::html, HTML::TagNames::br)) {
  350. TODO();
  351. }
  352. if (token.is_start_tag() && token.tag_name() == HTML::TagNames::template_) {
  353. // FIXME: Support this properly
  354. insert_html_element(token);
  355. return;
  356. }
  357. if (token.is_end_tag() && token.tag_name() == HTML::TagNames::template_) {
  358. // FIXME: Support this properly
  359. ASSERT(current_node().tag_name() == HTML::TagNames::template_);
  360. m_stack_of_open_elements.pop();
  361. return;
  362. }
  363. if ((token.is_start_tag() && token.tag_name() == HTML::TagNames::head) || token.is_end_tag()) {
  364. PARSE_ERROR();
  365. return;
  366. }
  367. m_stack_of_open_elements.pop();
  368. m_insertion_mode = InsertionMode::AfterHead;
  369. process_using_the_rules_for(m_insertion_mode, token);
  370. }
  371. void HTMLDocumentParser::handle_in_head_noscript(HTMLToken&)
  372. {
  373. TODO();
  374. }
  375. void HTMLDocumentParser::parse_generic_raw_text_element(HTMLToken& token)
  376. {
  377. insert_html_element(token);
  378. m_tokenizer.switch_to({}, HTMLTokenizer::State::RAWTEXT);
  379. m_original_insertion_mode = m_insertion_mode;
  380. m_insertion_mode = InsertionMode::Text;
  381. }
  382. Text* HTMLDocumentParser::find_character_insertion_node()
  383. {
  384. auto adjusted_insertion_location = find_appropriate_place_for_inserting_node();
  385. if (adjusted_insertion_location->is_document())
  386. return nullptr;
  387. if (adjusted_insertion_location->last_child() && adjusted_insertion_location->last_child()->is_text())
  388. return to<Text>(adjusted_insertion_location->last_child());
  389. auto new_text_node = adopt(*new Text(document(), ""));
  390. adjusted_insertion_location->append_child(new_text_node);
  391. return new_text_node;
  392. }
  393. void HTMLDocumentParser::flush_character_insertions()
  394. {
  395. if (m_character_insertion_builder.is_empty())
  396. return;
  397. m_character_insertion_node->set_data(m_character_insertion_builder.to_string());
  398. m_character_insertion_node->parent()->children_changed();
  399. m_character_insertion_builder.clear();
  400. }
  401. void HTMLDocumentParser::insert_character(u32 data)
  402. {
  403. auto node = find_character_insertion_node();
  404. if (node == m_character_insertion_node) {
  405. m_character_insertion_builder.append(Utf32View { &data, 1 });
  406. return;
  407. }
  408. if (!m_character_insertion_node) {
  409. m_character_insertion_node = node;
  410. m_character_insertion_builder.append(Utf32View { &data, 1 });
  411. return;
  412. }
  413. flush_character_insertions();
  414. m_character_insertion_node = node;
  415. m_character_insertion_builder.append(Utf32View { &data, 1 });
  416. }
  417. void HTMLDocumentParser::handle_after_head(HTMLToken& token)
  418. {
  419. if (token.is_character() && token.is_parser_whitespace()) {
  420. insert_character(token.codepoint());
  421. return;
  422. }
  423. if (token.is_comment()) {
  424. insert_comment(token);
  425. return;
  426. }
  427. if (token.is_doctype()) {
  428. PARSE_ERROR();
  429. return;
  430. }
  431. if (token.is_start_tag() && token.tag_name() == HTML::TagNames::html) {
  432. process_using_the_rules_for(InsertionMode::InBody, token);
  433. return;
  434. }
  435. if (token.is_start_tag() && token.tag_name() == HTML::TagNames::body) {
  436. insert_html_element(token);
  437. m_frameset_ok = false;
  438. m_insertion_mode = InsertionMode::InBody;
  439. return;
  440. }
  441. if (token.is_start_tag() && token.tag_name() == HTML::TagNames::frameset) {
  442. insert_html_element(token);
  443. m_insertion_mode = InsertionMode::InFrameset;
  444. return;
  445. }
  446. if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::base, HTML::TagNames::basefont, HTML::TagNames::bgsound, HTML::TagNames::link, HTML::TagNames::meta, HTML::TagNames::noframes, HTML::TagNames::script, HTML::TagNames::style, HTML::TagNames::template_, HTML::TagNames::title)) {
  447. PARSE_ERROR();
  448. m_stack_of_open_elements.push(*m_head_element);
  449. process_using_the_rules_for(InsertionMode::InHead, token);
  450. m_stack_of_open_elements.elements().remove_first_matching([&](auto& entry) {
  451. return entry.ptr() == m_head_element;
  452. });
  453. return;
  454. }
  455. if (token.is_end_tag() && token.tag_name() == HTML::TagNames::template_) {
  456. TODO();
  457. }
  458. if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::body, HTML::TagNames::html, HTML::TagNames::br)) {
  459. goto AnythingElse;
  460. }
  461. if ((token.is_start_tag() && token.tag_name() == HTML::TagNames::head) || token.is_end_tag()) {
  462. PARSE_ERROR();
  463. return;
  464. }
  465. AnythingElse:
  466. HTMLToken fake_body_token;
  467. fake_body_token.m_type = HTMLToken::Type::StartTag;
  468. fake_body_token.m_tag.tag_name.append(HTML::TagNames::body);
  469. insert_html_element(fake_body_token);
  470. m_insertion_mode = InsertionMode::InBody;
  471. process_using_the_rules_for(m_insertion_mode, token);
  472. }
  473. void HTMLDocumentParser::generate_implied_end_tags(const FlyString& exception)
  474. {
  475. while (current_node().tag_name() != exception && current_node().tag_name().is_one_of(HTML::TagNames::dd, HTML::TagNames::dt, HTML::TagNames::li, HTML::TagNames::optgroup, HTML::TagNames::option, HTML::TagNames::p, HTML::TagNames::rb, HTML::TagNames::rp, HTML::TagNames::rt, HTML::TagNames::rtc))
  476. m_stack_of_open_elements.pop();
  477. }
  478. void HTMLDocumentParser::close_a_p_element()
  479. {
  480. generate_implied_end_tags(HTML::TagNames::p);
  481. if (current_node().tag_name() != HTML::TagNames::p) {
  482. PARSE_ERROR();
  483. }
  484. m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::p);
  485. }
  486. void HTMLDocumentParser::handle_after_body(HTMLToken& token)
  487. {
  488. if (token.is_character() && token.is_parser_whitespace()) {
  489. process_using_the_rules_for(InsertionMode::InBody, token);
  490. return;
  491. }
  492. if (token.is_comment()) {
  493. TODO();
  494. }
  495. if (token.is_doctype()) {
  496. PARSE_ERROR();
  497. return;
  498. }
  499. if (token.is_start_tag() && token.tag_name() == HTML::TagNames::html) {
  500. process_using_the_rules_for(InsertionMode::InBody, token);
  501. return;
  502. }
  503. if (token.is_end_of_file()) {
  504. stop_parsing();
  505. return;
  506. }
  507. if (token.is_end_tag() && token.tag_name() == HTML::TagNames::html) {
  508. if (m_parsing_fragment) {
  509. TODO();
  510. }
  511. m_insertion_mode = InsertionMode::AfterAfterBody;
  512. return;
  513. }
  514. PARSE_ERROR();
  515. m_insertion_mode = InsertionMode::InBody;
  516. process_using_the_rules_for(InsertionMode::InBody, token);
  517. }
  518. void HTMLDocumentParser::handle_after_after_body(HTMLToken& token)
  519. {
  520. if (token.is_comment()) {
  521. auto comment = adopt(*new Comment(document(), token.m_comment_or_character.data.to_string()));
  522. document().append_child(move(comment));
  523. return;
  524. }
  525. if (token.is_doctype() || token.is_parser_whitespace() || (token.is_start_tag() && token.tag_name() == HTML::TagNames::html)) {
  526. process_using_the_rules_for(InsertionMode::InBody, token);
  527. return;
  528. }
  529. if (token.is_end_of_file()) {
  530. stop_parsing();
  531. return;
  532. }
  533. PARSE_ERROR();
  534. m_insertion_mode = InsertionMode::InBody;
  535. process_using_the_rules_for(m_insertion_mode, token);
  536. }
  537. void HTMLDocumentParser::reconstruct_the_active_formatting_elements()
  538. {
  539. // FIXME: This needs to care about "markers"
  540. if (m_list_of_active_formatting_elements.is_empty())
  541. return;
  542. if (m_list_of_active_formatting_elements.entries().last().is_marker())
  543. return;
  544. if (m_stack_of_open_elements.contains(*m_list_of_active_formatting_elements.entries().last().element))
  545. return;
  546. ssize_t index = m_list_of_active_formatting_elements.entries().size() - 1;
  547. RefPtr<Element> entry = m_list_of_active_formatting_elements.entries().at(index).element;
  548. ASSERT(entry);
  549. Rewind:
  550. if (index == 0) {
  551. goto Create;
  552. }
  553. --index;
  554. entry = m_list_of_active_formatting_elements.entries().at(index).element;
  555. ASSERT(entry);
  556. if (!m_stack_of_open_elements.contains(*entry))
  557. goto Rewind;
  558. Advance:
  559. ++index;
  560. entry = m_list_of_active_formatting_elements.entries().at(index).element;
  561. ASSERT(entry);
  562. Create:
  563. // FIXME: Hold on to the real token!
  564. HTMLToken fake_token;
  565. fake_token.m_type = HTMLToken::Type::StartTag;
  566. fake_token.m_tag.tag_name.append(entry->tag_name());
  567. auto new_element = insert_html_element(fake_token);
  568. m_list_of_active_formatting_elements.entries().at(index).element = *new_element;
  569. if (index != (ssize_t)m_list_of_active_formatting_elements.entries().size() - 1)
  570. goto Advance;
  571. }
  572. HTMLDocumentParser::AdoptionAgencyAlgorithmOutcome HTMLDocumentParser::run_the_adoption_agency_algorithm(HTMLToken& token)
  573. {
  574. auto subject = token.tag_name();
  575. // If the current node is an HTML element whose tag name is subject,
  576. // and the current node is not in the list of active formatting elements,
  577. // then pop the current node off the stack of open elements, and return.
  578. if (current_node().tag_name() == subject && !m_list_of_active_formatting_elements.contains(current_node())) {
  579. m_stack_of_open_elements.pop();
  580. return AdoptionAgencyAlgorithmOutcome::DoNothing;
  581. }
  582. size_t outer_loop_counter = 0;
  583. //OuterLoop:
  584. if (outer_loop_counter >= 8)
  585. return AdoptionAgencyAlgorithmOutcome::DoNothing;
  586. ++outer_loop_counter;
  587. auto formatting_element = m_list_of_active_formatting_elements.last_element_with_tag_name_before_marker(subject);
  588. if (!formatting_element)
  589. return AdoptionAgencyAlgorithmOutcome::RunAnyOtherEndTagSteps;
  590. if (!m_stack_of_open_elements.contains(*formatting_element)) {
  591. PARSE_ERROR();
  592. // FIXME: If formatting element is not in the stack of open elements,
  593. // then this is a parse error; remove the element from the list, and return.
  594. TODO();
  595. }
  596. if (!m_stack_of_open_elements.has_in_scope(*formatting_element)) {
  597. PARSE_ERROR();
  598. return AdoptionAgencyAlgorithmOutcome::DoNothing;
  599. }
  600. if (formatting_element != &current_node()) {
  601. PARSE_ERROR();
  602. }
  603. RefPtr<Element> furthest_block = m_stack_of_open_elements.topmost_special_node_below(*formatting_element);
  604. if (!furthest_block) {
  605. while (&current_node() != formatting_element)
  606. m_stack_of_open_elements.pop();
  607. m_stack_of_open_elements.pop();
  608. m_list_of_active_formatting_elements.remove(*formatting_element);
  609. return AdoptionAgencyAlgorithmOutcome::DoNothing;
  610. }
  611. // FIXME: Implement the rest of the AAA :^)
  612. TODO();
  613. }
  614. bool HTMLDocumentParser::is_special_tag(const FlyString& tag_name)
  615. {
  616. return tag_name.is_one_of(
  617. HTML::TagNames::address,
  618. HTML::TagNames::applet,
  619. HTML::TagNames::area,
  620. HTML::TagNames::article,
  621. HTML::TagNames::aside,
  622. HTML::TagNames::base,
  623. HTML::TagNames::basefont,
  624. HTML::TagNames::bgsound,
  625. HTML::TagNames::blockquote,
  626. HTML::TagNames::body,
  627. HTML::TagNames::br,
  628. HTML::TagNames::button,
  629. HTML::TagNames::caption,
  630. HTML::TagNames::center,
  631. HTML::TagNames::col,
  632. HTML::TagNames::colgroup,
  633. HTML::TagNames::dd,
  634. HTML::TagNames::details,
  635. HTML::TagNames::dir,
  636. HTML::TagNames::div,
  637. HTML::TagNames::dl,
  638. HTML::TagNames::dt,
  639. HTML::TagNames::embed,
  640. HTML::TagNames::fieldset,
  641. HTML::TagNames::figcaption,
  642. HTML::TagNames::figure,
  643. HTML::TagNames::footer,
  644. HTML::TagNames::form,
  645. HTML::TagNames::frame,
  646. HTML::TagNames::frameset,
  647. HTML::TagNames::h1,
  648. HTML::TagNames::h2,
  649. HTML::TagNames::h3,
  650. HTML::TagNames::h4,
  651. HTML::TagNames::h5,
  652. HTML::TagNames::h6,
  653. HTML::TagNames::head,
  654. HTML::TagNames::header,
  655. HTML::TagNames::hgroup,
  656. HTML::TagNames::hr,
  657. HTML::TagNames::html,
  658. HTML::TagNames::iframe,
  659. HTML::TagNames::img,
  660. HTML::TagNames::input,
  661. HTML::TagNames::keygen,
  662. HTML::TagNames::li,
  663. HTML::TagNames::link,
  664. HTML::TagNames::listing,
  665. HTML::TagNames::main,
  666. HTML::TagNames::marquee,
  667. HTML::TagNames::menu,
  668. HTML::TagNames::meta,
  669. HTML::TagNames::nav,
  670. HTML::TagNames::noembed,
  671. HTML::TagNames::noframes,
  672. HTML::TagNames::noscript,
  673. HTML::TagNames::object,
  674. HTML::TagNames::ol,
  675. HTML::TagNames::p,
  676. HTML::TagNames::param,
  677. HTML::TagNames::plaintext,
  678. HTML::TagNames::pre,
  679. HTML::TagNames::script,
  680. HTML::TagNames::section,
  681. HTML::TagNames::select,
  682. HTML::TagNames::source,
  683. HTML::TagNames::style,
  684. HTML::TagNames::summary,
  685. HTML::TagNames::table,
  686. HTML::TagNames::tbody,
  687. HTML::TagNames::td,
  688. HTML::TagNames::template_,
  689. HTML::TagNames::textarea,
  690. HTML::TagNames::tfoot,
  691. HTML::TagNames::th,
  692. HTML::TagNames::thead,
  693. HTML::TagNames::title,
  694. HTML::TagNames::tr,
  695. HTML::TagNames::track,
  696. HTML::TagNames::ul,
  697. HTML::TagNames::wbr,
  698. HTML::TagNames::xmp);
  699. }
  700. void HTMLDocumentParser::handle_in_body(HTMLToken& token)
  701. {
  702. if (token.is_character()) {
  703. if (token.codepoint() == 0) {
  704. PARSE_ERROR();
  705. return;
  706. }
  707. if (token.is_parser_whitespace()) {
  708. reconstruct_the_active_formatting_elements();
  709. insert_character(token.codepoint());
  710. return;
  711. }
  712. reconstruct_the_active_formatting_elements();
  713. insert_character(token.codepoint());
  714. m_frameset_ok = false;
  715. return;
  716. }
  717. if (token.is_comment()) {
  718. insert_comment(token);
  719. return;
  720. }
  721. if (token.is_doctype()) {
  722. PARSE_ERROR();
  723. return;
  724. }
  725. if (token.is_start_tag() && token.tag_name() == HTML::TagNames::html) {
  726. PARSE_ERROR();
  727. if (m_stack_of_open_elements.contains(HTML::TagNames::template_))
  728. return;
  729. for (auto& attribute : token.m_tag.attributes) {
  730. if (current_node().has_attribute(attribute.name_builder.string_view()))
  731. continue;
  732. current_node().set_attribute(attribute.name_builder.to_string(), attribute.value_builder.to_string());
  733. }
  734. return;
  735. }
  736. if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::base, HTML::TagNames::basefont, HTML::TagNames::bgsound, HTML::TagNames::link, HTML::TagNames::meta, HTML::TagNames::noframes, HTML::TagNames::script, HTML::TagNames::style, HTML::TagNames::template_, HTML::TagNames::title)) {
  737. process_using_the_rules_for(InsertionMode::InHead, token);
  738. return;
  739. }
  740. if (token.is_end_tag() && token.tag_name() == HTML::TagNames::template_) {
  741. process_using_the_rules_for(InsertionMode::InHead, token);
  742. return;
  743. }
  744. if (token.is_start_tag() && token.tag_name() == HTML::TagNames::body) {
  745. PARSE_ERROR();
  746. if (m_stack_of_open_elements.elements().size() == 1
  747. || node_before_current_node().tag_name() != HTML::TagNames::body
  748. || m_stack_of_open_elements.contains(HTML::TagNames::template_)) {
  749. return;
  750. }
  751. m_frameset_ok = false;
  752. for (auto& attribute : token.m_tag.attributes) {
  753. if (node_before_current_node().has_attribute(attribute.name_builder.string_view()))
  754. continue;
  755. node_before_current_node().set_attribute(attribute.name_builder.to_string(), attribute.value_builder.to_string());
  756. }
  757. return;
  758. }
  759. if (token.is_start_tag() && token.tag_name() == HTML::TagNames::frameset) {
  760. TODO();
  761. }
  762. if (token.is_end_of_file()) {
  763. // FIXME: If the stack of template insertion modes is not empty,
  764. // then process the token using the rules for the "in template" insertion mode.
  765. // FIXME: If there is a node in the stack of open elements that is not either
  766. // a dd element, a dt element, an li element, an optgroup element, an option element,
  767. // a p element, an rb element, an rp element, an rt element, an rtc element,
  768. // a tbody element, a td element, a tfoot element, a th element, a thead element,
  769. // a tr element, the body element, or the html element, then this is a parse error.
  770. stop_parsing();
  771. return;
  772. }
  773. if (token.is_end_tag() && token.tag_name() == HTML::TagNames::body) {
  774. if (!m_stack_of_open_elements.has_in_scope(HTML::TagNames::body)) {
  775. PARSE_ERROR();
  776. return;
  777. }
  778. for (auto& node : m_stack_of_open_elements.elements()) {
  779. if (!node.tag_name().is_one_of(HTML::TagNames::dd, HTML::TagNames::dt, HTML::TagNames::li, HTML::TagNames::optgroup, HTML::TagNames::option, HTML::TagNames::p, HTML::TagNames::rb, HTML::TagNames::rp, HTML::TagNames::rt, HTML::TagNames::rtc, HTML::TagNames::tbody, HTML::TagNames::td, HTML::TagNames::tfoot, HTML::TagNames::th, HTML::TagNames::thead, HTML::TagNames::tr, HTML::TagNames::body, HTML::TagNames::html)) {
  780. PARSE_ERROR();
  781. break;
  782. }
  783. }
  784. m_insertion_mode = InsertionMode::AfterBody;
  785. return;
  786. }
  787. if (token.is_end_tag() && token.tag_name() == HTML::TagNames::html) {
  788. if (!m_stack_of_open_elements.has_in_scope(HTML::TagNames::body)) {
  789. PARSE_ERROR();
  790. return;
  791. }
  792. for (auto& node : m_stack_of_open_elements.elements()) {
  793. if (!node.tag_name().is_one_of(HTML::TagNames::dd, HTML::TagNames::dt, HTML::TagNames::li, HTML::TagNames::optgroup, HTML::TagNames::option, HTML::TagNames::p, HTML::TagNames::rb, HTML::TagNames::rp, HTML::TagNames::rt, HTML::TagNames::rtc, HTML::TagNames::tbody, HTML::TagNames::td, HTML::TagNames::tfoot, HTML::TagNames::th, HTML::TagNames::thead, HTML::TagNames::tr, HTML::TagNames::body, HTML::TagNames::html)) {
  794. PARSE_ERROR();
  795. break;
  796. }
  797. }
  798. m_insertion_mode = InsertionMode::AfterBody;
  799. process_using_the_rules_for(m_insertion_mode, token);
  800. return;
  801. }
  802. if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::address, HTML::TagNames::article, HTML::TagNames::aside, HTML::TagNames::blockquote, HTML::TagNames::center, HTML::TagNames::details, HTML::TagNames::dialog, HTML::TagNames::dir, HTML::TagNames::div, HTML::TagNames::dl, HTML::TagNames::fieldset, HTML::TagNames::figcaption, HTML::TagNames::figure, HTML::TagNames::footer, HTML::TagNames::header, HTML::TagNames::hgroup, HTML::TagNames::main, HTML::TagNames::menu, HTML::TagNames::nav, HTML::TagNames::ol, HTML::TagNames::p, HTML::TagNames::section, HTML::TagNames::summary, HTML::TagNames::ul)) {
  803. if (m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::p))
  804. close_a_p_element();
  805. insert_html_element(token);
  806. return;
  807. }
  808. if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::h1, HTML::TagNames::h2, HTML::TagNames::h3, HTML::TagNames::h4, HTML::TagNames::h5, HTML::TagNames::h6)) {
  809. if (m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::p))
  810. close_a_p_element();
  811. if (current_node().tag_name().is_one_of(HTML::TagNames::h1, HTML::TagNames::h2, HTML::TagNames::h3, HTML::TagNames::h4, HTML::TagNames::h5, HTML::TagNames::h6)) {
  812. PARSE_ERROR();
  813. m_stack_of_open_elements.pop();
  814. }
  815. insert_html_element(token);
  816. return;
  817. }
  818. if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::pre, HTML::TagNames::listing)) {
  819. if (m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::p))
  820. close_a_p_element();
  821. insert_html_element(token);
  822. m_frameset_ok = false;
  823. // If the next token is a U+000A LINE FEED (LF) character token,
  824. // then ignore that token and move on to the next one.
  825. // (Newlines at the start of pre blocks are ignored as an authoring convenience.)
  826. auto next_token = m_tokenizer.next_token();
  827. if (next_token.has_value() && next_token.value().is_character() && next_token.value().codepoint() == '\n') {
  828. // Ignore it.
  829. } else {
  830. process_using_the_rules_for(m_insertion_mode, next_token.value());
  831. }
  832. return;
  833. }
  834. if (token.is_start_tag() && token.tag_name() == HTML::TagNames::form) {
  835. if (m_form_element && m_stack_of_open_elements.contains(HTML::TagNames::template_)) {
  836. PARSE_ERROR();
  837. return;
  838. }
  839. if (m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::p))
  840. close_a_p_element();
  841. auto element = insert_html_element(token);
  842. if (!m_stack_of_open_elements.contains(HTML::TagNames::template_))
  843. m_form_element = to<HTMLFormElement>(*element);
  844. return;
  845. }
  846. if (token.is_start_tag() && token.tag_name() == HTML::TagNames::li) {
  847. m_frameset_ok = false;
  848. for (ssize_t i = m_stack_of_open_elements.elements().size() - 1; i >= 0; --i) {
  849. RefPtr<Element> node = m_stack_of_open_elements.elements()[i];
  850. if (node->tag_name() == HTML::TagNames::li) {
  851. generate_implied_end_tags(HTML::TagNames::li);
  852. if (current_node().tag_name() != HTML::TagNames::li) {
  853. PARSE_ERROR();
  854. }
  855. m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::li);
  856. break;
  857. }
  858. if (is_special_tag(node->tag_name()) && !node->tag_name().is_one_of(HTML::TagNames::address, HTML::TagNames::div, HTML::TagNames::p))
  859. break;
  860. }
  861. if (m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::p))
  862. close_a_p_element();
  863. insert_html_element(token);
  864. return;
  865. }
  866. if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::dd, HTML::TagNames::dt)) {
  867. m_frameset_ok = false;
  868. for (ssize_t i = m_stack_of_open_elements.elements().size() - 1; i >= 0; --i) {
  869. RefPtr<Element> node = m_stack_of_open_elements.elements()[i];
  870. if (node->tag_name() == HTML::TagNames::dd) {
  871. generate_implied_end_tags(HTML::TagNames::dd);
  872. if (current_node().tag_name() != HTML::TagNames::dd) {
  873. PARSE_ERROR();
  874. }
  875. m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::dd);
  876. break;
  877. }
  878. if (node->tag_name() == HTML::TagNames::dt) {
  879. generate_implied_end_tags(HTML::TagNames::dt);
  880. if (current_node().tag_name() != HTML::TagNames::dt) {
  881. PARSE_ERROR();
  882. }
  883. m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::dt);
  884. break;
  885. }
  886. if (is_special_tag(node->tag_name()) && !node->tag_name().is_one_of(HTML::TagNames::address, HTML::TagNames::div, HTML::TagNames::p))
  887. break;
  888. }
  889. if (m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::p))
  890. close_a_p_element();
  891. insert_html_element(token);
  892. return;
  893. }
  894. if (token.is_start_tag() && token.tag_name() == HTML::TagNames::plaintext) {
  895. if (m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::p))
  896. close_a_p_element();
  897. insert_html_element(token);
  898. m_tokenizer.switch_to({}, HTMLTokenizer::State::PLAINTEXT);
  899. return;
  900. }
  901. if (token.is_start_tag() && token.tag_name() == HTML::TagNames::button) {
  902. if (m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::button)) {
  903. PARSE_ERROR();
  904. generate_implied_end_tags();
  905. m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::button);
  906. }
  907. reconstruct_the_active_formatting_elements();
  908. insert_html_element(token);
  909. m_frameset_ok = false;
  910. return;
  911. }
  912. if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::address, HTML::TagNames::article, HTML::TagNames::aside, HTML::TagNames::blockquote, HTML::TagNames::button, HTML::TagNames::center, HTML::TagNames::details, HTML::TagNames::dialog, HTML::TagNames::dir, HTML::TagNames::div, HTML::TagNames::dl, HTML::TagNames::fieldset, HTML::TagNames::figcaption, HTML::TagNames::figure, HTML::TagNames::footer, HTML::TagNames::header, HTML::TagNames::hgroup, HTML::TagNames::listing, HTML::TagNames::main, HTML::TagNames::menu, HTML::TagNames::nav, HTML::TagNames::ol, HTML::TagNames::pre, HTML::TagNames::section, HTML::TagNames::summary, HTML::TagNames::ul)) {
  913. if (!m_stack_of_open_elements.has_in_scope(token.tag_name())) {
  914. PARSE_ERROR();
  915. return;
  916. }
  917. generate_implied_end_tags();
  918. if (current_node().tag_name() != token.tag_name()) {
  919. PARSE_ERROR();
  920. }
  921. m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(token.tag_name());
  922. return;
  923. }
  924. if (token.is_end_tag() && token.tag_name() == HTML::TagNames::form) {
  925. if (!m_stack_of_open_elements.contains(HTML::TagNames::template_)) {
  926. auto node = m_form_element;
  927. m_form_element = nullptr;
  928. if (!node || m_stack_of_open_elements.has_in_scope(*node)) {
  929. PARSE_ERROR();
  930. return;
  931. }
  932. generate_implied_end_tags();
  933. if (&current_node() != node) {
  934. PARSE_ERROR();
  935. }
  936. m_stack_of_open_elements.elements().remove_first_matching([&](auto& entry) { return entry.ptr() == node.ptr(); });
  937. } else {
  938. if (!m_stack_of_open_elements.has_in_scope(HTML::TagNames::form)) {
  939. PARSE_ERROR();
  940. return;
  941. }
  942. generate_implied_end_tags();
  943. if (current_node().tag_name() != HTML::TagNames::form) {
  944. PARSE_ERROR();
  945. }
  946. m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::form);
  947. }
  948. return;
  949. }
  950. if (token.is_end_tag() && token.tag_name() == HTML::TagNames::p) {
  951. if (!m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::p)) {
  952. PARSE_ERROR();
  953. HTMLToken fake_p_token;
  954. fake_p_token.m_type = HTMLToken::Type::StartTag;
  955. fake_p_token.m_tag.tag_name.append(HTML::TagNames::p);
  956. insert_html_element(fake_p_token);
  957. }
  958. close_a_p_element();
  959. return;
  960. }
  961. if (token.is_end_tag() && token.tag_name() == HTML::TagNames::li) {
  962. if (!m_stack_of_open_elements.has_in_list_item_scope(HTML::TagNames::li)) {
  963. PARSE_ERROR();
  964. return;
  965. }
  966. generate_implied_end_tags(HTML::TagNames::li);
  967. if (current_node().tag_name() != HTML::TagNames::li) {
  968. PARSE_ERROR();
  969. dbg() << "Expected <li> current node, but had <" << current_node().tag_name() << ">";
  970. }
  971. m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::li);
  972. return;
  973. }
  974. if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::dd, HTML::TagNames::dt)) {
  975. if (!m_stack_of_open_elements.has_in_scope(token.tag_name())) {
  976. PARSE_ERROR();
  977. return;
  978. }
  979. generate_implied_end_tags(token.tag_name());
  980. if (current_node().tag_name() != token.tag_name()) {
  981. PARSE_ERROR();
  982. }
  983. m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(token.tag_name());
  984. return;
  985. }
  986. if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::h1, HTML::TagNames::h2, HTML::TagNames::h3, HTML::TagNames::h4, HTML::TagNames::h5, HTML::TagNames::h6)) {
  987. if (!m_stack_of_open_elements.has_in_scope(HTML::TagNames::h1)
  988. && !m_stack_of_open_elements.has_in_scope(HTML::TagNames::h2)
  989. && !m_stack_of_open_elements.has_in_scope(HTML::TagNames::h3)
  990. && !m_stack_of_open_elements.has_in_scope(HTML::TagNames::h4)
  991. && !m_stack_of_open_elements.has_in_scope(HTML::TagNames::h5)
  992. && !m_stack_of_open_elements.has_in_scope(HTML::TagNames::h6)) {
  993. PARSE_ERROR();
  994. return;
  995. }
  996. generate_implied_end_tags();
  997. if (current_node().tag_name() != token.tag_name()) {
  998. PARSE_ERROR();
  999. }
  1000. for (;;) {
  1001. auto popped_element = m_stack_of_open_elements.pop();
  1002. if (popped_element->tag_name().is_one_of(HTML::TagNames::h1, HTML::TagNames::h2, HTML::TagNames::h3, HTML::TagNames::h4, HTML::TagNames::h5, HTML::TagNames::h6))
  1003. break;
  1004. }
  1005. return;
  1006. }
  1007. if (token.is_start_tag() && token.tag_name() == HTML::TagNames::a) {
  1008. if (auto* element = m_list_of_active_formatting_elements.last_element_with_tag_name_before_marker(HTML::TagNames::a)) {
  1009. PARSE_ERROR();
  1010. if (run_the_adoption_agency_algorithm(token) == AdoptionAgencyAlgorithmOutcome::RunAnyOtherEndTagSteps)
  1011. goto AnyOtherEndTag;
  1012. m_list_of_active_formatting_elements.remove(*element);
  1013. m_stack_of_open_elements.elements().remove_first_matching([&](auto& entry) {
  1014. return entry.ptr() == element;
  1015. });
  1016. }
  1017. reconstruct_the_active_formatting_elements();
  1018. auto element = insert_html_element(token);
  1019. m_list_of_active_formatting_elements.add(*element);
  1020. return;
  1021. }
  1022. if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::b, HTML::TagNames::big, HTML::TagNames::code, HTML::TagNames::em, HTML::TagNames::font, HTML::TagNames::i, HTML::TagNames::s, HTML::TagNames::small, HTML::TagNames::strike, HTML::TagNames::strong, HTML::TagNames::tt, HTML::TagNames::u)) {
  1023. reconstruct_the_active_formatting_elements();
  1024. auto element = insert_html_element(token);
  1025. m_list_of_active_formatting_elements.add(*element);
  1026. return;
  1027. }
  1028. if (token.is_start_tag() && token.tag_name() == HTML::TagNames::nobr) {
  1029. reconstruct_the_active_formatting_elements();
  1030. if (m_stack_of_open_elements.has_in_scope(HTML::TagNames::nobr)) {
  1031. PARSE_ERROR();
  1032. run_the_adoption_agency_algorithm(token);
  1033. reconstruct_the_active_formatting_elements();
  1034. }
  1035. auto element = insert_html_element(token);
  1036. m_list_of_active_formatting_elements.add(*element);
  1037. return;
  1038. }
  1039. if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::a, HTML::TagNames::b, HTML::TagNames::big, HTML::TagNames::code, HTML::TagNames::em, HTML::TagNames::font, HTML::TagNames::i, HTML::TagNames::nobr, HTML::TagNames::s, HTML::TagNames::small, HTML::TagNames::strike, HTML::TagNames::strong, HTML::TagNames::tt, HTML::TagNames::u)) {
  1040. if (run_the_adoption_agency_algorithm(token) == AdoptionAgencyAlgorithmOutcome::RunAnyOtherEndTagSteps)
  1041. goto AnyOtherEndTag;
  1042. return;
  1043. }
  1044. if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::applet, HTML::TagNames::marquee, HTML::TagNames::object)) {
  1045. reconstruct_the_active_formatting_elements();
  1046. insert_html_element(token);
  1047. m_list_of_active_formatting_elements.add_marker();
  1048. m_frameset_ok = false;
  1049. return;
  1050. }
  1051. if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::applet, HTML::TagNames::marquee, HTML::TagNames::object)) {
  1052. if (!m_stack_of_open_elements.has_in_scope(token.tag_name())) {
  1053. PARSE_ERROR();
  1054. return;
  1055. }
  1056. generate_implied_end_tags();
  1057. if (current_node().tag_name() != token.tag_name()) {
  1058. PARSE_ERROR();
  1059. }
  1060. m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(token.tag_name());
  1061. m_list_of_active_formatting_elements.clear_up_to_the_last_marker();
  1062. return;
  1063. }
  1064. if (token.is_start_tag() && token.tag_name() == HTML::TagNames::table) {
  1065. if (!document().in_quirks_mode()) {
  1066. if (m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::p))
  1067. close_a_p_element();
  1068. }
  1069. insert_html_element(token);
  1070. m_frameset_ok = false;
  1071. m_insertion_mode = InsertionMode::InTable;
  1072. return;
  1073. }
  1074. if (token.is_end_tag() && token.tag_name() == HTML::TagNames::br) {
  1075. token.drop_attributes();
  1076. goto BRStartTag;
  1077. }
  1078. if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::area, HTML::TagNames::br, HTML::TagNames::embed, HTML::TagNames::img, HTML::TagNames::keygen, HTML::TagNames::wbr)) {
  1079. BRStartTag:
  1080. reconstruct_the_active_formatting_elements();
  1081. insert_html_element(token);
  1082. m_stack_of_open_elements.pop();
  1083. token.acknowledge_self_closing_flag_if_set();
  1084. m_frameset_ok = false;
  1085. return;
  1086. }
  1087. if (token.is_start_tag() && token.tag_name() == HTML::TagNames::input) {
  1088. reconstruct_the_active_formatting_elements();
  1089. insert_html_element(token);
  1090. m_stack_of_open_elements.pop();
  1091. token.acknowledge_self_closing_flag_if_set();
  1092. auto type_attribute = token.attribute(HTML::AttributeNames::type);
  1093. if (type_attribute.is_null() || type_attribute != "hidden") {
  1094. m_frameset_ok = false;
  1095. }
  1096. return;
  1097. }
  1098. if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::param, HTML::TagNames::source, HTML::TagNames::track)) {
  1099. insert_html_element(token);
  1100. m_stack_of_open_elements.pop();
  1101. token.acknowledge_self_closing_flag_if_set();
  1102. return;
  1103. }
  1104. if (token.is_start_tag() && token.tag_name() == HTML::TagNames::hr) {
  1105. if (m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::p))
  1106. close_a_p_element();
  1107. insert_html_element(token);
  1108. m_stack_of_open_elements.pop();
  1109. token.acknowledge_self_closing_flag_if_set();
  1110. m_frameset_ok = false;
  1111. return;
  1112. }
  1113. if (token.is_start_tag() && token.tag_name().equals_ignoring_case("image")) {
  1114. // Parse error. Change the token's tag name to HTML::TagNames::img and reprocess it. (Don't ask.)
  1115. PARSE_ERROR();
  1116. token.m_tag.tag_name.clear();
  1117. token.m_tag.tag_name.append(HTML::TagNames::img);
  1118. process_using_the_rules_for(m_insertion_mode, token);
  1119. return;
  1120. }
  1121. if (token.is_start_tag() && token.tag_name() == HTML::TagNames::textarea) {
  1122. insert_html_element(token);
  1123. m_tokenizer.switch_to({}, HTMLTokenizer::State::RCDATA);
  1124. // If the next token is a U+000A LINE FEED (LF) character token,
  1125. // then ignore that token and move on to the next one.
  1126. // (Newlines at the start of pre blocks are ignored as an authoring convenience.)
  1127. auto next_token = m_tokenizer.next_token();
  1128. m_original_insertion_mode = m_insertion_mode;
  1129. m_frameset_ok = false;
  1130. m_insertion_mode = InsertionMode::Text;
  1131. if (next_token.has_value() && next_token.value().is_character() && next_token.value().codepoint() == '\n') {
  1132. // Ignore it.
  1133. } else {
  1134. process_using_the_rules_for(m_insertion_mode, next_token.value());
  1135. }
  1136. return;
  1137. }
  1138. if (token.is_start_tag() && token.tag_name() == HTML::TagNames::xmp) {
  1139. if (m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::p)) {
  1140. close_a_p_element();
  1141. }
  1142. reconstruct_the_active_formatting_elements();
  1143. m_frameset_ok = false;
  1144. parse_generic_raw_text_element(token);
  1145. return;
  1146. }
  1147. if (token.is_start_tag() && token.tag_name() == HTML::TagNames::iframe) {
  1148. m_frameset_ok = false;
  1149. parse_generic_raw_text_element(token);
  1150. return;
  1151. }
  1152. if (token.is_start_tag() && ((token.tag_name() == HTML::TagNames::noembed) || (token.tag_name() == HTML::TagNames::noscript && m_scripting_enabled))) {
  1153. parse_generic_raw_text_element(token);
  1154. return;
  1155. }
  1156. if (token.is_start_tag() && token.tag_name() == HTML::TagNames::select) {
  1157. reconstruct_the_active_formatting_elements();
  1158. insert_html_element(token);
  1159. m_frameset_ok = false;
  1160. switch (m_insertion_mode) {
  1161. case InsertionMode::InTable:
  1162. case InsertionMode::InCaption:
  1163. case InsertionMode::InTableBody:
  1164. case InsertionMode::InRow:
  1165. case InsertionMode::InCell:
  1166. m_insertion_mode = InsertionMode::InSelectInTable;
  1167. break;
  1168. default:
  1169. m_insertion_mode = InsertionMode::InSelect;
  1170. break;
  1171. }
  1172. return;
  1173. }
  1174. if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::optgroup, HTML::TagNames::option)) {
  1175. if (current_node().tag_name() == HTML::TagNames::option)
  1176. m_stack_of_open_elements.pop();
  1177. reconstruct_the_active_formatting_elements();
  1178. insert_html_element(token);
  1179. return;
  1180. }
  1181. if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::rb, HTML::TagNames::rtc)) {
  1182. TODO();
  1183. }
  1184. if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::rp, HTML::TagNames::rt)) {
  1185. TODO();
  1186. }
  1187. if (token.is_start_tag() && token.tag_name() == "math") {
  1188. dbg() << "<math> element encountered.";
  1189. reconstruct_the_active_formatting_elements();
  1190. insert_html_element(token);
  1191. return;
  1192. }
  1193. if (token.is_start_tag() && token.tag_name() == "svg") {
  1194. dbg() << "<svg> element encountered.";
  1195. reconstruct_the_active_formatting_elements();
  1196. insert_html_element(token);
  1197. return;
  1198. }
  1199. if ((token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::caption, HTML::TagNames::col, HTML::TagNames::colgroup, HTML::TagNames::frame, HTML::TagNames::head, HTML::TagNames::tbody, HTML::TagNames::td, HTML::TagNames::tfoot, HTML::TagNames::th, HTML::TagNames::thead, HTML::TagNames::tr))) {
  1200. PARSE_ERROR();
  1201. return;
  1202. }
  1203. // Any other start tag
  1204. if (token.is_start_tag()) {
  1205. reconstruct_the_active_formatting_elements();
  1206. insert_html_element(token);
  1207. return;
  1208. }
  1209. if (token.is_end_tag()) {
  1210. AnyOtherEndTag:
  1211. RefPtr<Element> node;
  1212. for (ssize_t i = m_stack_of_open_elements.elements().size() - 1; i >= 0; --i) {
  1213. node = m_stack_of_open_elements.elements()[i];
  1214. if (node->tag_name() == token.tag_name()) {
  1215. generate_implied_end_tags(token.tag_name());
  1216. if (node != current_node()) {
  1217. PARSE_ERROR();
  1218. }
  1219. while (&current_node() != node) {
  1220. m_stack_of_open_elements.pop();
  1221. }
  1222. m_stack_of_open_elements.pop();
  1223. break;
  1224. }
  1225. if (is_special_tag(node->tag_name())) {
  1226. PARSE_ERROR();
  1227. return;
  1228. }
  1229. }
  1230. return;
  1231. }
  1232. TODO();
  1233. }
  1234. void HTMLDocumentParser::increment_script_nesting_level()
  1235. {
  1236. ++m_script_nesting_level;
  1237. }
  1238. void HTMLDocumentParser::decrement_script_nesting_level()
  1239. {
  1240. ASSERT(m_script_nesting_level);
  1241. --m_script_nesting_level;
  1242. }
  1243. void HTMLDocumentParser::handle_text(HTMLToken& token)
  1244. {
  1245. if (token.is_character()) {
  1246. insert_character(token.codepoint());
  1247. return;
  1248. }
  1249. if (token.is_end_of_file()) {
  1250. PARSE_ERROR();
  1251. if (current_node().tag_name() == HTML::TagNames::script)
  1252. to<HTMLScriptElement>(current_node()).set_already_started({}, true);
  1253. m_stack_of_open_elements.pop();
  1254. m_insertion_mode = m_original_insertion_mode;
  1255. process_using_the_rules_for(m_insertion_mode, token);
  1256. return;
  1257. }
  1258. if (token.is_end_tag() && token.tag_name() == HTML::TagNames::script) {
  1259. NonnullRefPtr<HTMLScriptElement> script = to<HTMLScriptElement>(current_node());
  1260. m_stack_of_open_elements.pop();
  1261. m_insertion_mode = m_original_insertion_mode;
  1262. // FIXME: Handle tokenizer insertion point stuff here.
  1263. increment_script_nesting_level();
  1264. script->prepare_script({});
  1265. decrement_script_nesting_level();
  1266. if (script_nesting_level() == 0)
  1267. m_parser_pause_flag = false;
  1268. // FIXME: Handle tokenizer insertion point stuff here too.
  1269. while (document().pending_parsing_blocking_script()) {
  1270. if (script_nesting_level() != 0) {
  1271. m_parser_pause_flag = true;
  1272. // FIXME: Abort the processing of any nested invocations of the tokenizer,
  1273. // yielding control back to the caller. (Tokenization will resume when
  1274. // the caller returns to the "outer" tree construction stage.)
  1275. TODO();
  1276. } else {
  1277. auto the_script = document().take_pending_parsing_blocking_script({});
  1278. m_tokenizer.set_blocked(true);
  1279. // FIXME: If the parser's Document has a style sheet that is blocking scripts
  1280. // or the script's "ready to be parser-executed" flag is not set:
  1281. // spin the event loop until the parser's Document has no style sheet
  1282. // that is blocking scripts and the script's "ready to be parser-executed"
  1283. // flag is set.
  1284. ASSERT(the_script->is_ready_to_be_parser_executed());
  1285. if (m_aborted)
  1286. return;
  1287. m_tokenizer.set_blocked(false);
  1288. // FIXME: Handle tokenizer insertion point stuff here too.
  1289. ASSERT(script_nesting_level() == 0);
  1290. increment_script_nesting_level();
  1291. the_script->execute_script();
  1292. decrement_script_nesting_level();
  1293. ASSERT(script_nesting_level() == 0);
  1294. m_parser_pause_flag = false;
  1295. // FIXME: Handle tokenizer insertion point stuff here too.
  1296. }
  1297. }
  1298. return;
  1299. }
  1300. if (token.is_end_tag()) {
  1301. m_stack_of_open_elements.pop();
  1302. m_insertion_mode = m_original_insertion_mode;
  1303. return;
  1304. }
  1305. TODO();
  1306. }
  1307. void HTMLDocumentParser::clear_the_stack_back_to_a_table_context()
  1308. {
  1309. while (!current_node().tag_name().is_one_of(HTML::TagNames::table, HTML::TagNames::template_, HTML::TagNames::html))
  1310. m_stack_of_open_elements.pop();
  1311. }
  1312. void HTMLDocumentParser::clear_the_stack_back_to_a_table_row_context()
  1313. {
  1314. while (!current_node().tag_name().is_one_of(HTML::TagNames::tr, HTML::TagNames::template_, HTML::TagNames::html))
  1315. m_stack_of_open_elements.pop();
  1316. }
  1317. void HTMLDocumentParser::clear_the_stack_back_to_a_table_body_context()
  1318. {
  1319. while (!current_node().tag_name().is_one_of(HTML::TagNames::tbody, HTML::TagNames::tfoot, HTML::TagNames::thead, HTML::TagNames::template_, HTML::TagNames::html))
  1320. m_stack_of_open_elements.pop();
  1321. }
  1322. void HTMLDocumentParser::handle_in_row(HTMLToken& token)
  1323. {
  1324. if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::th, HTML::TagNames::td)) {
  1325. clear_the_stack_back_to_a_table_row_context();
  1326. insert_html_element(token);
  1327. m_insertion_mode = InsertionMode::InCell;
  1328. m_list_of_active_formatting_elements.add_marker();
  1329. return;
  1330. }
  1331. if (token.is_end_tag() && token.tag_name() == HTML::TagNames::tr) {
  1332. if (!m_stack_of_open_elements.has_in_table_scope(HTML::TagNames::tr)) {
  1333. PARSE_ERROR();
  1334. return;
  1335. }
  1336. clear_the_stack_back_to_a_table_row_context();
  1337. m_stack_of_open_elements.pop();
  1338. m_insertion_mode = InsertionMode::InTableBody;
  1339. return;
  1340. }
  1341. if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::caption, HTML::TagNames::col, HTML::TagNames::colgroup, HTML::TagNames::tbody, HTML::TagNames::tfoot, HTML::TagNames::thead, HTML::TagNames::tr)) {
  1342. if (m_stack_of_open_elements.has_in_table_scope(HTML::TagNames::tr)) {
  1343. PARSE_ERROR();
  1344. return;
  1345. }
  1346. clear_the_stack_back_to_a_table_row_context();
  1347. m_stack_of_open_elements.pop();
  1348. m_insertion_mode = InsertionMode::InTableBody;
  1349. process_using_the_rules_for(m_insertion_mode, token);
  1350. return;
  1351. }
  1352. if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::tbody, HTML::TagNames::tfoot, HTML::TagNames::thead)) {
  1353. if (!m_stack_of_open_elements.has_in_table_scope(token.tag_name())) {
  1354. PARSE_ERROR();
  1355. return;
  1356. }
  1357. if (!m_stack_of_open_elements.has_in_table_scope(HTML::TagNames::tr)) {
  1358. return;
  1359. }
  1360. clear_the_stack_back_to_a_table_row_context();
  1361. m_stack_of_open_elements.pop();
  1362. m_insertion_mode = InsertionMode::InTableBody;
  1363. process_using_the_rules_for(m_insertion_mode, token);
  1364. return;
  1365. }
  1366. if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::body, HTML::TagNames::caption, HTML::TagNames::col, HTML::TagNames::colgroup, HTML::TagNames::html, HTML::TagNames::td, HTML::TagNames::th)) {
  1367. PARSE_ERROR();
  1368. return;
  1369. }
  1370. process_using_the_rules_for(InsertionMode::InTable, token);
  1371. }
  1372. void HTMLDocumentParser::close_the_cell()
  1373. {
  1374. generate_implied_end_tags();
  1375. if (!current_node().tag_name().is_one_of(HTML::TagNames::td, HTML::TagNames::th)) {
  1376. PARSE_ERROR();
  1377. }
  1378. while (!current_node().tag_name().is_one_of(HTML::TagNames::td, HTML::TagNames::th))
  1379. m_stack_of_open_elements.pop();
  1380. m_stack_of_open_elements.pop();
  1381. m_list_of_active_formatting_elements.clear_up_to_the_last_marker();
  1382. m_insertion_mode = InsertionMode::InRow;
  1383. }
  1384. void HTMLDocumentParser::handle_in_cell(HTMLToken& token)
  1385. {
  1386. if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::td, HTML::TagNames::th)) {
  1387. if (!m_stack_of_open_elements.has_in_table_scope(token.tag_name())) {
  1388. PARSE_ERROR();
  1389. return;
  1390. }
  1391. generate_implied_end_tags();
  1392. if (current_node().tag_name() != token.tag_name()) {
  1393. PARSE_ERROR();
  1394. }
  1395. m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(token.tag_name());
  1396. m_list_of_active_formatting_elements.clear_up_to_the_last_marker();
  1397. m_insertion_mode = InsertionMode::InRow;
  1398. return;
  1399. }
  1400. if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::caption, HTML::TagNames::col, HTML::TagNames::colgroup, HTML::TagNames::tbody, HTML::TagNames::td, HTML::TagNames::tfoot, HTML::TagNames::th, HTML::TagNames::thead, HTML::TagNames::tr)) {
  1401. if (!m_stack_of_open_elements.has_in_table_scope(HTML::TagNames::td) && m_stack_of_open_elements.has_in_table_scope(HTML::TagNames::th)) {
  1402. PARSE_ERROR();
  1403. return;
  1404. }
  1405. close_the_cell();
  1406. process_using_the_rules_for(m_insertion_mode, token);
  1407. return;
  1408. }
  1409. if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::body, HTML::TagNames::caption, HTML::TagNames::col, HTML::TagNames::colgroup, HTML::TagNames::html)) {
  1410. PARSE_ERROR();
  1411. return;
  1412. }
  1413. if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::table, HTML::TagNames::tbody, HTML::TagNames::tfoot, HTML::TagNames::thead, HTML::TagNames::tr)) {
  1414. if (m_stack_of_open_elements.has_in_table_scope(token.tag_name())) {
  1415. PARSE_ERROR();
  1416. return;
  1417. }
  1418. close_the_cell();
  1419. // Reprocess the token.
  1420. process_using_the_rules_for(m_insertion_mode, token);
  1421. return;
  1422. }
  1423. process_using_the_rules_for(InsertionMode::InBody, token);
  1424. }
  1425. void HTMLDocumentParser::handle_in_table_text(HTMLToken& token)
  1426. {
  1427. if (token.is_character()) {
  1428. if (token.codepoint() == 0) {
  1429. PARSE_ERROR();
  1430. return;
  1431. }
  1432. m_pending_table_character_tokens.append(token);
  1433. return;
  1434. }
  1435. for (auto& pending_token : m_pending_table_character_tokens) {
  1436. ASSERT(pending_token.is_character());
  1437. if (!pending_token.is_parser_whitespace()) {
  1438. // FIXME: If any of the tokens in the pending table character tokens list
  1439. // are character tokens that are not ASCII whitespace, then this is a parse error:
  1440. // reprocess the character tokens in the pending table character tokens list using
  1441. // the rules given in the "anything else" entry in the "in table" insertion mode.
  1442. TODO();
  1443. }
  1444. }
  1445. for (auto& pending_token : m_pending_table_character_tokens) {
  1446. insert_character(pending_token.codepoint());
  1447. }
  1448. m_insertion_mode = m_original_insertion_mode;
  1449. process_using_the_rules_for(m_insertion_mode, token);
  1450. }
  1451. void HTMLDocumentParser::handle_in_table_body(HTMLToken& token)
  1452. {
  1453. if (token.is_start_tag() && token.tag_name() == HTML::TagNames::tr) {
  1454. clear_the_stack_back_to_a_table_body_context();
  1455. insert_html_element(token);
  1456. m_insertion_mode = InsertionMode::InRow;
  1457. return;
  1458. }
  1459. if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::th, HTML::TagNames::td)) {
  1460. PARSE_ERROR();
  1461. clear_the_stack_back_to_a_table_body_context();
  1462. HTMLToken fake_tr_token;
  1463. fake_tr_token.m_type = HTMLToken::Type::StartTag;
  1464. fake_tr_token.m_tag.tag_name.append(HTML::TagNames::tr);
  1465. insert_html_element(fake_tr_token);
  1466. m_insertion_mode = InsertionMode::InRow;
  1467. process_using_the_rules_for(m_insertion_mode, token);
  1468. return;
  1469. }
  1470. if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::tbody, HTML::TagNames::tfoot, HTML::TagNames::thead)) {
  1471. if (!m_stack_of_open_elements.has_in_table_scope(token.tag_name())) {
  1472. PARSE_ERROR();
  1473. return;
  1474. }
  1475. clear_the_stack_back_to_a_table_body_context();
  1476. m_stack_of_open_elements.pop();
  1477. m_insertion_mode = InsertionMode::InTable;
  1478. return;
  1479. }
  1480. if ((token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::caption, HTML::TagNames::col, HTML::TagNames::colgroup, HTML::TagNames::tbody, HTML::TagNames::tfoot, HTML::TagNames::thead))
  1481. || (token.is_end_tag() && token.tag_name() == HTML::TagNames::table)) {
  1482. // FIXME: If the stack of open elements does not have a tbody, thead, or tfoot element in table scope, this is a parse error; ignore the token.
  1483. clear_the_stack_back_to_a_table_body_context();
  1484. m_stack_of_open_elements.pop();
  1485. m_insertion_mode = InsertionMode::InTable;
  1486. process_using_the_rules_for(InsertionMode::InTable, token);
  1487. return;
  1488. }
  1489. if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::body, HTML::TagNames::caption, HTML::TagNames::col, HTML::TagNames::colgroup, HTML::TagNames::html, HTML::TagNames::td, HTML::TagNames::th, HTML::TagNames::tr)) {
  1490. PARSE_ERROR();
  1491. return;
  1492. }
  1493. process_using_the_rules_for(InsertionMode::InTable, token);
  1494. }
  1495. void HTMLDocumentParser::handle_in_table(HTMLToken& token)
  1496. {
  1497. if (token.is_character() && current_node().tag_name().is_one_of(HTML::TagNames::table, HTML::TagNames::tbody, HTML::TagNames::tfoot, HTML::TagNames::thead, HTML::TagNames::tr)) {
  1498. m_pending_table_character_tokens.clear();
  1499. m_original_insertion_mode = m_insertion_mode;
  1500. m_insertion_mode = InsertionMode::InTableText;
  1501. process_using_the_rules_for(InsertionMode::InTableText, token);
  1502. return;
  1503. }
  1504. if (token.is_comment()) {
  1505. insert_comment(token);
  1506. return;
  1507. }
  1508. if (token.is_doctype()) {
  1509. PARSE_ERROR();
  1510. return;
  1511. }
  1512. if (token.is_start_tag() && token.tag_name() == HTML::TagNames::caption) {
  1513. clear_the_stack_back_to_a_table_context();
  1514. m_list_of_active_formatting_elements.add_marker();
  1515. insert_html_element(token);
  1516. m_insertion_mode = InsertionMode::InCaption;
  1517. return;
  1518. }
  1519. if (token.is_start_tag() && token.tag_name() == HTML::TagNames::colgroup) {
  1520. clear_the_stack_back_to_a_table_context();
  1521. insert_html_element(token);
  1522. m_insertion_mode = InsertionMode::InColumnGroup;
  1523. return;
  1524. }
  1525. if (token.is_start_tag() && token.tag_name() == HTML::TagNames::col) {
  1526. clear_the_stack_back_to_a_table_context();
  1527. HTMLToken fake_colgroup_token;
  1528. fake_colgroup_token.m_type = HTMLToken::Type::StartTag;
  1529. fake_colgroup_token.m_tag.tag_name.append(HTML::TagNames::colgroup);
  1530. insert_html_element(fake_colgroup_token);
  1531. m_insertion_mode = InsertionMode::InColumnGroup;
  1532. process_using_the_rules_for(m_insertion_mode, token);
  1533. return;
  1534. }
  1535. if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::tbody, HTML::TagNames::tfoot, HTML::TagNames::thead)) {
  1536. clear_the_stack_back_to_a_table_context();
  1537. insert_html_element(token);
  1538. m_insertion_mode = InsertionMode::InTableBody;
  1539. return;
  1540. }
  1541. if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::td, HTML::TagNames::th, HTML::TagNames::tr)) {
  1542. clear_the_stack_back_to_a_table_context();
  1543. HTMLToken fake_tbody_token;
  1544. fake_tbody_token.m_type = HTMLToken::Type::StartTag;
  1545. fake_tbody_token.m_tag.tag_name.append(HTML::TagNames::tbody);
  1546. insert_html_element(fake_tbody_token);
  1547. m_insertion_mode = InsertionMode::InTableBody;
  1548. process_using_the_rules_for(InsertionMode::InTableBody, token);
  1549. return;
  1550. }
  1551. if (token.is_start_tag() && token.tag_name() == HTML::TagNames::table) {
  1552. PARSE_ERROR();
  1553. TODO();
  1554. }
  1555. if (token.is_end_tag()) {
  1556. if (!m_stack_of_open_elements.has_in_table_scope(HTML::TagNames::table)) {
  1557. PARSE_ERROR();
  1558. return;
  1559. }
  1560. m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::table);
  1561. reset_the_insertion_mode_appropriately();
  1562. return;
  1563. }
  1564. TODO();
  1565. }
  1566. void HTMLDocumentParser::handle_in_select_in_table(HTMLToken& token)
  1567. {
  1568. (void)token;
  1569. TODO();
  1570. }
  1571. void HTMLDocumentParser::handle_in_select(HTMLToken& token)
  1572. {
  1573. if (token.is_character()) {
  1574. if (token.codepoint() == 0) {
  1575. PARSE_ERROR();
  1576. return;
  1577. }
  1578. insert_character(token.codepoint());
  1579. return;
  1580. }
  1581. if (token.is_comment()) {
  1582. insert_comment(token);
  1583. return;
  1584. }
  1585. if (token.is_doctype()) {
  1586. PARSE_ERROR();
  1587. return;
  1588. }
  1589. if (token.is_start_tag() && token.tag_name() == HTML::TagNames::html) {
  1590. process_using_the_rules_for(InsertionMode::InBody, token);
  1591. return;
  1592. }
  1593. if (token.is_start_tag() && token.tag_name() == HTML::TagNames::option) {
  1594. if (current_node().tag_name() == HTML::TagNames::option) {
  1595. m_stack_of_open_elements.pop();
  1596. }
  1597. insert_html_element(token);
  1598. return;
  1599. }
  1600. if (token.is_start_tag() && token.tag_name() == HTML::TagNames::optgroup) {
  1601. if (current_node().tag_name() == HTML::TagNames::option) {
  1602. m_stack_of_open_elements.pop();
  1603. }
  1604. if (current_node().tag_name() == HTML::TagNames::optgroup) {
  1605. m_stack_of_open_elements.pop();
  1606. }
  1607. insert_html_element(token);
  1608. return;
  1609. }
  1610. if (token.is_end_tag() && token.tag_name() == HTML::TagNames::optgroup) {
  1611. if (current_node().tag_name() == HTML::TagNames::option && node_before_current_node().tag_name() == HTML::TagNames::optgroup)
  1612. m_stack_of_open_elements.pop();
  1613. if (current_node().tag_name() == HTML::TagNames::optgroup) {
  1614. m_stack_of_open_elements.pop();
  1615. } else {
  1616. PARSE_ERROR();
  1617. return;
  1618. }
  1619. return;
  1620. }
  1621. if (token.is_end_tag() && token.tag_name() == HTML::TagNames::option) {
  1622. if (current_node().tag_name() == HTML::TagNames::option) {
  1623. m_stack_of_open_elements.pop();
  1624. } else {
  1625. PARSE_ERROR();
  1626. return;
  1627. }
  1628. return;
  1629. }
  1630. if (token.is_end_tag() && token.tag_name() == HTML::TagNames::select) {
  1631. if (m_stack_of_open_elements.has_in_select_scope(HTML::TagNames::select)) {
  1632. PARSE_ERROR();
  1633. return;
  1634. }
  1635. m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::select);
  1636. reset_the_insertion_mode_appropriately();
  1637. return;
  1638. }
  1639. if (token.is_start_tag() && token.tag_name() == HTML::TagNames::select) {
  1640. PARSE_ERROR();
  1641. if (!m_stack_of_open_elements.has_in_select_scope(HTML::TagNames::select))
  1642. return;
  1643. m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::select);
  1644. reset_the_insertion_mode_appropriately();
  1645. return;
  1646. }
  1647. if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::input, HTML::TagNames::keygen, HTML::TagNames::textarea)) {
  1648. PARSE_ERROR();
  1649. if (!m_stack_of_open_elements.has_in_select_scope(HTML::TagNames::select)) {
  1650. return;
  1651. }
  1652. m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::select);
  1653. reset_the_insertion_mode_appropriately();
  1654. process_using_the_rules_for(m_insertion_mode, token);
  1655. return;
  1656. }
  1657. if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::script, HTML::TagNames::template_)) {
  1658. process_using_the_rules_for(InsertionMode::InHead, token);
  1659. return;
  1660. }
  1661. if (token.is_end_tag() && token.tag_name() == HTML::TagNames::template_) {
  1662. process_using_the_rules_for(InsertionMode::InHead, token);
  1663. return;
  1664. }
  1665. if (token.is_end_of_file()) {
  1666. process_using_the_rules_for(InsertionMode::InBody, token);
  1667. return;
  1668. }
  1669. PARSE_ERROR();
  1670. }
  1671. void HTMLDocumentParser::handle_in_caption(HTMLToken& token)
  1672. {
  1673. if (token.is_end_tag() && token.tag_name() == HTML::TagNames::caption) {
  1674. if (!m_stack_of_open_elements.has_in_table_scope(HTML::TagNames::caption)) {
  1675. PARSE_ERROR();
  1676. return;
  1677. }
  1678. generate_implied_end_tags();
  1679. if (current_node().tag_name() != HTML::TagNames::caption)
  1680. PARSE_ERROR();
  1681. m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::caption);
  1682. m_list_of_active_formatting_elements.clear_up_to_the_last_marker();
  1683. m_insertion_mode = InsertionMode::InTable;
  1684. return;
  1685. }
  1686. if ((token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::caption, HTML::TagNames::col, HTML::TagNames::colgroup, HTML::TagNames::tbody, HTML::TagNames::td, HTML::TagNames::tfoot, HTML::TagNames::th, HTML::TagNames::thead, HTML::TagNames::tr))
  1687. || (token.is_end_tag() && token.tag_name() == HTML::TagNames::table)) {
  1688. if (!m_stack_of_open_elements.has_in_table_scope(HTML::TagNames::caption)) {
  1689. PARSE_ERROR();
  1690. return;
  1691. }
  1692. generate_implied_end_tags();
  1693. if (current_node().tag_name() != HTML::TagNames::caption)
  1694. PARSE_ERROR();
  1695. m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::caption);
  1696. m_list_of_active_formatting_elements.clear_up_to_the_last_marker();
  1697. m_insertion_mode = InsertionMode::InTable;
  1698. process_using_the_rules_for(m_insertion_mode, token);
  1699. return;
  1700. }
  1701. if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::body, HTML::TagNames::col, HTML::TagNames::colgroup, HTML::TagNames::html, HTML::TagNames::tbody, HTML::TagNames::td, HTML::TagNames::tfoot, HTML::TagNames::th, HTML::TagNames::thead, HTML::TagNames::tr)) {
  1702. PARSE_ERROR();
  1703. return;
  1704. }
  1705. process_using_the_rules_for(InsertionMode::InBody, token);
  1706. }
  1707. void HTMLDocumentParser::handle_in_column_group(HTMLToken& token)
  1708. {
  1709. if (token.is_character() && token.is_parser_whitespace()) {
  1710. insert_character(token.codepoint());
  1711. return;
  1712. }
  1713. if (token.is_comment()) {
  1714. insert_comment(token);
  1715. return;
  1716. }
  1717. if (token.is_doctype()) {
  1718. PARSE_ERROR();
  1719. return;
  1720. }
  1721. if (token.is_start_tag() && token.tag_name() == HTML::TagNames::html) {
  1722. process_using_the_rules_for(InsertionMode::InBody, token);
  1723. return;
  1724. }
  1725. if (token.is_start_tag() && token.tag_name() == HTML::TagNames::col) {
  1726. insert_html_element(token);
  1727. m_stack_of_open_elements.pop();
  1728. token.acknowledge_self_closing_flag_if_set();
  1729. return;
  1730. }
  1731. if (token.is_end_tag() && token.tag_name() == HTML::TagNames::colgroup) {
  1732. if (current_node().tag_name() != HTML::TagNames::colgroup) {
  1733. PARSE_ERROR();
  1734. return;
  1735. }
  1736. m_stack_of_open_elements.pop();
  1737. m_insertion_mode = InsertionMode::InTable;
  1738. return;
  1739. }
  1740. if (token.is_end_tag() && token.tag_name() == HTML::TagNames::col) {
  1741. PARSE_ERROR();
  1742. return;
  1743. }
  1744. if ((token.is_start_tag() || token.is_end_tag()) && token.tag_name() == HTML::TagNames::template_) {
  1745. process_using_the_rules_for(InsertionMode::InHead, token);
  1746. return;
  1747. }
  1748. if (token.is_end_of_file()) {
  1749. process_using_the_rules_for(InsertionMode::InBody, token);
  1750. return;
  1751. }
  1752. if (current_node().tag_name() != HTML::TagNames::colgroup) {
  1753. PARSE_ERROR();
  1754. return;
  1755. }
  1756. m_stack_of_open_elements.pop();
  1757. m_insertion_mode = InsertionMode::InTable;
  1758. process_using_the_rules_for(m_insertion_mode, token);
  1759. }
  1760. void HTMLDocumentParser::reset_the_insertion_mode_appropriately()
  1761. {
  1762. for (ssize_t i = m_stack_of_open_elements.elements().size() - 1; i >= 0; --i) {
  1763. RefPtr<Element> node = m_stack_of_open_elements.elements().at(i);
  1764. if (node->tag_name() == HTML::TagNames::select) {
  1765. TODO();
  1766. }
  1767. if (node->tag_name().is_one_of(HTML::TagNames::td, HTML::TagNames::th)) {
  1768. m_insertion_mode = InsertionMode::InCell;
  1769. return;
  1770. }
  1771. if (node->tag_name() == HTML::TagNames::tr) {
  1772. m_insertion_mode = InsertionMode::InRow;
  1773. return;
  1774. }
  1775. if (node->tag_name().is_one_of(HTML::TagNames::tbody, HTML::TagNames::thead, HTML::TagNames::tfoot)) {
  1776. m_insertion_mode = InsertionMode::InTableBody;
  1777. return;
  1778. }
  1779. if (node->tag_name() == HTML::TagNames::caption) {
  1780. m_insertion_mode = InsertionMode::InCaption;
  1781. return;
  1782. }
  1783. if (node->tag_name() == HTML::TagNames::colgroup) {
  1784. m_insertion_mode = InsertionMode::InColumnGroup;
  1785. return;
  1786. }
  1787. if (node->tag_name() == HTML::TagNames::table) {
  1788. m_insertion_mode = InsertionMode::InTable;
  1789. return;
  1790. }
  1791. if (node->tag_name() == HTML::TagNames::template_) {
  1792. TODO();
  1793. }
  1794. if (node->tag_name() == HTML::TagNames::body) {
  1795. m_insertion_mode = InsertionMode::InBody;
  1796. return;
  1797. }
  1798. if (node->tag_name() == HTML::TagNames::frameset) {
  1799. m_insertion_mode = InsertionMode::InFrameset;
  1800. if (m_parsing_fragment) {
  1801. TODO();
  1802. }
  1803. return;
  1804. }
  1805. if (node->tag_name() == HTML::TagNames::html) {
  1806. TODO();
  1807. }
  1808. }
  1809. m_insertion_mode = InsertionMode::InBody;
  1810. if (m_parsing_fragment) {
  1811. TODO();
  1812. }
  1813. }
  1814. const char* HTMLDocumentParser::insertion_mode_name() const
  1815. {
  1816. switch (m_insertion_mode) {
  1817. #define __ENUMERATE_INSERTION_MODE(mode) \
  1818. case InsertionMode::mode: \
  1819. return #mode;
  1820. ENUMERATE_INSERTION_MODES
  1821. #undef __ENUMERATE_INSERTION_MODE
  1822. }
  1823. ASSERT_NOT_REACHED();
  1824. }
  1825. Document& HTMLDocumentParser::document()
  1826. {
  1827. return *m_document;
  1828. }
  1829. }