Kaynağa Gözat

LibWeb: Handle more MathML/HTML integration points in the parser

Andreas Kling 9 ay önce
ebeveyn
işleme
0a47d8cb08

+ 3 - 4
Tests/LibWeb/Text/expected/wpt-import/html/syntax/parsing/html-integration-point.txt

@@ -6,11 +6,10 @@ Rerun
 
 Found 5 tests
 
-3 Pass
-2 Fail
+5 Pass
 Details
-Result	Test Name	MessageFail	MathML annotation-xml with encoding=text/html should be an HTML integration point	
-Fail	MathML annotation-xml with encoding=application/xhtml+xml should be an HTML integration point	
+Result	Test Name	MessagePass	MathML annotation-xml with encoding=text/html should be an HTML integration point	
+Pass	MathML annotation-xml with encoding=application/xhtml+xml should be an HTML integration point	
 Pass	SVG foreignObject should be an HTML integration point	
 Pass	SVG desc should be an HTML integration point	
 Pass	SVG title should be an HTML integration point	

+ 3 - 3
Tests/LibWeb/Text/expected/wpt-import/html/syntax/parsing/html5lib_innerHTML_math.txt

@@ -6,10 +6,10 @@ Rerun
 
 Found 8 tests
 
-3 Pass
-5 Fail
+4 Pass
+4 Fail
 Details
-Result	Test Name	MessageFail	html5lib_innerHTML_math.html 0e7e70d0dcf0c26593203b36cac4fa7f6325613e	
+Result	Test Name	MessagePass	html5lib_innerHTML_math.html 0e7e70d0dcf0c26593203b36cac4fa7f6325613e	
 Fail	html5lib_innerHTML_math.html fa7d4a31838dbcc16bf73672f2f4486cca185673	
 Fail	html5lib_innerHTML_math.html d9d2e4c0e926a91f5e704846cdbc855e3cb21949	
 Fail	html5lib_innerHTML_math.html c04b203803f6b3bec3db65db16854e7e624d13ef	

+ 3 - 2
Tests/LibWeb/Text/expected/wpt-import/html/syntax/parsing/html5lib_tests12.txt

@@ -6,7 +6,8 @@ Rerun
 
 Found 2 tests
 
-2 Fail
+1 Pass
+1 Fail
 Details
 Result	Test Name	MessageFail	html5lib_tests12.html 95751b82f57d4feaaf06d208d57b7f6cc4d5fef5	
-Fail	html5lib_tests12.html 411c792cef85cbb029d5c91f4a2142751a319bc2	
+Pass	html5lib_tests12.html 411c792cef85cbb029d5c91f4a2142751a319bc2	

+ 4 - 4
Tests/LibWeb/Text/expected/wpt-import/html/syntax/parsing/html5lib_tests26.txt

@@ -6,8 +6,8 @@ Rerun
 
 Found 20 tests
 
-16 Pass
-4 Fail
+18 Pass
+2 Fail
 Details
 Result	Test Name	MessageFail	html5lib_tests26.html 6232bd8c710002d3b3c375903a712d05163a821d	
 Pass	html5lib_tests26.html 5e4fff339b6d191d80311bfa258a9b62e063c6aa	
@@ -21,8 +21,8 @@ Pass	html5lib_tests26.html c085683feffd9da64f8782545042884173a1d1e3
 Fail	html5lib_tests26.html be84971ac4a451e91099d225d0315dd17a88b830	
 Pass	html5lib_tests26.html 9d63346ca23dc3ee41e29fe7d8403934bce8c610	
 Pass	html5lib_tests26.html 8434eb9c0c3daf656b3158f5955d07e39ddc6444	
-Fail	html5lib_tests26.html 6c95a99290e309b638b958272686a21486fa561e	
-Fail	html5lib_tests26.html 37d360649a2b092ad05e1be1c9ea65bfec83ca6d	
+Pass	html5lib_tests26.html 6c95a99290e309b638b958272686a21486fa561e	
+Pass	html5lib_tests26.html 37d360649a2b092ad05e1be1c9ea65bfec83ca6d	
 Pass	html5lib_tests26.html 8919621ebbd1f4df0fdaacee3e53dc959a2d9235	
 Pass	html5lib_tests26.html 3723d028349cbbc1a5c3f014987bb11c8ba804d0	
 Pass	html5lib_tests26.html e370b3102af9ba28e24f5c41a08443cdd9dd6d25	

+ 2 - 3
Tests/LibWeb/Text/expected/wpt-import/html/syntax/parsing/html5lib_tests9.txt

@@ -6,13 +6,12 @@ Rerun
 
 Found 27 tests
 
-26 Pass
-1 Fail
+27 Pass
 Details
 Result	Test Name	MessagePass	html5lib_tests9.html cb005f4b2a248cc98dc153d7391715b8d113cd0d	
 Pass	html5lib_tests9.html 6b687e562bd878d3a6098f0a1b1c05b04dc8c02c	
 Pass	html5lib_tests9.html a28615629ac367bd8127ff3049e81b349e7ec7f6	
-Fail	html5lib_tests9.html d70e711bf9b7582d9b83488ab14f99b53a0f3a26	
+Pass	html5lib_tests9.html d70e711bf9b7582d9b83488ab14f99b53a0f3a26	
 Pass	html5lib_tests9.html b2a8131e72e53265479c08cd18d4f4663278a021	
 Pass	html5lib_tests9.html a45a8948b799dadc321a86ff0bebf13167b5f076	
 Pass	html5lib_tests9.html 9d6809ff0d5796525b655f44e8abe4267cfd84e1	

+ 2 - 3
Tests/LibWeb/Text/expected/wpt-import/html/syntax/parsing/math-parse01.txt

@@ -6,8 +6,7 @@ Rerun
 
 Found 9 tests
 
-8 Pass
-1 Fail
+9 Pass
 Details
 Result	Test Name	MessagePass	The id attribute should be recognised on math elements	
 Pass	The node name should be math	
@@ -17,4 +16,4 @@ Pass	Nested mrow elements should be parsed correctly
 Pass	Testing rang and lang entity code points	
 Pass	Testing Kopf (Plane 1) entity code point	
 Pass	Empty element tags in annotation-xml parsed as per XML.	
-Fail	html tags allowed in annotation-xml/@encoding='text/html'.	
+Pass	html tags allowed in annotation-xml/@encoding='text/html'.	

+ 11 - 12
Tests/LibWeb/Text/expected/wpt-import/html/syntax/parsing/math-parse03.txt

@@ -6,25 +6,24 @@ Rerun
 
 Found 19 tests
 
-9 Pass
-10 Fail
+19 Pass
 Details
 Result	Test Name	MessagePass	MATH element name should be lowercased	
 Pass	MI element name and mathvariant attribute name should be lowercased, attribute value unchanged	
 Pass	DEFINITIONurl attribute markup should produce a definitionURL attribute, attribute value unchanged	
-Fail	html Span in mtext produces SPAN nodename in XHTML namespace	Cannot access property "nodeName" on null object ".firstChild.firstChild"
-Fail	html Span in mi produces SPAN nodename in XHTML namespace	Cannot access property "nodeName" on null object ".firstChild.firstChild"
-Fail	html Span in mrow produces SPAN nodename in XHTML namespace	Cannot access property "nodeName" on null object ".firstChild.firstChild"
-Fail	html P in mtext produces P nodename in XHTML namespace	Cannot access property "nodeName" on null object ".firstChild.firstChild"
-Fail	html P in mi produces P nodename in XHTML namespace	Cannot access property "nodeName" on null object ".firstChild.firstChild"
+Pass	html Span in mtext produces SPAN nodename in XHTML namespace	
+Pass	html Span in mi produces SPAN nodename in XHTML namespace	
+Pass	html Span in mrow produces SPAN nodename in XHTML namespace	
+Pass	html P in mtext produces P nodename in XHTML namespace	
+Pass	html P in mi produces P nodename in XHTML namespace	
 Pass	html P in mrow terminates the math: mrow,P,MI children of div	
 Pass	html P in mrow terminates the math: mrow child of math	
 Pass	html P in mrow terminates the math: mrow empty	
 Pass	html P in mrow terminates the math: math,P,MI children of div	
-Fail	Undefinedelement in mtext produces UNDEFINEDELEMENT nodename in XHTML namespace	
-Fail	mi in mtext produces MI nodename in XHTML namespace	
+Pass	Undefinedelement in mtext produces UNDEFINEDELEMENT nodename in XHTML namespace	
+Pass	mi in mtext produces MI nodename in XHTML namespace	
 Pass	p in annotation-xml moves to be child of DIV	
-Fail	p in annotation-xml encoding=text/html stays as child of annotation-xml	
-Fail	p in annotation-xml encoding=TEXT/HTML stays as child of annotation-xml	
-Fail	p in annotation-xml encoding=application/xhtml+xml stays as child of annotation-xml	
+Pass	p in annotation-xml encoding=text/html stays as child of annotation-xml	
+Pass	p in annotation-xml encoding=TEXT/HTML stays as child of annotation-xml	
+Pass	p in annotation-xml encoding=application/xhtml+xml stays as child of annotation-xml	
 Pass	p in annotation-xml encoding=foo moves to be child of DIV	

+ 26 - 12
Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.cpp

@@ -114,22 +114,33 @@ static Vector<StringView> const s_quirks_public_ids = {
 };
 
 // https://html.spec.whatwg.org/multipage/parsing.html#mathml-text-integration-point
-static bool is_mathml_text_integration_point(DOM::Element const&)
+static bool is_mathml_text_integration_point(DOM::Element const& element)
 {
-    // FIXME: Implement.
-    return false;
+    // A node is a MathML text integration point if it is one of the following elements:
+    // - A MathML mi element
+    // - A MathML mo element
+    // - A MathML mn element
+    // - A MathML ms element
+    // - A MathML mtext element
+    return element.tag_name().is_one_of(MathML::TagNames::mi, MathML::TagNames::mo, MathML::TagNames::mn, MathML::TagNames::ms, MathML::TagNames::mtext);
 }
 
 // https://html.spec.whatwg.org/multipage/parsing.html#html-integration-point
 static bool is_html_integration_point(DOM::Element const& element)
 {
     // A node is an HTML integration point if it is one of the following elements:
-    // FIXME: A MathML annotation-xml element whose start tag token had an attribute with the name "encoding" whose value was an ASCII case-insensitive match for the string "text/html"
-    // FIXME: A MathML annotation-xml element whose start tag token had an attribute with the name "encoding" whose value was an ASCII case-insensitive match for the string "application/xhtml+xml"
-
-    // An SVG foreignObject element
-    // An SVG desc element
-    // An SVG title element
+    // - A MathML annotation-xml element whose start tag token had an attribute with the name "encoding" whose value was an ASCII case-insensitive match for the string "text/html"
+    // - A MathML annotation-xml element whose start tag token had an attribute with the name "encoding" whose value was an ASCII case-insensitive match for the string "application/xhtml+xml"
+    if (element.namespace_uri() == Namespace::MathML
+        && element.tag_name() == MathML::TagNames::annotation_xml) {
+        auto encoding = element.attribute("encoding"_fly_string);
+        if (encoding.has_value() && (encoding->equals_ignoring_ascii_case("text/html"sv) || encoding->equals_ignoring_ascii_case("application/xhtml+xml"sv)))
+            return true;
+    }
+
+    // - An SVG foreignObject element
+    // - An SVG desc element
+    // - An SVG title element
     if (element.tag_name().is_one_of(SVG::TagNames::foreignObject, SVG::TagNames::desc, SVG::TagNames::title))
         return true;
 
@@ -191,13 +202,16 @@ void HTMLParser::run(HTMLTokenizer::StopAtInsertionPoint stop_at_insertion_point
         // As each token is emitted from the tokenizer, the user agent must follow the appropriate steps from the following list, known as the tree construction dispatcher:
         if (m_stack_of_open_elements.is_empty()
             || adjusted_current_node().namespace_uri() == Namespace::HTML
+            || (is_mathml_text_integration_point(adjusted_current_node()) && token.is_start_tag() && token.tag_name() != MathML::TagNames::mglyph && token.tag_name() != MathML::TagNames::malignmark)
+            || (is_mathml_text_integration_point(adjusted_current_node()) && token.is_character())
+            || (adjusted_current_node().namespace_uri() == Namespace::MathML && adjusted_current_node().tag_name() == MathML::TagNames::annotation_xml && token.is_start_tag() && token.tag_name() == SVG::TagNames::svg)
             || (is_html_integration_point(adjusted_current_node()) && (token.is_start_tag() || token.is_character()))
             || token.is_end_of_file()) {
             // -> If the stack of open elements is empty
             // -> If the adjusted current node is an element in the HTML namespace
-            // FIXME: -> If the adjusted current node is a MathML text integration point and the token is a start tag whose tag name is neither "mglyph" nor "malignmark"
-            // FIXME: -> If the adjusted current node is a MathML text integration point and the token is a character token
-            // FIXME: -> If the adjusted current node is a MathML annotation-xml element and the token is a start tag whose tag name is "svg"
+            // -> If the adjusted current node is a MathML text integration point and the token is a start tag whose tag name is neither "mglyph" nor "malignmark"
+            // -> If the adjusted current node is a MathML text integration point and the token is a character token
+            // -> If the adjusted current node is a MathML annotation-xml element and the token is a start tag whose tag name is "svg"
             // -> If the adjusted current node is an HTML integration point and the token is a start tag
             // -> If the adjusted current node is an HTML integration point and the token is a character token
             // -> If the token is an end-of-file token

+ 2 - 0
Userland/Libraries/LibWeb/MathML/TagNames.h

@@ -13,8 +13,10 @@ namespace Web::MathML::TagNames {
 #define ENUMERATE_MATHML_TAGS             \
     __ENUMERATE_MATHML_TAG(annotation)    \
     __ENUMERATE_MATHML_TAG(maction)       \
+    __ENUMERATE_MATHML_TAG(malignmark)    \
     __ENUMERATE_MATHML_TAG(math)          \
     __ENUMERATE_MATHML_TAG(merror)        \
+    __ENUMERATE_MATHML_TAG(mglyph)        \
     __ENUMERATE_MATHML_TAG(mfrac)         \
     __ENUMERATE_MATHML_TAG(mi)            \
     __ENUMERATE_MATHML_TAG(mmultiscripts) \