Ver código fonte

LibWeb: Run XML parser input through encoding decoder

Fixes the issue that XML parser fails when loader passes input that is
prefixed with byte order mark.

Also it generally makes sense to pass text source through encoding
decoder before parsing. Probably we would even want to introduce method
similar to `create_with_uncertain_encoding` in `HTMLParser` but for
`XMLParser` to be make harder unconsciously pass non-UTF8 input to XML
parser.
Aliaksandr Kalenik 2 anos atrás
pai
commit
5b31d1208f
1 arquivos alterados com 7 adições e 2 exclusões
  1. 7 2
      Userland/Libraries/LibWeb/Loader/FrameLoader.cpp

+ 7 - 2
Userland/Libraries/LibWeb/Loader/FrameLoader.cpp

@@ -11,6 +11,7 @@
 #include <LibGemini/Document.h>
 #include <LibGfx/ImageFormats/ImageDecoder.h>
 #include <LibMarkdown/Document.h>
+#include <LibTextCodec/Decoder.h>
 #include <LibWeb/Bindings/MainThreadVM.h>
 #include <LibWeb/DOM/Document.h>
 #include <LibWeb/DOM/ElementFactory.h>
@@ -18,6 +19,7 @@
 #include <LibWeb/HTML/BrowsingContext.h>
 #include <LibWeb/HTML/HTMLIFrameElement.h>
 #include <LibWeb/HTML/NavigationParams.h>
+#include <LibWeb/HTML/Parser/HTMLEncodingDetection.h>
 #include <LibWeb/HTML/Parser/HTMLParser.h>
 #include <LibWeb/Loader/FrameLoader.h>
 #include <LibWeb/Loader/ResourceLoader.h>
@@ -171,8 +173,11 @@ static bool build_gemini_document(DOM::Document& document, ByteBuffer const& dat
 
 static bool build_xml_document(DOM::Document& document, ByteBuffer const& data)
 {
-
-    XML::Parser parser(data, { .resolve_external_resource = resolve_xml_resource });
+    auto encoding = HTML::run_encoding_sniffing_algorithm(document, data);
+    auto decoder = TextCodec::decoder_for(encoding);
+    VERIFY(decoder.has_value());
+    auto source = decoder->to_utf8(data).release_value_but_fixme_should_propagate_errors();
+    XML::Parser parser(source, { .resolve_external_resource = resolve_xml_resource });
     XMLDocumentBuilder builder { document };
     auto result = parser.parse_with_listener(builder);
     return !result.is_error() && !builder.has_error();