Sfoglia il codice sorgente

LibTextCodec: Add x-user-defined decoder

It's a pretty simple charset: the bottom 128 bytes (0x00-0x7F) are
standard ASCII, while the top 128 bytes (0x80-0xFF) are mapped to a
portion of the Unicode Private Use Area, specifically 0xF780-0xF7FF.

This is used by Google Maps for certain blobs.
Luke Wilde 3 anni fa
parent
commit
0e0f98a45e

+ 25 - 0
Userland/Libraries/LibTextCodec/Decoder.cpp

@@ -20,6 +20,7 @@ CyrillicDecoder s_cyrillic_decoder;
 Koi8RDecoder s_koi8r_decoder;
 Latin9Decoder s_latin9_decoder;
 TurkishDecoder s_turkish_decoder;
+XUserDefinedDecoder s_x_user_defined_decoder;
 }
 
 Decoder* decoder_for(const String& a_encoding)
@@ -44,6 +45,8 @@ Decoder* decoder_for(const String& a_encoding)
             return &s_latin9_decoder;
         if (encoding.value().equals_ignoring_case("windows-1254"))
             return &s_turkish_decoder;
+        if (encoding.value().equals_ignoring_case("x-user-defined"))
+            return &s_x_user_defined_decoder;
     }
     dbgln("TextCodec: No decoder implemented for encoding '{}'", a_encoding);
     return nullptr;
@@ -466,4 +469,26 @@ void TurkishDecoder::process(StringView input, Function<void(u32)> on_code_point
     }
 }
 
+// https://encoding.spec.whatwg.org/#x-user-defined-decoder
+void XUserDefinedDecoder::process(StringView input, Function<void(u32)> on_code_point)
+{
+    auto convert_x_user_defined_to_utf8 = [](u8 ch) -> u32 {
+        // 2. If byte is an ASCII byte, return a code point whose value is byte.
+        // https://infra.spec.whatwg.org/#ascii-byte
+        // An ASCII byte is a byte in the range 0x00 (NUL) to 0x7F (DEL), inclusive.
+        // NOTE: This doesn't check for ch >= 0x00, as that would always be true due to being unsigned.
+        if (ch <= 0x7f)
+            return ch;
+
+        // 3. Return a code point whose value is 0xF780 + byte − 0x80.
+        return 0xF780 + ch - 0x80;
+    };
+
+    for (auto ch : input) {
+        on_code_point(convert_x_user_defined_to_utf8(ch));
+    }
+
+    // 1. If byte is end-of-queue, return finished.
+}
+
 }

+ 5 - 0
Userland/Libraries/LibTextCodec/Decoder.h

@@ -67,6 +67,11 @@ public:
     virtual void process(StringView, Function<void(u32)> on_code_point) override;
 };
 
+class XUserDefinedDecoder final : public Decoder {
+public:
+    virtual void process(StringView, Function<void(u32)> on_code_point) override;
+};
+
 Decoder* decoder_for(String const& encoding);
 Optional<String> get_standardized_encoding(const String& encoding);