Browse Source

LibTextCodec: Return Optional<Decoder&> from `bom_sniff_to_decoder()`

Sam Atkins 2 years ago
parent
commit
3c5090e172

+ 16 - 10
Userland/Libraries/LibTextCodec/Decoder.cpp

@@ -157,7 +157,7 @@ Optional<StringView> get_standardized_encoding(StringView encoding)
 }
 }
 
 
 // https://encoding.spec.whatwg.org/#bom-sniff
 // https://encoding.spec.whatwg.org/#bom-sniff
-Decoder* bom_sniff_to_decoder(StringView input)
+Optional<Decoder&> bom_sniff_to_decoder(StringView input)
 {
 {
     // 1. Let BOM be the result of peeking 3 bytes from ioQueue, converted to a byte sequence.
     // 1. Let BOM be the result of peeking 3 bytes from ioQueue, converted to a byte sequence.
     // 2. For each of the rows in the table below, starting with the first one and going down,
     // 2. For each of the rows in the table below, starting with the first one and going down,
@@ -172,22 +172,28 @@ Decoder* bom_sniff_to_decoder(StringView input)
 
 
     auto bytes = input.bytes();
     auto bytes = input.bytes();
     if (bytes.size() < 2)
     if (bytes.size() < 2)
-        return nullptr;
+        return {};
 
 
     auto first_byte = bytes[0];
     auto first_byte = bytes[0];
 
 
     switch (first_byte) {
     switch (first_byte) {
     case 0xEF: // UTF-8
     case 0xEF: // UTF-8
         if (bytes.size() < 3)
         if (bytes.size() < 3)
-            return nullptr;
-        return bytes[1] == 0xBB && bytes[2] == 0xBF ? &s_utf8_decoder : nullptr;
+            return {};
+        if (bytes[1] == 0xBB && bytes[2] == 0xBF)
+            return s_utf8_decoder;
+        return {};
     case 0xFE: // UTF-16BE
     case 0xFE: // UTF-16BE
-        return bytes[1] == 0xFF ? &s_utf16be_decoder : nullptr;
+        if (bytes[1] == 0xFF)
+            return s_utf16be_decoder;
+        return {};
     case 0xFF: // UTF-16LE
     case 0xFF: // UTF-16LE
-        return bytes[1] == 0xFE ? &s_utf16le_decoder : nullptr;
+        if (bytes[1] == 0xFE)
+            return s_utf16le_decoder;
+        return {};
     }
     }
 
 
-    return nullptr;
+    return {};
 }
 }
 
 
 // https://encoding.spec.whatwg.org/#decode
 // https://encoding.spec.whatwg.org/#decode
@@ -197,13 +203,13 @@ DeprecatedString convert_input_to_utf8_using_given_decoder_unless_there_is_a_byt
 
 
     // 1. Let BOMEncoding be the result of BOM sniffing ioQueue.
     // 1. Let BOMEncoding be the result of BOM sniffing ioQueue.
     // 2. If BOMEncoding is non-null:
     // 2. If BOMEncoding is non-null:
-    if (auto* unicode_decoder = bom_sniff_to_decoder(input); unicode_decoder) {
+    if (auto unicode_decoder = bom_sniff_to_decoder(input); unicode_decoder.has_value()) {
         // 1. Set encoding to BOMEncoding.
         // 1. Set encoding to BOMEncoding.
-        actual_decoder = unicode_decoder;
+        actual_decoder = &unicode_decoder.value();
 
 
         // 2. Read three bytes from ioQueue, if BOMEncoding is UTF-8; otherwise read two bytes. (Do nothing with those bytes.)
         // 2. Read three bytes from ioQueue, if BOMEncoding is UTF-8; otherwise read two bytes. (Do nothing with those bytes.)
         // FIXME: I imagine this will be pretty slow for large inputs, as it's regenerating the input without the first 2/3 bytes.
         // FIXME: I imagine this will be pretty slow for large inputs, as it's regenerating the input without the first 2/3 bytes.
-        input = input.substring_view(unicode_decoder == &s_utf8_decoder ? 3 : 2);
+        input = input.substring_view(&unicode_decoder.value() == &s_utf8_decoder ? 3 : 2);
     }
     }
 
 
     VERIFY(actual_decoder);
     VERIFY(actual_decoder);

+ 2 - 2
Userland/Libraries/LibTextCodec/Decoder.h

@@ -89,8 +89,8 @@ public:
 Optional<Decoder&> decoder_for(StringView encoding);
 Optional<Decoder&> decoder_for(StringView encoding);
 Optional<StringView> get_standardized_encoding(StringView encoding);
 Optional<StringView> get_standardized_encoding(StringView encoding);
 
 
-// This returns the appropriate Unicode decoder for the sniffed BOM or nullptr if there is no appropriate decoder.
-Decoder* bom_sniff_to_decoder(StringView);
+// This returns the appropriate Unicode decoder for the sniffed BOM or nothing if there is no appropriate decoder.
+Optional<Decoder&> bom_sniff_to_decoder(StringView);
 
 
 // NOTE: This has an obnoxious name to discourage usage. Only use this if you absolutely must! For example, XHR in LibWeb uses this.
 // NOTE: This has an obnoxious name to discourage usage. Only use this if you absolutely must! For example, XHR in LibWeb uses this.
 // This will use the given decoder unless there is a byte order mark in the input, in which we will instead use the appropriate Unicode decoder.
 // This will use the given decoder unless there is a byte order mark in the input, in which we will instead use the appropriate Unicode decoder.