LibTextCodec: Use Optional<String> for get_standardized_encoding

This patch changes get_standardized_encoding to use an Optional<String>
return type instead of just returning the null string when unable to
match the provided encoding to one of the canonical encoding names.

This is part of an effort to move away from using null strings towards
explicitly using Optional<String> to indicate that the String may not
have a value.
This commit is contained in:
Max Wipfli 2021-05-11 15:52:25 +02:00 committed by Andreas Kling
parent f51b0729f5
commit d325403cb5
Notes: sideshowbarker 2024-07-18 17:48:52 +09:00
3 changed files with 21 additions and 16 deletions

View file

@ -64,24 +64,26 @@ CyrillicDecoder& cyrillic_decoder()
Decoder* decoder_for(const String& a_encoding)
{
auto encoding = get_standardized_encoding(a_encoding);
if (encoding.equals_ignoring_case("windows-1252"))
return &latin1_decoder();
if (encoding.equals_ignoring_case("utf-8"))
return &utf8_decoder();
if (encoding.equals_ignoring_case("utf-16be"))
return &utf16be_decoder();
if (encoding.equals_ignoring_case("iso-8859-2"))
return &latin2_decoder();
if (encoding.equals_ignoring_case("windows-1255"))
return &hebrew_decoder();
if (encoding.equals_ignoring_case("windows-1251"))
return &cyrillic_decoder();
if (encoding.has_value()) {
if (encoding.value().equals_ignoring_case("windows-1252"))
return &latin1_decoder();
if (encoding.value().equals_ignoring_case("utf-8"))
return &utf8_decoder();
if (encoding.value().equals_ignoring_case("utf-16be"))
return &utf16be_decoder();
if (encoding.value().equals_ignoring_case("iso-8859-2"))
return &latin2_decoder();
if (encoding.value().equals_ignoring_case("windows-1255"))
return &hebrew_decoder();
if (encoding.value().equals_ignoring_case("windows-1251"))
return &cyrillic_decoder();
}
dbgln("TextCodec: No decoder implemented for encoding '{}'", a_encoding);
return nullptr;
}
// https://encoding.spec.whatwg.org/#concept-encoding-get
String get_standardized_encoding(const String& encoding)
Optional<String> get_standardized_encoding(const String& encoding)
{
String trimmed_lowercase_encoding = encoding.trim_whitespace().to_lowercase();
@ -172,7 +174,8 @@ String get_standardized_encoding(const String& encoding)
bool is_standardized_encoding(const String& encoding)
{
return encoding.equals_ignoring_case(get_standardized_encoding(encoding));
auto standardized_encoding = get_standardized_encoding(encoding);
return standardized_encoding.has_value() && encoding.equals_ignoring_case(standardized_encoding.value());
}
String UTF8Decoder::to_utf8(const StringView& input)

View file

@ -49,7 +49,7 @@ public:
};
Decoder* decoder_for(const String& encoding);
String get_standardized_encoding(const String& encoding);
Optional<String> get_standardized_encoding(const String& encoding);
bool is_standardized_encoding(const String& encoding);
}

View file

@ -104,7 +104,9 @@ HTMLDocumentParser::HTMLDocumentParser(DOM::Document& document, const StringView
, m_document(document)
{
m_document->set_should_invalidate_styles_on_attribute_changes(false);
m_document->set_encoding(TextCodec::get_standardized_encoding(encoding));
auto standardized_encoding = TextCodec::get_standardized_encoding(encoding);
VERIFY(standardized_encoding.has_value());
m_document->set_encoding(standardized_encoding.value());
}
HTMLDocumentParser::~HTMLDocumentParser()