mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-11-22 07:30:19 +00:00
AK: Add BOM handling to String::from_utf8_with_replacement_character
This commit is contained in:
parent
1e8cc97b73
commit
b3bf5c4ea8
Notes:
github-actions[bot]
2024-08-12 10:39:57 +00:00
Author: https://github.com/shannonbooth Commit: https://github.com/LadybirdBrowser/ladybird/commit/b3bf5c4ea84 Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/1038 Reviewed-by: https://github.com/trflynn89
4 changed files with 21 additions and 7 deletions
|
@ -20,8 +20,11 @@
|
|||
|
||||
namespace AK {
|
||||
|
||||
String String::from_utf8_with_replacement_character(StringView view)
|
||||
String String::from_utf8_with_replacement_character(StringView view, WithBOMHandling with_bom_handling)
|
||||
{
|
||||
if (auto bytes = view.bytes(); with_bom_handling == WithBOMHandling::Yes && bytes.size() >= 3 && bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF)
|
||||
view = view.substring_view(3);
|
||||
|
||||
if (Utf8View(view).validate())
|
||||
return String::from_utf8_without_validation(view.bytes());
|
||||
|
||||
|
|
|
@ -51,8 +51,13 @@ public:
|
|||
// Creates a new String from a sequence of UTF-8 encoded code points.
|
||||
static ErrorOr<String> from_utf8(StringView);
|
||||
|
||||
enum class WithBOMHandling {
|
||||
Yes,
|
||||
No,
|
||||
};
|
||||
|
||||
// Creates a new String using the replacement character for invalid bytes
|
||||
[[nodiscard]] static String from_utf8_with_replacement_character(StringView);
|
||||
[[nodiscard]] static String from_utf8_with_replacement_character(StringView, WithBOMHandling = WithBOMHandling::Yes);
|
||||
|
||||
template<typename T>
|
||||
requires(IsOneOf<RemoveCVReference<T>, ByteString, DeprecatedFlyString, FlyString, String>)
|
||||
|
|
|
@ -175,15 +175,21 @@ TEST_CASE(invalid_utf8)
|
|||
|
||||
TEST_CASE(with_replacement_character)
|
||||
{
|
||||
auto string1 = String::from_utf8_with_replacement_character("long string \xf4\x8f\xbf\xc0"sv); // U+110000
|
||||
auto string1 = String::from_utf8_with_replacement_character("long string \xf4\x8f\xbf\xc0"sv, String::WithBOMHandling::No); // U+110000
|
||||
Array<u8, 24> string1_expected { 0x6c, 0x6f, 0x6e, 0x67, 0x20, 0x73, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x20, 0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd };
|
||||
EXPECT_EQ(string1.bytes(), string1_expected);
|
||||
|
||||
auto string3 = String::from_utf8_with_replacement_character("A valid string!"sv);
|
||||
auto string3 = String::from_utf8_with_replacement_character("A valid string!"sv, String::WithBOMHandling::No);
|
||||
EXPECT_EQ(string3, "A valid string!"sv);
|
||||
|
||||
auto string4 = String::from_utf8_with_replacement_character(""sv);
|
||||
auto string4 = String::from_utf8_with_replacement_character(""sv, String::WithBOMHandling::No);
|
||||
EXPECT_EQ(string4, ""sv);
|
||||
|
||||
auto string5 = String::from_utf8_with_replacement_character("\xEF\xBB\xBFWHF!"sv, String::WithBOMHandling::Yes);
|
||||
EXPECT_EQ(string5, "WHF!"sv);
|
||||
|
||||
auto string6 = String::from_utf8_with_replacement_character("\xEF\xBB\xBFWHF!"sv, String::WithBOMHandling::No);
|
||||
EXPECT_EQ(string6, "\xEF\xBB\xBFWHF!"sv);
|
||||
}
|
||||
|
||||
TEST_CASE(from_code_points)
|
||||
|
|
|
@ -116,8 +116,8 @@ ErrorOr<Vector<QueryParam>> url_decode(StringView input)
|
|||
auto space_decoded_value = value.replace("+"sv, " "sv, ReplaceMode::All);
|
||||
|
||||
// 5. Let nameString and valueString be the result of running UTF-8 decode without BOM on the percent-decoding of name and value, respectively.
|
||||
auto name_string = String::from_utf8_with_replacement_character(URL::percent_decode(space_decoded_name));
|
||||
auto value_string = String::from_utf8_with_replacement_character(URL::percent_decode(space_decoded_value));
|
||||
auto name_string = String::from_utf8_with_replacement_character(URL::percent_decode(space_decoded_name), String::WithBOMHandling::No);
|
||||
auto value_string = String::from_utf8_with_replacement_character(URL::percent_decode(space_decoded_value), String::WithBOMHandling::No);
|
||||
|
||||
TRY(output.try_empend(move(name_string), move(value_string)));
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue