mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-11-22 07:30:19 +00:00
LibTextCodec+AK: Don't validate UTF-8 strings twice
UTF8Decoder was already converting invalid data into replacement characters while converting, so we know for sure we have valid UTF-8 by the time conversion is finished. This patch adds a new StringBuilder::to_string_without_validation() and uses it to make UTF8Decoder avoid half the work it was doing.
This commit is contained in:
parent
a285e36041
commit
3c039903fb
Notes:
sideshowbarker
2024-07-16 17:12:03 +09:00
Author: https://github.com/awesomekling Commit: https://github.com/SerenityOS/serenity/commit/3c039903fb Pull-request: https://github.com/SerenityOS/serenity/pull/22496 Reviewed-by: https://github.com/LucasChollet
5 changed files with 22 additions and 1 deletions
|
@ -233,6 +233,19 @@ void String::destroy_string()
|
||||||
m_data->unref();
|
m_data->unref();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
String String::from_utf8_without_validation(ReadonlyBytes bytes)
|
||||||
|
{
|
||||||
|
if (bytes.size() <= MAX_SHORT_STRING_BYTE_COUNT) {
|
||||||
|
ShortString short_string;
|
||||||
|
if (!bytes.is_empty())
|
||||||
|
memcpy(short_string.storage, bytes.data(), bytes.size());
|
||||||
|
short_string.byte_count_and_short_string_flag = (bytes.size() << 1) | SHORT_STRING_FLAG;
|
||||||
|
return String { short_string };
|
||||||
|
}
|
||||||
|
auto data = MUST(Detail::StringData::from_utf8(reinterpret_cast<char const*>(bytes.data()), bytes.size()));
|
||||||
|
return String { move(data) };
|
||||||
|
}
|
||||||
|
|
||||||
ErrorOr<String> String::from_utf8(StringView view)
|
ErrorOr<String> String::from_utf8(StringView view)
|
||||||
{
|
{
|
||||||
if (!Utf8View { view }.validate())
|
if (!Utf8View { view }.validate())
|
||||||
|
|
|
@ -70,6 +70,8 @@ public:
|
||||||
requires(IsOneOf<RemoveCVReference<T>, ByteString, DeprecatedFlyString, FlyString, String>)
|
requires(IsOneOf<RemoveCVReference<T>, ByteString, DeprecatedFlyString, FlyString, String>)
|
||||||
static ErrorOr<String> from_utf8(T&&) = delete;
|
static ErrorOr<String> from_utf8(T&&) = delete;
|
||||||
|
|
||||||
|
[[nodiscard]] static String from_utf8_without_validation(ReadonlyBytes);
|
||||||
|
|
||||||
// Creates a new String by reading byte_count bytes from a UTF-8 encoded Stream.
|
// Creates a new String by reading byte_count bytes from a UTF-8 encoded Stream.
|
||||||
static ErrorOr<String> from_stream(Stream&, size_t byte_count);
|
static ErrorOr<String> from_stream(Stream&, size_t byte_count);
|
||||||
|
|
||||||
|
|
|
@ -156,6 +156,11 @@ ErrorOr<String> StringBuilder::to_string() const
|
||||||
return String::from_utf8(string_view());
|
return String::from_utf8(string_view());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
String StringBuilder::to_string_without_validation() const
|
||||||
|
{
|
||||||
|
return String::from_utf8_without_validation(string_view().bytes());
|
||||||
|
}
|
||||||
|
|
||||||
ErrorOr<FlyString> StringBuilder::to_fly_string() const
|
ErrorOr<FlyString> StringBuilder::to_fly_string() const
|
||||||
{
|
{
|
||||||
return FlyString::from_utf8(string_view());
|
return FlyString::from_utf8(string_view());
|
||||||
|
|
|
@ -73,6 +73,7 @@ public:
|
||||||
[[nodiscard]] ByteString to_byte_string() const;
|
[[nodiscard]] ByteString to_byte_string() const;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
[[nodiscard]] String to_string_without_validation() const;
|
||||||
ErrorOr<String> to_string() const;
|
ErrorOr<String> to_string() const;
|
||||||
ErrorOr<FlyString> to_fly_string() const;
|
ErrorOr<FlyString> to_fly_string() const;
|
||||||
|
|
||||||
|
|
|
@ -247,7 +247,7 @@ ErrorOr<String> Decoder::to_utf8(StringView input)
|
||||||
{
|
{
|
||||||
StringBuilder builder(input.length());
|
StringBuilder builder(input.length());
|
||||||
TRY(process(input, [&builder](u32 c) { return builder.try_append_code_point(c); }));
|
TRY(process(input, [&builder](u32 c) { return builder.try_append_code_point(c); }));
|
||||||
return builder.to_string();
|
return builder.to_string_without_validation();
|
||||||
}
|
}
|
||||||
|
|
||||||
ErrorOr<void> UTF8Decoder::process(StringView input, Function<ErrorOr<void>(u32)> on_code_point)
|
ErrorOr<void> UTF8Decoder::process(StringView input, Function<ErrorOr<void>(u32)> on_code_point)
|
||||||
|
|
Loading…
Reference in a new issue