AK: Add a Base64 decoder to decode into an existing buffer

Some callers (LibJS) will want to control the size of the output buffer,
to decode up to a maximum length. They will also want to receive partial
results in the case of an error. This patch adds a method to provide
those capabilities, and makes the existing implementation use it.
This commit is contained in:
Timothy Flynn 2024-09-01 17:41:57 -04:00 committed by Andreas Kling
parent 35d8e7e63f
commit d265575269
Notes: github-actions[bot] 2024-09-03 15:47:17 +00:00
3 changed files with 85 additions and 9 deletions

View file

@ -17,21 +17,40 @@ size_t size_required_to_decode_base64(StringView input)
return simdutf::maximal_binary_length_from_base64(input.characters_without_null_termination(), input.length());
}
static ErrorOr<size_t, InvalidBase64> decode_base64_into_impl(StringView input, ByteBuffer& output, simdutf::base64_options options)
{
size_t output_length = output.size();
auto result = simdutf::base64_to_binary_safe(
input.characters_without_null_termination(),
input.length(),
reinterpret_cast<char*>(output.data()),
output_length,
options);
if (result.error != simdutf::SUCCESS && result.error != simdutf::OUTPUT_BUFFER_TOO_SMALL) {
output.resize((result.count / 4) * 3);
return InvalidBase64 {
.error = Error::from_string_literal("Invalid base64-encoded data"),
.valid_input_bytes = result.count,
};
}
VERIFY(output_length <= output.size());
output.resize(output_length);
return result.error == simdutf::SUCCESS ? input.length() : result.count;
}
static ErrorOr<ByteBuffer> decode_base64_impl(StringView input, simdutf::base64_options options)
{
ByteBuffer output;
TRY(output.try_resize(size_required_to_decode_base64(input)));
auto result = simdutf::base64_to_binary(
input.characters_without_null_termination(),
input.length(),
reinterpret_cast<char*>(output.data()),
options);
if (auto result = decode_base64_into_impl(input, output, options); result.is_error())
return result.release_error().error;
if (result.error != simdutf::SUCCESS)
return Error::from_string_literal("Invalid base64-encoded data");
output.resize(result.count);
return output;
}
@ -59,6 +78,16 @@ ErrorOr<ByteBuffer> decode_base64url(StringView input)
return decode_base64_impl(input, simdutf::base64_url);
}
ErrorOr<size_t, InvalidBase64> decode_base64_into(StringView input, ByteBuffer& output)
{
return decode_base64_into_impl(input, output, simdutf::base64_default);
}
ErrorOr<size_t, InvalidBase64> decode_base64url_into(StringView input, ByteBuffer& output)
{
return decode_base64_into_impl(input, output, simdutf::base64_url);
}
ErrorOr<String> encode_base64(ReadonlyBytes input, OmitPadding omit_padding)
{
auto options = omit_padding == OmitPadding::Yes

View file

@ -18,6 +18,16 @@ size_t size_required_to_decode_base64(StringView);
ErrorOr<ByteBuffer> decode_base64(StringView);
ErrorOr<ByteBuffer> decode_base64url(StringView);
struct InvalidBase64 {
Error error;
size_t valid_input_bytes { 0 };
};
// On success, these return the number of input bytes that were decoded. This might be less than the
// string length if the output buffer was not large enough.
ErrorOr<size_t, InvalidBase64> decode_base64_into(StringView, ByteBuffer&);
ErrorOr<size_t, InvalidBase64> decode_base64url_into(StringView, ByteBuffer&);
enum class OmitPadding {
No,
Yes,

View file

@ -29,6 +29,43 @@ TEST_CASE(test_decode)
decode_equal("aGVsbG8/d29ybGQ="sv, "hello?world"sv);
}
TEST_CASE(test_decode_into)
{
ByteBuffer buffer;
auto decode_equal = [&](StringView input, StringView expected, Optional<size_t> buffer_size = {}) {
buffer.resize(buffer_size.value_or_lazy_evaluated([&]() {
return AK::size_required_to_decode_base64(input);
}));
auto result = AK::decode_base64_into(input, buffer);
VERIFY(!result.is_error());
EXPECT_EQ(StringView { buffer }, expected);
};
decode_equal(""sv, ""sv);
decode_equal("Zg=="sv, "f"sv);
decode_equal("Zm8="sv, "fo"sv);
decode_equal("Zm9v"sv, "foo"sv);
decode_equal("Zm9vYg=="sv, "foob"sv);
decode_equal("Zm9vYmE="sv, "fooba"sv);
decode_equal("Zm9vYmFy"sv, "foobar"sv);
decode_equal(" Zm9vYmFy "sv, "foobar"sv);
decode_equal(" \n\r \t Zm 9v \t YmFy \n"sv, "foobar"sv);
decode_equal("aGVsbG8/d29ybGQ="sv, "hello?world"sv);
decode_equal("Zm9vYmFy"sv, ""sv, 0);
decode_equal("Zm9vYmFy"sv, ""sv, 1);
decode_equal("Zm9vYmFy"sv, ""sv, 2);
decode_equal("Zm9vYmFy"sv, "foo"sv, 3);
decode_equal("Zm9vYmFy"sv, "foo"sv, 4);
decode_equal("Zm9vYmFy"sv, "foo"sv, 5);
decode_equal("Zm9vYmFy"sv, "foobar"sv, 6);
decode_equal("Zm9vYmFy"sv, "foobar"sv, 7);
}
TEST_CASE(test_decode_invalid)
{
EXPECT(decode_base64(("asdf\xffqwe"sv)).is_error());