AK: Remove whitespace skipping feature from AK's Base64 decoder

This was added in commit f2663f477f as a
partial implementation of what is now LibWeb's forgiving Base64 decoder.
All use cases within LibWeb that require whitespace skipping now use
that implementation instead.

Removing this feature from AK allows us to know the exact output size of
a decoded Base64 string. We can still trim whitespace at the start and
end of the input though; for example, this is useful when reading from a
file that may have a newline at the end of the file.
This commit is contained in:
Timothy Flynn 2024-03-24 08:51:44 -04:00 committed by Andreas Kling
parent 690db10463
commit 754ff41b9c
Notes: sideshowbarker 2024-07-17 03:35:24 +09:00
2 changed files with 14 additions and 18 deletions

View file

@ -6,7 +6,6 @@
#include <AK/Assertions.h>
#include <AK/Base64.h>
#include <AK/CharacterTypes.h>
#include <AK/Error.h>
#include <AK/StringBuilder.h>
#include <AK/Types.h>
@ -26,19 +25,20 @@ size_t calculate_base64_encoded_length(ReadonlyBytes input)
static ErrorOr<ByteBuffer> decode_base64_impl(StringView input, ReadonlySpan<i16> alphabet_lookup_table)
{
auto get = [&](size_t& offset, bool* is_padding, bool& parsed_something) -> ErrorOr<u8> {
while (offset < input.length() && is_ascii_space(input[offset]))
++offset;
input = input.trim_whitespace();
auto get = [&](size_t offset, bool* is_padding) -> ErrorOr<u8> {
if (offset >= input.length())
return 0;
auto ch = static_cast<unsigned char>(input[offset++]);
parsed_something = true;
auto ch = static_cast<unsigned char>(input[offset]);
if (ch == '=') {
if (!is_padding)
return Error::from_string_literal("Invalid '=' character outside of padding in base64 data");
*is_padding = true;
return 0;
}
i16 result = alphabet_lookup_table[ch];
if (result < 0)
return Error::from_string_literal("Invalid character in base64 data");
@ -56,15 +56,10 @@ static ErrorOr<ByteBuffer> decode_base64_impl(StringView input, ReadonlySpan<i16
bool in2_is_padding = false;
bool in3_is_padding = false;
bool parsed_something = false;
u8 const in0 = TRY(get(input_offset, nullptr, parsed_something));
u8 const in1 = TRY(get(input_offset, nullptr, parsed_something));
u8 const in2 = TRY(get(input_offset, &in2_is_padding, parsed_something));
u8 const in3 = TRY(get(input_offset, &in3_is_padding, parsed_something));
if (!parsed_something)
break;
u8 const in0 = TRY(get(input_offset++, nullptr));
u8 const in1 = TRY(get(input_offset++, nullptr));
u8 const in2 = TRY(get(input_offset++, &in2_is_padding));
u8 const in3 = TRY(get(input_offset++, &in3_is_padding));
output[output_offset++] = (in0 << 2) | ((in1 >> 4) & 3);

View file

@ -25,8 +25,8 @@ TEST_CASE(test_decode)
decode_equal("Zm9vYg=="sv, "foob"sv);
decode_equal("Zm9vYmE="sv, "fooba"sv);
decode_equal("Zm9vYmFy"sv, "foobar"sv);
decode_equal("Z m\r9\n v\v Ym\tFy"sv, "foobar"sv);
EXPECT_EQ(decode_base64(" ZD Qg\r\nPS An Zm91cic\r\n 7"sv).value(), decode_base64("ZDQgPSAnZm91cic7"sv).value());
decode_equal(" Zm9vYmFy "sv, "foobar"sv);
decode_equal(" \n\r \t Zm9vYmFy \n"sv, "foobar"sv);
decode_equal("aGVsbG8/d29ybGQ="sv, "hello?world"sv);
}
@ -88,7 +88,8 @@ TEST_CASE(test_urldecode)
decode_equal("Zm9vYg=="sv, "foob"sv);
decode_equal("Zm9vYmE="sv, "fooba"sv);
decode_equal("Zm9vYmFy"sv, "foobar"sv);
decode_equal("Z m\r9\n v\v Ym\tFy"sv, "foobar"sv);
decode_equal(" Zm9vYmFy "sv, "foobar"sv);
decode_equal(" \n\r \t Zm9vYmFy \n"sv, "foobar"sv);
decode_equal("TG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdCwgc2VkIGRvIGVpdXNtb2QgdGVtcG9yIGluY2lkaWR1bnQgdXQgbGFib3JlIGV0IGRvbG9yZSBtYWduYSBhbGlxdWEu"sv, "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua."sv);
decode_equal("aGVsbG8_d29ybGQ="sv, "hello?world"sv);