AK: Avoid creating an intermediate buffer when decoding a Base64 string

There's no need to copy the result. We can also avoid increasing the size of the output buffer by 1 for each written byte. This reduces the runtime of `./bin/base64 -d enwik8.base64 >/dev/null` from 0.917s to 0.632s. (enwik8 is a 100MB test file from http://mattmahoney.net/dc/enwik8.zip)
Author: https://github.com/trflynn89 Commit: https://github.com/SerenityOS/serenity/commit/81ad6de41b Pull-request: https://github.com/SerenityOS/serenity/pull/23655
2024-11-21 23:20:20 +00:00 · 2024-03-20 12:41:41 -04:00 · 2024-03-20 12:41:41 -04:00 · 81ad6de41b · 2024-07-17 22:01:16 +09:00
commit 81ad6de41b
parent 0fd7ad09a0
1 changed files with 18 additions and 15 deletions
--- a/AK/Base64.cpp
+++ b/AK/Base64.cpp
@ -47,36 +47,39 @@ ErrorOr<ByteBuffer> decode_base64_impl(StringView input)
        return { result };
    };

-    Vector<u8> output;
-    output.ensure_capacity(calculate_base64_decoded_length(input));
+    ByteBuffer output;
+    TRY(output.try_resize(calculate_base64_decoded_length(input)));

-    size_t offset = 0;
-    while (offset < input.length()) {
+    size_t input_offset = 0;
+    size_t output_offset = 0;
+
+    while (input_offset < input.length()) {
        bool in2_is_padding = false;
        bool in3_is_padding = false;

        bool parsed_something = false;

-        const u8 in0 = TRY(get(offset, nullptr, parsed_something));
-        const u8 in1 = TRY(get(offset, nullptr, parsed_something));
-        const u8 in2 = TRY(get(offset, &in2_is_padding, parsed_something));
-        const u8 in3 = TRY(get(offset, &in3_is_padding, parsed_something));
+        const u8 in0 = TRY(get(input_offset, nullptr, parsed_something));
+        const u8 in1 = TRY(get(input_offset, nullptr, parsed_something));
+        const u8 in2 = TRY(get(input_offset, &in2_is_padding, parsed_something));
+        const u8 in3 = TRY(get(input_offset, &in3_is_padding, parsed_something));

        if (!parsed_something)
            break;

-        const u8 out0 = (in0 << 2) | ((in1 >> 4) & 3);
-        const u8 out1 = ((in1 & 0xf) << 4) | ((in2 >> 2) & 0xf);
-        const u8 out2 = ((in2 & 0x3) << 6) | in3;
+        output[output_offset++] = (in0 << 2) | ((in1 >> 4) & 3);

-        output.append(out0);
        if (!in2_is_padding)
-            output.append(out1);
+            output[output_offset++] = ((in1 & 0xf) << 4) | ((in2 >> 2) & 0xf);
+
        if (!in3_is_padding)
-            output.append(out2);
+            output[output_offset++] = ((in2 & 0x3) << 6) | in3;
    }

-    return ByteBuffer::copy(output);
+    if (output_offset < output.size())
+        output.trim(output_offset, false);
+
+    return output;
 }

 template<auto alphabet>