Base64.cpp 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123
  1. /*
  2. * Copyright (c) 2022-2023, the SerenityOS developers.
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/Base64.h>
  7. #include <AK/ByteBuffer.h>
  8. #include <AK/CharacterTypes.h>
  9. #include <AK/Error.h>
  10. #include <AK/StringBuilder.h>
  11. #include <AK/StringView.h>
  12. #include <AK/Vector.h>
  13. #include <LibWeb/Infra/Base64.h>
  14. #include <LibWeb/Infra/CharacterTypes.h>
  15. namespace Web::Infra {
  16. // https://infra.spec.whatwg.org/#forgiving-base64
  17. ErrorOr<ByteBuffer> decode_forgiving_base64(StringView input)
  18. {
  19. // 1. Remove all ASCII whitespace from data.
  20. // FIXME: It is possible to avoid copying input here, it's just a bit tricky to remove the equal signs
  21. StringBuilder builder;
  22. for (auto character : input) {
  23. if (!is_ascii_whitespace(character))
  24. TRY(builder.try_append(character));
  25. }
  26. auto data = builder.string_view();
  27. // 2. If data’s code point length divides by 4 leaving no remainder, then:
  28. if (data.length() % 4 == 0) {
  29. // If data ends with one or two U+003D (=) code points, then remove them from data.
  30. if (data.ends_with("=="sv))
  31. data = data.substring_view(0, data.length() - 2);
  32. else if (data.ends_with('='))
  33. data = data.substring_view(0, data.length() - 1);
  34. }
  35. // 3. If data’s code point length divides by 4 leaving a remainder of 1, then return failure.
  36. if (data.length() % 4 == 1)
  37. return Error::from_string_literal("Invalid input length in forgiving base64 decode");
  38. // 4. If data contains a code point that is not one of
  39. // U+002B (+), U+002F (/), ASCII alphanumeric
  40. // then return failure.
  41. for (auto point : data) {
  42. if (point != '+' && point != '/' && !is_ascii_alphanumeric(point))
  43. return Error::from_string_literal("Invalid character in forgiving base64 decode");
  44. }
  45. // 5. Let output be an empty byte sequence.
  46. // 6. Let buffer be an empty buffer that can have bits appended to it.
  47. Vector<u8> output;
  48. u32 buffer = 0;
  49. auto accumulated_bits = 0;
  50. auto add_to_buffer = [&](u8 number) {
  51. VERIFY(number < 64);
  52. u32 buffer_mask = number;
  53. if (accumulated_bits == 0)
  54. buffer_mask <<= 18;
  55. else if (accumulated_bits == 6)
  56. buffer_mask <<= 12;
  57. else if (accumulated_bits == 12)
  58. buffer_mask <<= 6;
  59. else if (accumulated_bits == 18)
  60. buffer_mask <<= 0;
  61. buffer |= buffer_mask;
  62. accumulated_bits += 6;
  63. };
  64. auto append_bytes = [&]() {
  65. output.append(static_cast<u8>((buffer & 0xff0000) >> 16));
  66. output.append(static_cast<u8>((buffer & 0xff00) >> 8));
  67. output.append(static_cast<u8>(buffer & 0xff));
  68. buffer = 0;
  69. accumulated_bits = 0;
  70. };
  71. auto alphabet_lookup_table = AK::base64_lookup_table();
  72. // 7. Let position be a position variable for data, initially pointing at the start of data.
  73. // 8. While position does not point past the end of data:
  74. for (auto point : data) {
  75. // 1. Find the code point pointed to by position in the second column of Table 1: The Base 64 Alphabet of RFC 4648.
  76. // Let n be the number given in the first cell of the same row. [RFC4648]
  77. auto n = alphabet_lookup_table[point];
  78. VERIFY(n >= 0);
  79. // 2. Append the six bits corresponding to n, most significant bit first, to buffer.
  80. add_to_buffer(static_cast<u8>(n));
  81. // 3. buffer has accumulated 24 bits,
  82. if (accumulated_bits == 24) {
  83. // interpret them as three 8-bit big-endian numbers.
  84. // Append three bytes with values equal to those numbers to output, in the same order, and then empty buffer
  85. append_bytes();
  86. }
  87. }
  88. // 9. If buffer is not empty, it contains either 12 or 18 bits.
  89. VERIFY(accumulated_bits == 0 || accumulated_bits == 12 || accumulated_bits == 18);
  90. // If it contains 12 bits, then discard the last four and interpret the remaining eight as an 8-bit big-endian number.
  91. if (accumulated_bits == 12)
  92. output.append(static_cast<u8>((buffer & 0xff0000) >> 16));
  93. // If it contains 18 bits, then discard the last two and interpret the remaining 16 as two 8-bit big-endian numbers.
  94. // Append the one or two bytes with values equal to those one or two numbers to output, in the same order.
  95. if (accumulated_bits == 18) {
  96. output.append(static_cast<u8>((buffer & 0xff0000) >> 16));
  97. output.append(static_cast<u8>((buffer & 0xff00) >> 8));
  98. }
  99. return ByteBuffer::copy(output);
  100. }
  101. }