CRC32.cpp 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174
  1. /*
  2. * Copyright (c) 2020-2022, the SerenityOS developers.
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/Array.h>
  7. #include <AK/NumericLimits.h>
  8. #include <AK/Span.h>
  9. #include <AK/Types.h>
  10. #include <LibCrypto/Checksum/CRC32.h>
  11. namespace Crypto::Checksum {
  12. #if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32)
  13. void CRC32::update(ReadonlyBytes span)
  14. {
  15. // FIXME: Does this require runtime checking on rpi?
  16. // (Maybe the instruction is present on the rpi4 but not on the rpi3?)
  17. u8 const* data = span.data();
  18. size_t size = span.size();
  19. while (size > 0 && (reinterpret_cast<FlatPtr>(data) & 7) != 0) {
  20. m_state = __builtin_arm_crc32b(m_state, *data);
  21. ++data;
  22. --size;
  23. }
  24. auto* data64 = reinterpret_cast<u64 const*>(data);
  25. while (size >= 8) {
  26. m_state = __builtin_arm_crc32d(m_state, *data64);
  27. ++data64;
  28. size -= 8;
  29. }
  30. data = reinterpret_cast<u8 const*>(data64);
  31. while (size > 0) {
  32. m_state = __builtin_arm_crc32b(m_state, *data);
  33. ++data;
  34. --size;
  35. }
  36. }
  37. // FIXME: On Intel, use _mm_crc32_u8 / _mm_crc32_u64 if available (SSE 4.2).
  38. #else
  39. static constexpr size_t ethernet_polynomial = 0xEDB88320;
  40. # if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
  41. // This implements Intel's slicing-by-8 algorithm. Their original paper is no longer on their website,
  42. // but their source code is still available for reference:
  43. // https://sourceforge.net/projects/slicing-by-8/
  44. static constexpr auto generate_table()
  45. {
  46. Array<Array<u32, 256>, 8> data {};
  47. for (u32 i = 0; i < 256; ++i) {
  48. auto value = i;
  49. for (size_t j = 0; j < 8; ++j)
  50. value = (value >> 1) ^ ((value & 1) * ethernet_polynomial);
  51. data[0][i] = value;
  52. }
  53. for (u32 i = 0; i < 256; ++i) {
  54. for (size_t j = 1; j < 8; ++j)
  55. data[j][i] = (data[j - 1][i] >> 8) ^ data[0][data[j - 1][i] & 0xff];
  56. }
  57. return data;
  58. }
  59. static constexpr auto table = generate_table();
  60. struct AlignmentData {
  61. ReadonlyBytes misaligned;
  62. ReadonlyBytes aligned;
  63. };
  64. static AlignmentData split_bytes_for_alignment(ReadonlyBytes data, size_t alignment)
  65. {
  66. auto address = reinterpret_cast<uintptr_t>(data.data());
  67. auto offset = alignment - address % alignment;
  68. if (offset == alignment)
  69. return { {}, data };
  70. if (data.size() < alignment)
  71. return { data, {} };
  72. return { data.trim(offset), data.slice(offset) };
  73. }
  74. static constexpr u32 single_byte_crc(u32 crc, u8 byte)
  75. {
  76. return (crc >> 8) ^ table[0][(crc & 0xff) ^ byte];
  77. }
  78. void CRC32::update(ReadonlyBytes data)
  79. {
  80. // The provided data may not be aligned to a 4-byte boundary, required to reinterpret its address
  81. // into a u32 in the loop below. So we split the bytes into two segments: the misaligned bytes
  82. // (which undergo the standard 1-byte-at-a-time algorithm) and remaining aligned bytes.
  83. auto [misaligned_data, aligned_data] = split_bytes_for_alignment(data, alignof(u32));
  84. for (auto byte : misaligned_data)
  85. m_state = single_byte_crc(m_state, byte);
  86. while (aligned_data.size() >= 8) {
  87. auto const* segment = reinterpret_cast<u32 const*>(aligned_data.data());
  88. auto low = *segment ^ m_state;
  89. auto high = *(++segment);
  90. m_state = table[0][(high >> 24) & 0xff]
  91. ^ table[1][(high >> 16) & 0xff]
  92. ^ table[2][(high >> 8) & 0xff]
  93. ^ table[3][high & 0xff]
  94. ^ table[4][(low >> 24) & 0xff]
  95. ^ table[5][(low >> 16) & 0xff]
  96. ^ table[6][(low >> 8) & 0xff]
  97. ^ table[7][low & 0xff];
  98. aligned_data = aligned_data.slice(8);
  99. }
  100. for (auto byte : aligned_data)
  101. m_state = single_byte_crc(m_state, byte);
  102. }
  103. # else
  104. // FIXME: Implement the slicing-by-8 algorithm for big endian CPUs.
  105. static constexpr auto generate_table()
  106. {
  107. Array<u32, 256> data {};
  108. for (auto i = 0u; i < data.size(); i++) {
  109. u32 value = i;
  110. for (auto j = 0; j < 8; j++) {
  111. if (value & 1) {
  112. value = ethernet_polynomial ^ (value >> 1);
  113. } else {
  114. value = value >> 1;
  115. }
  116. }
  117. data[i] = value;
  118. }
  119. return data;
  120. }
  121. static constexpr auto table = generate_table();
  122. void CRC32::update(ReadonlyBytes data)
  123. {
  124. for (size_t i = 0; i < data.size(); i++) {
  125. m_state = table[(m_state ^ data.at(i)) & 0xFF] ^ (m_state >> 8);
  126. }
  127. }
  128. # endif
  129. #endif
  130. u32 CRC32::digest()
  131. {
  132. return ~m_state;
  133. }
  134. }