Poly1305.cpp 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226
  1. /*
  2. * Copyright (c) 2022, stelar7 <dudedbz@gmail.com>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/ByteReader.h>
  7. #include <AK/Endian.h>
  8. #include <LibCrypto/Authentication/Poly1305.h>
  9. namespace Crypto::Authentication {
  10. Poly1305::Poly1305(ReadonlyBytes key)
  11. {
  12. for (size_t i = 0; i < 16; i += 4) {
  13. m_state.r[i / 4] = AK::convert_between_host_and_little_endian(ByteReader::load32(key.offset(i)));
  14. }
  15. // r[3], r[7], r[11], and r[15] are required to have their top four bits clear (be smaller than 16)
  16. // r[4], r[8], and r[12] are required to have their bottom two bits clear (be divisible by 4)
  17. m_state.r[0] &= 0x0FFFFFFF;
  18. m_state.r[1] &= 0x0FFFFFFC;
  19. m_state.r[2] &= 0x0FFFFFFC;
  20. m_state.r[3] &= 0x0FFFFFFC;
  21. for (size_t i = 16; i < 32; i += 4) {
  22. m_state.s[(i - 16) / 4] = AK::convert_between_host_and_little_endian(ByteReader::load32(key.offset(i)));
  23. }
  24. }
  25. void Poly1305::update(ReadonlyBytes message)
  26. {
  27. size_t offset = 0;
  28. while (offset < message.size()) {
  29. u32 n = min(message.size() - offset, 16 - m_state.block_count);
  30. memcpy(m_state.blocks + m_state.block_count, message.offset_pointer(offset), n);
  31. m_state.block_count += n;
  32. offset += n;
  33. if (m_state.block_count == 16) {
  34. process_block();
  35. m_state.block_count = 0;
  36. }
  37. }
  38. }
  39. void Poly1305::process_block()
  40. {
  41. u32 a[5];
  42. u8 n = m_state.block_count;
  43. // Add one bit beyond the number of octets. For a 16-byte block,
  44. // this is equivalent to adding 2^128 to the number. For the shorter
  45. // block, it can be 2^120, 2^112, or any power of two that is evenly
  46. // divisible by 8, all the way down to 2^8.
  47. m_state.blocks[n++] = 0x01;
  48. // If the block is not 17 bytes long (the last block), pad it with zeros.
  49. // This is meaningless if you are treating the blocks as numbers.
  50. while (n < 17) {
  51. m_state.blocks[n++] = 0x00;
  52. }
  53. // Read the block as a little-endian number.
  54. for (size_t i = 0; i < 16; i += 4) {
  55. a[i / 4] = AK::convert_between_host_and_little_endian(ByteReader::load32(m_state.blocks + i));
  56. }
  57. a[4] = m_state.blocks[16];
  58. // Add this number to the accumulator.
  59. m_state.a[0] += a[0];
  60. m_state.a[1] += a[1];
  61. m_state.a[2] += a[2];
  62. m_state.a[3] += a[3];
  63. m_state.a[4] += a[4];
  64. // Carry
  65. m_state.a[1] += m_state.a[0] >> 32;
  66. m_state.a[2] += m_state.a[1] >> 32;
  67. m_state.a[3] += m_state.a[2] >> 32;
  68. m_state.a[4] += m_state.a[3] >> 32;
  69. // Only consider the least significant bits
  70. a[0] = m_state.a[0] & 0xFFFFFFFF;
  71. a[1] = m_state.a[1] & 0xFFFFFFFF;
  72. a[2] = m_state.a[2] & 0xFFFFFFFF;
  73. a[3] = m_state.a[3] & 0xFFFFFFFF;
  74. a[4] = m_state.a[4] & 0xFFFFFFFF;
  75. // Multiply by r
  76. m_state.a[0] = (u64)a[0] * m_state.r[0];
  77. m_state.a[1] = (u64)a[0] * m_state.r[1] + (u64)a[1] * m_state.r[0];
  78. m_state.a[2] = (u64)a[0] * m_state.r[2] + (u64)a[1] * m_state.r[1] + (u64)a[2] * m_state.r[0];
  79. m_state.a[3] = (u64)a[0] * m_state.r[3] + (u64)a[1] * m_state.r[2] + (u64)a[2] * m_state.r[1] + (u64)a[3] * m_state.r[0];
  80. m_state.a[4] = (u64)a[1] * m_state.r[3] + (u64)a[2] * m_state.r[2] + (u64)a[3] * m_state.r[1] + (u64)a[4] * m_state.r[0];
  81. m_state.a[5] = (u64)a[2] * m_state.r[3] + (u64)a[3] * m_state.r[2] + (u64)a[4] * m_state.r[1];
  82. m_state.a[6] = (u64)a[3] * m_state.r[3] + (u64)a[4] * m_state.r[2];
  83. m_state.a[7] = (u64)a[4] * m_state.r[3];
  84. // Carry
  85. m_state.a[1] += m_state.a[0] >> 32;
  86. m_state.a[2] += m_state.a[1] >> 32;
  87. m_state.a[3] += m_state.a[2] >> 32;
  88. m_state.a[4] += m_state.a[3] >> 32;
  89. m_state.a[5] += m_state.a[4] >> 32;
  90. m_state.a[6] += m_state.a[5] >> 32;
  91. m_state.a[7] += m_state.a[6] >> 32;
  92. // Save the high part of the accumulator
  93. a[0] = m_state.a[4] & 0xFFFFFFFC;
  94. a[1] = m_state.a[5] & 0xFFFFFFFF;
  95. a[2] = m_state.a[6] & 0xFFFFFFFF;
  96. a[3] = m_state.a[7] & 0xFFFFFFFF;
  97. // Only consider the least significant bits
  98. m_state.a[0] &= 0xFFFFFFFF;
  99. m_state.a[1] &= 0xFFFFFFFF;
  100. m_state.a[2] &= 0xFFFFFFFF;
  101. m_state.a[3] &= 0xFFFFFFFF;
  102. m_state.a[4] &= 0x00000003;
  103. // Fast modular reduction (first pass)
  104. m_state.a[0] += a[0];
  105. m_state.a[0] += (a[0] >> 2) | (a[1] << 30);
  106. m_state.a[1] += a[1];
  107. m_state.a[1] += (a[1] >> 2) | (a[2] << 30);
  108. m_state.a[2] += a[2];
  109. m_state.a[2] += (a[2] >> 2) | (a[3] << 30);
  110. m_state.a[3] += a[3];
  111. m_state.a[3] += (a[3] >> 2);
  112. // Carry
  113. m_state.a[1] += m_state.a[0] >> 32;
  114. m_state.a[2] += m_state.a[1] >> 32;
  115. m_state.a[3] += m_state.a[2] >> 32;
  116. m_state.a[4] += m_state.a[3] >> 32;
  117. // Save the high part of the accumulator
  118. a[0] = m_state.a[4] & 0xFFFFFFFC;
  119. // Only consider the least significant bits
  120. m_state.a[0] &= 0xFFFFFFFF;
  121. m_state.a[1] &= 0xFFFFFFFF;
  122. m_state.a[2] &= 0xFFFFFFFF;
  123. m_state.a[3] &= 0xFFFFFFFF;
  124. m_state.a[4] &= 0x00000003;
  125. // Fast modular reduction (second pass)
  126. m_state.a[0] += a[0];
  127. m_state.a[0] += a[0] >> 2;
  128. // Carry
  129. m_state.a[1] += m_state.a[0] >> 32;
  130. m_state.a[2] += m_state.a[1] >> 32;
  131. m_state.a[3] += m_state.a[2] >> 32;
  132. m_state.a[4] += m_state.a[3] >> 32;
  133. // Only consider the least significant bits
  134. m_state.a[0] &= 0xFFFFFFFF;
  135. m_state.a[1] &= 0xFFFFFFFF;
  136. m_state.a[2] &= 0xFFFFFFFF;
  137. m_state.a[3] &= 0xFFFFFFFF;
  138. m_state.a[4] &= 0x00000003;
  139. }
  140. ErrorOr<ByteBuffer> Poly1305::digest()
  141. {
  142. if (m_state.block_count != 0)
  143. process_block();
  144. u32 b[4];
  145. // Save the accumulator
  146. b[0] = m_state.a[0] & 0xFFFFFFFF;
  147. b[1] = m_state.a[1] & 0xFFFFFFFF;
  148. b[2] = m_state.a[2] & 0xFFFFFFFF;
  149. b[3] = m_state.a[3] & 0xFFFFFFFF;
  150. // Compute a + 5
  151. m_state.a[0] += 5;
  152. // Carry
  153. m_state.a[1] += m_state.a[0] >> 32;
  154. m_state.a[2] += m_state.a[1] >> 32;
  155. m_state.a[3] += m_state.a[2] >> 32;
  156. m_state.a[4] += m_state.a[3] >> 32;
  157. // Select mask based on (a + 5) >= 2^130
  158. u32 mask = ((m_state.a[4] & 0x04) >> 2) - 1;
  159. // Select based on mask
  160. m_state.a[0] = (m_state.a[0] & ~mask) | (b[0] & mask);
  161. m_state.a[1] = (m_state.a[1] & ~mask) | (b[1] & mask);
  162. m_state.a[2] = (m_state.a[2] & ~mask) | (b[2] & mask);
  163. m_state.a[3] = (m_state.a[3] & ~mask) | (b[3] & mask);
  164. // Finally, the value of the secret key "s" is added to the accumulator,
  165. // and the 128 least significant bits are serialized in little-endian
  166. // order to form the tag.
  167. m_state.a[0] += m_state.s[0];
  168. m_state.a[1] += m_state.s[1];
  169. m_state.a[2] += m_state.s[2];
  170. m_state.a[3] += m_state.s[3];
  171. // Carry
  172. m_state.a[1] += m_state.a[0] >> 32;
  173. m_state.a[2] += m_state.a[1] >> 32;
  174. m_state.a[3] += m_state.a[2] >> 32;
  175. m_state.a[4] += m_state.a[3] >> 32;
  176. // Only consider the least significant bits
  177. b[0] = m_state.a[0] & 0xFFFFFFFF;
  178. b[1] = m_state.a[1] & 0xFFFFFFFF;
  179. b[2] = m_state.a[2] & 0xFFFFFFFF;
  180. b[3] = m_state.a[3] & 0xFFFFFFFF;
  181. ByteBuffer output = TRY(ByteBuffer::create_uninitialized(16));
  182. for (auto i = 0; i < 4; i++) {
  183. ByteReader::store(output.offset_pointer(i * 4), AK::convert_between_host_and_little_endian(b[i]));
  184. }
  185. return output;
  186. }
  187. }