sum_ppc64le.s 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182
  1. // Copyright 2019 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. //go:build gc && !purego
  5. // +build gc,!purego
  6. #include "textflag.h"
  7. // This was ported from the amd64 implementation.
  8. #define POLY1305_ADD(msg, h0, h1, h2, t0, t1, t2) \
  9. MOVD (msg), t0; \
  10. MOVD 8(msg), t1; \
  11. MOVD $1, t2; \
  12. ADDC t0, h0, h0; \
  13. ADDE t1, h1, h1; \
  14. ADDE t2, h2; \
  15. ADD $16, msg
  16. #define POLY1305_MUL(h0, h1, h2, r0, r1, t0, t1, t2, t3, t4, t5) \
  17. MULLD r0, h0, t0; \
  18. MULLD r0, h1, t4; \
  19. MULHDU r0, h0, t1; \
  20. MULHDU r0, h1, t5; \
  21. ADDC t4, t1, t1; \
  22. MULLD r0, h2, t2; \
  23. ADDZE t5; \
  24. MULHDU r1, h0, t4; \
  25. MULLD r1, h0, h0; \
  26. ADD t5, t2, t2; \
  27. ADDC h0, t1, t1; \
  28. MULLD h2, r1, t3; \
  29. ADDZE t4, h0; \
  30. MULHDU r1, h1, t5; \
  31. MULLD r1, h1, t4; \
  32. ADDC t4, t2, t2; \
  33. ADDE t5, t3, t3; \
  34. ADDC h0, t2, t2; \
  35. MOVD $-4, t4; \
  36. MOVD t0, h0; \
  37. MOVD t1, h1; \
  38. ADDZE t3; \
  39. ANDCC $3, t2, h2; \
  40. AND t2, t4, t0; \
  41. ADDC t0, h0, h0; \
  42. ADDE t3, h1, h1; \
  43. SLD $62, t3, t4; \
  44. SRD $2, t2; \
  45. ADDZE h2; \
  46. OR t4, t2, t2; \
  47. SRD $2, t3; \
  48. ADDC t2, h0, h0; \
  49. ADDE t3, h1, h1; \
  50. ADDZE h2
  51. DATA ·poly1305Mask<>+0x00(SB)/8, $0x0FFFFFFC0FFFFFFF
  52. DATA ·poly1305Mask<>+0x08(SB)/8, $0x0FFFFFFC0FFFFFFC
  53. GLOBL ·poly1305Mask<>(SB), RODATA, $16
  54. // func update(state *[7]uint64, msg []byte)
  55. TEXT ·update(SB), $0-32
  56. MOVD state+0(FP), R3
  57. MOVD msg_base+8(FP), R4
  58. MOVD msg_len+16(FP), R5
  59. MOVD 0(R3), R8 // h0
  60. MOVD 8(R3), R9 // h1
  61. MOVD 16(R3), R10 // h2
  62. MOVD 24(R3), R11 // r0
  63. MOVD 32(R3), R12 // r1
  64. CMP R5, $16
  65. BLT bytes_between_0_and_15
  66. loop:
  67. POLY1305_ADD(R4, R8, R9, R10, R20, R21, R22)
  68. multiply:
  69. POLY1305_MUL(R8, R9, R10, R11, R12, R16, R17, R18, R14, R20, R21)
  70. ADD $-16, R5
  71. CMP R5, $16
  72. BGE loop
  73. bytes_between_0_and_15:
  74. CMP R5, $0
  75. BEQ done
  76. MOVD $0, R16 // h0
  77. MOVD $0, R17 // h1
  78. flush_buffer:
  79. CMP R5, $8
  80. BLE just1
  81. MOVD $8, R21
  82. SUB R21, R5, R21
  83. // Greater than 8 -- load the rightmost remaining bytes in msg
  84. // and put into R17 (h1)
  85. MOVD (R4)(R21), R17
  86. MOVD $16, R22
  87. // Find the offset to those bytes
  88. SUB R5, R22, R22
  89. SLD $3, R22
  90. // Shift to get only the bytes in msg
  91. SRD R22, R17, R17
  92. // Put 1 at high end
  93. MOVD $1, R23
  94. SLD $3, R21
  95. SLD R21, R23, R23
  96. OR R23, R17, R17
  97. // Remainder is 8
  98. MOVD $8, R5
  99. just1:
  100. CMP R5, $8
  101. BLT less8
  102. // Exactly 8
  103. MOVD (R4), R16
  104. CMP R17, $0
  105. // Check if we've already set R17; if not
  106. // set 1 to indicate end of msg.
  107. BNE carry
  108. MOVD $1, R17
  109. BR carry
  110. less8:
  111. MOVD $0, R16 // h0
  112. MOVD $0, R22 // shift count
  113. CMP R5, $4
  114. BLT less4
  115. MOVWZ (R4), R16
  116. ADD $4, R4
  117. ADD $-4, R5
  118. MOVD $32, R22
  119. less4:
  120. CMP R5, $2
  121. BLT less2
  122. MOVHZ (R4), R21
  123. SLD R22, R21, R21
  124. OR R16, R21, R16
  125. ADD $16, R22
  126. ADD $-2, R5
  127. ADD $2, R4
  128. less2:
  129. CMP R5, $0
  130. BEQ insert1
  131. MOVBZ (R4), R21
  132. SLD R22, R21, R21
  133. OR R16, R21, R16
  134. ADD $8, R22
  135. insert1:
  136. // Insert 1 at end of msg
  137. MOVD $1, R21
  138. SLD R22, R21, R21
  139. OR R16, R21, R16
  140. carry:
  141. // Add new values to h0, h1, h2
  142. ADDC R16, R8
  143. ADDE R17, R9
  144. ADDZE R10, R10
  145. MOVD $16, R5
  146. ADD R5, R4
  147. BR multiply
  148. done:
  149. // Save h0, h1, h2 in state
  150. MOVD R8, 0(R3)
  151. MOVD R9, 8(R3)
  152. MOVD R10, 16(R3)
  153. RET