sum_ref.go 20 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531
  1. // Copyright 2012 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // +build !amd64,!arm gccgo appengine
  5. package poly1305
  6. // Based on original, public domain implementation from NaCl by D. J.
  7. // Bernstein.
  8. import "math"
  9. const (
  10. alpham80 = 0.00000000558793544769287109375
  11. alpham48 = 24.0
  12. alpham16 = 103079215104.0
  13. alpha0 = 6755399441055744.0
  14. alpha18 = 1770887431076116955136.0
  15. alpha32 = 29014219670751100192948224.0
  16. alpha50 = 7605903601369376408980219232256.0
  17. alpha64 = 124615124604835863084731911901282304.0
  18. alpha82 = 32667107224410092492483962313449748299776.0
  19. alpha96 = 535217884764734955396857238543560676143529984.0
  20. alpha112 = 35076039295941670036888435985190792471742381031424.0
  21. alpha130 = 9194973245195333150150082162901855101712434733101613056.0
  22. scale = 0.0000000000000000000000000000000000000036734198463196484624023016788195177431833298649127735047148490821200539357960224151611328125
  23. offset0 = 6755408030990331.0
  24. offset1 = 29014256564239239022116864.0
  25. offset2 = 124615283061160854719918951570079744.0
  26. offset3 = 535219245894202480694386063513315216128475136.0
  27. )
  28. // Sum generates an authenticator for m using a one-time key and puts the
  29. // 16-byte result into out. Authenticating two different messages with the same
  30. // key allows an attacker to forge messages at will.
  31. func Sum(out *[16]byte, m []byte, key *[32]byte) {
  32. r := key
  33. s := key[16:]
  34. var (
  35. y7 float64
  36. y6 float64
  37. y1 float64
  38. y0 float64
  39. y5 float64
  40. y4 float64
  41. x7 float64
  42. x6 float64
  43. x1 float64
  44. x0 float64
  45. y3 float64
  46. y2 float64
  47. x5 float64
  48. r3lowx0 float64
  49. x4 float64
  50. r0lowx6 float64
  51. x3 float64
  52. r3highx0 float64
  53. x2 float64
  54. r0highx6 float64
  55. r0lowx0 float64
  56. sr1lowx6 float64
  57. r0highx0 float64
  58. sr1highx6 float64
  59. sr3low float64
  60. r1lowx0 float64
  61. sr2lowx6 float64
  62. r1highx0 float64
  63. sr2highx6 float64
  64. r2lowx0 float64
  65. sr3lowx6 float64
  66. r2highx0 float64
  67. sr3highx6 float64
  68. r1highx4 float64
  69. r1lowx4 float64
  70. r0highx4 float64
  71. r0lowx4 float64
  72. sr3highx4 float64
  73. sr3lowx4 float64
  74. sr2highx4 float64
  75. sr2lowx4 float64
  76. r0lowx2 float64
  77. r0highx2 float64
  78. r1lowx2 float64
  79. r1highx2 float64
  80. r2lowx2 float64
  81. r2highx2 float64
  82. sr3lowx2 float64
  83. sr3highx2 float64
  84. z0 float64
  85. z1 float64
  86. z2 float64
  87. z3 float64
  88. m0 int64
  89. m1 int64
  90. m2 int64
  91. m3 int64
  92. m00 uint32
  93. m01 uint32
  94. m02 uint32
  95. m03 uint32
  96. m10 uint32
  97. m11 uint32
  98. m12 uint32
  99. m13 uint32
  100. m20 uint32
  101. m21 uint32
  102. m22 uint32
  103. m23 uint32
  104. m30 uint32
  105. m31 uint32
  106. m32 uint32
  107. m33 uint64
  108. lbelow2 int32
  109. lbelow3 int32
  110. lbelow4 int32
  111. lbelow5 int32
  112. lbelow6 int32
  113. lbelow7 int32
  114. lbelow8 int32
  115. lbelow9 int32
  116. lbelow10 int32
  117. lbelow11 int32
  118. lbelow12 int32
  119. lbelow13 int32
  120. lbelow14 int32
  121. lbelow15 int32
  122. s00 uint32
  123. s01 uint32
  124. s02 uint32
  125. s03 uint32
  126. s10 uint32
  127. s11 uint32
  128. s12 uint32
  129. s13 uint32
  130. s20 uint32
  131. s21 uint32
  132. s22 uint32
  133. s23 uint32
  134. s30 uint32
  135. s31 uint32
  136. s32 uint32
  137. s33 uint32
  138. bits32 uint64
  139. f uint64
  140. f0 uint64
  141. f1 uint64
  142. f2 uint64
  143. f3 uint64
  144. f4 uint64
  145. g uint64
  146. g0 uint64
  147. g1 uint64
  148. g2 uint64
  149. g3 uint64
  150. g4 uint64
  151. )
  152. var p int32
  153. l := int32(len(m))
  154. r00 := uint32(r[0])
  155. r01 := uint32(r[1])
  156. r02 := uint32(r[2])
  157. r0 := int64(2151)
  158. r03 := uint32(r[3])
  159. r03 &= 15
  160. r0 <<= 51
  161. r10 := uint32(r[4])
  162. r10 &= 252
  163. r01 <<= 8
  164. r0 += int64(r00)
  165. r11 := uint32(r[5])
  166. r02 <<= 16
  167. r0 += int64(r01)
  168. r12 := uint32(r[6])
  169. r03 <<= 24
  170. r0 += int64(r02)
  171. r13 := uint32(r[7])
  172. r13 &= 15
  173. r1 := int64(2215)
  174. r0 += int64(r03)
  175. d0 := r0
  176. r1 <<= 51
  177. r2 := int64(2279)
  178. r20 := uint32(r[8])
  179. r20 &= 252
  180. r11 <<= 8
  181. r1 += int64(r10)
  182. r21 := uint32(r[9])
  183. r12 <<= 16
  184. r1 += int64(r11)
  185. r22 := uint32(r[10])
  186. r13 <<= 24
  187. r1 += int64(r12)
  188. r23 := uint32(r[11])
  189. r23 &= 15
  190. r2 <<= 51
  191. r1 += int64(r13)
  192. d1 := r1
  193. r21 <<= 8
  194. r2 += int64(r20)
  195. r30 := uint32(r[12])
  196. r30 &= 252
  197. r22 <<= 16
  198. r2 += int64(r21)
  199. r31 := uint32(r[13])
  200. r23 <<= 24
  201. r2 += int64(r22)
  202. r32 := uint32(r[14])
  203. r2 += int64(r23)
  204. r3 := int64(2343)
  205. d2 := r2
  206. r3 <<= 51
  207. r33 := uint32(r[15])
  208. r33 &= 15
  209. r31 <<= 8
  210. r3 += int64(r30)
  211. r32 <<= 16
  212. r3 += int64(r31)
  213. r33 <<= 24
  214. r3 += int64(r32)
  215. r3 += int64(r33)
  216. h0 := alpha32 - alpha32
  217. d3 := r3
  218. h1 := alpha32 - alpha32
  219. h2 := alpha32 - alpha32
  220. h3 := alpha32 - alpha32
  221. h4 := alpha32 - alpha32
  222. r0low := math.Float64frombits(uint64(d0))
  223. h5 := alpha32 - alpha32
  224. r1low := math.Float64frombits(uint64(d1))
  225. h6 := alpha32 - alpha32
  226. r2low := math.Float64frombits(uint64(d2))
  227. h7 := alpha32 - alpha32
  228. r0low -= alpha0
  229. r1low -= alpha32
  230. r2low -= alpha64
  231. r0high := r0low + alpha18
  232. r3low := math.Float64frombits(uint64(d3))
  233. r1high := r1low + alpha50
  234. sr1low := scale * r1low
  235. r2high := r2low + alpha82
  236. sr2low := scale * r2low
  237. r0high -= alpha18
  238. r0high_stack := r0high
  239. r3low -= alpha96
  240. r1high -= alpha50
  241. r1high_stack := r1high
  242. sr1high := sr1low + alpham80
  243. r0low -= r0high
  244. r2high -= alpha82
  245. sr3low = scale * r3low
  246. sr2high := sr2low + alpham48
  247. r1low -= r1high
  248. r1low_stack := r1low
  249. sr1high -= alpham80
  250. sr1high_stack := sr1high
  251. r2low -= r2high
  252. r2low_stack := r2low
  253. sr2high -= alpham48
  254. sr2high_stack := sr2high
  255. r3high := r3low + alpha112
  256. r0low_stack := r0low
  257. sr1low -= sr1high
  258. sr1low_stack := sr1low
  259. sr3high := sr3low + alpham16
  260. r2high_stack := r2high
  261. sr2low -= sr2high
  262. sr2low_stack := sr2low
  263. r3high -= alpha112
  264. r3high_stack := r3high
  265. sr3high -= alpham16
  266. sr3high_stack := sr3high
  267. r3low -= r3high
  268. r3low_stack := r3low
  269. sr3low -= sr3high
  270. sr3low_stack := sr3low
  271. if l < 16 {
  272. goto addatmost15bytes
  273. }
  274. m00 = uint32(m[p+0])
  275. m0 = 2151
  276. m0 <<= 51
  277. m1 = 2215
  278. m01 = uint32(m[p+1])
  279. m1 <<= 51
  280. m2 = 2279
  281. m02 = uint32(m[p+2])
  282. m2 <<= 51
  283. m3 = 2343
  284. m03 = uint32(m[p+3])
  285. m10 = uint32(m[p+4])
  286. m01 <<= 8
  287. m0 += int64(m00)
  288. m11 = uint32(m[p+5])
  289. m02 <<= 16
  290. m0 += int64(m01)
  291. m12 = uint32(m[p+6])
  292. m03 <<= 24
  293. m0 += int64(m02)
  294. m13 = uint32(m[p+7])
  295. m3 <<= 51
  296. m0 += int64(m03)
  297. m20 = uint32(m[p+8])
  298. m11 <<= 8
  299. m1 += int64(m10)
  300. m21 = uint32(m[p+9])
  301. m12 <<= 16
  302. m1 += int64(m11)
  303. m22 = uint32(m[p+10])
  304. m13 <<= 24
  305. m1 += int64(m12)
  306. m23 = uint32(m[p+11])
  307. m1 += int64(m13)
  308. m30 = uint32(m[p+12])
  309. m21 <<= 8
  310. m2 += int64(m20)
  311. m31 = uint32(m[p+13])
  312. m22 <<= 16
  313. m2 += int64(m21)
  314. m32 = uint32(m[p+14])
  315. m23 <<= 24
  316. m2 += int64(m22)
  317. m33 = uint64(m[p+15])
  318. m2 += int64(m23)
  319. d0 = m0
  320. m31 <<= 8
  321. m3 += int64(m30)
  322. d1 = m1
  323. m32 <<= 16
  324. m3 += int64(m31)
  325. d2 = m2
  326. m33 += 256
  327. m33 <<= 24
  328. m3 += int64(m32)
  329. m3 += int64(m33)
  330. d3 = m3
  331. p += 16
  332. l -= 16
  333. z0 = math.Float64frombits(uint64(d0))
  334. z1 = math.Float64frombits(uint64(d1))
  335. z2 = math.Float64frombits(uint64(d2))
  336. z3 = math.Float64frombits(uint64(d3))
  337. z0 -= alpha0
  338. z1 -= alpha32
  339. z2 -= alpha64
  340. z3 -= alpha96
  341. h0 += z0
  342. h1 += z1
  343. h3 += z2
  344. h5 += z3
  345. if l < 16 {
  346. goto multiplyaddatmost15bytes
  347. }
  348. multiplyaddatleast16bytes:
  349. m2 = 2279
  350. m20 = uint32(m[p+8])
  351. y7 = h7 + alpha130
  352. m2 <<= 51
  353. m3 = 2343
  354. m21 = uint32(m[p+9])
  355. y6 = h6 + alpha130
  356. m3 <<= 51
  357. m0 = 2151
  358. m22 = uint32(m[p+10])
  359. y1 = h1 + alpha32
  360. m0 <<= 51
  361. m1 = 2215
  362. m23 = uint32(m[p+11])
  363. y0 = h0 + alpha32
  364. m1 <<= 51
  365. m30 = uint32(m[p+12])
  366. y7 -= alpha130
  367. m21 <<= 8
  368. m2 += int64(m20)
  369. m31 = uint32(m[p+13])
  370. y6 -= alpha130
  371. m22 <<= 16
  372. m2 += int64(m21)
  373. m32 = uint32(m[p+14])
  374. y1 -= alpha32
  375. m23 <<= 24
  376. m2 += int64(m22)
  377. m33 = uint64(m[p+15])
  378. y0 -= alpha32
  379. m2 += int64(m23)
  380. m00 = uint32(m[p+0])
  381. y5 = h5 + alpha96
  382. m31 <<= 8
  383. m3 += int64(m30)
  384. m01 = uint32(m[p+1])
  385. y4 = h4 + alpha96
  386. m32 <<= 16
  387. m02 = uint32(m[p+2])
  388. x7 = h7 - y7
  389. y7 *= scale
  390. m33 += 256
  391. m03 = uint32(m[p+3])
  392. x6 = h6 - y6
  393. y6 *= scale
  394. m33 <<= 24
  395. m3 += int64(m31)
  396. m10 = uint32(m[p+4])
  397. x1 = h1 - y1
  398. m01 <<= 8
  399. m3 += int64(m32)
  400. m11 = uint32(m[p+5])
  401. x0 = h0 - y0
  402. m3 += int64(m33)
  403. m0 += int64(m00)
  404. m12 = uint32(m[p+6])
  405. y5 -= alpha96
  406. m02 <<= 16
  407. m0 += int64(m01)
  408. m13 = uint32(m[p+7])
  409. y4 -= alpha96
  410. m03 <<= 24
  411. m0 += int64(m02)
  412. d2 = m2
  413. x1 += y7
  414. m0 += int64(m03)
  415. d3 = m3
  416. x0 += y6
  417. m11 <<= 8
  418. m1 += int64(m10)
  419. d0 = m0
  420. x7 += y5
  421. m12 <<= 16
  422. m1 += int64(m11)
  423. x6 += y4
  424. m13 <<= 24
  425. m1 += int64(m12)
  426. y3 = h3 + alpha64
  427. m1 += int64(m13)
  428. d1 = m1
  429. y2 = h2 + alpha64
  430. x0 += x1
  431. x6 += x7
  432. y3 -= alpha64
  433. r3low = r3low_stack
  434. y2 -= alpha64
  435. r0low = r0low_stack
  436. x5 = h5 - y5
  437. r3lowx0 = r3low * x0
  438. r3high = r3high_stack
  439. x4 = h4 - y4
  440. r0lowx6 = r0low * x6
  441. r0high = r0high_stack
  442. x3 = h3 - y3
  443. r3highx0 = r3high * x0
  444. sr1low = sr1low_stack
  445. x2 = h2 - y2
  446. r0highx6 = r0high * x6
  447. sr1high = sr1high_stack
  448. x5 += y3
  449. r0lowx0 = r0low * x0
  450. r1low = r1low_stack
  451. h6 = r3lowx0 + r0lowx6
  452. sr1lowx6 = sr1low * x6
  453. r1high = r1high_stack
  454. x4 += y2
  455. r0highx0 = r0high * x0
  456. sr2low = sr2low_stack
  457. h7 = r3highx0 + r0highx6
  458. sr1highx6 = sr1high * x6
  459. sr2high = sr2high_stack
  460. x3 += y1
  461. r1lowx0 = r1low * x0
  462. r2low = r2low_stack
  463. h0 = r0lowx0 + sr1lowx6
  464. sr2lowx6 = sr2low * x6
  465. r2high = r2high_stack
  466. x2 += y0
  467. r1highx0 = r1high * x0
  468. sr3low = sr3low_stack
  469. h1 = r0highx0 + sr1highx6
  470. sr2highx6 = sr2high * x6
  471. sr3high = sr3high_stack
  472. x4 += x5
  473. r2lowx0 = r2low * x0
  474. z2 = math.Float64frombits(uint64(d2))
  475. h2 = r1lowx0 + sr2lowx6
  476. sr3lowx6 = sr3low * x6
  477. x2 += x3
  478. r2highx0 = r2high * x0
  479. z3 = math.Float64frombits(uint64(d3))
  480. h3 = r1highx0 + sr2highx6
  481. sr3highx6 = sr3high * x6
  482. r1highx4 = r1high * x4
  483. z2 -= alpha64
  484. h4 = r2lowx0 + sr3lowx6
  485. r1lowx4 = r1low * x4
  486. r0highx4 = r0high * x4
  487. z3 -= alpha96
  488. h5 = r2highx0 + sr3highx6
  489. r0lowx4 = r0low * x4
  490. h7 += r1highx4
  491. sr3highx4 = sr3high * x4
  492. h6 += r1lowx4
  493. sr3lowx4 = sr3low * x4
  494. h5 += r0highx4
  495. sr2highx4 = sr2high * x4
  496. h4 += r0lowx4
  497. sr2lowx4 = sr2low * x4
  498. h3 += sr3highx4
  499. r0lowx2 = r0low * x2
  500. h2 += sr3lowx4
  501. r0highx2 = r0high * x2
  502. h1 += sr2highx4
  503. r1lowx2 = r1low * x2
  504. h0 += sr2lowx4
  505. r1highx2 = r1high * x2
  506. h2 += r0lowx2
  507. r2lowx2 = r2low * x2
  508. h3 += r0highx2
  509. r2highx2 = r2high * x2
  510. h4 += r1lowx2
  511. sr3lowx2 = sr3low * x2
  512. h5 += r1highx2
  513. sr3highx2 = sr3high * x2
  514. p += 16
  515. l -= 16
  516. h6 += r2lowx2
  517. h7 += r2highx2
  518. z1 = math.Float64frombits(uint64(d1))
  519. h0 += sr3lowx2
  520. z0 = math.Float64frombits(uint64(d0))
  521. h1 += sr3highx2
  522. z1 -= alpha32
  523. z0 -= alpha0
  524. h5 += z3
  525. h3 += z2
  526. h1 += z1
  527. h0 += z0
  528. if l >= 16 {
  529. goto multiplyaddatleast16bytes
  530. }
  531. multiplyaddatmost15bytes:
  532. y7 = h7 + alpha130
  533. y6 = h6 + alpha130
  534. y1 = h1 + alpha32
  535. y0 = h0 + alpha32
  536. y7 -= alpha130
  537. y6 -= alpha130
  538. y1 -= alpha32
  539. y0 -= alpha32
  540. y5 = h5 + alpha96
  541. y4 = h4 + alpha96
  542. x7 = h7 - y7
  543. y7 *= scale
  544. x6 = h6 - y6
  545. y6 *= scale
  546. x1 = h1 - y1
  547. x0 = h0 - y0
  548. y5 -= alpha96
  549. y4 -= alpha96
  550. x1 += y7
  551. x0 += y6
  552. x7 += y5
  553. x6 += y4
  554. y3 = h3 + alpha64
  555. y2 = h2 + alpha64
  556. x0 += x1
  557. x6 += x7
  558. y3 -= alpha64
  559. r3low = r3low_stack
  560. y2 -= alpha64
  561. r0low = r0low_stack
  562. x5 = h5 - y5
  563. r3lowx0 = r3low * x0
  564. r3high = r3high_stack
  565. x4 = h4 - y4
  566. r0lowx6 = r0low * x6
  567. r0high = r0high_stack
  568. x3 = h3 - y3
  569. r3highx0 = r3high * x0
  570. sr1low = sr1low_stack
  571. x2 = h2 - y2
  572. r0highx6 = r0high * x6
  573. sr1high = sr1high_stack
  574. x5 += y3
  575. r0lowx0 = r0low * x0
  576. r1low = r1low_stack
  577. h6 = r3lowx0 + r0lowx6
  578. sr1lowx6 = sr1low * x6
  579. r1high = r1high_stack
  580. x4 += y2
  581. r0highx0 = r0high * x0
  582. sr2low = sr2low_stack
  583. h7 = r3highx0 + r0highx6
  584. sr1highx6 = sr1high * x6
  585. sr2high = sr2high_stack
  586. x3 += y1
  587. r1lowx0 = r1low * x0
  588. r2low = r2low_stack
  589. h0 = r0lowx0 + sr1lowx6
  590. sr2lowx6 = sr2low * x6
  591. r2high = r2high_stack
  592. x2 += y0
  593. r1highx0 = r1high * x0
  594. sr3low = sr3low_stack
  595. h1 = r0highx0 + sr1highx6
  596. sr2highx6 = sr2high * x6
  597. sr3high = sr3high_stack
  598. x4 += x5
  599. r2lowx0 = r2low * x0
  600. h2 = r1lowx0 + sr2lowx6
  601. sr3lowx6 = sr3low * x6
  602. x2 += x3
  603. r2highx0 = r2high * x0
  604. h3 = r1highx0 + sr2highx6
  605. sr3highx6 = sr3high * x6
  606. r1highx4 = r1high * x4
  607. h4 = r2lowx0 + sr3lowx6
  608. r1lowx4 = r1low * x4
  609. r0highx4 = r0high * x4
  610. h5 = r2highx0 + sr3highx6
  611. r0lowx4 = r0low * x4
  612. h7 += r1highx4
  613. sr3highx4 = sr3high * x4
  614. h6 += r1lowx4
  615. sr3lowx4 = sr3low * x4
  616. h5 += r0highx4
  617. sr2highx4 = sr2high * x4
  618. h4 += r0lowx4
  619. sr2lowx4 = sr2low * x4
  620. h3 += sr3highx4
  621. r0lowx2 = r0low * x2
  622. h2 += sr3lowx4
  623. r0highx2 = r0high * x2
  624. h1 += sr2highx4
  625. r1lowx2 = r1low * x2
  626. h0 += sr2lowx4
  627. r1highx2 = r1high * x2
  628. h2 += r0lowx2
  629. r2lowx2 = r2low * x2
  630. h3 += r0highx2
  631. r2highx2 = r2high * x2
  632. h4 += r1lowx2
  633. sr3lowx2 = sr3low * x2
  634. h5 += r1highx2
  635. sr3highx2 = sr3high * x2
  636. h6 += r2lowx2
  637. h7 += r2highx2
  638. h0 += sr3lowx2
  639. h1 += sr3highx2
  640. addatmost15bytes:
  641. if l == 0 {
  642. goto nomorebytes
  643. }
  644. lbelow2 = l - 2
  645. lbelow3 = l - 3
  646. lbelow2 >>= 31
  647. lbelow4 = l - 4
  648. m00 = uint32(m[p+0])
  649. lbelow3 >>= 31
  650. p += lbelow2
  651. m01 = uint32(m[p+1])
  652. lbelow4 >>= 31
  653. p += lbelow3
  654. m02 = uint32(m[p+2])
  655. p += lbelow4
  656. m0 = 2151
  657. m03 = uint32(m[p+3])
  658. m0 <<= 51
  659. m1 = 2215
  660. m0 += int64(m00)
  661. m01 &^= uint32(lbelow2)
  662. m02 &^= uint32(lbelow3)
  663. m01 -= uint32(lbelow2)
  664. m01 <<= 8
  665. m03 &^= uint32(lbelow4)
  666. m0 += int64(m01)
  667. lbelow2 -= lbelow3
  668. m02 += uint32(lbelow2)
  669. lbelow3 -= lbelow4
  670. m02 <<= 16
  671. m03 += uint32(lbelow3)
  672. m03 <<= 24
  673. m0 += int64(m02)
  674. m0 += int64(m03)
  675. lbelow5 = l - 5
  676. lbelow6 = l - 6
  677. lbelow7 = l - 7
  678. lbelow5 >>= 31
  679. lbelow8 = l - 8
  680. lbelow6 >>= 31
  681. p += lbelow5
  682. m10 = uint32(m[p+4])
  683. lbelow7 >>= 31
  684. p += lbelow6
  685. m11 = uint32(m[p+5])
  686. lbelow8 >>= 31
  687. p += lbelow7
  688. m12 = uint32(m[p+6])
  689. m1 <<= 51
  690. p += lbelow8
  691. m13 = uint32(m[p+7])
  692. m10 &^= uint32(lbelow5)
  693. lbelow4 -= lbelow5
  694. m10 += uint32(lbelow4)
  695. lbelow5 -= lbelow6
  696. m11 &^= uint32(lbelow6)
  697. m11 += uint32(lbelow5)
  698. m11 <<= 8
  699. m1 += int64(m10)
  700. m1 += int64(m11)
  701. m12 &^= uint32(lbelow7)
  702. lbelow6 -= lbelow7
  703. m13 &^= uint32(lbelow8)
  704. m12 += uint32(lbelow6)
  705. lbelow7 -= lbelow8
  706. m12 <<= 16
  707. m13 += uint32(lbelow7)
  708. m13 <<= 24
  709. m1 += int64(m12)
  710. m1 += int64(m13)
  711. m2 = 2279
  712. lbelow9 = l - 9
  713. m3 = 2343
  714. lbelow10 = l - 10
  715. lbelow11 = l - 11
  716. lbelow9 >>= 31
  717. lbelow12 = l - 12
  718. lbelow10 >>= 31
  719. p += lbelow9
  720. m20 = uint32(m[p+8])
  721. lbelow11 >>= 31
  722. p += lbelow10
  723. m21 = uint32(m[p+9])
  724. lbelow12 >>= 31
  725. p += lbelow11
  726. m22 = uint32(m[p+10])
  727. m2 <<= 51
  728. p += lbelow12
  729. m23 = uint32(m[p+11])
  730. m20 &^= uint32(lbelow9)
  731. lbelow8 -= lbelow9
  732. m20 += uint32(lbelow8)
  733. lbelow9 -= lbelow10
  734. m21 &^= uint32(lbelow10)
  735. m21 += uint32(lbelow9)
  736. m21 <<= 8
  737. m2 += int64(m20)
  738. m2 += int64(m21)
  739. m22 &^= uint32(lbelow11)
  740. lbelow10 -= lbelow11
  741. m23 &^= uint32(lbelow12)
  742. m22 += uint32(lbelow10)
  743. lbelow11 -= lbelow12
  744. m22 <<= 16
  745. m23 += uint32(lbelow11)
  746. m23 <<= 24
  747. m2 += int64(m22)
  748. m3 <<= 51
  749. lbelow13 = l - 13
  750. lbelow13 >>= 31
  751. lbelow14 = l - 14
  752. lbelow14 >>= 31
  753. p += lbelow13
  754. lbelow15 = l - 15
  755. m30 = uint32(m[p+12])
  756. lbelow15 >>= 31
  757. p += lbelow14
  758. m31 = uint32(m[p+13])
  759. p += lbelow15
  760. m2 += int64(m23)
  761. m32 = uint32(m[p+14])
  762. m30 &^= uint32(lbelow13)
  763. lbelow12 -= lbelow13
  764. m30 += uint32(lbelow12)
  765. lbelow13 -= lbelow14
  766. m3 += int64(m30)
  767. m31 &^= uint32(lbelow14)
  768. m31 += uint32(lbelow13)
  769. m32 &^= uint32(lbelow15)
  770. m31 <<= 8
  771. lbelow14 -= lbelow15
  772. m3 += int64(m31)
  773. m32 += uint32(lbelow14)
  774. d0 = m0
  775. m32 <<= 16
  776. m33 = uint64(lbelow15 + 1)
  777. d1 = m1
  778. m33 <<= 24
  779. m3 += int64(m32)
  780. d2 = m2
  781. m3 += int64(m33)
  782. d3 = m3
  783. z3 = math.Float64frombits(uint64(d3))
  784. z2 = math.Float64frombits(uint64(d2))
  785. z1 = math.Float64frombits(uint64(d1))
  786. z0 = math.Float64frombits(uint64(d0))
  787. z3 -= alpha96
  788. z2 -= alpha64
  789. z1 -= alpha32
  790. z0 -= alpha0
  791. h5 += z3
  792. h3 += z2
  793. h1 += z1
  794. h0 += z0
  795. y7 = h7 + alpha130
  796. y6 = h6 + alpha130
  797. y1 = h1 + alpha32
  798. y0 = h0 + alpha32
  799. y7 -= alpha130
  800. y6 -= alpha130
  801. y1 -= alpha32
  802. y0 -= alpha32
  803. y5 = h5 + alpha96
  804. y4 = h4 + alpha96
  805. x7 = h7 - y7
  806. y7 *= scale
  807. x6 = h6 - y6
  808. y6 *= scale
  809. x1 = h1 - y1
  810. x0 = h0 - y0
  811. y5 -= alpha96
  812. y4 -= alpha96
  813. x1 += y7
  814. x0 += y6
  815. x7 += y5
  816. x6 += y4
  817. y3 = h3 + alpha64
  818. y2 = h2 + alpha64
  819. x0 += x1
  820. x6 += x7
  821. y3 -= alpha64
  822. r3low = r3low_stack
  823. y2 -= alpha64
  824. r0low = r0low_stack
  825. x5 = h5 - y5
  826. r3lowx0 = r3low * x0
  827. r3high = r3high_stack
  828. x4 = h4 - y4
  829. r0lowx6 = r0low * x6
  830. r0high = r0high_stack
  831. x3 = h3 - y3
  832. r3highx0 = r3high * x0
  833. sr1low = sr1low_stack
  834. x2 = h2 - y2
  835. r0highx6 = r0high * x6
  836. sr1high = sr1high_stack
  837. x5 += y3
  838. r0lowx0 = r0low * x0
  839. r1low = r1low_stack
  840. h6 = r3lowx0 + r0lowx6
  841. sr1lowx6 = sr1low * x6
  842. r1high = r1high_stack
  843. x4 += y2
  844. r0highx0 = r0high * x0
  845. sr2low = sr2low_stack
  846. h7 = r3highx0 + r0highx6
  847. sr1highx6 = sr1high * x6
  848. sr2high = sr2high_stack
  849. x3 += y1
  850. r1lowx0 = r1low * x0
  851. r2low = r2low_stack
  852. h0 = r0lowx0 + sr1lowx6
  853. sr2lowx6 = sr2low * x6
  854. r2high = r2high_stack
  855. x2 += y0
  856. r1highx0 = r1high * x0
  857. sr3low = sr3low_stack
  858. h1 = r0highx0 + sr1highx6
  859. sr2highx6 = sr2high * x6
  860. sr3high = sr3high_stack
  861. x4 += x5
  862. r2lowx0 = r2low * x0
  863. h2 = r1lowx0 + sr2lowx6
  864. sr3lowx6 = sr3low * x6
  865. x2 += x3
  866. r2highx0 = r2high * x0
  867. h3 = r1highx0 + sr2highx6
  868. sr3highx6 = sr3high * x6
  869. r1highx4 = r1high * x4
  870. h4 = r2lowx0 + sr3lowx6
  871. r1lowx4 = r1low * x4
  872. r0highx4 = r0high * x4
  873. h5 = r2highx0 + sr3highx6
  874. r0lowx4 = r0low * x4
  875. h7 += r1highx4
  876. sr3highx4 = sr3high * x4
  877. h6 += r1lowx4
  878. sr3lowx4 = sr3low * x4
  879. h5 += r0highx4
  880. sr2highx4 = sr2high * x4
  881. h4 += r0lowx4
  882. sr2lowx4 = sr2low * x4
  883. h3 += sr3highx4
  884. r0lowx2 = r0low * x2
  885. h2 += sr3lowx4
  886. r0highx2 = r0high * x2
  887. h1 += sr2highx4
  888. r1lowx2 = r1low * x2
  889. h0 += sr2lowx4
  890. r1highx2 = r1high * x2
  891. h2 += r0lowx2
  892. r2lowx2 = r2low * x2
  893. h3 += r0highx2
  894. r2highx2 = r2high * x2
  895. h4 += r1lowx2
  896. sr3lowx2 = sr3low * x2
  897. h5 += r1highx2
  898. sr3highx2 = sr3high * x2
  899. h6 += r2lowx2
  900. h7 += r2highx2
  901. h0 += sr3lowx2
  902. h1 += sr3highx2
  903. nomorebytes:
  904. y7 = h7 + alpha130
  905. y0 = h0 + alpha32
  906. y1 = h1 + alpha32
  907. y2 = h2 + alpha64
  908. y7 -= alpha130
  909. y3 = h3 + alpha64
  910. y4 = h4 + alpha96
  911. y5 = h5 + alpha96
  912. x7 = h7 - y7
  913. y7 *= scale
  914. y0 -= alpha32
  915. y1 -= alpha32
  916. y2 -= alpha64
  917. h6 += x7
  918. y3 -= alpha64
  919. y4 -= alpha96
  920. y5 -= alpha96
  921. y6 = h6 + alpha130
  922. x0 = h0 - y0
  923. x1 = h1 - y1
  924. x2 = h2 - y2
  925. y6 -= alpha130
  926. x0 += y7
  927. x3 = h3 - y3
  928. x4 = h4 - y4
  929. x5 = h5 - y5
  930. x6 = h6 - y6
  931. y6 *= scale
  932. x2 += y0
  933. x3 += y1
  934. x4 += y2
  935. x0 += y6
  936. x5 += y3
  937. x6 += y4
  938. x2 += x3
  939. x0 += x1
  940. x4 += x5
  941. x6 += y5
  942. x2 += offset1
  943. d1 = int64(math.Float64bits(x2))
  944. x0 += offset0
  945. d0 = int64(math.Float64bits(x0))
  946. x4 += offset2
  947. d2 = int64(math.Float64bits(x4))
  948. x6 += offset3
  949. d3 = int64(math.Float64bits(x6))
  950. f0 = uint64(d0)
  951. f1 = uint64(d1)
  952. bits32 = math.MaxUint64
  953. f2 = uint64(d2)
  954. bits32 >>= 32
  955. f3 = uint64(d3)
  956. f = f0 >> 32
  957. f0 &= bits32
  958. f &= 255
  959. f1 += f
  960. g0 = f0 + 5
  961. g = g0 >> 32
  962. g0 &= bits32
  963. f = f1 >> 32
  964. f1 &= bits32
  965. f &= 255
  966. g1 = f1 + g
  967. g = g1 >> 32
  968. f2 += f
  969. f = f2 >> 32
  970. g1 &= bits32
  971. f2 &= bits32
  972. f &= 255
  973. f3 += f
  974. g2 = f2 + g
  975. g = g2 >> 32
  976. g2 &= bits32
  977. f4 = f3 >> 32
  978. f3 &= bits32
  979. f4 &= 255
  980. g3 = f3 + g
  981. g = g3 >> 32
  982. g3 &= bits32
  983. g4 = f4 + g
  984. g4 = g4 - 4
  985. s00 = uint32(s[0])
  986. f = uint64(int64(g4) >> 63)
  987. s01 = uint32(s[1])
  988. f0 &= f
  989. g0 &^= f
  990. s02 = uint32(s[2])
  991. f1 &= f
  992. f0 |= g0
  993. s03 = uint32(s[3])
  994. g1 &^= f
  995. f2 &= f
  996. s10 = uint32(s[4])
  997. f3 &= f
  998. g2 &^= f
  999. s11 = uint32(s[5])
  1000. g3 &^= f
  1001. f1 |= g1
  1002. s12 = uint32(s[6])
  1003. f2 |= g2
  1004. f3 |= g3
  1005. s13 = uint32(s[7])
  1006. s01 <<= 8
  1007. f0 += uint64(s00)
  1008. s20 = uint32(s[8])
  1009. s02 <<= 16
  1010. f0 += uint64(s01)
  1011. s21 = uint32(s[9])
  1012. s03 <<= 24
  1013. f0 += uint64(s02)
  1014. s22 = uint32(s[10])
  1015. s11 <<= 8
  1016. f1 += uint64(s10)
  1017. s23 = uint32(s[11])
  1018. s12 <<= 16
  1019. f1 += uint64(s11)
  1020. s30 = uint32(s[12])
  1021. s13 <<= 24
  1022. f1 += uint64(s12)
  1023. s31 = uint32(s[13])
  1024. f0 += uint64(s03)
  1025. f1 += uint64(s13)
  1026. s32 = uint32(s[14])
  1027. s21 <<= 8
  1028. f2 += uint64(s20)
  1029. s33 = uint32(s[15])
  1030. s22 <<= 16
  1031. f2 += uint64(s21)
  1032. s23 <<= 24
  1033. f2 += uint64(s22)
  1034. s31 <<= 8
  1035. f3 += uint64(s30)
  1036. s32 <<= 16
  1037. f3 += uint64(s31)
  1038. s33 <<= 24
  1039. f3 += uint64(s32)
  1040. f2 += uint64(s23)
  1041. f3 += uint64(s33)
  1042. out[0] = byte(f0)
  1043. f0 >>= 8
  1044. out[1] = byte(f0)
  1045. f0 >>= 8
  1046. out[2] = byte(f0)
  1047. f0 >>= 8
  1048. out[3] = byte(f0)
  1049. f0 >>= 8
  1050. f1 += f0
  1051. out[4] = byte(f1)
  1052. f1 >>= 8
  1053. out[5] = byte(f1)
  1054. f1 >>= 8
  1055. out[6] = byte(f1)
  1056. f1 >>= 8
  1057. out[7] = byte(f1)
  1058. f1 >>= 8
  1059. f2 += f1
  1060. out[8] = byte(f2)
  1061. f2 >>= 8
  1062. out[9] = byte(f2)
  1063. f2 >>= 8
  1064. out[10] = byte(f2)
  1065. f2 >>= 8
  1066. out[11] = byte(f2)
  1067. f2 >>= 8
  1068. f3 += f2
  1069. out[12] = byte(f3)
  1070. f3 >>= 8
  1071. out[13] = byte(f3)
  1072. f3 >>= 8
  1073. out[14] = byte(f3)
  1074. f3 >>= 8
  1075. out[15] = byte(f3)
  1076. }