|
@@ -2,13 +2,13 @@
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
-// +build amd64,!appengine,!gccgo
|
|
|
+// +build amd64,!purego,gc
|
|
|
|
|
|
// This code was translated into a form compatible with 6a from the public
|
|
|
// domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html
|
|
|
|
|
|
// func salsa2020XORKeyStream(out, in *byte, n uint64, nonce, key *byte)
|
|
|
-// This needs up to 64 bytes at 360(SP); hence the non-obvious frame size.
|
|
|
+// This needs up to 64 bytes at 360(R12); hence the non-obvious frame size.
|
|
|
TEXT ·salsa2020XORKeyStream(SB),0,$456-40 // frame = 424 + 32 byte alignment
|
|
|
MOVQ out+0(FP),DI
|
|
|
MOVQ in+8(FP),SI
|
|
@@ -17,10 +17,8 @@ TEXT ·salsa2020XORKeyStream(SB),0,$456-40 // frame = 424 + 32 byte alignment
|
|
|
MOVQ key+32(FP),R8
|
|
|
|
|
|
MOVQ SP,R12
|
|
|
- MOVQ SP,R9
|
|
|
- ADDQ $31, R9
|
|
|
- ANDQ $~31, R9
|
|
|
- MOVQ R9, SP
|
|
|
+ ADDQ $31, R12
|
|
|
+ ANDQ $~31, R12
|
|
|
|
|
|
MOVQ DX,R9
|
|
|
MOVQ CX,DX
|
|
@@ -32,116 +30,116 @@ TEXT ·salsa2020XORKeyStream(SB),0,$456-40 // frame = 424 + 32 byte alignment
|
|
|
MOVL 0(R10),R8
|
|
|
MOVL 0(DX),AX
|
|
|
MOVL 16(R10),R11
|
|
|
- MOVL CX,0(SP)
|
|
|
- MOVL R8, 4 (SP)
|
|
|
- MOVL AX, 8 (SP)
|
|
|
- MOVL R11, 12 (SP)
|
|
|
+ MOVL CX,0(R12)
|
|
|
+ MOVL R8, 4 (R12)
|
|
|
+ MOVL AX, 8 (R12)
|
|
|
+ MOVL R11, 12 (R12)
|
|
|
MOVL 8(DX),CX
|
|
|
MOVL 24(R10),R8
|
|
|
MOVL 4(R10),AX
|
|
|
MOVL 4(DX),R11
|
|
|
- MOVL CX,16(SP)
|
|
|
- MOVL R8, 20 (SP)
|
|
|
- MOVL AX, 24 (SP)
|
|
|
- MOVL R11, 28 (SP)
|
|
|
+ MOVL CX,16(R12)
|
|
|
+ MOVL R8, 20 (R12)
|
|
|
+ MOVL AX, 24 (R12)
|
|
|
+ MOVL R11, 28 (R12)
|
|
|
MOVL 12(DX),CX
|
|
|
MOVL 12(R10),DX
|
|
|
MOVL 28(R10),R8
|
|
|
MOVL 8(R10),AX
|
|
|
- MOVL DX,32(SP)
|
|
|
- MOVL CX, 36 (SP)
|
|
|
- MOVL R8, 40 (SP)
|
|
|
- MOVL AX, 44 (SP)
|
|
|
+ MOVL DX,32(R12)
|
|
|
+ MOVL CX, 36 (R12)
|
|
|
+ MOVL R8, 40 (R12)
|
|
|
+ MOVL AX, 44 (R12)
|
|
|
MOVQ $1634760805,DX
|
|
|
MOVQ $857760878,CX
|
|
|
MOVQ $2036477234,R8
|
|
|
MOVQ $1797285236,AX
|
|
|
- MOVL DX,48(SP)
|
|
|
- MOVL CX, 52 (SP)
|
|
|
- MOVL R8, 56 (SP)
|
|
|
- MOVL AX, 60 (SP)
|
|
|
+ MOVL DX,48(R12)
|
|
|
+ MOVL CX, 52 (R12)
|
|
|
+ MOVL R8, 56 (R12)
|
|
|
+ MOVL AX, 60 (R12)
|
|
|
CMPQ R9,$256
|
|
|
JB BYTESBETWEEN1AND255
|
|
|
- MOVOA 48(SP),X0
|
|
|
+ MOVOA 48(R12),X0
|
|
|
PSHUFL $0X55,X0,X1
|
|
|
PSHUFL $0XAA,X0,X2
|
|
|
PSHUFL $0XFF,X0,X3
|
|
|
PSHUFL $0X00,X0,X0
|
|
|
- MOVOA X1,64(SP)
|
|
|
- MOVOA X2,80(SP)
|
|
|
- MOVOA X3,96(SP)
|
|
|
- MOVOA X0,112(SP)
|
|
|
- MOVOA 0(SP),X0
|
|
|
+ MOVOA X1,64(R12)
|
|
|
+ MOVOA X2,80(R12)
|
|
|
+ MOVOA X3,96(R12)
|
|
|
+ MOVOA X0,112(R12)
|
|
|
+ MOVOA 0(R12),X0
|
|
|
PSHUFL $0XAA,X0,X1
|
|
|
PSHUFL $0XFF,X0,X2
|
|
|
PSHUFL $0X00,X0,X3
|
|
|
PSHUFL $0X55,X0,X0
|
|
|
- MOVOA X1,128(SP)
|
|
|
- MOVOA X2,144(SP)
|
|
|
- MOVOA X3,160(SP)
|
|
|
- MOVOA X0,176(SP)
|
|
|
- MOVOA 16(SP),X0
|
|
|
+ MOVOA X1,128(R12)
|
|
|
+ MOVOA X2,144(R12)
|
|
|
+ MOVOA X3,160(R12)
|
|
|
+ MOVOA X0,176(R12)
|
|
|
+ MOVOA 16(R12),X0
|
|
|
PSHUFL $0XFF,X0,X1
|
|
|
PSHUFL $0X55,X0,X2
|
|
|
PSHUFL $0XAA,X0,X0
|
|
|
- MOVOA X1,192(SP)
|
|
|
- MOVOA X2,208(SP)
|
|
|
- MOVOA X0,224(SP)
|
|
|
- MOVOA 32(SP),X0
|
|
|
+ MOVOA X1,192(R12)
|
|
|
+ MOVOA X2,208(R12)
|
|
|
+ MOVOA X0,224(R12)
|
|
|
+ MOVOA 32(R12),X0
|
|
|
PSHUFL $0X00,X0,X1
|
|
|
PSHUFL $0XAA,X0,X2
|
|
|
PSHUFL $0XFF,X0,X0
|
|
|
- MOVOA X1,240(SP)
|
|
|
- MOVOA X2,256(SP)
|
|
|
- MOVOA X0,272(SP)
|
|
|
+ MOVOA X1,240(R12)
|
|
|
+ MOVOA X2,256(R12)
|
|
|
+ MOVOA X0,272(R12)
|
|
|
BYTESATLEAST256:
|
|
|
- MOVL 16(SP),DX
|
|
|
- MOVL 36 (SP),CX
|
|
|
- MOVL DX,288(SP)
|
|
|
- MOVL CX,304(SP)
|
|
|
+ MOVL 16(R12),DX
|
|
|
+ MOVL 36 (R12),CX
|
|
|
+ MOVL DX,288(R12)
|
|
|
+ MOVL CX,304(R12)
|
|
|
SHLQ $32,CX
|
|
|
ADDQ CX,DX
|
|
|
ADDQ $1,DX
|
|
|
MOVQ DX,CX
|
|
|
SHRQ $32,CX
|
|
|
- MOVL DX, 292 (SP)
|
|
|
- MOVL CX, 308 (SP)
|
|
|
+ MOVL DX, 292 (R12)
|
|
|
+ MOVL CX, 308 (R12)
|
|
|
ADDQ $1,DX
|
|
|
MOVQ DX,CX
|
|
|
SHRQ $32,CX
|
|
|
- MOVL DX, 296 (SP)
|
|
|
- MOVL CX, 312 (SP)
|
|
|
+ MOVL DX, 296 (R12)
|
|
|
+ MOVL CX, 312 (R12)
|
|
|
ADDQ $1,DX
|
|
|
MOVQ DX,CX
|
|
|
SHRQ $32,CX
|
|
|
- MOVL DX, 300 (SP)
|
|
|
- MOVL CX, 316 (SP)
|
|
|
+ MOVL DX, 300 (R12)
|
|
|
+ MOVL CX, 316 (R12)
|
|
|
ADDQ $1,DX
|
|
|
MOVQ DX,CX
|
|
|
SHRQ $32,CX
|
|
|
- MOVL DX,16(SP)
|
|
|
- MOVL CX, 36 (SP)
|
|
|
- MOVQ R9,352(SP)
|
|
|
+ MOVL DX,16(R12)
|
|
|
+ MOVL CX, 36 (R12)
|
|
|
+ MOVQ R9,352(R12)
|
|
|
MOVQ $20,DX
|
|
|
- MOVOA 64(SP),X0
|
|
|
- MOVOA 80(SP),X1
|
|
|
- MOVOA 96(SP),X2
|
|
|
- MOVOA 256(SP),X3
|
|
|
- MOVOA 272(SP),X4
|
|
|
- MOVOA 128(SP),X5
|
|
|
- MOVOA 144(SP),X6
|
|
|
- MOVOA 176(SP),X7
|
|
|
- MOVOA 192(SP),X8
|
|
|
- MOVOA 208(SP),X9
|
|
|
- MOVOA 224(SP),X10
|
|
|
- MOVOA 304(SP),X11
|
|
|
- MOVOA 112(SP),X12
|
|
|
- MOVOA 160(SP),X13
|
|
|
- MOVOA 240(SP),X14
|
|
|
- MOVOA 288(SP),X15
|
|
|
+ MOVOA 64(R12),X0
|
|
|
+ MOVOA 80(R12),X1
|
|
|
+ MOVOA 96(R12),X2
|
|
|
+ MOVOA 256(R12),X3
|
|
|
+ MOVOA 272(R12),X4
|
|
|
+ MOVOA 128(R12),X5
|
|
|
+ MOVOA 144(R12),X6
|
|
|
+ MOVOA 176(R12),X7
|
|
|
+ MOVOA 192(R12),X8
|
|
|
+ MOVOA 208(R12),X9
|
|
|
+ MOVOA 224(R12),X10
|
|
|
+ MOVOA 304(R12),X11
|
|
|
+ MOVOA 112(R12),X12
|
|
|
+ MOVOA 160(R12),X13
|
|
|
+ MOVOA 240(R12),X14
|
|
|
+ MOVOA 288(R12),X15
|
|
|
MAINLOOP1:
|
|
|
- MOVOA X1,320(SP)
|
|
|
- MOVOA X2,336(SP)
|
|
|
+ MOVOA X1,320(R12)
|
|
|
+ MOVOA X2,336(R12)
|
|
|
MOVOA X13,X1
|
|
|
PADDL X12,X1
|
|
|
MOVOA X1,X2
|
|
@@ -191,8 +189,8 @@ TEXT ·salsa2020XORKeyStream(SB),0,$456-40 // frame = 424 + 32 byte alignment
|
|
|
PXOR X1,X12
|
|
|
PSRLL $14,X2
|
|
|
PXOR X2,X12
|
|
|
- MOVOA 320(SP),X1
|
|
|
- MOVOA X12,320(SP)
|
|
|
+ MOVOA 320(R12),X1
|
|
|
+ MOVOA X12,320(R12)
|
|
|
MOVOA X9,X2
|
|
|
PADDL X7,X2
|
|
|
MOVOA X2,X12
|
|
@@ -207,8 +205,8 @@ TEXT ·salsa2020XORKeyStream(SB),0,$456-40 // frame = 424 + 32 byte alignment
|
|
|
PXOR X2,X3
|
|
|
PSRLL $25,X12
|
|
|
PXOR X12,X3
|
|
|
- MOVOA 336(SP),X2
|
|
|
- MOVOA X0,336(SP)
|
|
|
+ MOVOA 336(R12),X2
|
|
|
+ MOVOA X0,336(R12)
|
|
|
MOVOA X6,X0
|
|
|
PADDL X2,X0
|
|
|
MOVOA X0,X12
|
|
@@ -251,8 +249,8 @@ TEXT ·salsa2020XORKeyStream(SB),0,$456-40 // frame = 424 + 32 byte alignment
|
|
|
PXOR X0,X1
|
|
|
PSRLL $14,X12
|
|
|
PXOR X12,X1
|
|
|
- MOVOA 320(SP),X0
|
|
|
- MOVOA X1,320(SP)
|
|
|
+ MOVOA 320(R12),X0
|
|
|
+ MOVOA X1,320(R12)
|
|
|
MOVOA X4,X1
|
|
|
PADDL X0,X1
|
|
|
MOVOA X1,X12
|
|
@@ -267,8 +265,8 @@ TEXT ·salsa2020XORKeyStream(SB),0,$456-40 // frame = 424 + 32 byte alignment
|
|
|
PXOR X1,X2
|
|
|
PSRLL $14,X12
|
|
|
PXOR X12,X2
|
|
|
- MOVOA 336(SP),X12
|
|
|
- MOVOA X2,336(SP)
|
|
|
+ MOVOA 336(R12),X12
|
|
|
+ MOVOA X2,336(R12)
|
|
|
MOVOA X14,X1
|
|
|
PADDL X12,X1
|
|
|
MOVOA X1,X2
|
|
@@ -311,8 +309,8 @@ TEXT ·salsa2020XORKeyStream(SB),0,$456-40 // frame = 424 + 32 byte alignment
|
|
|
PXOR X1,X0
|
|
|
PSRLL $14,X2
|
|
|
PXOR X2,X0
|
|
|
- MOVOA 320(SP),X1
|
|
|
- MOVOA X0,320(SP)
|
|
|
+ MOVOA 320(R12),X1
|
|
|
+ MOVOA X0,320(R12)
|
|
|
MOVOA X8,X0
|
|
|
PADDL X14,X0
|
|
|
MOVOA X0,X2
|
|
@@ -327,8 +325,8 @@ TEXT ·salsa2020XORKeyStream(SB),0,$456-40 // frame = 424 + 32 byte alignment
|
|
|
PXOR X0,X6
|
|
|
PSRLL $25,X2
|
|
|
PXOR X2,X6
|
|
|
- MOVOA 336(SP),X2
|
|
|
- MOVOA X12,336(SP)
|
|
|
+ MOVOA 336(R12),X2
|
|
|
+ MOVOA X12,336(R12)
|
|
|
MOVOA X3,X0
|
|
|
PADDL X2,X0
|
|
|
MOVOA X0,X12
|
|
@@ -378,14 +376,14 @@ TEXT ·salsa2020XORKeyStream(SB),0,$456-40 // frame = 424 + 32 byte alignment
|
|
|
PXOR X0,X2
|
|
|
PSRLL $14,X12
|
|
|
PXOR X12,X2
|
|
|
- MOVOA 320(SP),X12
|
|
|
- MOVOA 336(SP),X0
|
|
|
+ MOVOA 320(R12),X12
|
|
|
+ MOVOA 336(R12),X0
|
|
|
SUBQ $2,DX
|
|
|
JA MAINLOOP1
|
|
|
- PADDL 112(SP),X12
|
|
|
- PADDL 176(SP),X7
|
|
|
- PADDL 224(SP),X10
|
|
|
- PADDL 272(SP),X4
|
|
|
+ PADDL 112(R12),X12
|
|
|
+ PADDL 176(R12),X7
|
|
|
+ PADDL 224(R12),X10
|
|
|
+ PADDL 272(R12),X4
|
|
|
MOVD X12,DX
|
|
|
MOVD X7,CX
|
|
|
MOVD X10,R8
|
|
@@ -446,10 +444,10 @@ TEXT ·salsa2020XORKeyStream(SB),0,$456-40 // frame = 424 + 32 byte alignment
|
|
|
MOVL CX,196(DI)
|
|
|
MOVL R8,200(DI)
|
|
|
MOVL R9,204(DI)
|
|
|
- PADDL 240(SP),X14
|
|
|
- PADDL 64(SP),X0
|
|
|
- PADDL 128(SP),X5
|
|
|
- PADDL 192(SP),X8
|
|
|
+ PADDL 240(R12),X14
|
|
|
+ PADDL 64(R12),X0
|
|
|
+ PADDL 128(R12),X5
|
|
|
+ PADDL 192(R12),X8
|
|
|
MOVD X14,DX
|
|
|
MOVD X0,CX
|
|
|
MOVD X5,R8
|
|
@@ -510,10 +508,10 @@ TEXT ·salsa2020XORKeyStream(SB),0,$456-40 // frame = 424 + 32 byte alignment
|
|
|
MOVL CX,212(DI)
|
|
|
MOVL R8,216(DI)
|
|
|
MOVL R9,220(DI)
|
|
|
- PADDL 288(SP),X15
|
|
|
- PADDL 304(SP),X11
|
|
|
- PADDL 80(SP),X1
|
|
|
- PADDL 144(SP),X6
|
|
|
+ PADDL 288(R12),X15
|
|
|
+ PADDL 304(R12),X11
|
|
|
+ PADDL 80(R12),X1
|
|
|
+ PADDL 144(R12),X6
|
|
|
MOVD X15,DX
|
|
|
MOVD X11,CX
|
|
|
MOVD X1,R8
|
|
@@ -574,10 +572,10 @@ TEXT ·salsa2020XORKeyStream(SB),0,$456-40 // frame = 424 + 32 byte alignment
|
|
|
MOVL CX,228(DI)
|
|
|
MOVL R8,232(DI)
|
|
|
MOVL R9,236(DI)
|
|
|
- PADDL 160(SP),X13
|
|
|
- PADDL 208(SP),X9
|
|
|
- PADDL 256(SP),X3
|
|
|
- PADDL 96(SP),X2
|
|
|
+ PADDL 160(R12),X13
|
|
|
+ PADDL 208(R12),X9
|
|
|
+ PADDL 256(R12),X3
|
|
|
+ PADDL 96(R12),X2
|
|
|
MOVD X13,DX
|
|
|
MOVD X9,CX
|
|
|
MOVD X3,R8
|
|
@@ -638,7 +636,7 @@ TEXT ·salsa2020XORKeyStream(SB),0,$456-40 // frame = 424 + 32 byte alignment
|
|
|
MOVL CX,244(DI)
|
|
|
MOVL R8,248(DI)
|
|
|
MOVL R9,252(DI)
|
|
|
- MOVQ 352(SP),R9
|
|
|
+ MOVQ 352(R12),R9
|
|
|
SUBQ $256,R9
|
|
|
ADDQ $256,SI
|
|
|
ADDQ $256,DI
|
|
@@ -650,17 +648,17 @@ TEXT ·salsa2020XORKeyStream(SB),0,$456-40 // frame = 424 + 32 byte alignment
|
|
|
CMPQ R9,$64
|
|
|
JAE NOCOPY
|
|
|
MOVQ DI,DX
|
|
|
- LEAQ 360(SP),DI
|
|
|
+ LEAQ 360(R12),DI
|
|
|
MOVQ R9,CX
|
|
|
REP; MOVSB
|
|
|
- LEAQ 360(SP),DI
|
|
|
- LEAQ 360(SP),SI
|
|
|
+ LEAQ 360(R12),DI
|
|
|
+ LEAQ 360(R12),SI
|
|
|
NOCOPY:
|
|
|
- MOVQ R9,352(SP)
|
|
|
- MOVOA 48(SP),X0
|
|
|
- MOVOA 0(SP),X1
|
|
|
- MOVOA 16(SP),X2
|
|
|
- MOVOA 32(SP),X3
|
|
|
+ MOVQ R9,352(R12)
|
|
|
+ MOVOA 48(R12),X0
|
|
|
+ MOVOA 0(R12),X1
|
|
|
+ MOVOA 16(R12),X2
|
|
|
+ MOVOA 32(R12),X3
|
|
|
MOVOA X1,X4
|
|
|
MOVQ $20,CX
|
|
|
MAINLOOP2:
|
|
@@ -791,10 +789,10 @@ TEXT ·salsa2020XORKeyStream(SB),0,$456-40 // frame = 424 + 32 byte alignment
|
|
|
PSHUFL $0X39,X3,X3
|
|
|
PXOR X6,X0
|
|
|
JA MAINLOOP2
|
|
|
- PADDL 48(SP),X0
|
|
|
- PADDL 0(SP),X1
|
|
|
- PADDL 16(SP),X2
|
|
|
- PADDL 32(SP),X3
|
|
|
+ PADDL 48(R12),X0
|
|
|
+ PADDL 0(R12),X1
|
|
|
+ PADDL 16(R12),X2
|
|
|
+ PADDL 32(R12),X3
|
|
|
MOVD X0,CX
|
|
|
MOVD X1,R8
|
|
|
MOVD X2,R9
|
|
@@ -855,16 +853,16 @@ TEXT ·salsa2020XORKeyStream(SB),0,$456-40 // frame = 424 + 32 byte alignment
|
|
|
MOVL R8,44(DI)
|
|
|
MOVL R9,28(DI)
|
|
|
MOVL AX,12(DI)
|
|
|
- MOVQ 352(SP),R9
|
|
|
- MOVL 16(SP),CX
|
|
|
- MOVL 36 (SP),R8
|
|
|
+ MOVQ 352(R12),R9
|
|
|
+ MOVL 16(R12),CX
|
|
|
+ MOVL 36 (R12),R8
|
|
|
ADDQ $1,CX
|
|
|
SHLQ $32,R8
|
|
|
ADDQ R8,CX
|
|
|
MOVQ CX,R8
|
|
|
SHRQ $32,R8
|
|
|
- MOVL CX,16(SP)
|
|
|
- MOVL R8, 36 (SP)
|
|
|
+ MOVL CX,16(R12)
|
|
|
+ MOVL R8, 36 (R12)
|
|
|
CMPQ R9,$64
|
|
|
JA BYTESATLEAST65
|
|
|
JAE BYTESATLEAST64
|
|
@@ -874,7 +872,6 @@ TEXT ·salsa2020XORKeyStream(SB),0,$456-40 // frame = 424 + 32 byte alignment
|
|
|
REP; MOVSB
|
|
|
BYTESATLEAST64:
|
|
|
DONE:
|
|
|
- MOVQ R12,SP
|
|
|
RET
|
|
|
BYTESATLEAST65:
|
|
|
SUBQ $64,R9
|