sum_s390x.go 2.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677
  1. // Copyright 2018 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. //go:build gc && !purego
  5. // +build gc,!purego
  6. package poly1305
  7. import (
  8. "golang.org/x/sys/cpu"
  9. )
  10. // updateVX is an assembly implementation of Poly1305 that uses vector
  11. // instructions. It must only be called if the vector facility (vx) is
  12. // available.
  13. //
  14. //go:noescape
  15. func updateVX(state *macState, msg []byte)
  16. // mac is a replacement for macGeneric that uses a larger buffer and redirects
  17. // calls that would have gone to updateGeneric to updateVX if the vector
  18. // facility is installed.
  19. //
  20. // A larger buffer is required for good performance because the vector
  21. // implementation has a higher fixed cost per call than the generic
  22. // implementation.
  23. type mac struct {
  24. macState
  25. buffer [16 * TagSize]byte // size must be a multiple of block size (16)
  26. offset int
  27. }
  28. func (h *mac) Write(p []byte) (int, error) {
  29. nn := len(p)
  30. if h.offset > 0 {
  31. n := copy(h.buffer[h.offset:], p)
  32. if h.offset+n < len(h.buffer) {
  33. h.offset += n
  34. return nn, nil
  35. }
  36. p = p[n:]
  37. h.offset = 0
  38. if cpu.S390X.HasVX {
  39. updateVX(&h.macState, h.buffer[:])
  40. } else {
  41. updateGeneric(&h.macState, h.buffer[:])
  42. }
  43. }
  44. tail := len(p) % len(h.buffer) // number of bytes to copy into buffer
  45. body := len(p) - tail // number of bytes to process now
  46. if body > 0 {
  47. if cpu.S390X.HasVX {
  48. updateVX(&h.macState, p[:body])
  49. } else {
  50. updateGeneric(&h.macState, p[:body])
  51. }
  52. }
  53. h.offset = copy(h.buffer[:], p[body:]) // copy tail bytes - can be 0
  54. return nn, nil
  55. }
  56. func (h *mac) Sum(out *[TagSize]byte) {
  57. state := h.macState
  58. remainder := h.buffer[:h.offset]
  59. // Use the generic implementation if we have 2 or fewer blocks left
  60. // to sum. The vector implementation has a higher startup time.
  61. if cpu.S390X.HasVX && len(remainder) > 2*TagSize {
  62. updateVX(&state, remainder)
  63. } else if len(remainder) > 0 {
  64. updateGeneric(&state, remainder)
  65. }
  66. finalize(out, &state.h, &state.s)
  67. }