mirror of
https://github.com/golang/go.git
synced 2026-01-29 07:02:05 +03:00
crypto/subtle: add vector implementation of xorBytes for riscv64
On a Banana Pi F3:
│ subtle.1 │ subtle.2 │
│ sec/op │ sec/op vs base │
ConstantTimeSelect-8 4.391n ± 1% 4.390n ± 0% ~ (p=0.500 n=10)
ConstantTimeByteEq-8 3.763n ± 0% 3.764n ± 0% ~ (p=0.549 n=10)
ConstantTimeEq-8 3.767n ± 1% 3.764n ± 0% -0.08% (p=0.002 n=10)
ConstantTimeLessOrEq-8 3.136n ± 0% 3.138n ± 0% +0.06% (p=0.002 n=10)
XORBytes/8Bytes-8 53.42n ± 0% 52.28n ± 0% -2.13% (p=0.000 n=10)
XORBytes/128Bytes-8 64.79n ± 0% 64.12n ± 0% -1.03% (p=0.000 n=10)
XORBytes/2048Bytes-8 479.3n ± 0% 322.0n ± 0% -32.84% (p=0.000 n=10)
XORBytes/8192Bytes-8 8.897µ ± 26% 7.734µ ± 12% ~ (p=0.165 n=10)
XORBytes/32768Bytes-8 39.17µ ± 17% 35.40µ ± 24% -9.63% (p=0.029 n=10)
XORBytesAlignment/8Bytes0Offset-8 51.74n ± 0% 54.18n ± 0% +4.72% (p=0.000 n=10)
XORBytesAlignment/8Bytes1Offset-8 51.51n ± 1% 53.52n ± 0% +3.92% (p=0.000 n=10)
XORBytesAlignment/8Bytes2Offset-8 51.35n ± 1% 53.58n ± 0% +4.34% (p=0.000 n=10)
XORBytesAlignment/8Bytes3Offset-8 50.86n ± 0% 53.56n ± 0% +5.31% (p=0.000 n=10)
XORBytesAlignment/8Bytes4Offset-8 51.62n ± 0% 54.20n ± 0% +4.98% (p=0.000 n=10)
XORBytesAlignment/8Bytes5Offset-8 51.42n ± 1% 53.48n ± 0% +4.02% (p=0.000 n=10)
XORBytesAlignment/8Bytes6Offset-8 51.08n ± 1% 53.46n ± 0% +4.67% (p=0.000 n=10)
XORBytesAlignment/8Bytes7Offset-8 50.83n ± 0% 53.54n ± 0% +5.33% (p=0.000 n=10)
XORBytesAlignment/128Bytes0Offset-8 63.67n ± 0% 66.04n ± 0% +3.72% (p=0.000 n=10)
XORBytesAlignment/128Bytes1Offset-8 114.40n ± 0% 67.42n ± 0% -41.07% (p=0.000 n=10)
XORBytesAlignment/128Bytes2Offset-8 113.85n ± 0% 67.43n ± 0% -40.78% (p=0.000 n=10)
XORBytesAlignment/128Bytes3Offset-8 114.60n ± 0% 67.31n ± 0% -41.27% (p=0.000 n=10)
XORBytesAlignment/128Bytes4Offset-8 109.30n ± 0% 67.45n ± 0% -38.29% (p=0.000 n=10)
XORBytesAlignment/128Bytes5Offset-8 110.70n ± 0% 67.32n ± 1% -39.19% (p=0.000 n=10)
XORBytesAlignment/128Bytes6Offset-8 110.05n ± 0% 67.45n ± 1% -38.71% (p=0.000 n=10)
XORBytesAlignment/128Bytes7Offset-8 110.60n ± 0% 67.43n ± 0% -39.04% (p=0.000 n=10)
XORBytesAlignment/2048Bytes0Offset-8 478.4n ± 0% 335.6n ± 0% -29.85% (p=0.000 n=10)
XORBytesAlignment/2048Bytes1Offset-8 529.7n ± 0% 349.3n ± 0% -34.05% (p=0.000 n=10)
XORBytesAlignment/2048Bytes2Offset-8 529.3n ± 0% 349.8n ± 0% -33.91% (p=0.000 n=10)
XORBytesAlignment/2048Bytes3Offset-8 529.8n ± 0% 349.5n ± 0% -34.02% (p=0.000 n=10)
XORBytesAlignment/2048Bytes4Offset-8 524.7n ± 0% 349.6n ± 0% -33.38% (p=0.000 n=10)
XORBytesAlignment/2048Bytes5Offset-8 525.9n ± 0% 349.6n ± 0% -33.52% (p=0.000 n=10)
XORBytesAlignment/2048Bytes6Offset-8 525.1n ± 0% 349.8n ± 0% -33.39% (p=0.000 n=10)
XORBytesAlignment/2048Bytes7Offset-8 526.0n ± 0% 349.8n ± 0% -33.51% (p=0.000 n=10)
geomean 120.0n 96.92n -19.23%
│ subtle.1 │ subtle.2 │
│ B/s │ B/s vs base │
XORBytes/8Bytes-8 142.8Mi ± 0% 145.9Mi ± 0% +2.19% (p=0.000 n=10)
XORBytes/128Bytes-8 1.840Gi ± 0% 1.859Gi ± 0% +1.05% (p=0.000 n=10)
XORBytes/2048Bytes-8 3.979Gi ± 0% 5.925Gi ± 0% +48.89% (p=0.000 n=10)
XORBytes/8192Bytes-8 879.1Mi ± 35% 1010.2Mi ± 13% ~ (p=0.165 n=10)
XORBytes/32768Bytes-8 797.9Mi ± 21% 882.8Mi ± 31% +10.64% (p=0.029 n=10)
XORBytesAlignment/8Bytes0Offset-8 147.5Mi ± 0% 140.8Mi ± 0% -4.50% (p=0.000 n=10)
XORBytesAlignment/8Bytes1Offset-8 148.1Mi ± 1% 142.5Mi ± 0% -3.77% (p=0.000 n=10)
XORBytesAlignment/8Bytes2Offset-8 148.6Mi ± 1% 142.4Mi ± 0% -4.15% (p=0.000 n=10)
XORBytesAlignment/8Bytes3Offset-8 150.0Mi ± 0% 142.4Mi ± 0% -5.04% (p=0.000 n=10)
XORBytesAlignment/8Bytes4Offset-8 147.8Mi ± 0% 140.8Mi ± 0% -4.75% (p=0.000 n=10)
XORBytesAlignment/8Bytes5Offset-8 148.4Mi ± 1% 142.6Mi ± 0% -3.87% (p=0.000 n=10)
XORBytesAlignment/8Bytes6Offset-8 149.4Mi ± 1% 142.7Mi ± 0% -4.45% (p=0.000 n=10)
XORBytesAlignment/8Bytes7Offset-8 150.1Mi ± 0% 142.5Mi ± 0% -5.05% (p=0.000 n=10)
XORBytesAlignment/128Bytes0Offset-8 1.872Gi ± 0% 1.805Gi ± 0% -3.59% (p=0.000 n=10)
XORBytesAlignment/128Bytes1Offset-8 1.042Gi ± 0% 1.768Gi ± 0% +69.65% (p=0.000 n=10)
XORBytesAlignment/128Bytes2Offset-8 1.047Gi ± 0% 1.768Gi ± 0% +68.80% (p=0.000 n=10)
XORBytesAlignment/128Bytes3Offset-8 1.040Gi ± 0% 1.771Gi ± 0% +70.27% (p=0.000 n=10)
XORBytesAlignment/128Bytes4Offset-8 1.090Gi ± 0% 1.767Gi ± 0% +62.08% (p=0.000 n=10)
XORBytesAlignment/128Bytes5Offset-8 1.077Gi ± 0% 1.771Gi ± 1% +64.41% (p=0.000 n=10)
XORBytesAlignment/128Bytes6Offset-8 1.083Gi ± 0% 1.767Gi ± 1% +63.17% (p=0.000 n=10)
XORBytesAlignment/128Bytes7Offset-8 1.078Gi ± 0% 1.768Gi ± 0% +64.07% (p=0.000 n=10)
XORBytesAlignment/2048Bytes0Offset-8 3.987Gi ± 0% 5.684Gi ± 0% +42.55% (p=0.000 n=10)
XORBytesAlignment/2048Bytes1Offset-8 3.601Gi ± 0% 5.459Gi ± 0% +51.61% (p=0.000 n=10)
XORBytesAlignment/2048Bytes2Offset-8 3.604Gi ± 0% 5.453Gi ± 0% +51.31% (p=0.000 n=10)
XORBytesAlignment/2048Bytes3Offset-8 3.600Gi ± 0% 5.457Gi ± 0% +51.56% (p=0.000 n=10)
XORBytesAlignment/2048Bytes4Offset-8 3.635Gi ± 0% 5.456Gi ± 0% +50.10% (p=0.000 n=10)
XORBytesAlignment/2048Bytes5Offset-8 3.627Gi ± 0% 5.455Gi ± 0% +50.39% (p=0.000 n=10)
XORBytesAlignment/2048Bytes6Offset-8 3.632Gi ± 0% 5.454Gi ± 0% +50.14% (p=0.000 n=10)
XORBytesAlignment/2048Bytes7Offset-8 3.626Gi ± 0% 5.453Gi ± 0% +50.39% (p=0.000 n=10)
geomean 881.0Mi 1.097Gi +27.51%
Change-Id: Id7f9d87fe1ea39aa91ea7d3fd1ba20737f0dda3c
Reviewed-on: https://go-review.googlesource.com/c/go/+/649657
Reviewed-by: Julian Zhu <jz531210@gmail.com>
Reviewed-by: Carlos Amedee <carlos@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
This commit is contained in:
@@ -2,7 +2,7 @@
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build (amd64 || arm64 || ppc64 || ppc64le || riscv64) && !purego
|
||||
//go:build (amd64 || arm64 || ppc64 || ppc64le) && !purego
|
||||
|
||||
package subtle
|
||||
|
||||
|
||||
18
src/crypto/internal/fips140/subtle/xor_riscv64.go
Normal file
18
src/crypto/internal/fips140/subtle/xor_riscv64.go
Normal file
@@ -0,0 +1,18 @@
|
||||
// Copyright 2025 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build riscv64 && !purego
|
||||
|
||||
package subtle
|
||||
|
||||
import (
|
||||
"crypto/internal/fips140deps/cpu"
|
||||
)
|
||||
|
||||
//go:noescape
|
||||
func xorBytesRISCV64(dst, a, b *byte, n int, hasV bool)
|
||||
|
||||
func xorBytes(dst, a, b *byte, n int) {
|
||||
xorBytesRISCV64(dst, a, b, n, cpu.RISCV64HasV)
|
||||
}
|
||||
@@ -4,10 +4,12 @@
|
||||
|
||||
//go:build !purego
|
||||
|
||||
#include "asm_riscv64.h"
|
||||
#include "go_asm.h"
|
||||
#include "textflag.h"
|
||||
|
||||
// func xorBytes(dst, a, b *byte, n int)
|
||||
TEXT ·xorBytes(SB), NOSPLIT|NOFRAME, $0
|
||||
// func xorBytesRISCV64(dst, a, b *byte, n int, hasV bool)
|
||||
TEXT ·xorBytesRISCV64(SB), NOSPLIT|NOFRAME, $0
|
||||
MOV dst+0(FP), X10
|
||||
MOV a+8(FP), X11
|
||||
MOV b+16(FP), X12
|
||||
@@ -16,6 +18,35 @@ TEXT ·xorBytes(SB), NOSPLIT|NOFRAME, $0
|
||||
MOV $32, X15
|
||||
BLT X13, X15, loop4_check
|
||||
|
||||
#ifndef hasV
|
||||
MOVB hasV+32(FP), X5
|
||||
BEQZ X5, xorbytes_scalar
|
||||
#endif
|
||||
|
||||
// Use vector if not 8 byte aligned.
|
||||
OR X10, X11, X5
|
||||
AND $7, X5
|
||||
BNEZ X5, vector_loop
|
||||
|
||||
// Use scalar if 8 byte aligned and <= 64 bytes.
|
||||
SUB $64, X12, X6
|
||||
BLEZ X6, loop64_check
|
||||
|
||||
PCALIGN $16
|
||||
vector_loop:
|
||||
VSETVLI X13, E8, M8, TU, MU, X15
|
||||
VLE8V (X11), V8
|
||||
VLE8V (X12), V16
|
||||
VXORVV V8, V16, V24
|
||||
VSE8V V24, (X10)
|
||||
ADD X15, X10
|
||||
ADD X15, X11
|
||||
ADD X15, X12
|
||||
SUB X15, X13
|
||||
BNEZ X13, vector_loop
|
||||
RET
|
||||
|
||||
xorbytes_scalar:
|
||||
// Check alignment - if alignment differs we have to do one byte at a time.
|
||||
AND $7, X10, X5
|
||||
AND $7, X11, X6
|
||||
|
||||
@@ -27,6 +27,8 @@ var (
|
||||
LOONG64HasLSX = cpu.Loong64.HasLSX
|
||||
LOONG64HasLASX = cpu.Loong64.HasLASX
|
||||
|
||||
RISCV64HasV = cpu.RISCV64.HasV
|
||||
|
||||
S390XHasAES = cpu.S390X.HasAES
|
||||
S390XHasAESCBC = cpu.S390X.HasAESCBC
|
||||
S390XHasAESCTR = cpu.S390X.HasAESCTR
|
||||
|
||||
Reference in New Issue
Block a user