mirror of
https://github.com/golang/go.git
synced 2026-01-29 07:02:05 +03:00
math: optimize the floating-point pipeline on loong64
Using the FSEL instruction on loong64 to eliminate branches and reduce
pipeline interruptions.
On the Loongson CPU 3A6000, there is a 0.09% performance improvement, as follows:
goos: linux
goarch: loong64
pkg: math/big
cpu: Loongson-3A6000-HV @ 2500.00MHz
│ old.bench │ new.bench │
│ sec/op │ sec/op vs base │
Exp 7.748m ± 0% 7.740m ± 0% -0.10% (p=0.001 n=10)
Exp2 7.747m ± 0% 7.741m ± 0% -0.09% (p=0.002 n=10)
geomean 7.747m 7.740m -0.09%
Change-Id: If62f2e81bf345c83a1fa9350ace131240cfa3b9b
Reviewed-on: https://go-review.googlesource.com/c/go/+/693458
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: abner chenc <chenguoqi@loongson.cn>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Meidan Li <limeidan@loongson.cn>
This commit is contained in:
committed by
abner chenc
parent
985b0b3fe2
commit
7f0f671951
@@ -62,13 +62,9 @@ TEXT ·archExp(SB),$0-16
|
|||||||
MOVD 8(R10), F3
|
MOVD 8(R10), F3
|
||||||
MOVD 48(R10), F2
|
MOVD 48(R10), F2
|
||||||
CMPGTD F0, F5, FCC0
|
CMPGTD F0, F5, FCC0
|
||||||
BFPT add // x > 0
|
FMSUBD F3, F2, F0, F4 // Log2e*x - 0.5
|
||||||
sub:
|
|
||||||
FMSUBD F3, F2, F0, F3 // Log2e*x - 0.5
|
|
||||||
JMP 2(PC)
|
|
||||||
add:
|
|
||||||
FMADDD F3, F2, F0, F3 // Log2e*x + 0.5
|
FMADDD F3, F2, F0, F3 // Log2e*x + 0.5
|
||||||
|
FSEL FCC0, F3, F4, F3
|
||||||
FTINTRZVD F3, F4 // float64 -> int64
|
FTINTRZVD F3, F4 // float64 -> int64
|
||||||
MOVV F4, R5 // R5 = int(k)
|
MOVV F4, R5 // R5 = int(k)
|
||||||
FFINTDV F4, F3 // int64 -> float64
|
FFINTDV F4, F3 // int64 -> float64
|
||||||
@@ -162,13 +158,9 @@ TEXT ·archExp2(SB),$0-16
|
|||||||
MOVD 0(R10), F10
|
MOVD 0(R10), F10
|
||||||
MOVD 8(R10), F2
|
MOVD 8(R10), F2
|
||||||
CMPGTD F0, F10, FCC0
|
CMPGTD F0, F10, FCC0
|
||||||
BFPT add
|
SUBD F2, F0, F4 // x - 0.5
|
||||||
sub:
|
|
||||||
SUBD F2, F0, F3 // x - 0.5
|
|
||||||
JMP 2(PC)
|
|
||||||
add:
|
|
||||||
ADDD F2, F0, F3 // x + 0.5
|
ADDD F2, F0, F3 // x + 0.5
|
||||||
|
FSEL FCC0, F3, F4, F3
|
||||||
FTINTRZVD F3, F4
|
FTINTRZVD F3, F4
|
||||||
MOVV F4, R5
|
MOVV F4, R5
|
||||||
FFINTDV F4, F3
|
FFINTDV F4, F3
|
||||||
|
|||||||
Reference in New Issue
Block a user