summaryrefslogtreecommitdiff
path: root/1/repair_avx.asm
diff options
context:
space:
mode:
authorBond_009 <bond.009@outlook.com>2021-01-05 14:42:25 +0100
committerBond_009 <bond.009@outlook.com>2021-01-05 14:42:25 +0100
commita50c55c1e4e9d1f981f0563ab72471de1ddf2501 (patch)
treef301948922e78b9e46498f0008cfb55bf04e3063 /1/repair_avx.asm
parente6efc41e26ffc1a93c29f1710bbb9dac18d162a4 (diff)
Optimize repair_avx_inner
Diffstat (limited to '1/repair_avx.asm')
-rw-r--r--1/repair_avx.asm7
1 files changed, 3 insertions, 4 deletions
diff --git a/1/repair_avx.asm b/1/repair_avx.asm
index 777e292..4a268a6 100644
--- a/1/repair_avx.asm
+++ b/1/repair_avx.asm
@@ -6,16 +6,15 @@ repair_avx_inner:
; vpbroadcastd ymm1, edi ; AVX512VL AVX512F
vmovd xmm1, edi
vpbroadcastd ymm1, xmm1
+ vpsubd ymm1, ymm0, ymm1
%rep 24
- vpaddd ymm2, ymm1, [rsi]
- vpcmpeqd ymm2, ymm2, ymm0
+ vpcmpeqd ymm2, ymm1, [rsi]
vpmovmskb edx, ymm2
test edx, edx
jne .found
add rsi, 32 ; set up to read the next 256 bits (32 bytes) (8 * dword)
%endrep
- vpaddd ymm2, ymm1, [rsi]
- vpcmpeqd ymm2, ymm2, ymm0
+ vpcmpeqd ymm2, ymm1, [rsi]
vpmovmskb edx, ymm2
test edx, edx
jne .found