summaryrefslogtreecommitdiff
path: root/1/repair_avx.asm
diff options
context:
space:
mode:
authorBond_009 <bond.009@outlook.com>2021-01-06 16:08:35 +0100
committerBond_009 <bond.009@outlook.com>2021-01-06 16:08:35 +0100
commit04e1607c944b5a133f20e5cd1f213e0d2da0702b (patch)
tree4972804475168743f178fbca3dd3b0cfb2e035c2 /1/repair_avx.asm
parentc6d4b175c275602cb5b1d0be6123b656dc0bbb27 (diff)
Clean up day 1 fast
Diffstat (limited to '1/repair_avx.asm')
-rw-r--r--1/repair_avx.asm11
1 files changed, 4 insertions, 7 deletions
diff --git a/1/repair_avx.asm b/1/repair_avx.asm
index 7271138..4f128f6 100644
--- a/1/repair_avx.asm
+++ b/1/repair_avx.asm
@@ -3,14 +3,11 @@ global repair_avx_inner
section .text
repair_avx_inner:
-; vpbroadcastd ymm1, edi ; AVX512VL AVX512F
- vmovd xmm1, edi
- vpbroadcastd ymm1, xmm1
- vpsubd ymm1, ymm0, ymm1
%assign i 0
%rep 25
- vpcmpeqd ymm2, ymm1, [rsi + i]
- vpmovmskb eax, ymm2
+ vpcmpeqd ymm1, ymm0, [rdi + i]
+; vptest ymm1, ymm1 ; slower then vpmovmskb + test
+ vpmovmskb eax, ymm1
test eax, eax
jne .found
%assign i i+32
@@ -20,5 +17,5 @@ repair_avx_inner:
ret
.found:
vzeroupper ; eliminate performance penalties caused by false dependencies when transitioning between AVX and legacy SSE instructions
- movd eax, xmm1
+ movd eax, xmm0 ; smaller then putting a vmovd before the vzeroupper and no measurable performance difference
ret