summaryrefslogtreecommitdiff
path: root/1/repair_avx.asm
diff options
context:
space:
mode:
Diffstat (limited to '1/repair_avx.asm')
-rw-r--r--1/repair_avx.asm11
1 files changed, 4 insertions, 7 deletions
diff --git a/1/repair_avx.asm b/1/repair_avx.asm
index 7271138..4f128f6 100644
--- a/1/repair_avx.asm
+++ b/1/repair_avx.asm
@@ -3,14 +3,11 @@ global repair_avx_inner
section .text
repair_avx_inner:
-; vpbroadcastd ymm1, edi ; AVX512VL AVX512F
- vmovd xmm1, edi
- vpbroadcastd ymm1, xmm1
- vpsubd ymm1, ymm0, ymm1
%assign i 0
%rep 25
- vpcmpeqd ymm2, ymm1, [rsi + i]
- vpmovmskb eax, ymm2
+ vpcmpeqd ymm1, ymm0, [rdi + i]
+; vptest ymm1, ymm1 ; slower then vpmovmskb + test
+ vpmovmskb eax, ymm1
test eax, eax
jne .found
%assign i i+32
@@ -20,5 +17,5 @@ repair_avx_inner:
ret
.found:
vzeroupper ; eliminate performance penalties caused by false dependencies when transitioning between AVX and legacy SSE instructions
- movd eax, xmm1
+ movd eax, xmm0 ; smaller then putting a vmovd before the vzeroupper and no measurable performance difference
ret