summaryrefslogtreecommitdiff
path: root/2020/01/repair_avx.asm
diff options
context:
space:
mode:
Diffstat (limited to '2020/01/repair_avx.asm')
-rw-r--r--2020/01/repair_avx.asm21
1 files changed, 21 insertions, 0 deletions
diff --git a/2020/01/repair_avx.asm b/2020/01/repair_avx.asm
new file mode 100644
index 0000000..4f128f6
--- /dev/null
+++ b/2020/01/repair_avx.asm
@@ -0,0 +1,21 @@
+global repair_avx_inner
+
+section .text
+
+repair_avx_inner:
+%assign i 0
+%rep 25
+ vpcmpeqd ymm1, ymm0, [rdi + i]
+; vptest ymm1, ymm1 ; slower then vpmovmskb + test
+ vpmovmskb eax, ymm1
+ test eax, eax
+ jne .found
+%assign i i+32
+%endrep
+ xor eax, eax ; not found, return 0
+ vzeroupper ; eliminate performance penalties caused by false dependencies when transitioning between AVX and legacy SSE instructions
+ ret
+.found:
+ vzeroupper ; eliminate performance penalties caused by false dependencies when transitioning between AVX and legacy SSE instructions
+ movd eax, xmm0 ; smaller then putting a vmovd before the vzeroupper and no measurable performance difference
+ ret