summaryrefslogtreecommitdiff
path: root/1/repair_avx.asm
diff options
context:
space:
mode:
Diffstat (limited to '1/repair_avx.asm')
-rw-r--r--1/repair_avx.asm29
1 files changed, 29 insertions, 0 deletions
diff --git a/1/repair_avx.asm b/1/repair_avx.asm
new file mode 100644
index 0000000..777e292
--- /dev/null
+++ b/1/repair_avx.asm
@@ -0,0 +1,29 @@
+global repair_avx_inner
+
+section .text
+
+repair_avx_inner:
+; vpbroadcastd ymm1, edi ; AVX512VL AVX512F
+ vmovd xmm1, edi
+ vpbroadcastd ymm1, xmm1
+%rep 24
+ vpaddd ymm2, ymm1, [rsi]
+ vpcmpeqd ymm2, ymm2, ymm0
+ vpmovmskb edx, ymm2
+ test edx, edx
+ jne .found
+ add rsi, 32 ; set up to read the next 256 bits (32 bytes) (8 * dword)
+%endrep
+ vpaddd ymm2, ymm1, [rsi]
+ vpcmpeqd ymm2, ymm2, ymm0
+ vpmovmskb edx, ymm2
+ test edx, edx
+ jne .found
+ xor eax, eax ; not found, return 0
+ vzeroupper ; eliminate performance penalties caused by false dependencies when transitioning between AVX and legacy SSE instructions
+ ret
+.found:
+ bsf edx, edx
+ mov eax, dword [rsi + rdx]
+ vzeroupper ; eliminate performance penalties caused by false dependencies when transitioning between AVX and legacy SSE instructions
+ ret