summaryrefslogtreecommitdiff
path: root/1/repair_avx.asm
diff options
context:
space:
mode:
authorBond_009 <bond.009@outlook.com>2021-01-05 00:43:57 +0100
committerBond_009 <bond.009@outlook.com>2021-01-05 00:43:57 +0100
commite6efc41e26ffc1a93c29f1710bbb9dac18d162a4 (patch)
tree1ae7f4d600dad322f6ed745442d8ee942dd3e080 /1/repair_avx.asm
parent8fe6e81226a895d4e5300becdea73538dbd58505 (diff)
Add assembly version of the inner loop of day 1
Diffstat (limited to '1/repair_avx.asm')
-rw-r--r--1/repair_avx.asm29
1 files changed, 29 insertions, 0 deletions
diff --git a/1/repair_avx.asm b/1/repair_avx.asm
new file mode 100644
index 0000000..777e292
--- /dev/null
+++ b/1/repair_avx.asm
@@ -0,0 +1,29 @@
+global repair_avx_inner
+
+section .text
+
+repair_avx_inner:
+; vpbroadcastd ymm1, edi ; AVX512VL AVX512F
+ vmovd xmm1, edi
+ vpbroadcastd ymm1, xmm1
+%rep 24
+ vpaddd ymm2, ymm1, [rsi]
+ vpcmpeqd ymm2, ymm2, ymm0
+ vpmovmskb edx, ymm2
+ test edx, edx
+ jne .found
+ add rsi, 32 ; set up to read the next 256 bits (32 bytes) (8 * dword)
+%endrep
+ vpaddd ymm2, ymm1, [rsi]
+ vpcmpeqd ymm2, ymm2, ymm0
+ vpmovmskb edx, ymm2
+ test edx, edx
+ jne .found
+ xor eax, eax ; not found, return 0
+ vzeroupper ; eliminate performance penalties caused by false dependencies when transitioning between AVX and legacy SSE instructions
+ ret
+.found:
+ bsf edx, edx
+ mov eax, dword [rsi + rdx]
+ vzeroupper ; eliminate performance penalties caused by false dependencies when transitioning between AVX and legacy SSE instructions
+ ret