diff options
| author | Bond_009 <bond.009@outlook.com> | 2021-01-05 00:43:57 +0100 |
|---|---|---|
| committer | Bond_009 <bond.009@outlook.com> | 2021-01-05 00:43:57 +0100 |
| commit | e6efc41e26ffc1a93c29f1710bbb9dac18d162a4 (patch) | |
| tree | 1ae7f4d600dad322f6ed745442d8ee942dd3e080 /1/repair_avx.asm | |
| parent | 8fe6e81226a895d4e5300becdea73538dbd58505 (diff) | |
Add assembly version of the inner loop of day 1
Diffstat (limited to '1/repair_avx.asm')
| -rw-r--r-- | 1/repair_avx.asm | 29 |
1 files changed, 29 insertions, 0 deletions
diff --git a/1/repair_avx.asm b/1/repair_avx.asm new file mode 100644 index 0000000..777e292 --- /dev/null +++ b/1/repair_avx.asm @@ -0,0 +1,29 @@ +global repair_avx_inner + +section .text + +repair_avx_inner: +; vpbroadcastd ymm1, edi ; AVX512VL AVX512F + vmovd xmm1, edi + vpbroadcastd ymm1, xmm1 +%rep 24 + vpaddd ymm2, ymm1, [rsi] + vpcmpeqd ymm2, ymm2, ymm0 + vpmovmskb edx, ymm2 + test edx, edx + jne .found + add rsi, 32 ; set up to read the next 256 bits (32 bytes) (8 * dword) +%endrep + vpaddd ymm2, ymm1, [rsi] + vpcmpeqd ymm2, ymm2, ymm0 + vpmovmskb edx, ymm2 + test edx, edx + jne .found + xor eax, eax ; not found, return 0 + vzeroupper ; eliminate performance penalties caused by false dependencies when transitioning between AVX and legacy SSE instructions + ret +.found: + bsf edx, edx + mov eax, dword [rsi + rdx] + vzeroupper ; eliminate performance penalties caused by false dependencies when transitioning between AVX and legacy SSE instructions + ret |
