diff options
| author | Bond_009 <bond.009@outlook.com> | 2021-01-06 16:08:35 +0100 |
|---|---|---|
| committer | Bond_009 <bond.009@outlook.com> | 2021-01-06 16:08:35 +0100 |
| commit | 04e1607c944b5a133f20e5cd1f213e0d2da0702b (patch) | |
| tree | 4972804475168743f178fbca3dd3b0cfb2e035c2 /1/repair_avx.asm | |
| parent | c6d4b175c275602cb5b1d0be6123b656dc0bbb27 (diff) | |
Clean up day 1 fast
Diffstat (limited to '1/repair_avx.asm')
| -rw-r--r-- | 1/repair_avx.asm | 11 |
1 files changed, 4 insertions, 7 deletions
diff --git a/1/repair_avx.asm b/1/repair_avx.asm index 7271138..4f128f6 100644 --- a/1/repair_avx.asm +++ b/1/repair_avx.asm @@ -3,14 +3,11 @@ global repair_avx_inner section .text repair_avx_inner: -; vpbroadcastd ymm1, edi ; AVX512VL AVX512F - vmovd xmm1, edi - vpbroadcastd ymm1, xmm1 - vpsubd ymm1, ymm0, ymm1 %assign i 0 %rep 25 - vpcmpeqd ymm2, ymm1, [rsi + i] - vpmovmskb eax, ymm2 + vpcmpeqd ymm1, ymm0, [rdi + i] +; vptest ymm1, ymm1 ; slower then vpmovmskb + test + vpmovmskb eax, ymm1 test eax, eax jne .found %assign i i+32 @@ -20,5 +17,5 @@ repair_avx_inner: ret .found: vzeroupper ; eliminate performance penalties caused by false dependencies when transitioning between AVX and legacy SSE instructions - movd eax, xmm1 + movd eax, xmm0 ; smaller then putting a vmovd before the vzeroupper and no measurable performance difference ret |
