diff options
Diffstat (limited to '1/repair_avx.asm')
| -rw-r--r-- | 1/repair_avx.asm | 11 |
1 files changed, 4 insertions, 7 deletions
diff --git a/1/repair_avx.asm b/1/repair_avx.asm index 7271138..4f128f6 100644 --- a/1/repair_avx.asm +++ b/1/repair_avx.asm @@ -3,14 +3,11 @@ global repair_avx_inner section .text repair_avx_inner: -; vpbroadcastd ymm1, edi ; AVX512VL AVX512F - vmovd xmm1, edi - vpbroadcastd ymm1, xmm1 - vpsubd ymm1, ymm0, ymm1 %assign i 0 %rep 25 - vpcmpeqd ymm2, ymm1, [rsi + i] - vpmovmskb eax, ymm2 + vpcmpeqd ymm1, ymm0, [rdi + i] +; vptest ymm1, ymm1 ; slower then vpmovmskb + test + vpmovmskb eax, ymm1 test eax, eax jne .found %assign i i+32 @@ -20,5 +17,5 @@ repair_avx_inner: ret .found: vzeroupper ; eliminate performance penalties caused by false dependencies when transitioning between AVX and legacy SSE instructions - movd eax, xmm1 + movd eax, xmm0 ; smaller then putting a vmovd before the vzeroupper and no measurable performance difference ret |
