summaryrefslogtreecommitdiff
path: root/1/repair_avx.asm
blob: 4f128f63ae42cc1b7658bd788a1723c11b4a7222 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
global repair_avx_inner

section .text

repair_avx_inner:
%assign i 0
%rep    25
    vpcmpeqd        ymm1, ymm0, [rdi + i]
;    vptest          ymm1, ymm1              ; slower then vpmovmskb + test
    vpmovmskb       eax, ymm1
    test            eax, eax
    jne             .found
%assign i i+32
%endrep
    xor             eax, eax                ; not found, return 0
    vzeroupper                              ; eliminate performance penalties caused by false dependencies when transitioning between AVX and legacy SSE instructions
    ret
.found:
    vzeroupper                              ; eliminate performance penalties caused by false dependencies when transitioning between AVX and legacy SSE instructions
    movd            eax, xmm0               ; smaller then putting a vmovd before the vzeroupper and no measurable performance difference
    ret