summaryrefslogtreecommitdiff
path: root/1/repair_avx.asm
blob: 777e292a7139e480842d23093780977254a79e39 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
global repair_avx_inner

section .text

repair_avx_inner:
;    vpbroadcastd    ymm1, edi                 ; AVX512VL AVX512F
    vmovd           xmm1, edi
    vpbroadcastd    ymm1, xmm1
%rep    24
    vpaddd          ymm2, ymm1, [rsi]
    vpcmpeqd        ymm2, ymm2, ymm0
    vpmovmskb       edx, ymm2
    test            edx, edx
    jne             .found
    add             rsi, 32                 ; set up to read the next 256 bits (32 bytes) (8 * dword)
%endrep
    vpaddd          ymm2, ymm1, [rsi]
    vpcmpeqd        ymm2, ymm2, ymm0
    vpmovmskb       edx, ymm2
    test            edx, edx
    jne             .found
    xor             eax, eax                ; not found, return 0
    vzeroupper                              ; eliminate performance penalties caused by false dependencies when transitioning between AVX and legacy SSE instructions
    ret
.found:
    bsf             edx, edx
    mov             eax, dword [rsi + rdx]
    vzeroupper                              ; eliminate performance penalties caused by false dependencies when transitioning between AVX and legacy SSE instructions
    ret