From a50c55c1e4e9d1f981f0563ab72471de1ddf2501 Mon Sep 17 00:00:00 2001 From: Bond_009 Date: Tue, 5 Jan 2021 14:42:25 +0100 Subject: Optimize repair_avx_inner --- 1/part2_fast.c | 6 ++---- 1/repair_avx.asm | 7 +++---- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/1/part2_fast.c b/1/part2_fast.c index ffb4504..67cd6c5 100644 --- a/1/part2_fast.c +++ b/1/part2_fast.c @@ -12,11 +12,10 @@ int repair_avx_inner(int i, const int *arr, __m256i search); #else int repair_avx_inner(int i, const int *arr, __m256i search) { - __m256i start = _mm256_set1_epi32(i); + __m256i cmp = _mm256_sub_epi32(search, _mm256_set1_epi32(i)); for (int k = 0; k < INPUT_LEN; k += 8) { __m256i new = _mm256_loadu_si256((__m256i *)(&arr[k])); - new = _mm256_add_epi32(start, new); - int mask = _mm256_movemask_epi8(_mm256_cmpeq_epi32(new, search)); + int mask = _mm256_movemask_epi8(_mm256_cmpeq_epi32(new, cmp)); if (mask == 0) { continue; } @@ -125,7 +124,6 @@ int repair_avx(const int *arr) return 0; } - int main(int argc, char *argv[]) { FILE *file = fopen(argv[argc - 1], "r"); diff --git a/1/repair_avx.asm b/1/repair_avx.asm index 777e292..4a268a6 100644 --- a/1/repair_avx.asm +++ b/1/repair_avx.asm @@ -6,16 +6,15 @@ repair_avx_inner: ; vpbroadcastd ymm1, edi ; AVX512VL AVX512F vmovd xmm1, edi vpbroadcastd ymm1, xmm1 + vpsubd ymm1, ymm0, ymm1 %rep 24 - vpaddd ymm2, ymm1, [rsi] - vpcmpeqd ymm2, ymm2, ymm0 + vpcmpeqd ymm2, ymm1, [rsi] vpmovmskb edx, ymm2 test edx, edx jne .found add rsi, 32 ; set up to read the next 256 bits (32 bytes) (8 * dword) %endrep - vpaddd ymm2, ymm1, [rsi] - vpcmpeqd ymm2, ymm2, ymm0 + vpcmpeqd ymm2, ymm1, [rsi] vpmovmskb edx, ymm2 test edx, edx jne .found -- cgit v1.2.3