summaryrefslogtreecommitdiff
path: root/1
diff options
context:
space:
mode:
Diffstat (limited to '1')
-rw-r--r--1/part2_fast.c6
-rw-r--r--1/repair_avx.asm7
2 files changed, 5 insertions, 8 deletions
diff --git a/1/part2_fast.c b/1/part2_fast.c
index ffb4504..67cd6c5 100644
--- a/1/part2_fast.c
+++ b/1/part2_fast.c
@@ -12,11 +12,10 @@ int repair_avx_inner(int i, const int *arr, __m256i search);
#else
int repair_avx_inner(int i, const int *arr, __m256i search)
{
- __m256i start = _mm256_set1_epi32(i);
+ __m256i cmp = _mm256_sub_epi32(search, _mm256_set1_epi32(i));
for (int k = 0; k < INPUT_LEN; k += 8) {
__m256i new = _mm256_loadu_si256((__m256i *)(&arr[k]));
- new = _mm256_add_epi32(start, new);
- int mask = _mm256_movemask_epi8(_mm256_cmpeq_epi32(new, search));
+ int mask = _mm256_movemask_epi8(_mm256_cmpeq_epi32(new, cmp));
if (mask == 0) {
continue;
}
@@ -125,7 +124,6 @@ int repair_avx(const int *arr)
return 0;
}
-
int main(int argc, char *argv[])
{
FILE *file = fopen(argv[argc - 1], "r");
diff --git a/1/repair_avx.asm b/1/repair_avx.asm
index 777e292..4a268a6 100644
--- a/1/repair_avx.asm
+++ b/1/repair_avx.asm
@@ -6,16 +6,15 @@ repair_avx_inner:
; vpbroadcastd ymm1, edi ; AVX512VL AVX512F
vmovd xmm1, edi
vpbroadcastd ymm1, xmm1
+ vpsubd ymm1, ymm0, ymm1
%rep 24
- vpaddd ymm2, ymm1, [rsi]
- vpcmpeqd ymm2, ymm2, ymm0
+ vpcmpeqd ymm2, ymm1, [rsi]
vpmovmskb edx, ymm2
test edx, edx
jne .found
add rsi, 32 ; set up to read the next 256 bits (32 bytes) (8 * dword)
%endrep
- vpaddd ymm2, ymm1, [rsi]
- vpcmpeqd ymm2, ymm2, ymm0
+ vpcmpeqd ymm2, ymm1, [rsi]
vpmovmskb edx, ymm2
test edx, edx
jne .found