From 74a9bee9e67e9800d8efe164d66f5c479f7fabca Mon Sep 17 00:00:00 2001 From: Bond_009 Date: Thu, 3 Dec 2020 21:37:22 +0100 Subject: Add fast AVX version of day 1 part 2 --- 1/part2_fast.c | 138 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 138 insertions(+) create mode 100644 1/part2_fast.c diff --git a/1/part2_fast.c b/1/part2_fast.c new file mode 100644 index 0000000..1d9afb1 --- /dev/null +++ b/1/part2_fast.c @@ -0,0 +1,138 @@ +#include +#include +#include +#include +#include + +#define INPUT_LEN 200 +#define SEARCH 2020 + +int inner_repair_avx(int i, const int *arr, __m256i search) +{ + __m256i start = _mm256_set1_epi32(i); + for (int k = 0; k < INPUT_LEN; k += 8) { + __m256i new = _mm256_loadu_si256((__m256i *)(&arr[k])); + new = _mm256_add_epi32(start, new); + int mask = _mm256_movemask_epi8(_mm256_cmpeq_epi32(new, search)); + if (mask == 0) { + continue; + } + + for (int l = 0; l < 8; l++) { + if (mask & (0xf << (l * 4))) { + return arr[k + l]; + } + } + } + + return 0; +} + +int repair_avx(const int *arr) +{ + __m256i search = _mm256_set1_epi32(SEARCH); + for (int i = 0; i < INPUT_LEN; i++) { + __m256i start = _mm256_set1_epi32(arr[i]); + for (int j = 0; j < INPUT_LEN; j += 8) { + __m256i new = _mm256_loadu_si256((__m256i *)(&arr[j])); + new = _mm256_add_epi32(start, new); + int mask = _mm256_movemask_epi8(_mm256_cmpgt_epi32(new, search)); + if (mask == 0xffffffff) { + continue; + } + + switch (__lzcnt32(mask) / 4) { + case 0: goto label0; + case 1: goto label1; + case 2: goto label2; + case 3: goto label3; + case 4: goto label4; + case 5: goto label5; + case 6: goto label6; + case 7: goto label7; + } + + label0: + if ((mask & 0x0000000f) == 0) { + int tmp = inner_repair_avx(_mm256_extract_epi32(new, 0), arr, search); + if (tmp) { + return tmp * arr[i] * arr[j]; + } + } + + label1: + if ((mask & 0x000000f0) == 0) { + int tmp = inner_repair_avx(_mm256_extract_epi32(new, 1), arr, search); + if (tmp) { + return tmp * arr[i] * arr[j + 1]; + } + } + + label2: + if ((mask & 0x00000f00) == 0) { + int tmp = inner_repair_avx(_mm256_extract_epi32(new, 2), arr, search); + if (tmp) { + return tmp * arr[i] * arr[j + 2]; + } + } + + label3: + if ((mask & 0x0000f000) == 0) { + int tmp = inner_repair_avx(_mm256_extract_epi32(new, 3), arr, search); + if (tmp) { + return tmp * arr[i] * arr[j + 3]; + } + } + + label4: + if ((mask & 0x000f0000) == 0) { + int tmp = inner_repair_avx(_mm256_extract_epi32(new, 4), arr, search); + if (tmp) { + return tmp * arr[i] * arr[j + 4]; + } + } + + label5: + if ((mask & 0x00f00000) == 0 ){ + int tmp = inner_repair_avx(_mm256_extract_epi32(new, 5), arr, search); + if (tmp) { + return tmp * arr[i] * arr[j + 5]; + } + } + + label6: + if ((mask & 0x0f000000) == 0) { + int tmp = inner_repair_avx(_mm256_extract_epi32(new, 6), arr, search); + if (tmp) { + return tmp * arr[i] * arr[j + 6]; + } + } + + label7: + if ((mask & 0xf0000000) == 0) { + int tmp = inner_repair_avx(_mm256_extract_epi32(new, 7), arr, search); + if (tmp) { + return tmp * arr[i] * arr[j + 7]; + } + } + } + } + + return 0; +} + +int main() +{ + FILE *file = fopen("input", "r"); + + char buffer[8] = { 0 }; + int input[INPUT_LEN] = { 0 }; + for (int i = 0; i < 200; i++) { + fgets(buffer, 8, file); + input[i] = atoi(buffer); + } + + fclose(file); + + printf("%i\n", repair_avx(input)); +} -- cgit v1.2.3