From baf4910870a6e8999802b9a4a22eabd4142a34e3 Mon Sep 17 00:00:00 2001 From: Bond_009 Date: Thu, 1 Dec 2022 22:30:22 +0100 Subject: Move all Advent of Codes into one repo --- 2020/01/part1.c | 38 +++++++++++++++ 2020/01/part1.f90 | 25 ++++++++++ 2020/01/part2.c | 40 ++++++++++++++++ 2020/01/part2.f90 | 27 +++++++++++ 2020/01/part2_fast.c | 127 +++++++++++++++++++++++++++++++++++++++++++++++++ 2020/01/repair_avx.asm | 21 ++++++++ 6 files changed, 278 insertions(+) create mode 100644 2020/01/part1.c create mode 100644 2020/01/part1.f90 create mode 100644 2020/01/part2.c create mode 100644 2020/01/part2.f90 create mode 100644 2020/01/part2_fast.c create mode 100644 2020/01/repair_avx.asm (limited to '2020/01') diff --git a/2020/01/part1.c b/2020/01/part1.c new file mode 100644 index 0000000..1b825df --- /dev/null +++ b/2020/01/part1.c @@ -0,0 +1,38 @@ +#include +#include +#include +#include + +#define INPUT_LEN 200 + +int repair(const int * arr) +{ + for (int i = 0; i < INPUT_LEN; i++) { + for (int j = 0; j < INPUT_LEN; j++) { + if (arr[i] + arr[j] == 2020) { + return arr[i] * arr[j]; + } + } + } + + return 0; +} + +int main(int argc, char *argv[]) +{ + FILE *file = fopen(argv[argc - 1], "r"); + if (!file) { + return 1; + } + + char buffer[8] = { 0 }; + int input[INPUT_LEN] = { 0 }; + for (int i = 0; i < 200; i++) { + fgets(buffer, 8, file); + input[i] = atoi(buffer); + } + + fclose(file); + + printf("%i", repair(input)); +} diff --git a/2020/01/part1.f90 b/2020/01/part1.f90 new file mode 100644 index 0000000..a7c09c8 --- /dev/null +++ b/2020/01/part1.f90 @@ -0,0 +1,25 @@ +program day1 + implicit none + + integer, parameter :: input_len = 200 + integer, parameter :: search = 2020 + + integer :: i, j + integer, dimension(input_len) :: input + + open(10, file='input', status='old') + do i = 1, input_len + read(10, *) input(i) + end do + close(10) + + do i = 1, input_len + do j = 1, input_len + if (input(i) + input(j) == search) then + print *, input(i) * input(j) + stop + end if + end do + end do + +end program day1 diff --git a/2020/01/part2.c b/2020/01/part2.c new file mode 100644 index 0000000..9c10b61 --- /dev/null +++ b/2020/01/part2.c @@ -0,0 +1,40 @@ +#include +#include +#include +#include + +#define INPUT_LEN 200 + +int repair(const int *arr) +{ + for (int i = 0; i < INPUT_LEN; i++) { + for (int j = 0; j < INPUT_LEN; j++) { + for (int k = 0; k < INPUT_LEN; k++) { + if (arr[i] + arr[j] + arr[k] == 2020) { + return arr[i] * arr[j] * arr[k]; + } + } + } + } + + return 0; +} + +int main(int argc, char *argv[]) +{ + FILE *file = fopen(argv[argc - 1], "r"); + if (!file) { + return 1; + } + + char buffer[8] = { 0 }; + int input[INPUT_LEN] = { 0 }; + for (int i = 0; i < 200; i++) { + fgets(buffer, 8, file); + input[i] = atoi(buffer); + } + + fclose(file); + + printf("%i", repair(input)); +} diff --git a/2020/01/part2.f90 b/2020/01/part2.f90 new file mode 100644 index 0000000..b3d6bd2 --- /dev/null +++ b/2020/01/part2.f90 @@ -0,0 +1,27 @@ +program day1 + implicit none + + integer, parameter :: input_len = 200 + integer, parameter :: search = 2020 + + integer :: i, j, k + integer, dimension(input_len) :: input + + open(10, file='input', status='old') + do i = 1, input_len + read(10, *) input(i) + end do + close(10) + + do i = 1, input_len + do j = 1, input_len + do k = 1, input_len + if (input(i) + input(j) + input(k) == search) then + print *, input(i) * input(j) * input(k) + stop + end if + end do + end do + end do + +end program day1 diff --git a/2020/01/part2_fast.c b/2020/01/part2_fast.c new file mode 100644 index 0000000..c32f881 --- /dev/null +++ b/2020/01/part2_fast.c @@ -0,0 +1,127 @@ +#include +#include +#include +#include + +#define INPUT_LEN 200 +#define SEARCH 2020 + +#ifdef USE_ASM +int repair_avx_inner(const int *arr, __m256i search); +#else +int repair_avx_inner(const int *arr, __m256i search) +{ + for (int k = 0; k < INPUT_LEN; k += 8) { + __m256i new = _mm256_loadu_si256((__m256i *)(&arr[k])); + if (_mm256_movemask_epi8(_mm256_cmpeq_epi32(new, search))) { + return _mm256_extract_epi32(search, 0); + } + } + + return 0; +} +#endif + +int repair_avx(const int *arr) +{ + __m256i search = _mm256_set1_epi32(SEARCH); + for (int i = 0; i < INPUT_LEN; i++) { + __m256i start = _mm256_set1_epi32(arr[i]); + for (int j = 0; j < INPUT_LEN; j += 8) { + __m256i new = _mm256_loadu_si256((__m256i *)(&arr[j])); + new = _mm256_add_epi32(start, new); + unsigned int mask = (unsigned int)_mm256_movemask_epi8(_mm256_cmpgt_epi32(new, search)); + if (mask == 0xffffffff) { + continue; + } + + switch (__lzcnt32(mask) / 4) { + case 0: + { + __m256i cmp = _mm256_sub_epi32(search, _mm256_permutevar8x32_epi32(new, _mm256_set1_epi32(7))); + int tmp = repair_avx_inner(arr, cmp); + if (tmp) { + return tmp * arr[i] * arr[j + 7]; + } + } + case 1: + if ((mask & 0x0f000000) == 0) { + __m256i cmp = _mm256_sub_epi32(search, _mm256_permutevar8x32_epi32(new, _mm256_set1_epi32(6))); + int tmp = repair_avx_inner(arr, cmp); + if (tmp) { + return tmp * arr[i] * arr[j + 6]; + } + } + case 2: + if ((mask & 0x00f00000) == 0 ){ + __m256i cmp = _mm256_sub_epi32(search, _mm256_permutevar8x32_epi32(new, _mm256_set1_epi32(5))); + int tmp = repair_avx_inner(arr, cmp); + if (tmp) { + return tmp * arr[i] * arr[j + 5]; + } + } + case 3: + if ((mask & 0x000f0000) == 0) { + __m256i cmp = _mm256_sub_epi32(search, _mm256_permutevar8x32_epi32(new, _mm256_set1_epi32(4))); + int tmp = repair_avx_inner(arr, cmp); + if (tmp) { + return tmp * arr[i] * arr[j + 4]; + } + } + case 4: + if ((mask & 0x0000f000) == 0) { + __m256i cmp = _mm256_sub_epi32(search, _mm256_permutevar8x32_epi32(new, _mm256_set1_epi32(3))); + int tmp = repair_avx_inner(arr, cmp); + if (tmp) { + return tmp * arr[i] * arr[j + 3]; + } + } + case 5: + if ((mask & 0x00000f00) == 0) { + __m256i cmp = _mm256_sub_epi32(search, _mm256_permutevar8x32_epi32(new, _mm256_set1_epi32(2))); + int tmp = repair_avx_inner(arr, cmp); + if (tmp) { + return tmp * arr[i] * arr[j + 2]; + } + } + case 6: + if ((mask & 0x000000f0) == 0) { + __m256i cmp = _mm256_sub_epi32(search, _mm256_permutevar8x32_epi32(new, _mm256_set1_epi32(1))); + int tmp = repair_avx_inner(arr, cmp); + if (tmp) { + return tmp * arr[i] * arr[j + 1]; + } + } + case 7: + if ((mask & 0x0000000f) == 0) { + __m256i cmp = _mm256_sub_epi32(search, _mm256_permutevar8x32_epi32(new, _mm256_setzero_si256())); + int tmp = repair_avx_inner(arr, cmp); + if (tmp) { + return tmp * arr[i] * arr[j]; + } + } + } + } + } + + return 0; +} + +int main(int argc, char *argv[]) +{ + FILE *file = fopen(argv[argc - 1], "r"); + if (!file) { + return 1; + } + + char buffer[8] = { 0 }; + int input[INPUT_LEN] = { 0 }; + for (int i = 0; i < 200; i++) { + fgets(buffer, 8, file); + input[i] = atoi(buffer); + } + + fclose(file); + + printf("%i\n", repair_avx(input)); +} diff --git a/2020/01/repair_avx.asm b/2020/01/repair_avx.asm new file mode 100644 index 0000000..4f128f6 --- /dev/null +++ b/2020/01/repair_avx.asm @@ -0,0 +1,21 @@ +global repair_avx_inner + +section .text + +repair_avx_inner: +%assign i 0 +%rep 25 + vpcmpeqd ymm1, ymm0, [rdi + i] +; vptest ymm1, ymm1 ; slower then vpmovmskb + test + vpmovmskb eax, ymm1 + test eax, eax + jne .found +%assign i i+32 +%endrep + xor eax, eax ; not found, return 0 + vzeroupper ; eliminate performance penalties caused by false dependencies when transitioning between AVX and legacy SSE instructions + ret +.found: + vzeroupper ; eliminate performance penalties caused by false dependencies when transitioning between AVX and legacy SSE instructions + movd eax, xmm0 ; smaller then putting a vmovd before the vzeroupper and no measurable performance difference + ret -- cgit v1.2.3