summaryrefslogtreecommitdiff
path: root/2020/01
diff options
context:
space:
mode:
Diffstat (limited to '2020/01')
-rw-r--r--2020/01/part1.c38
-rw-r--r--2020/01/part1.f9025
-rw-r--r--2020/01/part2.c40
-rw-r--r--2020/01/part2.f9027
-rw-r--r--2020/01/part2_fast.c127
-rw-r--r--2020/01/repair_avx.asm21
6 files changed, 278 insertions, 0 deletions
diff --git a/2020/01/part1.c b/2020/01/part1.c
new file mode 100644
index 0000000..1b825df
--- /dev/null
+++ b/2020/01/part1.c
@@ -0,0 +1,38 @@
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define INPUT_LEN 200
+
+int repair(const int * arr)
+{
+ for (int i = 0; i < INPUT_LEN; i++) {
+ for (int j = 0; j < INPUT_LEN; j++) {
+ if (arr[i] + arr[j] == 2020) {
+ return arr[i] * arr[j];
+ }
+ }
+ }
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ FILE *file = fopen(argv[argc - 1], "r");
+ if (!file) {
+ return 1;
+ }
+
+ char buffer[8] = { 0 };
+ int input[INPUT_LEN] = { 0 };
+ for (int i = 0; i < 200; i++) {
+ fgets(buffer, 8, file);
+ input[i] = atoi(buffer);
+ }
+
+ fclose(file);
+
+ printf("%i", repair(input));
+}
diff --git a/2020/01/part1.f90 b/2020/01/part1.f90
new file mode 100644
index 0000000..a7c09c8
--- /dev/null
+++ b/2020/01/part1.f90
@@ -0,0 +1,25 @@
+program day1
+ implicit none
+
+ integer, parameter :: input_len = 200
+ integer, parameter :: search = 2020
+
+ integer :: i, j
+ integer, dimension(input_len) :: input
+
+ open(10, file='input', status='old')
+ do i = 1, input_len
+ read(10, *) input(i)
+ end do
+ close(10)
+
+ do i = 1, input_len
+ do j = 1, input_len
+ if (input(i) + input(j) == search) then
+ print *, input(i) * input(j)
+ stop
+ end if
+ end do
+ end do
+
+end program day1
diff --git a/2020/01/part2.c b/2020/01/part2.c
new file mode 100644
index 0000000..9c10b61
--- /dev/null
+++ b/2020/01/part2.c
@@ -0,0 +1,40 @@
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define INPUT_LEN 200
+
+int repair(const int *arr)
+{
+ for (int i = 0; i < INPUT_LEN; i++) {
+ for (int j = 0; j < INPUT_LEN; j++) {
+ for (int k = 0; k < INPUT_LEN; k++) {
+ if (arr[i] + arr[j] + arr[k] == 2020) {
+ return arr[i] * arr[j] * arr[k];
+ }
+ }
+ }
+ }
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ FILE *file = fopen(argv[argc - 1], "r");
+ if (!file) {
+ return 1;
+ }
+
+ char buffer[8] = { 0 };
+ int input[INPUT_LEN] = { 0 };
+ for (int i = 0; i < 200; i++) {
+ fgets(buffer, 8, file);
+ input[i] = atoi(buffer);
+ }
+
+ fclose(file);
+
+ printf("%i", repair(input));
+}
diff --git a/2020/01/part2.f90 b/2020/01/part2.f90
new file mode 100644
index 0000000..b3d6bd2
--- /dev/null
+++ b/2020/01/part2.f90
@@ -0,0 +1,27 @@
+program day1
+ implicit none
+
+ integer, parameter :: input_len = 200
+ integer, parameter :: search = 2020
+
+ integer :: i, j, k
+ integer, dimension(input_len) :: input
+
+ open(10, file='input', status='old')
+ do i = 1, input_len
+ read(10, *) input(i)
+ end do
+ close(10)
+
+ do i = 1, input_len
+ do j = 1, input_len
+ do k = 1, input_len
+ if (input(i) + input(j) + input(k) == search) then
+ print *, input(i) * input(j) * input(k)
+ stop
+ end if
+ end do
+ end do
+ end do
+
+end program day1
diff --git a/2020/01/part2_fast.c b/2020/01/part2_fast.c
new file mode 100644
index 0000000..c32f881
--- /dev/null
+++ b/2020/01/part2_fast.c
@@ -0,0 +1,127 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <immintrin.h>
+
+#define INPUT_LEN 200
+#define SEARCH 2020
+
+#ifdef USE_ASM
+int repair_avx_inner(const int *arr, __m256i search);
+#else
+int repair_avx_inner(const int *arr, __m256i search)
+{
+ for (int k = 0; k < INPUT_LEN; k += 8) {
+ __m256i new = _mm256_loadu_si256((__m256i *)(&arr[k]));
+ if (_mm256_movemask_epi8(_mm256_cmpeq_epi32(new, search))) {
+ return _mm256_extract_epi32(search, 0);
+ }
+ }
+
+ return 0;
+}
+#endif
+
+int repair_avx(const int *arr)
+{
+ __m256i search = _mm256_set1_epi32(SEARCH);
+ for (int i = 0; i < INPUT_LEN; i++) {
+ __m256i start = _mm256_set1_epi32(arr[i]);
+ for (int j = 0; j < INPUT_LEN; j += 8) {
+ __m256i new = _mm256_loadu_si256((__m256i *)(&arr[j]));
+ new = _mm256_add_epi32(start, new);
+ unsigned int mask = (unsigned int)_mm256_movemask_epi8(_mm256_cmpgt_epi32(new, search));
+ if (mask == 0xffffffff) {
+ continue;
+ }
+
+ switch (__lzcnt32(mask) / 4) {
+ case 0:
+ {
+ __m256i cmp = _mm256_sub_epi32(search, _mm256_permutevar8x32_epi32(new, _mm256_set1_epi32(7)));
+ int tmp = repair_avx_inner(arr, cmp);
+ if (tmp) {
+ return tmp * arr[i] * arr[j + 7];
+ }
+ }
+ case 1:
+ if ((mask & 0x0f000000) == 0) {
+ __m256i cmp = _mm256_sub_epi32(search, _mm256_permutevar8x32_epi32(new, _mm256_set1_epi32(6)));
+ int tmp = repair_avx_inner(arr, cmp);
+ if (tmp) {
+ return tmp * arr[i] * arr[j + 6];
+ }
+ }
+ case 2:
+ if ((mask & 0x00f00000) == 0 ){
+ __m256i cmp = _mm256_sub_epi32(search, _mm256_permutevar8x32_epi32(new, _mm256_set1_epi32(5)));
+ int tmp = repair_avx_inner(arr, cmp);
+ if (tmp) {
+ return tmp * arr[i] * arr[j + 5];
+ }
+ }
+ case 3:
+ if ((mask & 0x000f0000) == 0) {
+ __m256i cmp = _mm256_sub_epi32(search, _mm256_permutevar8x32_epi32(new, _mm256_set1_epi32(4)));
+ int tmp = repair_avx_inner(arr, cmp);
+ if (tmp) {
+ return tmp * arr[i] * arr[j + 4];
+ }
+ }
+ case 4:
+ if ((mask & 0x0000f000) == 0) {
+ __m256i cmp = _mm256_sub_epi32(search, _mm256_permutevar8x32_epi32(new, _mm256_set1_epi32(3)));
+ int tmp = repair_avx_inner(arr, cmp);
+ if (tmp) {
+ return tmp * arr[i] * arr[j + 3];
+ }
+ }
+ case 5:
+ if ((mask & 0x00000f00) == 0) {
+ __m256i cmp = _mm256_sub_epi32(search, _mm256_permutevar8x32_epi32(new, _mm256_set1_epi32(2)));
+ int tmp = repair_avx_inner(arr, cmp);
+ if (tmp) {
+ return tmp * arr[i] * arr[j + 2];
+ }
+ }
+ case 6:
+ if ((mask & 0x000000f0) == 0) {
+ __m256i cmp = _mm256_sub_epi32(search, _mm256_permutevar8x32_epi32(new, _mm256_set1_epi32(1)));
+ int tmp = repair_avx_inner(arr, cmp);
+ if (tmp) {
+ return tmp * arr[i] * arr[j + 1];
+ }
+ }
+ case 7:
+ if ((mask & 0x0000000f) == 0) {
+ __m256i cmp = _mm256_sub_epi32(search, _mm256_permutevar8x32_epi32(new, _mm256_setzero_si256()));
+ int tmp = repair_avx_inner(arr, cmp);
+ if (tmp) {
+ return tmp * arr[i] * arr[j];
+ }
+ }
+ }
+ }
+ }
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ FILE *file = fopen(argv[argc - 1], "r");
+ if (!file) {
+ return 1;
+ }
+
+ char buffer[8] = { 0 };
+ int input[INPUT_LEN] = { 0 };
+ for (int i = 0; i < 200; i++) {
+ fgets(buffer, 8, file);
+ input[i] = atoi(buffer);
+ }
+
+ fclose(file);
+
+ printf("%i\n", repair_avx(input));
+}
diff --git a/2020/01/repair_avx.asm b/2020/01/repair_avx.asm
new file mode 100644
index 0000000..4f128f6
--- /dev/null
+++ b/2020/01/repair_avx.asm
@@ -0,0 +1,21 @@
+global repair_avx_inner
+
+section .text
+
+repair_avx_inner:
+%assign i 0
+%rep 25
+ vpcmpeqd ymm1, ymm0, [rdi + i]
+; vptest ymm1, ymm1 ; slower then vpmovmskb + test
+ vpmovmskb eax, ymm1
+ test eax, eax
+ jne .found
+%assign i i+32
+%endrep
+ xor eax, eax ; not found, return 0
+ vzeroupper ; eliminate performance penalties caused by false dependencies when transitioning between AVX and legacy SSE instructions
+ ret
+.found:
+ vzeroupper ; eliminate performance penalties caused by false dependencies when transitioning between AVX and legacy SSE instructions
+ movd eax, xmm0 ; smaller then putting a vmovd before the vzeroupper and no measurable performance difference
+ ret