From 8b2eedf589b205cfe0d8c63de84da173c533f4e0 Mon Sep 17 00:00:00 2001 From: William Bell Date: Sat, 5 Jul 2025 04:38:37 +0100 Subject: [PATCH] load cache if it exists --- .gitignore | 3 +- Makefile | 2 +- src/hash_data/siphash/LICENSE_CC0 | 2 +- src/hashmap/hashmap.c | 11 +- src/main.c | 328 ++++++++++++++++++++++++------ src/runtime/objects/object.c | 1 - src/translator/translator.h | 2 + 7 files changed, 281 insertions(+), 68 deletions(-) diff --git a/.gitignore b/.gitignore index fc7a50b..24cad0e 100644 --- a/.gitignore +++ b/.gitignore @@ -61,4 +61,5 @@ build *.yy.h out.arbin -rand_test.ar \ No newline at end of file +rand_test.ar +__arcache__ \ No newline at end of file diff --git a/Makefile b/Makefile index 3fb1e58..2b03e20 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ LEXER_SRC = src/lexer/lex.l LEXER_C = src/lexer/lex.yy.c LEXER_H = src/lexer/lex.yy.h -CFILES = $(shell find src -name '*.c') +CFILES = external/xxhash/xxhash.c $(shell find src -name '*.c') CFLAGS = $(ARCHFLAGS) -lm -lgc -lgmp -Wall -Wextra -Wno-unused-function BINARY = bin/argon diff --git a/src/hash_data/siphash/LICENSE_CC0 b/src/hash_data/siphash/LICENSE_CC0 index 64978f2..55cc000 100644 --- a/src/hash_data/siphash/LICENSE_CC0 +++ b/src/hash_data/siphash/LICENSE_CC0 @@ -1,6 +1,6 @@ This license applies **only** to the files that explicitly state at the top of the file that they are under CC0 1.0 Universal. -All other files in this project are licensed under the GNU General Public License version 3 (GPLv3). +All other files in this project are licensed under the GNU General Public License version 3 (GPLv3) unless otherwise stated. Please refer to the LICENSE file in the root directory for the main project license. diff --git a/src/hashmap/hashmap.c b/src/hashmap/hashmap.c index 6582ef1..2c23adc 100644 --- a/src/hashmap/hashmap.c +++ b/src/hashmap/hashmap.c @@ -1,7 +1,9 @@ #include "hashmap.h" +#include "../memory.h" #include #include +#include #include #include #include @@ -9,10 +11,10 @@ struct hashmap *createHashmap() { size_t size = 8; - struct hashmap *t = (struct hashmap *)malloc(sizeof(struct hashmap)); + struct hashmap *t = (struct hashmap *)checked_malloc(sizeof(struct hashmap)); t->size = size; t->order = 1; - t->list = (struct node **)malloc(sizeof(struct node *) * size); + t->list = (struct node **)checked_malloc(sizeof(struct node *) * size); memset(t->list, 0, sizeof(struct node *) * size); return t; } @@ -42,7 +44,7 @@ void resize_hashmap(struct hashmap *t) { struct node **old_list = t->list; // Create new list - t->list = (struct node **)malloc(sizeof(struct node *) * new_size); + t->list = (struct node **)checked_malloc(sizeof(struct node *) * new_size); memset(t->list, 0, sizeof(struct node *) * new_size); t->size = new_size; @@ -57,6 +59,7 @@ void resize_hashmap(struct hashmap *t) { temp = temp->next; } } + free(old_list); } int hashCode(struct hashmap *t, uint64_t hash) { return hash % t->size; } @@ -106,7 +109,7 @@ void hashmap_insert(struct hashmap *t, uint64_t hash, void *key, } // Insert new node - struct node *newNode = (struct node *)malloc(sizeof(struct node)); + struct node *newNode = (struct node *)checked_malloc(sizeof(struct node)); newNode->hash = hash; newNode->key = key; newNode->val = val; diff --git a/src/main.c b/src/main.c index 33fda3d..88a062f 100644 --- a/src/main.c +++ b/src/main.c @@ -6,7 +6,7 @@ #include "runtime/runtime.h" #include "translator/translator.h" -#include "external/xxhash/xxhash.h" +#include "../external/xxhash/xxhash.h" #include "hash_data/hash_data.h" #include #include @@ -15,27 +15,217 @@ #include #include #include +#include #include #include #include - -const char FILE_IDENTIFIER[] = "ARBI"; -const uint32_t version_number = 0; - -uint64_t htonll(uint64_t x) { -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - return ((uint64_t)htonl(x & 0xFFFFFFFF) << 32) | htonl(x >> 32); +#ifdef _WIN32 +#include +#define mkdir(path, mode) _mkdir(path) #else - return x; +#include +#include #endif +#include + +int ensure_dir_exists(const char *path) { + struct stat st = {0}; + + if (stat(path, &st) == -1) { + // Directory does not exist, create it + if (mkdir(path, 0755) != 0) { + perror("mkdir failed"); + return -1; + } + } + return 0; } -uint64_t ntohll(uint64_t x) { -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - return ((uint64_t)ntohl(x & 0xFFFFFFFF) << 32) | ntohl(x >> 32); -#else - return x; +char *normalize_path(char *path) { +#ifdef _WIN32 + for (char *p = path; *p; p++) { + if (*p == '/') { + *p = '\\'; + } + } #endif + return path; +} + +// Join two paths using '/' as separator, no platform checks here. +int path_join(char *dest, size_t dest_size, const char *path1, + const char *path2) { + size_t len1 = strlen(path1); + size_t len2 = strlen(path2); + + // Check if buffer is large enough (extra 2 for '/' and '\0') + if (len1 + len2 + 2 > dest_size) + return -1; + + strcpy(dest, path1); + + // Add '/' if needed + if (len1 > 0 && dest[len1 - 1] != '/') { + dest[len1] = '/'; + dest[len1 + 1] = '\0'; + len1++; + } + + // Skip leading '/' in path2 to avoid double separator + if (len2 > 0 && path2[0] == '/') { + path2++; + len2--; + } + + strcat(dest, path2); + + return 0; +} + +const char CACHE_FOLDER[] = "__arcache__"; +const char FILE_IDENTIFIER[5] = "ARBI"; +const char BYTECODE_EXTENTION[] = "arbin"; +const uint32_t version_number = 0; + +#ifdef _WIN32 +#define PATH_SEP '\\' +#else +#define PATH_SEP '/' +#endif + +char *replace_extension(const char *path, const char *new_ext) { + // Defensive: if new_ext doesn't start with '.', add it + int need_dot = (new_ext[0] != '.'); + + // Find last path separator to avoid changing dots in folder names + const char *last_sep = strrchr(path, PATH_SEP); +#ifdef _WIN32 + // Windows can have '/' too as separator in practice, check it + const char *last_alt_sep = strrchr(path, '/'); + if (last_alt_sep && (!last_sep || last_alt_sep > last_sep)) { + last_sep = last_alt_sep; + } +#endif + + // Find last '.' after last_sep (if any) + const char *last_dot = strrchr(path, '.'); + if (last_dot && (!last_sep || last_dot > last_sep)) { + // Extension found: copy path up to last_dot, then append new_ext + size_t base_len = last_dot - path; + size_t ext_len = strlen(new_ext) + (need_dot ? 1 : 0); + size_t new_len = base_len + ext_len + 1; + + char *result = malloc(new_len); + if (!result) + return NULL; + + memcpy(result, path, base_len); + + if (need_dot) + result[base_len] = '.'; + + strcpy(result + base_len + (need_dot ? 1 : 0), new_ext); + + return result; + } else { + // No extension found: append '.' + new_ext (if needed) + size_t path_len = strlen(path); + size_t ext_len = strlen(new_ext) + (need_dot ? 1 : 0); + size_t new_len = path_len + ext_len + 1; + + char *result = malloc(new_len); + if (!result) + return NULL; + + strcpy(result, path); + + if (need_dot) + strcat(result, "."); + + strcat(result, new_ext); + + return result; + } +} + +int load_cache(Translated *translated_dest, char *joined_paths, uint64_t hash) { + FILE *bytecode_file = fopen(joined_paths, "rb"); + if (!bytecode_file) + return 1; + char file_identifier_from_cache[sizeof(FILE_IDENTIFIER)]; + file_identifier_from_cache[strlen(FILE_IDENTIFIER)] = '\0'; + if (fread(&file_identifier_from_cache, 1, + sizeof(file_identifier_from_cache) - 1, + bytecode_file) != sizeof(file_identifier_from_cache) - 1 || + memcmp(file_identifier_from_cache, FILE_IDENTIFIER, + sizeof(file_identifier_from_cache)) != 0) { + fclose(bytecode_file); + return 1; + } + + uint32_t read_version; + if (fread(&read_version, 1, sizeof(read_version), bytecode_file) != + sizeof(read_version)) { + goto FAILED; + } + read_version = le32toh(read_version); + + if (read_version != version_number) { + goto FAILED; + } + + uint64_t read_hash; + if (fread(&read_hash, 1, sizeof(read_hash), bytecode_file) != + sizeof(read_hash)) { + goto FAILED; + } + read_hash = le64toh(read_hash); + + if (read_hash != hash) { + goto FAILED; + } + + uint8_t register_count; + if (fread(®ister_count, 1, sizeof(register_count), bytecode_file) != + sizeof(register_count)) { + goto FAILED; + } + + uint64_t constantsSize; + if (fread(&constantsSize, 1, sizeof(constantsSize), bytecode_file) != + sizeof(constantsSize)) { + goto FAILED; + } + constantsSize = le64toh(constantsSize); + + uint64_t bytecodeSize; + if (fread(&bytecodeSize, 1, sizeof(bytecodeSize), bytecode_file) != + sizeof(bytecodeSize)) { + goto FAILED; + } + bytecodeSize = le64toh(bytecodeSize); + + arena_resize(&translated_dest->constants, constantsSize); + + if (fread(translated_dest->constants.data, 1, constantsSize, bytecode_file) != + constantsSize) { + goto FAILED; + } + + darray_resize(&translated_dest->bytecode, bytecodeSize); + + if (fread(translated_dest->bytecode.data, 1, bytecodeSize, bytecode_file) != + bytecodeSize) { + goto FAILED; + } + + translated_dest->registerCount = register_count; + + fclose(bytecode_file); + return 0; +FAILED: + fclose(bytecode_file); + return 1; } int main(int argc, char *argv[]) { @@ -47,9 +237,6 @@ int main(int argc, char *argv[]) { return -1; ar_memory_init(); char *path = argv[1]; - DArray tokens; - - darray_init(&tokens, sizeof(Token)); FILE *file = fopen(path, "r"); if (!file) { @@ -67,61 +254,81 @@ int main(int argc, char *argv[]) { rewind(file); uint64_t hash = XXH3_64bits_digest(hash_state); XXH3_freeState(hash_state); - printf("Hash: %016llx\n", (unsigned long long)hash); - LexerState state = {path, file, 0, 0, &tokens}; - start = clock(); - lexer(state); - end = clock(); - time_spent = (double)(end - start) / CLOCKS_PER_SEC; - total_time_spent += time_spent; - printf("Lexer time taken: %f seconds\n", time_spent); - fclose(state.file); + char *filename_without_extention = + replace_extension(path, BYTECODE_EXTENTION); - DArray ast; + size_t joined_paths_length = + strlen(CACHE_FOLDER) + strlen(filename_without_extention) + 2; + char *joined_paths = checked_malloc(joined_paths_length); - darray_init(&ast, sizeof(ParsedValue)); - - start = clock(); - parser(path, &ast, &tokens, false); - end = clock(); - time_spent = (double)(end - start) / CLOCKS_PER_SEC; - total_time_spent += time_spent; - printf("Parser time taken: %f seconds\n", time_spent); - darray_free(&tokens, free_token); + path_join(joined_paths, joined_paths_length, CACHE_FOLDER, + filename_without_extention); + free(filename_without_extention); + filename_without_extention = NULL; Translated translated = init_translator(); - start = clock(); - translate(&translated, &ast); - end = clock(); - time_spent = (double)(end - start) / CLOCKS_PER_SEC; - total_time_spent += time_spent; - printf("Translation time taken: %f seconds\n", time_spent); + if (load_cache(&translated, joined_paths, hash) != 0) { + free_translator(&translated); + translated = init_translator(); - darray_free(&ast, free_parsed); + DArray tokens; + darray_init(&tokens, sizeof(Token)); - file = fopen("out.arbin", "wb"); + LexerState state = {path, file, 0, 0, &tokens}; + start = clock(); + lexer(state); + end = clock(); + time_spent = (double)(end - start) / CLOCKS_PER_SEC; + total_time_spent += time_spent; + printf("Lexer time taken: %f seconds\n", time_spent); + fclose(state.file); - uint64_t constantsSize = (uint64_t)translated.constants.size; - uint64_t bytecodeSize = (uint64_t)translated.bytecode.size; + DArray ast; - uint32_t version_number_htole32ed = htole32(version_number); - uint64_t net_hash = htonll(hash); - constantsSize = htole64(constantsSize); - bytecodeSize = htole64(bytecodeSize); + darray_init(&ast, sizeof(ParsedValue)); - fwrite(&FILE_IDENTIFIER, sizeof(char), strlen(FILE_IDENTIFIER), file); - fwrite(&net_hash, sizeof(net_hash), 1, file); - fwrite(&version_number_htole32ed, sizeof(uint32_t), 1, file); - fwrite(&translated.registerCount, sizeof(uint8_t), 1, file); - fwrite(&constantsSize, sizeof(uint64_t), 1, file); - fwrite(&bytecodeSize, sizeof(uint64_t), 1, file); - fwrite(translated.constants.data, 1, translated.constants.size, file); - fwrite(translated.bytecode.data, translated.bytecode.element_size, - translated.bytecode.size, file); + start = clock(); + parser(path, &ast, &tokens, false); + end = clock(); + time_spent = (double)(end - start) / CLOCKS_PER_SEC; + total_time_spent += time_spent; + printf("Parser time taken: %f seconds\n", time_spent); + darray_free(&tokens, free_token); - fclose(file); + start = clock(); + translate(&translated, &ast); + end = clock(); + time_spent = (double)(end - start) / CLOCKS_PER_SEC; + total_time_spent += time_spent; + printf("Translation time taken: %f seconds\n", time_spent); + + darray_free(&ast, free_parsed); + ensure_dir_exists(CACHE_FOLDER); + + file = fopen(joined_paths, "wb"); + + uint64_t constantsSize = (uint64_t)translated.constants.size; + uint64_t bytecodeSize = (uint64_t)translated.bytecode.size; + + uint32_t version_number_htole32ed = htole32(version_number); + uint64_t net_hash = htole64(hash); + constantsSize = htole64(constantsSize); + bytecodeSize = htole64(bytecodeSize); + + fwrite(&FILE_IDENTIFIER, sizeof(char), strlen(FILE_IDENTIFIER), file); + fwrite(&version_number_htole32ed, sizeof(uint32_t), 1, file); + fwrite(&net_hash, sizeof(net_hash), 1, file); + fwrite(&translated.registerCount, sizeof(uint8_t), 1, file); + fwrite(&constantsSize, sizeof(uint64_t), 1, file); + fwrite(&bytecodeSize, sizeof(uint64_t), 1, file); + fwrite(translated.constants.data, 1, translated.constants.size, file); + fwrite(translated.bytecode.data, translated.bytecode.element_size, + translated.bytecode.size, file); + + fclose(file); + } init_types(); @@ -135,5 +342,6 @@ int main(int argc, char *argv[]) { printf("total time taken: %f seconds\n", total_time_spent); free_translator(&translated); + free(joined_paths); return 0; } diff --git a/src/runtime/objects/object.c b/src/runtime/objects/object.c index 55715c3..d324535 100644 --- a/src/runtime/objects/object.c +++ b/src/runtime/objects/object.c @@ -3,7 +3,6 @@ #include "../../memory.h" #include "type/type.h" #include -#include #include ArgonObject *BASE_CLASS = NULL; diff --git a/src/translator/translator.h b/src/translator/translator.h index 61c1951..425d618 100644 --- a/src/translator/translator.h +++ b/src/translator/translator.h @@ -24,6 +24,8 @@ typedef struct { ConstantArena constants; } Translated; +void arena_resize(ConstantArena *arena, size_t new_size); + void *arena_get(ConstantArena *arena, size_t offset); size_t arena_push(ConstantArena *arena, const void *data, size_t length);