use hashmap in constants buffer to speed up translation time with large files

This commit is contained in:
2025-06-30 17:56:32 +01:00
parent 31f38d8ba4
commit 3adecb4eba
18 changed files with 1120882 additions and 83 deletions

View File

@@ -1,6 +1,6 @@
let myfile = file.write("rand_test.ar") let myfile = file.write("rand_test.ar")
for (i from 0 to 1e6) do for (i from 0 to 100000) do
myfile.text("\"") myfile.text("\"")
myfile.text(string(random())) myfile.text(string(random()))
myfile.text("\"\n") myfile.text("\"\n")

1020600
hello_world_test.ar Normal file

File diff suppressed because it is too large Load Diff

100000
rand_test.ar Normal file

File diff suppressed because it is too large Load Diff

28
src/hash_data/hash_data.c Normal file
View File

@@ -0,0 +1,28 @@
#include <fcntl.h>
#include <stdint.h>
#include <unistd.h>
#include "siphash/siphash.h"
#include "hash_data.h"
uint8_t siphash_key[16];
uint8_t empty_siphash_key[16];
void generate_siphash_key(uint8_t hash_key[16]) {
int fd = open("/dev/urandom", O_RDONLY);
if (fd < 0 || read(fd, hash_key, 16) != 16) {
// Fallback or abort
}
close(fd);
}
uint64_t siphash64_bytes(const void *data, size_t len,uint8_t hash_key[16]) {
uint8_t out[8];
if (siphash(data, len, hash_key, out, sizeof(out)) != 0)
return 0;
uint64_t hash = 0;
for (int i = 0; i < 8; ++i)
hash |= ((uint64_t)out[i]) << (8 * i);
return hash;
}

11
src/hash_data/hash_data.h Normal file
View File

@@ -0,0 +1,11 @@
#ifndef HASH_DATA_H
#define HASH_DATA_H
#include <stdlib.h>
#include <stdint.h>
extern uint8_t siphash_key[16];
extern uint8_t empty_siphash_key[16];
void generate_siphash_key(uint8_t siphash_key[16]);
uint64_t siphash64_bytes(const void *data, size_t len,uint8_t siphash_key[16]);
#endif //HASH_DATA_H

130
src/hashmap/hashmap.c Normal file
View File

@@ -0,0 +1,130 @@
#include "hashmap.h"
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <stdlib.h>
struct hashmap *createHashmap() {
size_t size = 8;
struct hashmap *t = (struct hashmap *)malloc(sizeof(struct hashmap));
t->size = size;
t->order = 1;
t->list = (struct node **)malloc(sizeof(struct node *) * size);
memset(t->list, 0, sizeof(struct node *) * size);
return t;
}
void hashmap_free(struct hashmap *t, free_val_func free_val) {
if (!t) return;
for (size_t i = 0; i < t->size; i++) {
struct node *current = t->list[i];
while (current) {
struct node *next = current->next;
if (free_val && current->val) {
free_val(current->val);
}
free(current);
current = next;
}
}
free(t->list);
free(t);
}
void resize_hashmap(struct hashmap *t) {
int old_size = t->size;
int new_size = old_size * 2;
struct node **old_list = t->list;
// Create new list
t->list = (struct node **)malloc(sizeof(struct node *) * new_size);
memset(t->list, 0, sizeof(struct node *) * new_size);
t->size = new_size;
t->count = 0;
// Rehash old entries into new list
for (int i = 0; i < old_size; i++) {
struct node *temp = old_list[i];
while (temp) {
hashmap_insert(t, temp->hash, temp->key, temp->val,
temp->order); // Will increment count
temp = temp->next;
}
}
}
int hashCode(struct hashmap *t, uint64_t hash) { return hash % t->size; }
int hashmap_remove(struct hashmap *t, uint64_t hash) {
int pos = hashCode(t, hash);
struct node *list = t->list[pos];
struct node *temp = list;
struct node *prev = NULL;
while (temp) {
if (temp->hash == hash) {
if (prev)
prev->next = temp->next;
else
t->list[pos] = temp->next;
return 1;
}
prev = temp;
temp = temp->next;
}
list = NULL;
prev = NULL;
temp = NULL;
return 0;
}
void hashmap_insert(struct hashmap *t, uint64_t hash, void *key,
void *val, size_t order) {
if (!order) {
order = t->order++;
}
if ((t->count + 1) > t->size * 0.75) {
resize_hashmap(t);
}
int pos = hashCode(t, hash);
struct node *list = t->list[pos];
struct node *temp = list;
// Check if key exists → overwrite
while (temp) {
if (temp->hash == hash) {
temp->val = val;
return;
}
temp = temp->next;
}
// Insert new node
struct node *newNode = (struct node *)malloc(sizeof(struct node));
newNode->hash = hash;
newNode->key = key;
newNode->val = val;
newNode->order = order;
newNode->next = list;
t->list[pos] = newNode;
t->count++;
}
void *hashmap_lookup(struct hashmap *t, uint64_t hash) {
int pos = hashCode(t, hash);
struct node *list = t->list[pos];
struct node *temp = list;
while (temp) {
if (temp->hash == hash) {
return temp->val;
}
temp = temp->next;
}
return NULL;
}

37
src/hashmap/hashmap.h Normal file
View File

@@ -0,0 +1,37 @@
#ifndef HASHMAP_H
#define HASHMAP_H
#include <stdint.h>
#include <stdlib.h>
typedef struct ArgonObject ArgonObject;
typedef void (*free_val_func)(void *val);
struct node {
uint64_t hash;
void *key;
void *val;
size_t order;
struct node *next;
};
struct hashmap {
size_t size;
size_t count;
size_t order;
struct node **list;
};
struct hashmap *createHashmap();
void hashmap_free(struct hashmap *t, free_val_func free_val);
int hashCode(struct hashmap *t, uint64_t hash);
int hashmap_remove(struct hashmap *t, uint64_t hash);
void hashmap_insert(struct hashmap *t, uint64_t hash, void *key,
void *val, size_t order);
void *hashmap_lookup(struct hashmap *t, uint64_t hash);
#endif // HASHMAP_H

View File

@@ -15,13 +15,15 @@
#include <string.h> #include <string.h>
#include <time.h> #include <time.h>
#include <unistd.h> #include <unistd.h>
#include "hash_data/hash_data.h"
const char FILE_IDENTIFIER[] = "ARBI"; const char FILE_IDENTIFIER[] = "ARBI";
const uint32_t version_number = 0; const uint32_t version_number = 0;
int main(int argc, char *argv[]) { int main(int argc, char *argv[]) {
clock_t start,end; generate_siphash_key(siphash_key);
double time_spent; clock_t start, end;
double time_spent, total_time_spent=0;
setlocale(LC_ALL, ""); setlocale(LC_ALL, "");
if (argc <= 1) if (argc <= 1)
return -1; return -1;
@@ -42,6 +44,7 @@ int main(int argc, char *argv[]) {
lexer(state); lexer(state);
end = clock(); end = clock();
time_spent = (double)(end - start) / CLOCKS_PER_SEC; time_spent = (double)(end - start) / CLOCKS_PER_SEC;
total_time_spent+=time_spent;
printf("Lexer time taken: %f seconds\n", time_spent); printf("Lexer time taken: %f seconds\n", time_spent);
fclose(state.file); fclose(state.file);
@@ -49,21 +52,21 @@ int main(int argc, char *argv[]) {
darray_init(&ast, sizeof(ParsedValue)); darray_init(&ast, sizeof(ParsedValue));
start = clock(); start = clock();
parser(path, &ast, &tokens, false); parser(path, &ast, &tokens, false);
end = clock(); end = clock();
time_spent = (double)(end - start) / CLOCKS_PER_SEC; time_spent = (double)(end - start) / CLOCKS_PER_SEC;
total_time_spent+=time_spent;
printf("Parser time taken: %f seconds\n", time_spent); printf("Parser time taken: %f seconds\n", time_spent);
darray_free(&tokens, free_token); darray_free(&tokens, free_token);
Translated translated = init_translator(); Translated translated = init_translator();
start = clock(); start = clock();
translate(&translated, &ast); translate(&translated, &ast);
end = clock(); end = clock();
time_spent = (double)(end - start) / CLOCKS_PER_SEC; time_spent = (double)(end - start) / CLOCKS_PER_SEC;
total_time_spent+=time_spent;
printf("Translation time taken: %f seconds\n", time_spent); printf("Translation time taken: %f seconds\n", time_spent);
darray_free(&ast, free_parsed); darray_free(&ast, free_parsed);
@@ -88,8 +91,6 @@ int main(int argc, char *argv[]) {
fclose(file); fclose(file);
generate_siphash_key();
init_types(); init_types();
start = clock(); start = clock();
@@ -97,8 +98,9 @@ int main(int argc, char *argv[]) {
end = clock(); end = clock();
time_spent = (double)(end - start) / CLOCKS_PER_SEC; time_spent = (double)(end - start) / CLOCKS_PER_SEC;
total_time_spent+=time_spent;
printf("Execution time taken: %f seconds\n", time_spent); printf("Execution time taken: %f seconds\n", time_spent);
printf("total time taken: %f seconds\n", total_time_spent);
free_translator(&translated); free_translator(&translated);
return 0; return 0;

View File

@@ -7,47 +7,47 @@
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
struct hashmap *createHashmap() { struct hashmap_GC *createHashmap_GC() {
size_t size = 8; size_t size = 8;
struct hashmap *t = (struct hashmap *)ar_alloc(sizeof(struct hashmap)); struct hashmap_GC *t = (struct hashmap_GC *)ar_alloc(sizeof(struct hashmap_GC));
t->size = size; t->size = size;
t->order = 1; t->order = 1;
t->list = (struct node **)ar_alloc(sizeof(struct node *) * size); t->list = (struct node_GC **)ar_alloc(sizeof(struct node_GC *) * size);
memset(t->list, 0, sizeof(struct node *) * size); memset(t->list, 0, sizeof(struct node_GC *) * size);
return t; return t;
} }
void resize_hashmap(struct hashmap *t) { void resize_hashmap_GC(struct hashmap_GC *t) {
int old_size = t->size; int old_size = t->size;
int new_size = old_size * 2; int new_size = old_size * 2;
struct node **old_list = t->list; struct node_GC **old_list = t->list;
// Create new list // Create new list
t->list = (struct node **)ar_alloc(sizeof(struct node *) * new_size); t->list = (struct node_GC **)ar_alloc(sizeof(struct node_GC *) * new_size);
memset(t->list, 0, sizeof(struct node *) * new_size); memset(t->list, 0, sizeof(struct node_GC *) * new_size);
t->size = new_size; t->size = new_size;
t->count = 0; t->count = 0;
// Rehash old entries into new list // Rehash old entries into new list
for (int i = 0; i < old_size; i++) { for (int i = 0; i < old_size; i++) {
struct node *temp = old_list[i]; struct node_GC *temp = old_list[i];
while (temp) { while (temp) {
hashmap_insert(t, temp->hash, temp->key, temp->val, hashmap_insert_GC(t, temp->hash, temp->key, temp->val,
temp->order); // Will increment count temp->order); // Will increment count
temp = temp->next; temp = temp->next;
} }
} }
} }
int hashCode(struct hashmap *t, uint64_t hash) { return hash % t->size; } int hashCode_GC(struct hashmap_GC *t, uint64_t hash) { return hash % t->size; }
int hashmap_remove(struct hashmap *t, uint64_t hash) { int hashmap_remove_GC(struct hashmap_GC *t, uint64_t hash) {
int pos = hashCode(t, hash); int pos = hashCode_GC(t, hash);
struct node *list = t->list[pos]; struct node_GC *list = t->list[pos];
struct node *temp = list; struct node_GC *temp = list;
struct node *prev = NULL; struct node_GC *prev = NULL;
while (temp) { while (temp) {
if (temp->hash == hash) { if (temp->hash == hash) {
if (prev) if (prev)
@@ -65,18 +65,18 @@ int hashmap_remove(struct hashmap *t, uint64_t hash) {
return 0; return 0;
} }
void hashmap_insert(struct hashmap *t, uint64_t hash, void *key, void hashmap_insert_GC(struct hashmap_GC *t, uint64_t hash, void *key,
void *val, size_t order) { void *val, size_t order) {
if (!order) { if (!order) {
order = t->order++; order = t->order++;
} }
if ((t->count + 1) > t->size * 0.75) { if ((t->count + 1) > t->size * 0.75) {
resize_hashmap(t); resize_hashmap_GC(t);
} }
int pos = hashCode(t, hash); int pos = hashCode_GC(t, hash);
struct node *list = t->list[pos]; struct node_GC *list = t->list[pos];
struct node *temp = list; struct node_GC *temp = list;
// Check if key exists → overwrite // Check if key exists → overwrite
while (temp) { while (temp) {
@@ -88,7 +88,7 @@ void hashmap_insert(struct hashmap *t, uint64_t hash, void *key,
} }
// Insert new node // Insert new node
struct node *newNode = (struct node *)ar_alloc(sizeof(struct node)); struct node_GC *newNode = (struct node_GC *)ar_alloc(sizeof(struct node_GC));
newNode->hash = hash; newNode->hash = hash;
newNode->key = key; newNode->key = key;
newNode->val = val; newNode->val = val;
@@ -98,10 +98,10 @@ void hashmap_insert(struct hashmap *t, uint64_t hash, void *key,
t->count++; t->count++;
} }
void *hashmap_lookup(struct hashmap *t, uint64_t hash) { void *hashmap_lookup_GC(struct hashmap_GC *t, uint64_t hash) {
int pos = hashCode(t, hash); int pos = hashCode_GC(t, hash);
struct node *list = t->list[pos]; struct node_GC *list = t->list[pos];
struct node *temp = list; struct node_GC *temp = list;
while (temp) { while (temp) {
if (temp->hash == hash) { if (temp->hash == hash) {
return temp->val; return temp->val;

View File

@@ -1,33 +1,33 @@
#ifndef HASHMAP_H #ifndef HASHMAP_GC_H
#define HASHMAP_H #define HASHMAP_GC_H
#include <stdint.h> #include <stdint.h>
#include <stdlib.h> #include <stdlib.h>
typedef struct ArgonObject ArgonObject; typedef struct ArgonObject ArgonObject;
struct node { struct node_GC {
uint64_t hash; uint64_t hash;
void *key; void *key;
void *val; void *val;
size_t order; size_t order;
struct node *next; struct node_GC *next;
}; };
struct hashmap { struct hashmap_GC {
size_t size; size_t size;
size_t count; size_t count;
size_t order; size_t order;
struct node **list; struct node_GC **list;
}; };
struct hashmap *createHashmap(); struct hashmap_GC *createHashmap_GC();
int hashCode(struct hashmap *t, uint64_t hash); int hashCode_GC(struct hashmap_GC *t, uint64_t hash);
int hashmap_remove(struct hashmap *t, uint64_t hash); int hashmap_remove_GC(struct hashmap_GC *t, uint64_t hash);
void hashmap_insert(struct hashmap *t, uint64_t hash, void *key, void hashmap_insert_GC(struct hashmap_GC *t, uint64_t hash, void *key,
void *val, size_t order); void *val, size_t order);
void *hashmap_lookup(struct hashmap *t, uint64_t hash); void *hashmap_lookup_GC(struct hashmap_GC *t, uint64_t hash);
#endif // HASHMAP_H #endif // HASHMAP_GC_H

View File

@@ -1,6 +1,6 @@
#include "object.h" #include "object.h"
#include "../../memory.h" #include "../../memory.h"
#include "../runtime.h" #include "../../hash_data/hash_data.h"
#include <stdbool.h> #include <stdbool.h>
#include <string.h> #include <string.h>
#include "type/type.h" #include "type/type.h"
@@ -26,11 +26,11 @@ ArgonObject* init_argon_class(char*name) {
object->type = TYPE_OBJECT; object->type = TYPE_OBJECT;
object->self = NULL; object->self = NULL;
object->baseObject = ARGON_TYPE; object->baseObject = ARGON_TYPE;
object->fields = createHashmap(); object->fields = createHashmap_GC();
memset(&object->value, 0, sizeof(object->value)); memset(&object->value, 0, sizeof(object->value));
return object; return object;
} }
void add_field(ArgonObject*target, char* name, ArgonObject *object) { void add_field(ArgonObject*target, char* name, ArgonObject *object) {
hashmap_insert(target->fields, siphash64_bytes(name, strlen(name)),name, object, 0); hashmap_insert_GC(target->fields, siphash64_bytes(name, strlen(name), siphash_key),name, object, 0);
} }

View File

@@ -34,7 +34,7 @@ struct ArgonObject {
char* name; char* name;
ArgonObject *self; ArgonObject *self;
ArgonObject *baseObject; ArgonObject *baseObject;
struct hashmap *fields; // dynamic fields/methods struct hashmap_GC *fields; // dynamic fields/methods
union { union {
mpq_t as_number; mpq_t as_number;
bool as_bool; bool as_bool;

View File

@@ -1,6 +1,5 @@
#include "runtime.h" #include "runtime.h"
#include "../translator/translator.h" #include "../translator/translator.h"
#include "internals/siphash/siphash.h"
#include "objects/functions/functions.h" #include "objects/functions/functions.h"
#include "objects/null/null.h" #include "objects/null/null.h"
#include "objects/object.h" #include "objects/object.h"
@@ -12,6 +11,7 @@
#include <stdint.h> #include <stdint.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h>
#include <unistd.h> #include <unistd.h>
uint64_t bytes_to_uint64(const uint8_t bytes[8]) { uint64_t bytes_to_uint64(const uint8_t bytes[8]) {
@@ -89,25 +89,3 @@ void runtime(Translated translated) {
} }
free(state.registers); free(state.registers);
} }
static uint8_t siphash_key[16];
void generate_siphash_key() {
int fd = open("/dev/urandom", O_RDONLY);
if (fd < 0 || read(fd, siphash_key, 16) != 16) {
// Fallback or abort
}
close(fd);
}
uint64_t siphash64_bytes(const void *data, size_t len) {
uint8_t out[8];
if (siphash(data, len, siphash_key, out, sizeof(out)) != 0)
return 0;
uint64_t hash = 0;
for (int i = 0; i < 8; ++i)
hash |= ((uint64_t)out[i]) << (8 * i);
return hash;
}

View File

@@ -21,8 +21,4 @@ void run_instruction(Translated *translated, RuntimeState *state, struct Stack s
void runtime(Translated translated); void runtime(Translated translated);
uint64_t siphash64_bytes(const void *data, size_t len);
void generate_siphash_key();
#endif // RUNTIME_H #endif // RUNTIME_H

View File

@@ -1,4 +1,6 @@
#include "translator.h" #include "translator.h"
#include "../hash_data/hash_data.h"
#include "../hashmap/hashmap.h"
#include "declaration/declaration.h" #include "declaration/declaration.h"
#include "function/function.h" #include "function/function.h"
#include "number/number.h" #include "number/number.h"
@@ -19,6 +21,7 @@ void arena_init(ConstantArena *arena) {
arena->data = checked_malloc(CHUNK_SIZE); arena->data = checked_malloc(CHUNK_SIZE);
arena->capacity = CHUNK_SIZE; arena->capacity = CHUNK_SIZE;
arena->size = 0; arena->size = 0;
arena->hashmap = createHashmap();
} }
void arena_resize(ConstantArena *arena, size_t new_size) { void arena_resize(ConstantArena *arena, size_t new_size) {
@@ -38,6 +41,7 @@ void arena_free(ConstantArena *arena) {
free(arena->data); free(arena->data);
arena->capacity = 0; arena->capacity = 0;
arena->size = 0; arena->size = 0;
hashmap_free(arena->hashmap, NULL);
} }
void *arena_get(ConstantArena *arena, size_t offset) { void *arena_get(ConstantArena *arena, size_t offset) {
@@ -45,17 +49,29 @@ void *arena_get(ConstantArena *arena, size_t offset) {
} }
size_t arena_push(ConstantArena *arena, const void *data, size_t length) { size_t arena_push(ConstantArena *arena, const void *data, size_t length) {
if (arena->size >= length && arena->size<100000) { uint64_t hash = siphash64_bytes(data, length, siphash_key);
for (size_t i = 0; i <= (arena->size - length); i++) {
if (memcmp(data, arena->data + i, length) == 0) { // Look up offset in hashmap
return i; void *val = hashmap_lookup(arena->hashmap, hash);
} if (val != NULL) {
size_t offset =
(size_t)(uintptr_t)val - 1; // stored as pointer but really offset
// Verify to avoid collision false positive
if (memcmp(arena->data + offset, data, length) == 0) {
return offset;
} }
} }
// Not found: append data
arena_resize(arena, arena->size + length); arena_resize(arena, arena->size + length);
size_t offset = arena->size; size_t offset = arena->size;
memcpy(arena->data + arena->size, data, length); memcpy(arena->data + arena->size, data, length);
arena->size += length; arena->size += length;
// Insert into hashmap: store offset as pointer-sized integer
hashmap_insert(arena->hashmap, hash, (void *)data,
(void *)(uintptr_t)offset + 1, 0);
return offset; return offset;
} }
@@ -67,7 +83,6 @@ Translated init_translator() {
return translated; return translated;
} }
size_t push_instruction_byte(Translated *translator, uint8_t byte) { size_t push_instruction_byte(Translated *translator, uint8_t byte) {
size_t offset = translator->bytecode.size; size_t offset = translator->bytecode.size;
darray_push(&translator->bytecode, &byte); darray_push(&translator->bytecode, &byte);

View File

@@ -4,6 +4,7 @@
#include "../dynamic_array/darray.h" #include "../dynamic_array/darray.h"
#include "../memory.h" #include "../memory.h"
#include "../parser/parser.h" #include "../parser/parser.h"
#include "../hashmap/hashmap.h"
#include <stddef.h> #include <stddef.h>
#include <stdint.h> #include <stdint.h>
@@ -14,6 +15,7 @@ typedef struct {
void *data; void *data;
size_t capacity; size_t capacity;
size_t size; size_t size;
struct hashmap * hashmap;
} ConstantArena; } ConstantArena;
typedef struct { typedef struct {