change to dynamic array for lexer and parser to speed up lexical analysis

This commit is contained in:
2025-05-30 16:46:27 +01:00
parent ddf18ceb2c
commit ec894d4357
10 changed files with 165 additions and 2007408 deletions

View File

@@ -0,0 +1,78 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "darray.h"
void darray_init(DArray *arr, size_t element_size) {
arr->element_size = element_size;
arr->size = 0;
arr->capacity = CHUNK_SIZE;
arr->data = malloc(CHUNK_SIZE * element_size);
if (!arr->data) {
fprintf(stderr, "darray_init: allocation failed\n");
exit(EXIT_FAILURE);
}
}
void darray_resize(DArray *arr, size_t new_size) {
size_t new_capacity = ((new_size + CHUNK_SIZE - 1) / CHUNK_SIZE) * CHUNK_SIZE;
if (new_capacity != arr->capacity) {
void *new_data = realloc(arr->data, new_capacity * arr->element_size);
if (!new_data) {
fprintf(stderr, "darray_resize: reallocation failed\n");
exit(EXIT_FAILURE);
}
arr->data = new_data;
arr->capacity = new_capacity;
}
arr->size = new_size;
}
void darray_push(DArray *arr, void *element) {
if (arr->size >= arr->capacity) {
darray_resize(arr, arr->size + 1);
} else {
arr->size++;
}
void *target = (char *)arr->data + (arr->size - 1) * arr->element_size;
memcpy(target, element, arr->element_size);
}
void darray_pop(DArray *arr, void (*free_data)(void *)) {
if (arr->size == 0)
return;
arr->size--;
if (free_data) {
void *target = (char *)arr->data + arr->size * arr->element_size;
free_data(target);
}
darray_resize(arr, arr->size);
}
void *darray_get(DArray *arr, size_t index) {
if (index >= arr->size) {
fprintf(stderr, "darray_get: index out of bounds\n");
exit(EXIT_FAILURE);
}
return (char *)arr->data + index * arr->element_size;
}
void darray_free(DArray *arr, void (*free_data)(void *)) {
if (free_data) {
for (size_t i = 0; i < arr->size; ++i) {
void *element = (char *)arr->data + i * arr->element_size;
free_data(element);
}
}
free(arr->data);
arr->data = NULL;
arr->size = 0;
arr->capacity = 0;
arr->element_size = 0;
}

View File

@@ -0,0 +1,33 @@
#ifndef DARRAY_H
#define DARRAY_H
#include <stddef.h> // for size_t
#define CHUNK_SIZE 16
typedef struct {
void *data;
size_t element_size;
size_t size;
size_t capacity;
} DArray;
// Initializes the dynamic_array
void darray_init(DArray *arr, size_t element_size);
// Pushes an element onto the array
void darray_push(DArray *arr, void *element);
// Pops the last element, calling `free_data` if provided
void darray_pop(DArray *arr, void (*free_data)(void *));
// Gets a pointer to an element at index
void *darray_get(DArray *arr, size_t index);
// Frees the entire array and optionally each element
void darray_free(DArray *arr, void (*free_data)(void *));
// Resizes the array to a new size (internal use, but exposed)
void darray_resize(DArray *arr, size_t new_size);
#endif // DARRAY_H

View File

@@ -18,7 +18,7 @@ void lexer(LexerState state) {
state.current_column, state.current_column,
yyget_text(scanner) yyget_text(scanner)
); );
append(state.tokens, token_struct); darray_push(state.tokens, token_struct);
if (token == TOKEN_NEW_LINE) { if (token == TOKEN_NEW_LINE) {
state.current_column = 0; state.current_column = 0;
} else { } else {

View File

@@ -1,12 +1,12 @@
#include "token.h" #include "token.h"
#include "../list/list.h" #include "../dynamic_array/darray.h"
#include <stdio.h> #include <stdio.h>
typedef struct { typedef struct {
const char *path; const char *path;
FILE *file; FILE *file;
int current_column; int current_column;
LinkedList* tokens; DArray* tokens;
// add more fields as needed // add more fields as needed
} LexerState; } LexerState;

View File

@@ -2,32 +2,36 @@
#include "lexer/token.h" #include "lexer/token.h"
#include "parser/parser.h" #include "parser/parser.h"
#include "memory.h" #include "memory.h"
#include "dynamic_array/darray.h"
#include <stdbool.h> #include <stdbool.h>
#include <stddef.h> #include <stddef.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h>
#include <stdio.h> #include <stdio.h>
int main() { int main() {
const char * path = "test.ar"; const char * path = "test.ar";
LinkedList* tokens = create_list(sizeof(Token)); DArray tokens;
darray_init(&tokens, sizeof(Token));
LexerState state = { LexerState state = {
path, path,
fopen(path, "r"), fopen(path, "r"),
0, 0,
tokens &tokens
}; };
lexer(state); lexer(state);
LinkedList * parsed = create_list(sizeof(TaggedValue)); DArray parsed;
parser(parsed, tokens, false); darray_init(&parsed, sizeof(ParsedValue));
free_list(tokens, free_token);
free_list(parsed,free_tagged_value); parser(&parsed, &tokens, false);
darray_free(&tokens, free_token);
darray_free(&parsed,free_parsed_value);
ar_memory_init(); ar_memory_init();

View File

@@ -1,14 +1,14 @@
#include "parser.h" #include "parser.h"
#include "../dynamic_array/darray.h"
#include "../lexer/token.h"
#include "string/string.h"
#include <stdbool.h> #include <stdbool.h>
#include <stddef.h> #include <stddef.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include "../lexer/token.h"
#include "../list/list.h"
#include "string/string.h"
TaggedValue * parse_token(LinkedList * tokens, size_t *index) { ParsedValue *parse_token(DArray *tokens, size_t *index) {
Token * token = get_element_at(tokens, *index); Token *token = darray_get(tokens, *index);
switch (token->type) { switch (token->type) {
case TOKEN_STRING: case TOKEN_STRING:
(*index)++; (*index)++;
@@ -17,28 +17,29 @@ TaggedValue * parse_token(LinkedList * tokens, size_t *index) {
(*index)++; (*index)++;
return NULL; return NULL;
default: default:
fprintf(stderr, "Panic: %s\n", "unreachable"); \ fprintf(stderr, "Panic: %s\n", "unreachable");
exit(EXIT_FAILURE); \ exit(EXIT_FAILURE);
} }
} }
void parser(LinkedList * parsed, LinkedList * tokens, bool inline_flag) { void parser(DArray *parsed, DArray *tokens, bool inline_flag) {
size_t index = 0; size_t index = 0;
size_t length = list_length(tokens); size_t length = tokens->size;
while (index < length) { while (index < length) {
TaggedValue * parsed_code = parse_token(tokens, &index); ParsedValue *parsed_code = parse_token(tokens, &index);
if (parsed_code) if (parsed_code) {
append(parsed,parsed_code); darray_push(parsed, parsed_code);
free(parsed_code);
}
} }
} }
void free_tagged_value(void *ptr) { void free_parsed_value(void *ptr) {
TaggedValue *tagged = ptr; ParsedValue *tagged = ptr;
switch (tagged->type) { switch (tagged->type) {
case AST_STRING: case AST_STRING:
free(tagged->data); free(tagged->data);
break; break;
// Add cases if needed // Add cases if needed
} }
free(tagged); // Always free the TaggedValue itself
} }

View File

@@ -3,6 +3,7 @@
#include <stdbool.h> #include <stdbool.h>
#include <stddef.h> #include <stddef.h>
#include "../dynamic_array/darray.h"
typedef struct LinkedList LinkedList; typedef struct LinkedList LinkedList;
@@ -15,13 +16,13 @@ typedef struct {
ValueType type; ValueType type;
void *data; void *data;
} TaggedValue; } ParsedValue;
void parser(LinkedList *parsed, LinkedList *tokens, bool inline_flag); void parser(DArray * parsed, DArray * tokens, bool inline_flag);
TaggedValue *parse_token(LinkedList *tokens, size_t *index); ParsedValue * parse_token(DArray * tokens, size_t *index);
void free_tagged_value(void *ptr); void free_parsed_value(void *ptr);
#endif // PARSER_H #endif // PARSER_H

View File

@@ -71,10 +71,10 @@ char *unquote(char *str) {
return unescaped; return unescaped;
} }
TaggedValue * parse_string(Token token) { ParsedValue * parse_string(Token token) {
TaggedValue * taggedValue = malloc(sizeof(TaggedValue)); ParsedValue * parsedValue = malloc(sizeof(ParsedValue));
taggedValue->type = AST_STRING; parsedValue->type = AST_STRING;
taggedValue->data = unquote(token.value); parsedValue->data = unquote(token.value);
return taggedValue; return parsedValue;
} }

View File

@@ -10,6 +10,6 @@ char *swap_quotes(char *input, char quote);
char *unquote(char *str); char *unquote(char *str);
TaggedValue *parse_string(Token token); ParsedValue *parse_string(Token token);
#endif // STRING_UTILS_H #endif // STRING_UTILS_H

2007360
test.ar

File diff suppressed because it is too large Load Diff