Compare commits

...

4 Commits

19 changed files with 367 additions and 81 deletions

View File

@@ -26,7 +26,7 @@ full-debug: $(CFILES) $(LEXER_C) $(LEXER_H)
optimised: $(CFILES) $(LEXER_C) $(LEXER_H)
mkdir -p bin
gcc -O3 -fprofile-generate -o $(BINARY) $(CFILES) $(CFLAGS)
${BINARY}
${BINARY} test.ar
gcc -O3 -fprofile-use -o $(BINARY) $(CFILES) $(CFLAGS)

View File

@@ -6,6 +6,7 @@
#include "translator/translator.h"
#include <endian.h>
#include <string.h>
#include <locale.h>
#include <stdbool.h>
#include <stddef.h>
@@ -13,6 +14,9 @@
#include <stdio.h>
#include <unistd.h>
const char FILE_IDENTIFIER[] = "ARBI";
const uint64_t version_number = 0;
int main(int argc, char *argv[]) {
setlocale(LC_ALL, "");
if (argc <= 1)
@@ -52,10 +56,14 @@ int main(int argc, char *argv[]) {
uint64_t constantsSize = (uint64_t)translated.constants.size;
uint64_t bytecodeSize = (uint64_t)translated.bytecode.size;
uint64_t version_number_htole64ed = htole64(version_number);
regCount = htole64(regCount);
regCount = htole64(regCount);
constantsSize = htole64(constantsSize);
bytecodeSize = htole64(bytecodeSize);
fwrite(&FILE_IDENTIFIER, sizeof(char), strlen(FILE_IDENTIFIER), file);
fwrite(&version_number_htole64ed, sizeof(uint64_t), 1, file);
fwrite(&regCount, sizeof(uint64_t), 1, file);
fwrite(&constantsSize, sizeof(uint64_t), 1, file);
fwrite(&bytecodeSize, sizeof(uint64_t), 1, file);

View File

@@ -1,6 +1,7 @@
#include "access.h"
#include "../../../lexer/token.h"
#include "../../../memory.h"
#include "../../string/string.h"
#include "../../parser.h"
#include <stdio.h>
#include <stdlib.h>
@@ -18,11 +19,9 @@ ParsedValue *parse_access(char *file, DArray *tokens, size_t *index,
if (first_token->type == TOKEN_DOT) {
error_if_finished(file, tokens, index);
Token *token = darray_get(tokens, *index);
ParsedValue parsedString;
parsedString.type = AST_STRING;
parsedString.data =
strcpy(checked_malloc(strlen(token->value) + 1), token->value);
darray_push(&parsedAccess->access, &parsedString);
ParsedValue *parsedString = parse_string(token, false);
darray_push(&parsedAccess->access, parsedString);
free(parsedString);
} else {
while (true) {
skip_newlines_and_indents(tokens, index);

View File

@@ -2,6 +2,7 @@
#include "../../lexer/token.h"
#include "../../memory.h"
#include "../parser.h"
#include "../string/string.h"
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
@@ -22,9 +23,12 @@ ParsedValue *parse_dictionary(char *file, DArray *tokens, size_t *index) {
error_if_finished(file, tokens, index);
size_t keyIndex = *index;
Token *keyToken = darray_get(tokens, *index);
ParsedValue *key = parse_token(file, tokens, index, true);
ParsedValue *key;
if (keyToken->type == TOKEN_IDENTIFIER) {
key->type = AST_STRING;
(*index)++;
key = parse_string(keyToken, false);
} else {
key = parse_token(file, tokens, index, true);
}
skip_newlines_and_indents(tokens, index);
error_if_finished(file, tokens, index);

View File

@@ -3,12 +3,156 @@
#include "../parser.h"
#include "../../memory.h"
#include <gmp.h>
#include <string.h>
// #include <stdio.h>
// #include <stdlib.h>
// #include <string.h>
// #include <ctype.h>
// int parse_exponent(const char *exp_str, long *exp_val) {
// char *endptr;
// long val = strtol(exp_str, &endptr, 10);
// if (*endptr != '\0') {
// // exponent contains invalid chars or decimal point → reject
// return -1;
// }
// *exp_val = val;
// return 0;
// }
// int mpq_set_decimal_str_exp(mpq_t r, const char *str) {
// // Skip leading whitespace
// while (isspace(*str)) str++;
// // Handle sign
// int negative = 0;
// if (*str == '-') {
// negative = 1;
// str++;
// } else if (*str == '+') {
// str++;
// }
// // Copy input to a buffer for manipulation
// size_t len = strlen(str);
// char *buf = malloc(len + 1);
// if (!buf) return -1;
// strcpy(buf, str);
// // Find 'e' or 'E'
// char *e_ptr = strchr(buf, 'e');
// if (!e_ptr) e_ptr = strchr(buf, 'E');
// char *exp_str = NULL;
// if (e_ptr) {
// *e_ptr = '\0';
// exp_str = e_ptr + 1;
// }
// // Validate decimal part (digits and one dot)
// int dot_count = 0;
// for (char *p = buf; *p; p++) {
// if (*p == '.') {
// if (++dot_count > 1) { free(buf); return -1; }
// continue;
// }
// if (!isdigit((unsigned char)*p)) { free(buf); return -1; }
// }
// // Extract integer and fractional parts
// char *dot = strchr(buf, '.');
// size_t int_len = dot ? (size_t)(dot - buf) : strlen(buf);
// size_t frac_len = dot ? strlen(dot + 1) : 0;
// // Validate exponent if present
// int exp_negative = 0;
// long exp_val = 0;
// if (exp_str) {
// // Skip leading spaces in exponent (not in regex but safe)
// while (isspace(*exp_str)) exp_str++;
// if (*exp_str == '-') {
// exp_negative = 1;
// exp_str++;
// } else if (*exp_str == '+') {
// exp_str++;
// }
// if (!isdigit((unsigned char)*exp_str)) {
// free(buf);
// return -1;
// }
// char *endptr;
// exp_val = strtol(exp_str, &endptr, 10);
// if (*endptr != '\0') {
// free(buf);
// return -1;
// }
// if (exp_negative) exp_val = -exp_val;
// }
// // Build numerator string (integer part + fractional part)
// size_t num_len = int_len + frac_len;
// if (num_len == 0) { free(buf); return -1; }
// char *num_str = malloc(num_len + 1);
// if (!num_str) { free(buf); return -1; }
// if (int_len > 0) memcpy(num_str, buf, int_len);
// if (frac_len > 0) memcpy(num_str + int_len, dot + 1, frac_len);
// num_str[num_len] = '\0';
// // Calculate denominator exponent considering exponent part
// long denom_exp = frac_len - exp_val;
// mpz_t numerator, denominator;
// mpz_init(numerator);
// mpz_init(denominator);
// if (mpz_set_str(numerator, num_str, 10) != 0) {
// free(num_str);
// free(buf);
// mpz_clear(numerator);
// mpz_clear(denominator);
// return -1;
// }
// free(num_str);
// free(buf);
// if (denom_exp >= 0) {
// mpz_ui_pow_ui(denominator, 10, (unsigned long)denom_exp);
// } else {
// // denom_exp < 0 means multiply numerator by 10^(-denom_exp)
// mpz_ui_pow_ui(denominator, 10, 0);
// mpz_ui_pow_ui(numerator, 10, (unsigned long)(-denom_exp));
// }
// if (denom_exp < 0) {
// mpz_t temp;
// mpz_init(temp);
// mpz_ui_pow_ui(temp, 10, (unsigned long)(-denom_exp));
// mpz_mul(numerator, numerator, temp);
// mpz_clear(temp);
// mpz_set_ui(denominator, 1);
// }
// mpq_set_num(r, numerator);
// mpq_set_den(r, denominator);
// mpq_canonicalize(r);
// if (negative) mpq_neg(r, r);
// mpz_clear(numerator);
// mpz_clear(denominator);
// return 0;
// }
ParsedValue *parse_number(Token *token) {
ParsedValue *parsedValue = checked_malloc(sizeof(ParsedValue));
mpz_t *number = checked_malloc(sizeof(mpz_t));
mpz_init_set_str(*number, token->value, 10);
parsedValue->type = AST_NUMBER;
parsedValue->data = number;
parsedValue->data = strdup(token->value);
return parsedValue;
}

View File

@@ -1,21 +1,21 @@
#include "operations.h"
#include "../parser.h"
#include "../../memory.h"
#include "../parser.h"
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
ParsedValue *convert_to_operation(DArray * to_operate_on, DArray * operations) {
ParsedValue *convert_to_operation(DArray *to_operate_on, DArray *operations) {
if (to_operate_on->size == 1) {
return darray_get(to_operate_on, 0);
}
TokenType operation = 0;
DArray positions;
for (size_t i = 0; i<operations->size;i++) {
TokenType * current_operation = darray_get(operations, i);
for (size_t i = 0; i < operations->size; i++) {
TokenType *current_operation = darray_get(operations, i);
if (operation < *current_operation) {
if (operation!=0) {
if (operation != 0) {
darray_free(&positions, NULL);
}
operation = *current_operation;
@@ -23,22 +23,30 @@ ParsedValue *convert_to_operation(DArray * to_operate_on, DArray * operations) {
}
darray_push(&positions, &i);
}
size_t last_position = operations->size-1;
darray_push(&positions, &last_position);
ParsedValue * parsedValue = checked_malloc(sizeof(ParsedValue));
ParsedValue *parsedValue = checked_malloc(sizeof(ParsedValue));
parsedValue->type = AST_OPERATION;
ParsedOperation * operationStruct = checked_malloc(sizeof(ParsedOperation));
ParsedOperation *operationStruct = checked_malloc(sizeof(ParsedOperation));
parsedValue->data = operationStruct;
operationStruct->operation = operation;
darray_init(&operationStruct->to_operate_on, sizeof(ParsedValue));
last_position = 0;
for (size_t i = 0; i<positions.size;i++) {
size_t last_position = 0;
size_t to_operate_on_last_position = 0;
for (size_t i = 0; i < positions.size; i++) {
size_t *position = darray_get(&positions, i);
DArray to_operate_on_slice = darray_slice(to_operate_on, last_position, *position+1);
DArray operations_slice = darray_slice(operations, last_position, *position);
darray_push(&operationStruct->to_operate_on, convert_to_operation(&to_operate_on_slice, &operations_slice));
last_position = *position;
DArray to_operate_on_slice = darray_slice(
to_operate_on, to_operate_on_last_position, (*position) + 1);
DArray operations_slice =
darray_slice(operations, last_position, *position);
darray_push(&operationStruct->to_operate_on,
convert_to_operation(&to_operate_on_slice, &operations_slice));
last_position = (*position);
to_operate_on_last_position = (*position) + 1;
}
DArray to_operate_on_slice =
darray_slice(to_operate_on, to_operate_on_last_position, to_operate_on->size);
DArray operations_slice = darray_slice(operations, last_position, operations->size);
darray_push(&operationStruct->to_operate_on,
convert_to_operation(&to_operate_on_slice, &operations_slice));
darray_free(&positions, NULL);
return parsedValue;
}
@@ -48,6 +56,7 @@ ParsedValue *parse_operations(char *file, DArray *tokens, size_t *index,
DArray to_operate_on;
darray_init(&to_operate_on, sizeof(ParsedValue));
darray_push(&to_operate_on, first_parsed_value);
free(first_parsed_value);
DArray operations;
darray_init(&operations, sizeof(TokenType));
@@ -66,7 +75,10 @@ ParsedValue *parse_operations(char *file, DArray *tokens, size_t *index,
darray_push(&operations, &token->type);
(*index)++;
error_if_finished(file, tokens, index);
darray_push(&to_operate_on, parse_token_full(file, tokens, index, true, false));
ParsedValue *parsedValue =
parse_token_full(file, tokens, index, true, false);
darray_push(&to_operate_on, parsedValue);
free(parsedValue);
}
ParsedValue *output = convert_to_operation(&to_operate_on, &operations);
darray_free(&to_operate_on, NULL);

View File

@@ -82,7 +82,7 @@ ParsedValue *parse_token_full(char *file, DArray *tokens, size_t *index,
break;
case TOKEN_STRING:
(*index)++;
output = parse_string(file,token);
output = parse_string(token, true);
break;
case TOKEN_NEW_LINE:
(*index)++;
@@ -196,9 +196,12 @@ void free_parsed(void *ptr) {
ParsedValue *parsed = ptr;
switch (parsed->type) {
case AST_IDENTIFIER:
case AST_STRING:
case AST_NUMBER:
free(parsed->data);
break;
case AST_STRING:
free_parsed_string(parsed);
break;
case AST_ASSIGN:
free_parse_assign(parsed);
break;
@@ -211,9 +214,6 @@ void free_parsed(void *ptr) {
case AST_ACCESS:
free_parse_access(parsed);
break;
case AST_NUMBER:
mpz_clear(parsed->data);
break;
case AST_NULL:
case AST_BOOLEAN:
break;

View File

@@ -100,7 +100,7 @@ char *unquote_json_string(const char *input, size_t *out_len) {
size_t input_len = end - (input + 1); // length inside quotes
const char *src = input + 1;
// Allocate max output size = input_len, decoded string cannot be longer than input_len
char *outbuf = (char *)malloc(input_len + 1);
char *outbuf = (char *)checked_malloc(input_len + 1);
if (!outbuf) return NULL;
char *dst = outbuf;
@@ -245,12 +245,24 @@ char *unquote(char *str, size_t *decoded_len) {
return unescaped;
}
ParsedValue *parse_string(char*file,Token* token) {
ParsedValue *parse_string(Token* token, bool to_unquote) {
ParsedValue *parsedValue = checked_malloc(sizeof(ParsedValue));
parsedValue->type = AST_STRING;
ParsedString *parsedString = checked_malloc(sizeof(ParsedString));
parsedValue->data = parsedString;
if (to_unquote) {
parsedString->length = 0;
parsedString->string = unquote(token->value, &parsedString->length);
} else {
parsedString->string = strdup(token->value);
parsedString->length = token->length;
}
return parsedValue;
}
void free_parsed_string(void *ptr) {
ParsedValue *parsedValue = ptr;
ParsedString *parsedString = parsedValue->data;
free(parsedString->string);
free(parsedString);
}

View File

@@ -15,6 +15,8 @@ char *swap_quotes(char *input, char quote);
char *unquote(char *str, size_t *decoded_len);
ParsedValue *parse_string(char*file,Token* token);
ParsedValue *parse_string(Token* token, bool to_unquote);
void free_parsed_string(void *ptr);
#endif // STRING_UTILS_H

View File

@@ -0,0 +1,35 @@
#include "../translator.h"
#include "declaration.h"
#include "../../parser/declaration/declaration.h"
#include <stddef.h>
#include <stdio.h>
#include <string.h>
size_t translate_parsed_declaration(Translated *translated,
ParsedValue *parsedValue) {
DArray *delcarations = (DArray *)parsedValue->data;
set_registers(translated, 2);
size_t first = 0;
for (size_t i = 0; i < delcarations->size; i++) {
// TODO: add function delclaration
ParsedSingleDeclaration*singleDeclaration = darray_get(delcarations, i);
size_t temp = translate_parsed(translated, singleDeclaration->from);
if (i==0) first = temp;
size_t length = strlen(singleDeclaration->name);
size_t offset = arena_push(&translated->constants, singleDeclaration->name, length);
push_instruction_code(translated, OP_LOAD_CONST);
push_instruction_code(translated, 1);
push_instruction_code(translated, TYPE_OP_STRING);
push_instruction_code(translated,length);
push_instruction_code(translated, offset);
push_instruction_code(translated, OP_DECLARE);
push_instruction_code(translated, 0);
push_instruction_code(translated, 1);
}
if (delcarations->size != 1) {
push_instruction_code(translated, OP_LOAD_NULL);
push_instruction_code(translated, 0);
}
return first;
}

View File

@@ -0,0 +1,10 @@
#ifndef BYTECODE_DECLARATION_H
#define BYTECODE_DECLARATION_H
#include "../translator.h"
size_t translate_parsed_string(Translated *translated, ParsedValue *parsedValue);
size_t translate_parsed_declaration(Translated *translated,
ParsedValue *parsedValue);
#endif

View File

@@ -0,0 +1,21 @@
#include "../translator.h"
#include "number.h"
#include <gmp.h>
#include <stddef.h>
#include <stdio.h>
#include <string.h>
size_t translate_parsed_number(Translated *translated, ParsedValue *parsedValue) {
char *number_str = (char*)parsedValue->data;
size_t length = strlen(number_str);
size_t number_pos = arena_push(&translated->constants, number_str, length);
set_registers(translated, 1);
size_t start = push_instruction_code(translated, OP_LOAD_CONST);
push_instruction_code(translated, 0);
push_instruction_code(translated, TYPE_OP_NUMBER);
push_instruction_code(translated,length);
push_instruction_code(translated, number_pos);
return start;
}

View File

@@ -0,0 +1,7 @@
#ifndef BYTECODE_NUMBER_H
#define BYTECODE_NUMBER_H
#include "../translator.h"
size_t translate_parsed_number(Translated *translated, ParsedValue *parsedValue);
#endif

View File

@@ -1,18 +1,18 @@
#include "../translator.h"
#include "../../parser/string/string.h"
#include "string.h"
#include <stddef.h>
#include <stdio.h>
#include <string.h>
void translate_parsed_string(Translated *translated, ParsedValue *parsedValue) {
size_t translate_parsed_string(Translated *translated, ParsedValue *parsedValue) {
ParsedString *parsedString = (ParsedString*)parsedValue->data;
size_t string_pos = arena_push(&translated->constants, parsedString->string, parsedString->length);
set_registers(translated, 1);
push_instruction_code(translated, OP_LOAD_CONST);
size_t start = push_instruction_code(translated, OP_LOAD_CONST);
push_instruction_code(translated, 0);
push_instruction_code(translated, OP_TYPE_STRING);
push_instruction_code(translated, TYPE_OP_STRING);
push_instruction_code(translated,parsedString->length);
push_instruction_code(translated, string_pos);
fwrite(parsedString->string, 1, parsedString->length, stdout);
putchar('\n');
return start;
}

View File

@@ -1,7 +1,7 @@
#ifndef STRING_H
#define STRING_H
#ifndef BYTECODE_STRING_H
#define BYTECODE_STRING_H
#include "../translator.h"
void translate_parsed_string(Translated * translator, ParsedValue * parsedValue);
size_t translate_parsed_string(Translated *translated, ParsedValue *parsedValue);
#endif

View File

@@ -1,4 +1,6 @@
#include "translator.h"
#include "declaration/declaration.h"
#include "number/number.h"
#include "string/string.h"
#include <stddef.h>
#include <stdint.h>
@@ -13,30 +15,37 @@ void arena_init(ConstantArena *arena) {
}
void arena_resize(ConstantArena *arena, size_t new_size) {
size_t new_capacity = ((new_size / CHUNK_SIZE) + 1)*CHUNK_SIZE;
size_t new_capacity = ((new_size / CHUNK_SIZE) + 1) * CHUNK_SIZE;
if (new_capacity == arena->capacity)
return;
arena->data = realloc(arena->data, new_capacity);
if (!arena->data) {
fprintf(stderr, "error: failed to resize arena from %zu to %zu\n", new_capacity, arena->capacity);
fprintf(stderr, "error: failed to resize arena from %zu to %zu\n",
new_capacity, arena->capacity);
exit(EXIT_FAILURE);
}
arena->capacity = new_capacity;
}
void arena_free(ConstantArena *arena) {
free(arena->data);
arena->capacity = 0;
arena->size = 0;
}
void * arena_get(ConstantArena *arena, size_t offset) {
void *arena_get(ConstantArena *arena, size_t offset) {
return arena->data + offset;
}
size_t arena_push(ConstantArena *arena, const void *data, size_t length) {
arena_resize(arena, arena->size+length);
if (arena->size >= length) {
for (size_t i = 0; i <= (arena->size - length); i++) {
if (memcmp(data, arena->data + i, length) == 0) {
return i;
}
}
}
arena_resize(arena, arena->size + length);
size_t offset = arena->size;
memcpy(arena->data + arena->size, data, length);
arena->size += length;
@@ -51,32 +60,50 @@ Translated init_translator() {
return translated;
}
size_t push_instruction_code(Translated * translator, uint64_t code) {
void set_instruction_code(Translated *translator, size_t offset,
uint64_t code) {
code = htole64(code);
size_t *ptr = (translator->bytecode.data + offset);
*ptr = code;
}
size_t push_instruction_code(Translated *translator, uint64_t code) {
code = htole64(code);
size_t offset = translator->bytecode.size;
darray_push(&translator->bytecode, &code);
return offset;
}
void set_registers(Translated * translator, size_t count) {
if (count>translator->registerCount) translator->registerCount = count;
void set_registers(Translated *translator, size_t count) {
if (count > translator->registerCount)
translator->registerCount = count;
}
void translate_parsed(Translated * translator, ParsedValue * parsedValue) {
size_t translate_parsed(Translated *translated, ParsedValue *parsedValue) {
switch (parsedValue->type) {
case AST_STRING:
translate_parsed_string(translator,parsedValue);
return translate_parsed_string(translated, parsedValue);
case AST_DECLARATION:
return translate_parsed_declaration(translated, parsedValue);
case AST_NUMBER:
return translate_parsed_number(translated, parsedValue);
case AST_NULL:
set_registers(translated, 1);
size_t output = push_instruction_code(translated, OP_LOAD_NULL);
push_instruction_code(translated, 0);
return output;
}
return 0;
}
void translate(Translated *translated, DArray *ast) {
for (size_t i = 0; i < ast->size; i++) {
ParsedValue *parsedValue = darray_get(ast, i);
translate_parsed(translated, parsedValue);
}
}
void translate(Translated * translator, DArray *ast) {
for (size_t i = 0; i<ast->size; i++) {
ParsedValue * parsedValue = darray_get(ast, i);
translate_parsed(translator,parsedValue);
}
}
void free_translator(Translated * translated) {
void free_translator(Translated *translated) {
darray_free(&translated->bytecode, NULL);
arena_free(&translated->constants);
}

View File

@@ -2,14 +2,13 @@
#define TRANSLATOR_H
#include "../dynamic_array/darray.h"
#include "../memory.h"
#include "../parser/parser.h"
#include <stddef.h>
#include <stdint.h>
#include "../dynamic_array/darray.h"
#include "../parser/parser.h"
#include "../memory.h"
typedef enum { OP_LOAD_CONST=255 } OperationType;
typedef enum { OP_TYPE_STRING=255 } types;
typedef enum { OP_LOAD_CONST = 255, OP_DECLARE, OP_LOAD_NULL, OP_JUMP } OperationType;
typedef enum { TYPE_OP_STRING = 255, TYPE_OP_NUMBER } types;
typedef struct {
void *data;
@@ -23,18 +22,22 @@ typedef struct {
ConstantArena constants;
} Translated;
void * arena_get(ConstantArena *arena, size_t offset);
void *arena_get(ConstantArena *arena, size_t offset);
size_t arena_push(ConstantArena *arena, const void *data, size_t length);
size_t push_instruction_code(Translated * translator, uint64_t code);
void set_instruction_code(Translated * translator, size_t offset, uint64_t code);
void set_registers(Translated * translator, size_t count);
size_t push_instruction_code(Translated *translator, uint64_t code);
void set_registers(Translated *translator, size_t count);
Translated init_translator();
void translate(Translated * translator, DArray *ast);
size_t translate_parsed(Translated * translator, ParsedValue * parsedValue);
void free_translator(Translated * translated);
void translate(Translated *translator, DArray *ast);
void free_translator(Translated *translated);
#endif

12
test.ar
View File

@@ -17,8 +17,10 @@
"🇬🇧"
"hello\u0000world"
1.24323234e2312324
let a,
b = 1,
b = "hello",
c,
d = 42,
temp_result,
@@ -57,9 +59,9 @@ else term.log("bruh")
mm=1/2/4/2/4354/534/534//534//3422*404203420234+3432423324&&430234230||4320423040230423^384239423043024923%4432042304920.3432423423
mm=x/2/4/2/4354/534/534//534//3422*404203420234+3432423324&&430234230||4320423040230423^384239423043024923%4432042304920.3432423423
x = [
let x = [
'hello world',
'wow',
10
@@ -67,8 +69,8 @@ x = [
term.log(x[0:1:1])
y = {
'hello':10,
let y = {
'hello':test,
world:'nice'
}