add state to lexer for parrellel support

This commit is contained in:
2025-05-27 04:07:53 +01:00
parent 1540645759
commit 296600ee11
5 changed files with 55 additions and 28 deletions

View File

@@ -14,6 +14,10 @@ $(BINARY): $(CFILES) $(LEXER_C) $(LEXER_H)
mkdir -p bin mkdir -p bin
gcc -O3 -o $(BINARY) $(CFILES) -lm -Wall -Wextra -Wno-unused-function gcc -O3 -o $(BINARY) $(CFILES) -lm -Wall -Wextra -Wno-unused-function
debug: $(CFILES) $(LEXER_C) $(LEXER_H)
mkdir -p bin
gcc -g -O0 -o $(BINARY) $(CFILES) -lm -Wall -Wextra -Wno-unused-function
clean: clean:
rm -rf bin rm -rf bin
rm -f $(LEXER_C) $(LEXER_H) rm -f $(LEXER_C) $(LEXER_H)

View File

@@ -1,9 +1,14 @@
%option reentrant
%{ %{
#include "token.h" #include "token.h"
int current_line = 1; #include "lexer.h"
int current_column = 1; #define GET_STATE LexerState *state = (LexerState *)yyget_extra(yyscanner);
#define GET_ADD_COLUMN COLUMN_NO += yyleng;
#define LINE_NO yylineno+1
#define COLUMN_NO state->current_column
int yywrap() { int yywrap(void *) {
return 1; return 1;
} }
%} %}
@@ -11,37 +16,43 @@ int yywrap() {
%% %%
\"(\\[a-z\"'`]|[^\\"])*\" { \"(\\[a-z\"'`]|[^\\"])*\" {
add_token(TOKEN_STRING, yytext, current_line, current_column); GET_STATE
current_column += yyleng; add_token(TOKEN_STRING, yytext, LINE_NO, COLUMN_NO);
GET_ADD_COLUMN
} }
[0-9]+ { [0-9]+ {
add_token(TOKEN_NUMBER, yytext, current_line, current_column); GET_STATE
current_column += yyleng; add_token(TOKEN_NUMBER, yytext, LINE_NO, COLUMN_NO);
GET_ADD_COLUMN
} }
[a-zA-Z_][a-zA-Z0-9_]* { [a-zA-Z_][a-zA-Z0-9_]* {
add_token(TOKEN_IDENTIFIER, yytext, current_line, current_column); GET_STATE
current_column += yyleng; add_token(TOKEN_IDENTIFIER, yytext, LINE_NO, COLUMN_NO);
GET_ADD_COLUMN
} }
"." { "." {
add_token(TOKEN_DOT, yytext, current_line, current_column); GET_STATE
current_column += yyleng; add_token(TOKEN_DOT, yytext, LINE_NO, COLUMN_NO);
GET_ADD_COLUMN
} }
\n { \n {
add_token(TOKEN_NEW_LINE, yytext, current_line, current_column); GET_STATE
current_line++; add_token(TOKEN_NEW_LINE, yytext, LINE_NO, COLUMN_NO);
current_column = 1; COLUMN_NO = 1;
} }
[ \t]+ { [ \t]+ {
current_column += yyleng; // Advance column for whitespace GET_STATE
GET_ADD_COLUMN // Advance column for whitespace
} }
. { . {
fprintf(stderr, "Error: Unexpected character '%c' at line %d\n", *yytext, yylineno); GET_STATE
fprintf(stderr, "Error in file %s on line %d column %d: unexpected character '%s'\n", state->filename, LINE_NO, COLUMN_NO, yytext);
exit(1); exit(1);
} }
%% %%

View File

@@ -1,16 +1,27 @@
#include "lex.yy.h" #include "lex.yy.h"
#include "token.h" #include "token.h"
#include "lexer.h"
int lexer() { int lexer() {
const char *input = "term.log\n"; yyscan_t scanner;
LexerState state = { "file1.src", 1 };
const char *input = "let x = 10";
yylex_init(&scanner);
void* buffer = yy_scan_string(input); // Set the extra data *before* scanning
yy_switch_to_buffer(buffer); yyset_extra(&state, scanner);
yylex(); // This fills the token array
yy_delete_buffer(buffer);
void* buffer = yy_scan_string(input, scanner);
yy_switch_to_buffer(buffer, scanner);
yylex(scanner); // This fills the token array
yy_delete_buffer(buffer, scanner);
yylex_destroy(scanner);
// print tokens etc.
for (int i = 0; i < token_count; i++) { for (int i = 0; i < token_count; i++) {
printf("Token(type=%d, value='%s')\n", tokens[i].type, tokens[i].value); printf("Token(type=%d, value='%s')\n", tokens[i].type, tokens[i].value);
} }

View File

@@ -1 +1,7 @@
int lexer(); typedef struct {
const char *filename;
int current_column;
// add more fields as needed
} LexerState;
int lexer();

View File

@@ -1,9 +1,4 @@
#include "string/string.h"
#include "number/number.h" #include "number/number.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "lexer/lexer.h" #include "lexer/lexer.h"
void initialize() { void initialize() {