add tokens to state struct

This commit is contained in:
2025-05-27 13:24:56 +01:00
parent 6249c3519c
commit 3dedd7f348
15 changed files with 361 additions and 206 deletions

View File

@@ -4,8 +4,9 @@
#include "token.h"
#include "lexer.h"
#define GET_STATE LexerState *state = (LexerState *)yyget_extra(yyscanner);
#define GET_ADD_COLUMN COLUMN_NO += yyleng;
#define ADD_TO_COLUMN COLUMN_NO += yyleng;
#define LINE_NO yylineno+1
#define TOKENS state->tokens
#define COLUMN_NO state->current_column
int yywrap(void *) {
@@ -15,44 +16,104 @@ int yywrap(void *) {
%%
\"(\\[a-z\"'`]|[^\\"])*\" {
(\"(\\[a-z\"'`]|[^\\"])*\") {
GET_STATE
add_token(TOKEN_STRING, yytext, LINE_NO, COLUMN_NO);
GET_ADD_COLUMN
add_token(TOKENS,TOKEN_STRING, yytext, LINE_NO, COLUMN_NO);
ADD_TO_COLUMN
}
[0-9]+ {
('((\\([a-z'\"`]))|[^'])*') {
GET_STATE
add_token(TOKEN_NUMBER, yytext, LINE_NO, COLUMN_NO);
GET_ADD_COLUMN
add_token(TOKENS,TOKEN_STRING, yytext, LINE_NO, COLUMN_NO);
ADD_TO_COLUMN
}
((([0-9]+(\.[0-9]+)?)|(\.[0-9]+))(e((\-|\+)?([0-9]+(\.[0-9]+)?)))?) {
GET_STATE
add_token(TOKENS,TOKEN_NUMBER, yytext, LINE_NO, COLUMN_NO);
ADD_TO_COLUMN
}
([0-9]+\/[0-9]+) {
GET_STATE
add_token(TOKENS,TOKEN_FRACTION, yytext, LINE_NO, COLUMN_NO);
ADD_TO_COLUMN
}
"not"[ \t]+"in" { GET_STATE; add_token(TOKENS,TOKEN_NOT_IN, yytext, LINE_NO, COLUMN_NO); ADD_TO_COLUMN; }
"&&" { GET_STATE; add_token(TOKENS,TOKEN_AND, yytext, LINE_NO, COLUMN_NO); ADD_TO_COLUMN; }
"||" { GET_STATE; add_token(TOKENS,TOKEN_OR, yytext, LINE_NO, COLUMN_NO); ADD_TO_COLUMN; }
"<=" { GET_STATE; add_token(TOKENS,TOKEN_LE, yytext, LINE_NO, COLUMN_NO); ADD_TO_COLUMN; }
">=" { GET_STATE; add_token(TOKENS,TOKEN_GE, yytext, LINE_NO, COLUMN_NO); ADD_TO_COLUMN; }
"!=" { GET_STATE; add_token(TOKENS,TOKEN_NE, yytext, LINE_NO, COLUMN_NO); ADD_TO_COLUMN; }
"==" { GET_STATE; add_token(TOKENS,TOKEN_EQ, yytext, LINE_NO, COLUMN_NO); ADD_TO_COLUMN; }
"=" { GET_STATE; add_token(TOKENS,TOKEN_ASSIGN, yytext, LINE_NO, COLUMN_NO); ADD_TO_COLUMN; }
"//" { GET_STATE; add_token(TOKENS,TOKEN_FLOORDIV, yytext, LINE_NO, COLUMN_NO); ADD_TO_COLUMN; }
"<" { GET_STATE; add_token(TOKENS,TOKEN_LT, yytext, LINE_NO, COLUMN_NO); ADD_TO_COLUMN; }
">" { GET_STATE; add_token(TOKENS,TOKEN_GT, yytext, LINE_NO, COLUMN_NO); ADD_TO_COLUMN; }
"+" { GET_STATE; add_token(TOKENS,TOKEN_PLUS, yytext, LINE_NO, COLUMN_NO); ADD_TO_COLUMN; }
"-" { GET_STATE; add_token(TOKENS,TOKEN_MINUS, yytext, LINE_NO, COLUMN_NO); ADD_TO_COLUMN; }
"%" { GET_STATE; add_token(TOKENS,TOKEN_MODULO, yytext, LINE_NO, COLUMN_NO); ADD_TO_COLUMN; }
"*" { GET_STATE; add_token(TOKENS,TOKEN_STAR, yytext, LINE_NO, COLUMN_NO); ADD_TO_COLUMN; }
"/" { GET_STATE; add_token(TOKENS,TOKEN_SLASH, yytext, LINE_NO, COLUMN_NO); ADD_TO_COLUMN; }
"^" { GET_STATE; add_token(TOKENS,TOKEN_CARET, yytext, LINE_NO, COLUMN_NO); ADD_TO_COLUMN; }
"if" { GET_STATE; add_token(TOKENS,TOKEN_IF, yytext, LINE_NO, COLUMN_NO); ADD_TO_COLUMN; }
"else" { GET_STATE; add_token(TOKENS,TOKEN_ELSE, yytext, LINE_NO, COLUMN_NO); ADD_TO_COLUMN; }
"while" { GET_STATE; add_token(TOKENS,TOKEN_WHILE, yytext, LINE_NO, COLUMN_NO); ADD_TO_COLUMN; }
"forever" { GET_STATE; add_token(TOKENS,TOKEN_FOREVER, yytext, LINE_NO, COLUMN_NO); ADD_TO_COLUMN; }
"for" { GET_STATE; add_token(TOKENS,TOKEN_FOR, yytext, LINE_NO, COLUMN_NO); ADD_TO_COLUMN; }
"break" { GET_STATE; add_token(TOKENS,TOKEN_BREAK, yytext, LINE_NO, COLUMN_NO); ADD_TO_COLUMN; }
"continue" { GET_STATE; add_token(TOKENS,TOKEN_CONTINUE, yytext, LINE_NO, COLUMN_NO); ADD_TO_COLUMN; }
"return" { GET_STATE; add_token(TOKENS,TOKEN_RETURN, yytext, LINE_NO, COLUMN_NO); ADD_TO_COLUMN; }
"let" { GET_STATE; add_token(TOKENS,TOKEN_LET, yytext, LINE_NO, COLUMN_NO); ADD_TO_COLUMN; }
"import" { GET_STATE; add_token(TOKENS,TOKEN_IMPORT, yytext, LINE_NO, COLUMN_NO); ADD_TO_COLUMN; }
"from" { GET_STATE; add_token(TOKENS,TOKEN_FROM, yytext, LINE_NO, COLUMN_NO); ADD_TO_COLUMN; }
"do" { GET_STATE; add_token(TOKENS,TOKEN_DO, yytext, LINE_NO, COLUMN_NO); ADD_TO_COLUMN; }
"true" { GET_STATE; add_token(TOKENS,TOKEN_TRUE, yytext, LINE_NO, COLUMN_NO); ADD_TO_COLUMN; }
"false" { GET_STATE; add_token(TOKENS,TOKEN_FALSE, yytext, LINE_NO, COLUMN_NO); ADD_TO_COLUMN; }
"null" { GET_STATE; add_token(TOKENS,TOKEN_NULL, yytext, LINE_NO, COLUMN_NO); ADD_TO_COLUMN; }
"delete" { GET_STATE; add_token(TOKENS,TOKEN_DELETE, yytext, LINE_NO, COLUMN_NO); ADD_TO_COLUMN; }
"not" { GET_STATE; add_token(TOKENS,TOKEN_NOT, yytext, LINE_NO, COLUMN_NO); ADD_TO_COLUMN; }
"in" { GET_STATE; add_token(TOKENS,TOKEN_IN, yytext, LINE_NO, COLUMN_NO); ADD_TO_COLUMN; }
"try" { GET_STATE; add_token(TOKENS,TOKEN_TRY, yytext, LINE_NO, COLUMN_NO); ADD_TO_COLUMN; }
"catch" { GET_STATE; add_token(TOKENS,TOKEN_CATCH, yytext, LINE_NO, COLUMN_NO); ADD_TO_COLUMN; }
"(" { GET_STATE; add_token(TOKENS,TOKEN_LPAREN, yytext, LINE_NO, COLUMN_NO); ADD_TO_COLUMN; }
")" { GET_STATE; add_token(TOKENS,TOKEN_RPAREN, yytext, LINE_NO, COLUMN_NO); ADD_TO_COLUMN; }
"[" { GET_STATE; add_token(TOKENS,TOKEN_LBRACKET, yytext, LINE_NO, COLUMN_NO); ADD_TO_COLUMN; }
"]" { GET_STATE; add_token(TOKENS,TOKEN_RBRACKET, yytext, LINE_NO, COLUMN_NO); ADD_TO_COLUMN; }
"{" { GET_STATE; add_token(TOKENS,TOKEN_LBRACE, yytext, LINE_NO, COLUMN_NO); ADD_TO_COLUMN; }
"}" { GET_STATE; add_token(TOKENS,TOKEN_RBRACE, yytext, LINE_NO, COLUMN_NO); ADD_TO_COLUMN; }
[a-zA-Z_][a-zA-Z0-9_]* {
GET_STATE
add_token(TOKEN_IDENTIFIER, yytext, LINE_NO, COLUMN_NO);
GET_ADD_COLUMN
add_token(TOKENS,TOKEN_IDENTIFIER, yytext, LINE_NO, COLUMN_NO);
ADD_TO_COLUMN
}
"." {
GET_STATE
add_token(TOKEN_DOT, yytext, LINE_NO, COLUMN_NO);
GET_ADD_COLUMN
}
"." {GET_STATE;add_token(TOKENS,TOKEN_DOT, yytext, LINE_NO, COLUMN_NO);ADD_TO_COLUMN}
"," {GET_STATE;add_token(TOKENS,TOKEN_COMMA, yytext, LINE_NO, COLUMN_NO);ADD_TO_COLUMN}
":" {GET_STATE;add_token(TOKENS,TOKEN_COLON, yytext, LINE_NO, COLUMN_NO);ADD_TO_COLUMN}
\n {
GET_STATE
add_token(TOKEN_NEW_LINE, yytext, LINE_NO, COLUMN_NO);
add_token(TOKENS,TOKEN_NEW_LINE, yytext, LINE_NO, COLUMN_NO);
COLUMN_NO = 1;
}
[ \t]+ {
GET_STATE
GET_ADD_COLUMN // Advance column for whitespace
if (COLUMN_NO == 1){
add_token(TOKENS,TOKEN_INDENT, yytext, LINE_NO, COLUMN_NO);
}
ADD_TO_COLUMN // Advance column for whitespace
}
. {
GET_STATE
fprintf(stderr, "Error in file %s on line %d column %d: unexpected character '%s'\n", state->filename, LINE_NO, COLUMN_NO, yytext);
fprintf(stderr, "%s: line %d column %d: unexpected character '%s'\n", state->path, LINE_NO, COLUMN_NO, yytext);
exit(1);
}
%%

View File

@@ -1,31 +1,26 @@
#include "lex.yy.h"
#include "token.h"
#include "lexer.h"
#include "../string/string.h"
#include <stdlib.h>
int lexer() {
void lexer(LexerState state) {
yyscan_t scanner;
LexerState state = { "file1.src", 1 };
const char *input = "let x = 10";
char *unquoted = unquote(state.content);
if (unquoted) {
printf("%s\n", unquoted);
free(unquoted);
}
yylex_init(&scanner);
// Set the extra data *before* scanning
yyset_extra(&state, scanner);
void* buffer = yy_scan_string(input, scanner);
void* buffer = yy_scan_string(state.content, scanner);
yy_switch_to_buffer(buffer, scanner);
yylex(scanner); // This fills the token array
yylex(scanner);
yy_delete_buffer(buffer, scanner);
yylex_destroy(scanner);
// print tokens etc.
for (int i = 0; i < token_count; i++) {
printf("Token(type=%d, value='%s')\n", tokens[i].type, tokens[i].value);
}
free_tokens();
return 0;
}

View File

@@ -1,7 +1,11 @@
#include "token.h"
typedef struct {
const char *filename;
const char *path;
const char *content;
int current_column;
TokenStruct* tokens;
// add more fields as needed
} LexerState;
int lexer();
void lexer(LexerState state);

View File

@@ -4,32 +4,44 @@
#define INITIAL_CAPACITY 64
Token* tokens = NULL;
int token_count = 0;
static int token_capacity = 0;
void add_token(TokenType type, const char* value, int line, int column) {
if (tokens == NULL) {
token_capacity = INITIAL_CAPACITY;
tokens = malloc(sizeof(Token) * token_capacity);
} else if (token_count >= token_capacity) {
token_capacity *= 2;
tokens = realloc(tokens, sizeof(Token) * token_capacity);
TokenStruct* init_token() {
TokenStruct *tokenStruct = malloc(sizeof(TokenStruct));\
if (tokenStruct == NULL) {
// handle malloc failure
return NULL;
}
tokens[token_count].type = type;
tokens[token_count].value = strdup(value);
tokens[token_count].line = line;
tokens[token_count].column = column;
token_count++;
tokenStruct->count = 0;
tokenStruct->capacity = INITIAL_CAPACITY;
tokenStruct->tokens = malloc(sizeof(Token) * INITIAL_CAPACITY);
if (tokenStruct->tokens == NULL) {
// handle malloc failure
free(tokenStruct);
return NULL;
}
return tokenStruct;
}
void free_tokens() {
for (int i = 0; i < token_count; ++i) {
free(tokens[i].value);
void add_token(TokenStruct* token,TokenType type, const char* value, int line, int column) {
if (token->count >= token->capacity) {
token->capacity *= 2;
token->tokens = realloc(token->tokens, sizeof(Token) * token->capacity);
}
free(tokens);
tokens = NULL;
token_count = 0;
token_capacity = 0;
token->tokens[token->count].type = type;
token->tokens[token->count].value = strdup(value);
token->tokens[token->count].line = line;
token->tokens[token->count].column = column;
token->count++;
}
void free_tokens(TokenStruct* token) {
for (int i = 0; i < token->count; ++i) {
free(token->tokens[i].value);
}
free(token->tokens);
token->tokens = NULL;
token->count = 0;
token->capacity = 0;
free(token);
}

View File

@@ -4,26 +4,83 @@
typedef enum {
TOKEN_STRING,
TOKEN_NUMBER,
TOKEN_FRACTION,
TOKEN_IDENTIFIER,
TOKEN_KEYWORD,
TOKEN_DOT,
TOKEN_NEW_LINE,
TOKEN_INDENT,
// Operators
TOKEN_AND, // &&
TOKEN_OR, // ||
TOKEN_NOT_IN, // not in
TOKEN_LE, // <=
TOKEN_GE, // >=
TOKEN_LT, // <
TOKEN_GT, // >
TOKEN_NE, // !=
TOKEN_EQ, // ==
TOKEN_ASSIGN,
TOKEN_PLUS, // +
TOKEN_MINUS, // -
TOKEN_MODULO, // %
TOKEN_STAR, // *
TOKEN_FLOORDIV, // //
TOKEN_SLASH, // /
TOKEN_CARET, // ^
// Keywords
TOKEN_IF,
TOKEN_ELSE,
TOKEN_WHILE,
TOKEN_FOREVER,
TOKEN_FOR,
TOKEN_BREAK,
TOKEN_CONTINUE,
TOKEN_RETURN,
TOKEN_LET,
TOKEN_IMPORT,
TOKEN_FROM,
TOKEN_DO,
TOKEN_TRUE,
TOKEN_FALSE,
TOKEN_NULL,
TOKEN_DELETE,
TOKEN_NOT,
TOKEN_IN,
TOKEN_TRY,
TOKEN_CATCH,
// parentheses, brackets, and braces
TOKEN_LPAREN, // (
TOKEN_RPAREN, // )
TOKEN_LBRACKET, // [
TOKEN_RBRACKET, // ]
TOKEN_LBRACE, // {
TOKEN_RBRACE, // }
TOKEN_DOT,
TOKEN_COMMA,
TOKEN_COLON,
} TokenType;
typedef struct {
TokenType type;
char* value;
int line;
int column;
char* value;
} Token;
extern int token_count;
typedef struct {
int count;
int capacity;
Token* tokens;
} TokenStruct;
extern Token* tokens;
TokenStruct* init_token();
void add_token(TokenStruct* token,TokenType type, const char* value, int line, int column);
void add_token(TokenType type, const char* value, int line, int column);
void free_tokens();
void free_tokens(TokenStruct* token);
#endif