update lexer to just return the number and then do the other logic somewhere else

This commit is contained in:
2025-05-30 02:12:51 +01:00
parent 626445a906
commit 68341db0b0
13 changed files with 165 additions and 350 deletions

View File

@@ -1,12 +1,10 @@
%option reentrant %option reentrant
%option yylineno
%{ %{
#include "token.h" #include "token.h"
#include "lexer.h" #include "lexer.h"
#define GET_STATE LexerState *state = (LexerState *)yyget_extra(yyscanner); #define GET_STATE LexerState *state = (LexerState *)yyget_extra(yyscanner);
#define ADD_TO_COLUMN COLUMN_NO += yyleng;
#define LINE_NO yylineno+1
#define TOKENS state->tokens
#define COLUMN_NO state->current_column #define COLUMN_NO state->current_column
int yywrap(void *) { int yywrap(void *) {
@@ -17,326 +15,87 @@ int yywrap(void *) {
%% %%
\"((\\([\"\\\/bfnrt]|u[0-9a-fA-F]{4}))|[^\\\"\n])*\" { \"((\\([\"\\\/bfnrt]|u[0-9a-fA-F]{4}))|[^\\\"\n])*\" {
GET_STATE return TOKEN_STRING;
Token * token = create_token(
TOKEN_STRING,
LINE_NO,
COLUMN_NO,
yytext
);
append(TOKENS, token);
ADD_TO_COLUMN
} }
\'((\\([\'\\\/bfnrt]|u[0-9a-fA-F]{4}))|[^\\\'\n])*\' { \'((\\([\'\\\/bfnrt]|u[0-9a-fA-F]{4}))|[^\\\'\n])*\' {
GET_STATE return TOKEN_STRING;
append(TOKENS, create_token(
TOKEN_STRING,
LINE_NO,
COLUMN_NO,
yytext
));
ADD_TO_COLUMN
} }
((([0-9]+(\.[0-9]+)?)|(\.[0-9]+))(e((\-|\+)?([0-9]+(\.[0-9]+)?)))?) { ((([0-9]+(\.[0-9]+)?)|(\.[0-9]+))(e((\-|\+)?([0-9]+(\.[0-9]+)?)))?) {
GET_STATE return TOKEN_NUMBER;
append(TOKENS, create_token(
TOKEN_NUMBER,
LINE_NO,
COLUMN_NO,
yytext
));
ADD_TO_COLUMN
} }
([0-9]+\/[0-9]+) { ([0-9]+\/[0-9]+) {
GET_STATE return TOKEN_FRACTION;
append(TOKENS, create_token(
TOKEN_FRACTION,
LINE_NO,
COLUMN_NO,
yytext
));
ADD_TO_COLUMN
} }
"not"[ \t]+"in" { GET_STATE; append(TOKENS, create_token(TOKEN_NOT_IN, "not"[ \t]+"in" { return TOKEN_NOT_IN; }
LINE_NO, "&&" { return TOKEN_AND; }
COLUMN_NO, "||" { return TOKEN_OR; }
yytext "<=" { return TOKEN_LE; }
)); ">=" { return TOKEN_GE; }
append(TOKENS, create_token( "!=" { return TOKEN_NE; }
TOKEN_NOT_IN, "==" { return TOKEN_EQ; }
LINE_NO, "=" { return TOKEN_ASSIGN; }
COLUMN_NO, "//" { return TOKEN_FLOORDIV; }
yytext "<" { return TOKEN_LT; }
)); ADD_TO_COLUMN; } ">" { return TOKEN_GT; }
"&&" { GET_STATE; append(TOKENS, create_token(TOKEN_AND, "+" { return TOKEN_PLUS; }
LINE_NO, "-" { return TOKEN_MINUS; }
COLUMN_NO, "%" { return TOKEN_MODULO; }
yytext "*" { return TOKEN_STAR; }
)); ADD_TO_COLUMN; } "/" { return TOKEN_SLASH; }
"||" { GET_STATE; append(TOKENS, create_token(TOKEN_OR, "^" { return TOKEN_CARET; }
LINE_NO,
COLUMN_NO,
yytext
)); ADD_TO_COLUMN; }
"<=" { GET_STATE; append(TOKENS, create_token(TOKEN_LE,
LINE_NO,
COLUMN_NO,
yytext
)); ADD_TO_COLUMN; }
">=" { GET_STATE; append(TOKENS, create_token(TOKEN_GE,
LINE_NO,
COLUMN_NO,
yytext
)); ADD_TO_COLUMN; }
"!=" { GET_STATE; append(TOKENS, create_token(TOKEN_NE,
LINE_NO,
COLUMN_NO,
yytext
)); ADD_TO_COLUMN; }
"==" { GET_STATE; append(TOKENS, create_token(TOKEN_EQ,
LINE_NO,
COLUMN_NO,
yytext
)); ADD_TO_COLUMN; }
"=" { GET_STATE; append(TOKENS, create_token(TOKEN_ASSIGN,
LINE_NO,
COLUMN_NO,
yytext
)); ADD_TO_COLUMN; }
"//" { GET_STATE; append(TOKENS, create_token(TOKEN_FLOORDIV,
LINE_NO,
COLUMN_NO,
yytext
)); ADD_TO_COLUMN; }
"<" { GET_STATE; append(TOKENS, create_token(TOKEN_LT,
LINE_NO,
COLUMN_NO,
yytext
)); ADD_TO_COLUMN; }
">" { GET_STATE; append(TOKENS, create_token(TOKEN_GT,
LINE_NO,
COLUMN_NO,
yytext
)); ADD_TO_COLUMN; }
"+" { GET_STATE; append(TOKENS, create_token(TOKEN_PLUS,
LINE_NO,
COLUMN_NO,
yytext
)); ADD_TO_COLUMN; }
"-" { GET_STATE; append(TOKENS, create_token(TOKEN_MINUS,
LINE_NO,
COLUMN_NO,
yytext
)); ADD_TO_COLUMN; }
"%" { GET_STATE; append(TOKENS, create_token(TOKEN_MODULO,
LINE_NO,
COLUMN_NO,
yytext
)); ADD_TO_COLUMN; }
"*" { GET_STATE; append(TOKENS, create_token(TOKEN_STAR,
LINE_NO,
COLUMN_NO,
yytext
)); ADD_TO_COLUMN; }
"/" { GET_STATE; append(TOKENS, create_token(TOKEN_SLASH,
LINE_NO,
COLUMN_NO,
yytext
)); ADD_TO_COLUMN; }
"^" { GET_STATE; append(TOKENS, create_token(TOKEN_CARET,
LINE_NO,
COLUMN_NO,
yytext
)); ADD_TO_COLUMN; }
"if" { GET_STATE; append(TOKENS, create_token(TOKEN_IF, "if" { return TOKEN_IF; }
LINE_NO, "else" { return TOKEN_ELSE; }
COLUMN_NO, "while" { return TOKEN_WHILE; }
yytext "forever" { return TOKEN_FOREVER; }
)); ADD_TO_COLUMN; } "for" { return TOKEN_FOR; }
"else" { GET_STATE; append(TOKENS, create_token(TOKEN_ELSE, "break" { return TOKEN_BREAK; }
LINE_NO, "continue" { return TOKEN_CONTINUE; }
COLUMN_NO, "return" { return TOKEN_RETURN; }
yytext "let" { return TOKEN_LET; }
)); ADD_TO_COLUMN; } "import" { return TOKEN_IMPORT; }
"while" { GET_STATE; append(TOKENS, create_token(TOKEN_WHILE, "from" { return TOKEN_FROM; }
LINE_NO, "do" { return TOKEN_DO; }
COLUMN_NO, "true" { return TOKEN_TRUE; }
yytext "false" { return TOKEN_FALSE; }
)); ADD_TO_COLUMN; } "null" { return TOKEN_NULL; }
"forever" { GET_STATE; append(TOKENS, create_token(TOKEN_FOREVER, "delete" { return TOKEN_DELETE; }
LINE_NO, "not" { return TOKEN_NOT; }
COLUMN_NO, "in" { return TOKEN_IN; }
yytext "try" { return TOKEN_TRY; }
)); ADD_TO_COLUMN; } "catch" { return TOKEN_CATCH; }
"for" { GET_STATE; append(TOKENS, create_token(TOKEN_FOR,
LINE_NO,
COLUMN_NO,
yytext
)); ADD_TO_COLUMN; }
"break" { GET_STATE; append(TOKENS, create_token(TOKEN_BREAK,
LINE_NO,
COLUMN_NO,
yytext
)); ADD_TO_COLUMN; }
"continue" { GET_STATE; append(TOKENS, create_token(TOKEN_CONTINUE,
LINE_NO,
COLUMN_NO,
yytext
)); ADD_TO_COLUMN; }
"return" { GET_STATE; append(TOKENS, create_token(TOKEN_RETURN,
LINE_NO,
COLUMN_NO,
yytext
)); ADD_TO_COLUMN; }
"let" { GET_STATE; append(TOKENS, create_token(TOKEN_LET,
LINE_NO,
COLUMN_NO,
yytext
)); ADD_TO_COLUMN; }
"import" { GET_STATE; append(TOKENS, create_token(TOKEN_IMPORT,
LINE_NO,
COLUMN_NO,
yytext
)); ADD_TO_COLUMN; }
"from" { GET_STATE; append(TOKENS, create_token(TOKEN_FROM,
LINE_NO,
COLUMN_NO,
yytext
)); ADD_TO_COLUMN; }
"do" { GET_STATE; append(TOKENS, create_token(TOKEN_DO,
LINE_NO,
COLUMN_NO,
yytext
)); ADD_TO_COLUMN; }
"true" { GET_STATE; append(TOKENS, create_token(TOKEN_TRUE,
LINE_NO,
COLUMN_NO,
yytext
)); ADD_TO_COLUMN; }
"false" { GET_STATE; append(TOKENS, create_token(TOKEN_FALSE,
LINE_NO,
COLUMN_NO,
yytext
)); ADD_TO_COLUMN; }
"null" { GET_STATE; append(TOKENS, create_token(TOKEN_NULL,
LINE_NO,
COLUMN_NO,
yytext
)); ADD_TO_COLUMN; }
"delete" { GET_STATE; append(TOKENS, create_token(TOKEN_DELETE,
LINE_NO,
COLUMN_NO,
yytext
)); ADD_TO_COLUMN; }
"not" { GET_STATE; append(TOKENS, create_token(TOKEN_NOT,
LINE_NO,
COLUMN_NO,
yytext
)); ADD_TO_COLUMN; }
"in" { GET_STATE; append(TOKENS, create_token(TOKEN_IN,
LINE_NO,
COLUMN_NO,
yytext
)); ADD_TO_COLUMN; }
"try" { GET_STATE; append(TOKENS, create_token(TOKEN_TRY,
LINE_NO,
COLUMN_NO,
yytext
)); ADD_TO_COLUMN; }
"catch" { GET_STATE; append(TOKENS, create_token(TOKEN_CATCH,
LINE_NO,
COLUMN_NO,
yytext
)); ADD_TO_COLUMN; }
"(" { GET_STATE; append(TOKENS, create_token(TOKEN_LPAREN, "(" { return TOKEN_LPAREN; }
LINE_NO, ")" { return TOKEN_RPAREN; }
COLUMN_NO, "[" { return TOKEN_LBRACKET; }
yytext "]" { return TOKEN_RBRACKET; }
)); ADD_TO_COLUMN; } "{" { return TOKEN_LBRACE; }
")" { GET_STATE; append(TOKENS, create_token(TOKEN_RPAREN, "}" { return TOKEN_RBRACE; }
LINE_NO,
COLUMN_NO,
yytext
)); ADD_TO_COLUMN; }
"[" { GET_STATE; append(TOKENS, create_token(TOKEN_LBRACKET,
LINE_NO,
COLUMN_NO,
yytext
)); ADD_TO_COLUMN; }
"]" { GET_STATE; append(TOKENS, create_token(TOKEN_RBRACKET,
LINE_NO,
COLUMN_NO,
yytext
)); ADD_TO_COLUMN; }
"{" { GET_STATE; append(TOKENS, create_token(TOKEN_LBRACE,
LINE_NO,
COLUMN_NO,
yytext
)); ADD_TO_COLUMN; }
"}" { GET_STATE; append(TOKENS, create_token(TOKEN_RBRACE,
LINE_NO,
COLUMN_NO,
yytext
)); ADD_TO_COLUMN; }
[a-zA-Z_][a-zA-Z0-9_]* { [a-zA-Z_][a-zA-Z0-9_]* { return TOKEN_IDENTIFIER; }
GET_STATE
append(TOKENS, create_token(TOKEN_IDENTIFIER,
LINE_NO,
COLUMN_NO,
yytext
));
ADD_TO_COLUMN
}
"." {GET_STATE;append(TOKENS, create_token(TOKEN_DOT, "." { return TOKEN_DOT; }
LINE_NO, "," {return TOKEN_COMMA; }
COLUMN_NO, ":" {return TOKEN_COLON; }
yytext
));ADD_TO_COLUMN}
"," {GET_STATE;append(TOKENS, create_token(TOKEN_COMMA,
LINE_NO,
COLUMN_NO,
yytext
));ADD_TO_COLUMN}
":" {GET_STATE;append(TOKENS, create_token(TOKEN_COLON,
LINE_NO,
COLUMN_NO,
yytext
));ADD_TO_COLUMN}
\n { \n { return TOKEN_NEW_LINE; }
GET_STATE
append(TOKENS, create_token(TOKEN_NEW_LINE,
LINE_NO,
COLUMN_NO,
yytext
));
COLUMN_NO = 1;
}
[ \t]+ { [ \t]+ {
GET_STATE GET_STATE
if (COLUMN_NO == 1){ if (COLUMN_NO == 0){
append(TOKENS, create_token(TOKEN_INDENT, return TOKEN_INDENT;
LINE_NO,
COLUMN_NO,
yytext
));
} }
ADD_TO_COLUMN // Advance column for whitespace COLUMN_NO += yyleng;
} }
. { . {
GET_STATE GET_STATE
fprintf(stderr, "%s: line %d column %d: unexpected character '%s'\n", state->path, LINE_NO, COLUMN_NO, yytext); fprintf(stderr, "%s:%u:%u: unexpected character '%s'\n", state->path, yylineno+1, COLUMN_NO+1, yytext);
exit(1); exit(1);
} }
%% %%

View File

@@ -11,7 +11,21 @@ void lexer(LexerState state) {
void* buffer = yy_scan_string(state.content, scanner); void* buffer = yy_scan_string(state.content, scanner);
yy_switch_to_buffer(buffer, scanner); yy_switch_to_buffer(buffer, scanner);
yylex(scanner); int token;
while ((token = yylex(scanner)) != 0) {
Token * token_struct = create_token(
token,
yyget_lineno(scanner),
state.current_column,
yyget_text(scanner)
);
append(state.tokens, token_struct);
if (token == TOKEN_NEW_LINE) {
state.current_column = 0;
} else {
state.current_column += yyget_leng(scanner);
}
}
yy_delete_buffer(buffer, scanner); yy_delete_buffer(buffer, scanner);
yylex_destroy(scanner); yylex_destroy(scanner);

View File

@@ -1,14 +1,17 @@
#include "token.h" #include "token.h"
#include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include "../string/string.h" #include "../string/string.h"
Token *create_token(TokenType type, int line, int column, char *value) { Token *create_token(TokenType type, int line, int column, char *value) {
Token * token = malloc(sizeof(Token)); Token * token = malloc(sizeof(Token));
printf("%s\n", value);
token->type = type; token->type = type;
token->line=line; token->line=line;
token->column=column; token->column=column;
token->value=cloneString(value); token->value=cloneString(value);
return token; return token;
}
void free_token(void * ptr) {
Token* token = ptr;
free(token->value);
} }

View File

@@ -2,7 +2,7 @@
#define TOKEN_H #define TOKEN_H
typedef enum { typedef enum {
TOKEN_STRING, TOKEN_STRING = 256,
TOKEN_NUMBER, TOKEN_NUMBER,
TOKEN_FRACTION, TOKEN_FRACTION,
TOKEN_IDENTIFIER, TOKEN_IDENTIFIER,
@@ -72,4 +72,5 @@ typedef struct {
} Token; } Token;
Token *create_token(TokenType type, int line, int column, char *value); Token *create_token(TokenType type, int line, int column, char *value);
void free_token(void * ptr);
#endif #endif

View File

@@ -82,11 +82,14 @@ void print_list(LinkedList *list, void (*print_func)(void *)) {
} }
} }
void free_list(LinkedList *list) { void free_list(LinkedList *list, void (*free_data)(void *)) {
Node *current = list->head; Node *current = list->head;
while (current) { while (current) {
Node *next = current->next; Node *next = current->next;
free(current->data);
if (free_data) // Safe to pass NULL if you don't need it
free_data(current->data);
free(current); free(current);
current = next; current = next;
} }

View File

@@ -37,6 +37,6 @@ size_t list_length(LinkedList *list);
void print_list(LinkedList *list, void (*print_func)(void *)); void print_list(LinkedList *list, void (*print_func)(void *));
// Free all memory used by the list // Free all memory used by the list
void free_list(LinkedList *list); void free_list(LinkedList *list, void (*free_data)(void *));
#endif // LINKEDLIST_H #endif // LINKEDLIST_H

View File

@@ -6,6 +6,7 @@
#include <stdbool.h> #include <stdbool.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <stdio.h>
char* read_file_as_text(const char* filename) { char* read_file_as_text(const char* filename) {
FILE *file = fopen(filename, "r"); FILE *file = fopen(filename, "r");
@@ -46,7 +47,7 @@ int main() {
LexerState state = { LexerState state = {
path, path,
content, content,
1, 0,
tokens tokens
}; };
lexer(state); lexer(state);
@@ -55,11 +56,9 @@ int main() {
parser(parsed, tokens, false); parser(parsed, tokens, false);
Node *current = parsed->head; free_list(tokens, free_token);
while (current) {
printf("%s\n", (char*)((TaggedValue*)current->data)->data); free_list(parsed,free_tagged_value);
current = current->next;
}
return 0; return 0;
} }

View File

@@ -1,15 +1,24 @@
#include "parser.h" #include "parser.h"
#include <stdbool.h>
#include <stddef.h> #include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include "../lexer/token.h"
#include "../list/list.h"
#include "string/string.h"
TaggedValue parse_token(LinkedList * tokens, size_t *index) { TaggedValue * parse_token(LinkedList * tokens, size_t *index) {
Token * token = get_element_at(tokens, *index); Token * token = get_element_at(tokens, *index);
switch (token->type) { switch (token->type) {
case TOKEN_STRING: case TOKEN_STRING:
(*index)++; (*index)++;
return parse_string(*token); return parse_string(*token);
case TOKEN_NEW_LINE:
(*index)++;
return NULL;
default: default:
perror("unreachable"); fprintf(stderr, "Panic: %s\n", "unreachable"); \
exit(0); exit(EXIT_FAILURE); \
} }
} }
@@ -17,7 +26,19 @@ void parser(LinkedList * parsed, LinkedList * tokens, bool inline_flag) {
size_t index = 0; size_t index = 0;
size_t length = list_length(tokens); size_t length = list_length(tokens);
while (index < length) { while (index < length) {
TaggedValue parsed_code = parse_token(tokens, &index); TaggedValue * parsed_code = parse_token(tokens, &index);
append(parsed,&parsed_code); if (parsed_code)
append(parsed,parsed_code);
} }
}
void free_tagged_value(void *ptr) {
TaggedValue *tagged = ptr;
switch (tagged->type) {
case AST_STRING:
free(tagged->data);
break;
// Add cases if needed
}
free(tagged); // Always free the TaggedValue itself
} }

View File

@@ -1,9 +1,27 @@
#include "../lexer/token.h" #ifndef PARSER_H
#include "string/string.h" #define PARSER_H
#include <stdbool.h> #include <stdbool.h>
#include <stdio.h> #include <stddef.h>
#include <stdlib.h>
void parser(LinkedList * parsed, LinkedList * tokens, bool inline_flag);
TaggedValue parse_token(LinkedList * tokens, size_t *index); typedef struct LinkedList LinkedList;
typedef enum {
AST_STRING,
} ValueType;
typedef struct {
ValueType type;
void *data;
} TaggedValue;
void parser(LinkedList *parsed, LinkedList *tokens, bool inline_flag);
TaggedValue *parse_token(LinkedList *tokens, size_t *index);
void free_tagged_value(void *ptr);
#endif // PARSER_H

View File

@@ -7,7 +7,7 @@
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
char *swap_quotes(char *input) { char *swap_quotes(char *input, char quote) {
size_t len = strlen(input); size_t len = strlen(input);
char *result = malloc(len + 1); char *result = malloc(len + 1);
if (!result) if (!result)
@@ -15,8 +15,8 @@ char *swap_quotes(char *input) {
for (size_t i = 0; i < len; ++i) { for (size_t i = 0; i < len; ++i) {
if (input[i] == '"') if (input[i] == '"')
result[i] = '\''; result[i] = quote;
else if (input[i] == '\'') else if (input[i] == quote)
result[i] = '"'; result[i] = '"';
else else
result[i] = input[i]; result[i] = input[i];
@@ -33,8 +33,8 @@ char *unquote(char *str) {
char *swapped = NULL; char *swapped = NULL;
char *unescaped = NULL; char *unescaped = NULL;
if (quote == '\'') { if (quote != '"') {
swapped = swap_quotes(str); swapped = swap_quotes(str, quote);
if (!swapped) if (!swapped)
return NULL; return NULL;
str = swapped; str = swapped;
@@ -62,8 +62,8 @@ char *unquote(char *str) {
free(swapped); free(swapped);
// If input was single-quoted, swap quotes back in the output // If input was single-quoted, swap quotes back in the output
if (quote == '\'') { if (quote != '"') {
char *final = swap_quotes(unescaped); char *final = swap_quotes(unescaped, quote);
free(unescaped); free(unescaped);
return final; return final;
} }
@@ -71,9 +71,10 @@ char *unquote(char *str) {
return unescaped; return unescaped;
} }
TaggedValue parse_string(Token token) { TaggedValue * parse_string(Token token) {
return (TaggedValue){ TaggedValue * taggedValue = malloc(sizeof(TaggedValue));
AST_STRING,
unquote(token.value), taggedValue->type = AST_STRING;
}; taggedValue->data = unquote(token.value);
return taggedValue;
} }

View File

@@ -1,8 +1,15 @@
#include "../../lexer/token.h" #ifndef STRING_UTILS_H
#include "../taggedValue.h" #define STRING_UTILS_H
char *swap_quotes(char *input); #include "../../lexer/token.h"
#include "../parser.h"
// Declare functions related to string processing in parser
char *swap_quotes(char *input, char quote);
char *unquote(char *str); char *unquote(char *str);
TaggedValue parse_string(Token token); TaggedValue *parse_string(Token token);
#endif // STRING_UTILS_H

View File

@@ -1,11 +0,0 @@
#include "../list/list.h"
typedef enum {
AST_STRING,
} ValueType;
typedef struct {
ValueType type;
void *data;
} TaggedValue;

View File

@@ -1 +1 @@
"hello world" 'hello world'