diff --git a/src/lexer/lex.l b/src/lexer/lex.l index b5e7b42..830faef 100644 --- a/src/lexer/lex.l +++ b/src/lexer/lex.l @@ -1,12 +1,10 @@ %option reentrant +%option yylineno %{ #include "token.h" #include "lexer.h" #define GET_STATE LexerState *state = (LexerState *)yyget_extra(yyscanner); -#define ADD_TO_COLUMN COLUMN_NO += yyleng; -#define LINE_NO yylineno+1 -#define TOKENS state->tokens #define COLUMN_NO state->current_column int yywrap(void *) { @@ -17,326 +15,87 @@ int yywrap(void *) { %% \"((\\([\"\\\/bfnrt]|u[0-9a-fA-F]{4}))|[^\\\"\n])*\" { - GET_STATE - Token * token = create_token( - TOKEN_STRING, - LINE_NO, - COLUMN_NO, - yytext - ); - append(TOKENS, token); - ADD_TO_COLUMN + return TOKEN_STRING; } \'((\\([\'\\\/bfnrt]|u[0-9a-fA-F]{4}))|[^\\\'\n])*\' { - GET_STATE - append(TOKENS, create_token( - TOKEN_STRING, - LINE_NO, - COLUMN_NO, - yytext - )); - ADD_TO_COLUMN + return TOKEN_STRING; } ((([0-9]+(\.[0-9]+)?)|(\.[0-9]+))(e((\-|\+)?([0-9]+(\.[0-9]+)?)))?) { - GET_STATE - append(TOKENS, create_token( - TOKEN_NUMBER, - LINE_NO, - COLUMN_NO, - yytext - )); - ADD_TO_COLUMN + return TOKEN_NUMBER; } ([0-9]+\/[0-9]+) { - GET_STATE - append(TOKENS, create_token( - TOKEN_FRACTION, - LINE_NO, - COLUMN_NO, - yytext - )); - ADD_TO_COLUMN + return TOKEN_FRACTION; } -"not"[ \t]+"in" { GET_STATE; append(TOKENS, create_token(TOKEN_NOT_IN, - LINE_NO, - COLUMN_NO, - yytext - )); - append(TOKENS, create_token( - TOKEN_NOT_IN, - LINE_NO, - COLUMN_NO, - yytext - )); ADD_TO_COLUMN; } -"&&" { GET_STATE; append(TOKENS, create_token(TOKEN_AND, - LINE_NO, - COLUMN_NO, - yytext - )); ADD_TO_COLUMN; } -"||" { GET_STATE; append(TOKENS, create_token(TOKEN_OR, - LINE_NO, - COLUMN_NO, - yytext - )); ADD_TO_COLUMN; } -"<=" { GET_STATE; append(TOKENS, create_token(TOKEN_LE, - LINE_NO, - COLUMN_NO, - yytext - )); ADD_TO_COLUMN; } -">=" { GET_STATE; append(TOKENS, create_token(TOKEN_GE, - LINE_NO, - COLUMN_NO, - yytext - )); ADD_TO_COLUMN; } -"!=" { GET_STATE; append(TOKENS, create_token(TOKEN_NE, - LINE_NO, - COLUMN_NO, - yytext - )); ADD_TO_COLUMN; } -"==" { GET_STATE; append(TOKENS, create_token(TOKEN_EQ, - LINE_NO, - COLUMN_NO, - yytext - )); ADD_TO_COLUMN; } -"=" { GET_STATE; append(TOKENS, create_token(TOKEN_ASSIGN, - LINE_NO, - COLUMN_NO, - yytext - )); ADD_TO_COLUMN; } -"//" { GET_STATE; append(TOKENS, create_token(TOKEN_FLOORDIV, - LINE_NO, - COLUMN_NO, - yytext - )); ADD_TO_COLUMN; } -"<" { GET_STATE; append(TOKENS, create_token(TOKEN_LT, - LINE_NO, - COLUMN_NO, - yytext - )); ADD_TO_COLUMN; } -">" { GET_STATE; append(TOKENS, create_token(TOKEN_GT, - LINE_NO, - COLUMN_NO, - yytext - )); ADD_TO_COLUMN; } -"+" { GET_STATE; append(TOKENS, create_token(TOKEN_PLUS, - LINE_NO, - COLUMN_NO, - yytext - )); ADD_TO_COLUMN; } -"-" { GET_STATE; append(TOKENS, create_token(TOKEN_MINUS, - LINE_NO, - COLUMN_NO, - yytext - )); ADD_TO_COLUMN; } -"%" { GET_STATE; append(TOKENS, create_token(TOKEN_MODULO, - LINE_NO, - COLUMN_NO, - yytext - )); ADD_TO_COLUMN; } -"*" { GET_STATE; append(TOKENS, create_token(TOKEN_STAR, - LINE_NO, - COLUMN_NO, - yytext - )); ADD_TO_COLUMN; } -"/" { GET_STATE; append(TOKENS, create_token(TOKEN_SLASH, - LINE_NO, - COLUMN_NO, - yytext - )); ADD_TO_COLUMN; } -"^" { GET_STATE; append(TOKENS, create_token(TOKEN_CARET, - LINE_NO, - COLUMN_NO, - yytext - )); ADD_TO_COLUMN; } +"not"[ \t]+"in" { return TOKEN_NOT_IN; } +"&&" { return TOKEN_AND; } +"||" { return TOKEN_OR; } +"<=" { return TOKEN_LE; } +">=" { return TOKEN_GE; } +"!=" { return TOKEN_NE; } +"==" { return TOKEN_EQ; } +"=" { return TOKEN_ASSIGN; } +"//" { return TOKEN_FLOORDIV; } +"<" { return TOKEN_LT; } +">" { return TOKEN_GT; } +"+" { return TOKEN_PLUS; } +"-" { return TOKEN_MINUS; } +"%" { return TOKEN_MODULO; } +"*" { return TOKEN_STAR; } +"/" { return TOKEN_SLASH; } +"^" { return TOKEN_CARET; } -"if" { GET_STATE; append(TOKENS, create_token(TOKEN_IF, - LINE_NO, - COLUMN_NO, - yytext - )); ADD_TO_COLUMN; } -"else" { GET_STATE; append(TOKENS, create_token(TOKEN_ELSE, - LINE_NO, - COLUMN_NO, - yytext - )); ADD_TO_COLUMN; } -"while" { GET_STATE; append(TOKENS, create_token(TOKEN_WHILE, - LINE_NO, - COLUMN_NO, - yytext - )); ADD_TO_COLUMN; } -"forever" { GET_STATE; append(TOKENS, create_token(TOKEN_FOREVER, - LINE_NO, - COLUMN_NO, - yytext - )); ADD_TO_COLUMN; } -"for" { GET_STATE; append(TOKENS, create_token(TOKEN_FOR, - LINE_NO, - COLUMN_NO, - yytext - )); ADD_TO_COLUMN; } -"break" { GET_STATE; append(TOKENS, create_token(TOKEN_BREAK, - LINE_NO, - COLUMN_NO, - yytext - )); ADD_TO_COLUMN; } -"continue" { GET_STATE; append(TOKENS, create_token(TOKEN_CONTINUE, - LINE_NO, - COLUMN_NO, - yytext - )); ADD_TO_COLUMN; } -"return" { GET_STATE; append(TOKENS, create_token(TOKEN_RETURN, - LINE_NO, - COLUMN_NO, - yytext - )); ADD_TO_COLUMN; } -"let" { GET_STATE; append(TOKENS, create_token(TOKEN_LET, - LINE_NO, - COLUMN_NO, - yytext - )); ADD_TO_COLUMN; } -"import" { GET_STATE; append(TOKENS, create_token(TOKEN_IMPORT, - LINE_NO, - COLUMN_NO, - yytext - )); ADD_TO_COLUMN; } -"from" { GET_STATE; append(TOKENS, create_token(TOKEN_FROM, - LINE_NO, - COLUMN_NO, - yytext - )); ADD_TO_COLUMN; } -"do" { GET_STATE; append(TOKENS, create_token(TOKEN_DO, - LINE_NO, - COLUMN_NO, - yytext - )); ADD_TO_COLUMN; } -"true" { GET_STATE; append(TOKENS, create_token(TOKEN_TRUE, - LINE_NO, - COLUMN_NO, - yytext - )); ADD_TO_COLUMN; } -"false" { GET_STATE; append(TOKENS, create_token(TOKEN_FALSE, - LINE_NO, - COLUMN_NO, - yytext - )); ADD_TO_COLUMN; } -"null" { GET_STATE; append(TOKENS, create_token(TOKEN_NULL, - LINE_NO, - COLUMN_NO, - yytext - )); ADD_TO_COLUMN; } -"delete" { GET_STATE; append(TOKENS, create_token(TOKEN_DELETE, - LINE_NO, - COLUMN_NO, - yytext - )); ADD_TO_COLUMN; } -"not" { GET_STATE; append(TOKENS, create_token(TOKEN_NOT, - LINE_NO, - COLUMN_NO, - yytext - )); ADD_TO_COLUMN; } -"in" { GET_STATE; append(TOKENS, create_token(TOKEN_IN, - LINE_NO, - COLUMN_NO, - yytext - )); ADD_TO_COLUMN; } -"try" { GET_STATE; append(TOKENS, create_token(TOKEN_TRY, - LINE_NO, - COLUMN_NO, - yytext - )); ADD_TO_COLUMN; } -"catch" { GET_STATE; append(TOKENS, create_token(TOKEN_CATCH, - LINE_NO, - COLUMN_NO, - yytext - )); ADD_TO_COLUMN; } +"if" { return TOKEN_IF; } +"else" { return TOKEN_ELSE; } +"while" { return TOKEN_WHILE; } +"forever" { return TOKEN_FOREVER; } +"for" { return TOKEN_FOR; } +"break" { return TOKEN_BREAK; } +"continue" { return TOKEN_CONTINUE; } +"return" { return TOKEN_RETURN; } +"let" { return TOKEN_LET; } +"import" { return TOKEN_IMPORT; } +"from" { return TOKEN_FROM; } +"do" { return TOKEN_DO; } +"true" { return TOKEN_TRUE; } +"false" { return TOKEN_FALSE; } +"null" { return TOKEN_NULL; } +"delete" { return TOKEN_DELETE; } +"not" { return TOKEN_NOT; } +"in" { return TOKEN_IN; } +"try" { return TOKEN_TRY; } +"catch" { return TOKEN_CATCH; } -"(" { GET_STATE; append(TOKENS, create_token(TOKEN_LPAREN, - LINE_NO, - COLUMN_NO, - yytext - )); ADD_TO_COLUMN; } -")" { GET_STATE; append(TOKENS, create_token(TOKEN_RPAREN, - LINE_NO, - COLUMN_NO, - yytext - )); ADD_TO_COLUMN; } -"[" { GET_STATE; append(TOKENS, create_token(TOKEN_LBRACKET, - LINE_NO, - COLUMN_NO, - yytext - )); ADD_TO_COLUMN; } -"]" { GET_STATE; append(TOKENS, create_token(TOKEN_RBRACKET, - LINE_NO, - COLUMN_NO, - yytext - )); ADD_TO_COLUMN; } -"{" { GET_STATE; append(TOKENS, create_token(TOKEN_LBRACE, - LINE_NO, - COLUMN_NO, - yytext - )); ADD_TO_COLUMN; } -"}" { GET_STATE; append(TOKENS, create_token(TOKEN_RBRACE, - LINE_NO, - COLUMN_NO, - yytext - )); ADD_TO_COLUMN; } +"(" { return TOKEN_LPAREN; } +")" { return TOKEN_RPAREN; } +"[" { return TOKEN_LBRACKET; } +"]" { return TOKEN_RBRACKET; } +"{" { return TOKEN_LBRACE; } +"}" { return TOKEN_RBRACE; } -[a-zA-Z_][a-zA-Z0-9_]* { - GET_STATE - append(TOKENS, create_token(TOKEN_IDENTIFIER, - LINE_NO, - COLUMN_NO, - yytext - )); - ADD_TO_COLUMN -} +[a-zA-Z_][a-zA-Z0-9_]* { return TOKEN_IDENTIFIER; } -"." {GET_STATE;append(TOKENS, create_token(TOKEN_DOT, - LINE_NO, - COLUMN_NO, - yytext - ));ADD_TO_COLUMN} -"," {GET_STATE;append(TOKENS, create_token(TOKEN_COMMA, - LINE_NO, - COLUMN_NO, - yytext - ));ADD_TO_COLUMN} -":" {GET_STATE;append(TOKENS, create_token(TOKEN_COLON, - LINE_NO, - COLUMN_NO, - yytext - ));ADD_TO_COLUMN} +"." { return TOKEN_DOT; } +"," {return TOKEN_COMMA; } +":" {return TOKEN_COLON; } -\n { - GET_STATE - append(TOKENS, create_token(TOKEN_NEW_LINE, - LINE_NO, - COLUMN_NO, - yytext - )); - COLUMN_NO = 1; -} +\n { return TOKEN_NEW_LINE; } [ \t]+ { GET_STATE - if (COLUMN_NO == 1){ - append(TOKENS, create_token(TOKEN_INDENT, - LINE_NO, - COLUMN_NO, - yytext - )); + if (COLUMN_NO == 0){ + return TOKEN_INDENT; } - ADD_TO_COLUMN // Advance column for whitespace + COLUMN_NO += yyleng; } . { GET_STATE - fprintf(stderr, "%s: line %d column %d: unexpected character '%s'\n", state->path, LINE_NO, COLUMN_NO, yytext); + fprintf(stderr, "%s:%u:%u: unexpected character '%s'\n", state->path, yylineno+1, COLUMN_NO+1, yytext); exit(1); } %% \ No newline at end of file diff --git a/src/lexer/lexer.c b/src/lexer/lexer.c index 525388c..fd666e7 100644 --- a/src/lexer/lexer.c +++ b/src/lexer/lexer.c @@ -11,7 +11,21 @@ void lexer(LexerState state) { void* buffer = yy_scan_string(state.content, scanner); yy_switch_to_buffer(buffer, scanner); - yylex(scanner); + int token; + while ((token = yylex(scanner)) != 0) { + Token * token_struct = create_token( + token, + yyget_lineno(scanner), + state.current_column, + yyget_text(scanner) + ); + append(state.tokens, token_struct); + if (token == TOKEN_NEW_LINE) { + state.current_column = 0; + } else { + state.current_column += yyget_leng(scanner); + } + } yy_delete_buffer(buffer, scanner); yylex_destroy(scanner); diff --git a/src/lexer/token.c b/src/lexer/token.c index cb0b4c3..a62838c 100644 --- a/src/lexer/token.c +++ b/src/lexer/token.c @@ -1,14 +1,17 @@ #include "token.h" -#include #include #include "../string/string.h" Token *create_token(TokenType type, int line, int column, char *value) { Token * token = malloc(sizeof(Token)); - printf("%s\n", value); token->type = type; token->line=line; token->column=column; token->value=cloneString(value); return token; +} + +void free_token(void * ptr) { + Token* token = ptr; + free(token->value); } \ No newline at end of file diff --git a/src/lexer/token.h b/src/lexer/token.h index d2ea9c7..e93b7da 100644 --- a/src/lexer/token.h +++ b/src/lexer/token.h @@ -2,7 +2,7 @@ #define TOKEN_H typedef enum { - TOKEN_STRING, + TOKEN_STRING = 256, TOKEN_NUMBER, TOKEN_FRACTION, TOKEN_IDENTIFIER, @@ -72,4 +72,5 @@ typedef struct { } Token; Token *create_token(TokenType type, int line, int column, char *value); +void free_token(void * ptr); #endif \ No newline at end of file diff --git a/src/list/list.c b/src/list/list.c index 8d2d463..d3fa339 100644 --- a/src/list/list.c +++ b/src/list/list.c @@ -82,11 +82,14 @@ void print_list(LinkedList *list, void (*print_func)(void *)) { } } -void free_list(LinkedList *list) { +void free_list(LinkedList *list, void (*free_data)(void *)) { Node *current = list->head; while (current) { Node *next = current->next; - free(current->data); + + if (free_data) // Safe to pass NULL if you don't need it + free_data(current->data); + free(current); current = next; } diff --git a/src/list/list.h b/src/list/list.h index a7fb8f2..29c7d29 100644 --- a/src/list/list.h +++ b/src/list/list.h @@ -37,6 +37,6 @@ size_t list_length(LinkedList *list); void print_list(LinkedList *list, void (*print_func)(void *)); // Free all memory used by the list -void free_list(LinkedList *list); +void free_list(LinkedList *list, void (*free_data)(void *)); #endif // LINKEDLIST_H \ No newline at end of file diff --git a/src/main.c b/src/main.c index 9b1ac9e..cd6cff2 100644 --- a/src/main.c +++ b/src/main.c @@ -6,6 +6,7 @@ #include #include #include +#include char* read_file_as_text(const char* filename) { FILE *file = fopen(filename, "r"); @@ -46,7 +47,7 @@ int main() { LexerState state = { path, content, - 1, + 0, tokens }; lexer(state); @@ -55,11 +56,9 @@ int main() { parser(parsed, tokens, false); - Node *current = parsed->head; - while (current) { - printf("%s\n", (char*)((TaggedValue*)current->data)->data); - current = current->next; - } + free_list(tokens, free_token); + + free_list(parsed,free_tagged_value); return 0; } diff --git a/src/parser/parser.c b/src/parser/parser.c index fd382ac..b464b18 100644 --- a/src/parser/parser.c +++ b/src/parser/parser.c @@ -1,15 +1,24 @@ #include "parser.h" +#include #include +#include +#include +#include "../lexer/token.h" +#include "../list/list.h" +#include "string/string.h" -TaggedValue parse_token(LinkedList * tokens, size_t *index) { +TaggedValue * parse_token(LinkedList * tokens, size_t *index) { Token * token = get_element_at(tokens, *index); switch (token->type) { case TOKEN_STRING: (*index)++; return parse_string(*token); + case TOKEN_NEW_LINE: + (*index)++; + return NULL; default: - perror("unreachable"); - exit(0); + fprintf(stderr, "Panic: %s\n", "unreachable"); \ + exit(EXIT_FAILURE); \ } } @@ -17,7 +26,19 @@ void parser(LinkedList * parsed, LinkedList * tokens, bool inline_flag) { size_t index = 0; size_t length = list_length(tokens); while (index < length) { - TaggedValue parsed_code = parse_token(tokens, &index); - append(parsed,&parsed_code); + TaggedValue * parsed_code = parse_token(tokens, &index); + if (parsed_code) + append(parsed,parsed_code); } +} + +void free_tagged_value(void *ptr) { + TaggedValue *tagged = ptr; + switch (tagged->type) { + case AST_STRING: + free(tagged->data); + break; + // Add cases if needed + } + free(tagged); // Always free the TaggedValue itself } \ No newline at end of file diff --git a/src/parser/parser.h b/src/parser/parser.h index 5b80481..12dbc56 100644 --- a/src/parser/parser.h +++ b/src/parser/parser.h @@ -1,9 +1,27 @@ -#include "../lexer/token.h" -#include "string/string.h" +#ifndef PARSER_H +#define PARSER_H + #include -#include -#include +#include -void parser(LinkedList * parsed, LinkedList * tokens, bool inline_flag); -TaggedValue parse_token(LinkedList * tokens, size_t *index); \ No newline at end of file +typedef struct LinkedList LinkedList; + +typedef enum { + AST_STRING, +} ValueType; + +typedef struct { + ValueType type; + void *data; + +} TaggedValue; + +void parser(LinkedList *parsed, LinkedList *tokens, bool inline_flag); + +TaggedValue *parse_token(LinkedList *tokens, size_t *index); + +void free_tagged_value(void *ptr); + + +#endif // PARSER_H \ No newline at end of file diff --git a/src/parser/string/string.c b/src/parser/string/string.c index 2eadcec..8b72026 100644 --- a/src/parser/string/string.c +++ b/src/parser/string/string.c @@ -7,7 +7,7 @@ #include #include -char *swap_quotes(char *input) { +char *swap_quotes(char *input, char quote) { size_t len = strlen(input); char *result = malloc(len + 1); if (!result) @@ -15,8 +15,8 @@ char *swap_quotes(char *input) { for (size_t i = 0; i < len; ++i) { if (input[i] == '"') - result[i] = '\''; - else if (input[i] == '\'') + result[i] = quote; + else if (input[i] == quote) result[i] = '"'; else result[i] = input[i]; @@ -33,8 +33,8 @@ char *unquote(char *str) { char *swapped = NULL; char *unescaped = NULL; - if (quote == '\'') { - swapped = swap_quotes(str); + if (quote != '"') { + swapped = swap_quotes(str, quote); if (!swapped) return NULL; str = swapped; @@ -62,8 +62,8 @@ char *unquote(char *str) { free(swapped); // If input was single-quoted, swap quotes back in the output - if (quote == '\'') { - char *final = swap_quotes(unescaped); + if (quote != '"') { + char *final = swap_quotes(unescaped, quote); free(unescaped); return final; } @@ -71,9 +71,10 @@ char *unquote(char *str) { return unescaped; } -TaggedValue parse_string(Token token) { - return (TaggedValue){ - AST_STRING, - unquote(token.value), - }; +TaggedValue * parse_string(Token token) { + TaggedValue * taggedValue = malloc(sizeof(TaggedValue)); + + taggedValue->type = AST_STRING; + taggedValue->data = unquote(token.value); + return taggedValue; } \ No newline at end of file diff --git a/src/parser/string/string.h b/src/parser/string/string.h index fd3498d..0e7b8a9 100644 --- a/src/parser/string/string.h +++ b/src/parser/string/string.h @@ -1,8 +1,15 @@ -#include "../../lexer/token.h" -#include "../taggedValue.h" +#ifndef STRING_UTILS_H +#define STRING_UTILS_H -char *swap_quotes(char *input); +#include "../../lexer/token.h" +#include "../parser.h" + +// Declare functions related to string processing in parser + +char *swap_quotes(char *input, char quote); char *unquote(char *str); -TaggedValue parse_string(Token token); \ No newline at end of file +TaggedValue *parse_string(Token token); + +#endif // STRING_UTILS_H \ No newline at end of file diff --git a/src/parser/taggedValue.h b/src/parser/taggedValue.h deleted file mode 100644 index 94ecbed..0000000 --- a/src/parser/taggedValue.h +++ /dev/null @@ -1,11 +0,0 @@ -#include "../list/list.h" - -typedef enum { - AST_STRING, -} ValueType; - -typedef struct { - ValueType type; - void *data; - -} TaggedValue; \ No newline at end of file diff --git a/test.ar b/test.ar index cfcb15c..5dadd33 100644 --- a/test.ar +++ b/test.ar @@ -1 +1 @@ -"hello world" \ No newline at end of file +'hello world' \ No newline at end of file