change string literals to be length terminated instead of null terminated, so null characters can be embeded

This commit is contained in:
2025-06-14 01:16:28 +01:00
parent 937a6598fa
commit 20621944e6
15 changed files with 262 additions and 71 deletions

View File

@@ -15,6 +15,11 @@ int yywrap(void * unused_param) {
%%
"\0" {
fprintf(stderr, "Error: Null character encountered at line %d\n", yylineno);
exit(1);
}
"." { return TOKEN_DOT; }
"!" { return TOKEN_EXCLAMATION; }
"," { return TOKEN_COMMA; }

View File

@@ -1,7 +1,27 @@
#include "lexer.h"
#include "lex.yy.h"
#include "../string/string.h"
void lexer(LexerState state) {
size_t line = 1;
size_t column = 1;
int ch;
while ((ch = fgetc(state.file)) != EOF) {
if (ch == 0 || (ch < 0x20 && ch != '\n' && ch != '\r' && ch != '\t')) {
fprintf(stderr, "%s:%zu:%zu error: disallowed character\n", state.path,
line, column);
exit(1);
}
if (ch == '\n') {
line++;
column = 1;
} else {
column++;
}
}
rewind(state.file);
yyscan_t scanner;
yylex_init(&scanner);
@@ -12,11 +32,14 @@ void lexer(LexerState state) {
int token;
while ((token = yylex(scanner)) != 0) {
Token *token_struct =
create_token(token, state.current_line+1, state.current_column + 1,
yyget_text(scanner));
darray_push(state.tokens, token_struct);
free(token_struct);
Token token_struct = (Token){
token,
state.current_line+1,
state.current_column+1,
yyget_leng(scanner),
cloneString(yyget_text(scanner))
};
darray_push(state.tokens, &token_struct);
if (token == TOKEN_NEW_LINE) {
state.current_line += 1;
state.current_column = 0;

View File

@@ -1,16 +1,5 @@
#include "token.h"
#include "../string/string.h"
#include <stdlib.h>
#include "../memory.h"
Token *create_token(TokenType type, int line, int column, char *value) {
Token *token = checked_malloc(sizeof(Token));
token->type = type;
token->line = line;
token->column = column;
token->value = cloneString(value);
return token;
}
void free_token(void *ptr) {
Token *token = ptr;

View File

@@ -78,9 +78,9 @@ typedef struct {
TokenType type;
size_t line;
size_t column;
size_t length;
char *value;
} Token;
Token *create_token(TokenType type, int line, int column, char *value);
void free_token(void *ptr);
#endif