start implimenting a parser

This commit is contained in:
2025-05-27 17:19:09 +01:00
parent 3dedd7f348
commit 43bc7663fc
14 changed files with 241 additions and 195 deletions

View File

@@ -1,17 +1,9 @@
#include "lex.yy.h"
#include "lexer.h"
#include "../string/string.h"
#include <stdlib.h>
void lexer(LexerState state) {
yyscan_t scanner;
char *unquoted = unquote(state.content);
if (unquoted) {
printf("%s\n", unquoted);
free(unquoted);
}
yylex_init(&scanner);
yyset_extra(&state, scanner);

View File

@@ -6,7 +6,7 @@
TokenStruct* init_token() {
TokenStruct *tokenStruct = malloc(sizeof(TokenStruct));\
TokenStruct *tokenStruct = malloc(sizeof(TokenStruct));
if (tokenStruct == NULL) {
// handle malloc failure
return NULL;

View File

@@ -2,85 +2,86 @@
#define TOKEN_H
typedef enum {
TOKEN_STRING,
TOKEN_NUMBER,
TOKEN_FRACTION,
TOKEN_IDENTIFIER,
TOKEN_KEYWORD,
TOKEN_NEW_LINE,
TOKEN_INDENT,
TOKEN_STRING,
TOKEN_NUMBER,
TOKEN_FRACTION,
TOKEN_IDENTIFIER,
TOKEN_KEYWORD,
TOKEN_NEW_LINE,
TOKEN_INDENT,
// Operators
TOKEN_AND, // &&
TOKEN_OR, // ||
TOKEN_NOT_IN, // not in
TOKEN_LE, // <=
TOKEN_GE, // >=
TOKEN_LT, // <
TOKEN_GT, // >
TOKEN_NE, // !=
TOKEN_EQ, // ==
TOKEN_ASSIGN,
TOKEN_PLUS, // +
TOKEN_MINUS, // -
TOKEN_MODULO, // %
TOKEN_STAR, // *
TOKEN_FLOORDIV, // //
TOKEN_SLASH, // /
TOKEN_CARET, // ^
// Operators
TOKEN_AND, // &&
TOKEN_OR, // ||
TOKEN_NOT_IN, // not in
TOKEN_LE, // <=
TOKEN_GE, // >=
TOKEN_LT, // <
TOKEN_GT, // >
TOKEN_NE, // !=
TOKEN_EQ, // ==
TOKEN_ASSIGN,
TOKEN_PLUS, // +
TOKEN_MINUS, // -
TOKEN_MODULO, // %
TOKEN_STAR, // *
TOKEN_FLOORDIV, // //
TOKEN_SLASH, // /
TOKEN_CARET, // ^
// Keywords
TOKEN_IF,
TOKEN_ELSE,
TOKEN_WHILE,
TOKEN_FOREVER,
TOKEN_FOR,
TOKEN_BREAK,
TOKEN_CONTINUE,
TOKEN_RETURN,
TOKEN_LET,
TOKEN_IMPORT,
TOKEN_FROM,
TOKEN_DO,
TOKEN_TRUE,
TOKEN_FALSE,
TOKEN_NULL,
TOKEN_DELETE,
TOKEN_NOT,
TOKEN_IN,
TOKEN_TRY,
TOKEN_CATCH,
// Keywords
TOKEN_IF,
TOKEN_ELSE,
TOKEN_WHILE,
TOKEN_FOREVER,
TOKEN_FOR,
TOKEN_BREAK,
TOKEN_CONTINUE,
TOKEN_RETURN,
TOKEN_LET,
TOKEN_IMPORT,
TOKEN_FROM,
TOKEN_DO,
TOKEN_TRUE,
TOKEN_FALSE,
TOKEN_NULL,
TOKEN_DELETE,
TOKEN_NOT,
TOKEN_IN,
TOKEN_TRY,
TOKEN_CATCH,
// parentheses, brackets, and braces
TOKEN_LPAREN, // (
TOKEN_RPAREN, // )
TOKEN_LBRACKET, // [
TOKEN_RBRACKET, // ]
TOKEN_LBRACE, // {
TOKEN_RBRACE, // }
// parentheses, brackets, and braces
TOKEN_LPAREN, // (
TOKEN_RPAREN, // )
TOKEN_LBRACKET, // [
TOKEN_RBRACKET, // ]
TOKEN_LBRACE, // {
TOKEN_RBRACE, // }
TOKEN_DOT,
TOKEN_COMMA,
TOKEN_COLON,
TOKEN_DOT,
TOKEN_COMMA,
TOKEN_COLON,
} TokenType;
typedef struct {
TokenType type;
int line;
int column;
char* value;
TokenType type;
int line;
int column;
char *value;
} Token;
typedef struct {
int count;
int capacity;
Token* tokens;
int count;
int capacity;
Token *tokens;
} TokenStruct;
TokenStruct* init_token();
TokenStruct *init_token();
void add_token(TokenStruct* token,TokenType type, const char* value, int line, int column);
void add_token(TokenStruct *token, TokenType type, const char *value, int line,
int column);
void free_tokens(TokenStruct* token);
void free_tokens(TokenStruct *token);
#endif

View File

@@ -1,5 +1,7 @@
#include "lexer/lexer.h"
#include "parser/parser.h"
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
@@ -46,9 +48,11 @@ int main() {
};
lexer(state);
free(content);
for (int i = 0; i<tokenStruct->count; i++) {
printf("%d\n", tokenStruct->tokens[i].type);
}
TaggedValueStruct taggedValueStruct = init_TaggedValueStruct();
parser(&taggedValueStruct, tokenStruct, false);
free_tokens(tokenStruct);
return 0;
}

20
src/parser/parser.c Normal file
View File

@@ -0,0 +1,20 @@
#include "parser.h"
TaggedValue parse_token(TokenStruct * tokenStruct, int *index) {
Token token = tokenStruct->tokens[*index];
switch (token.type) {
case TOKEN_STRING:
index++;
return parse_string(token);
default:
perror("unreachable");
exit(0);
}
}
void parser(TaggedValueStruct * taggedValueStruct, TokenStruct * tokenStruct, bool inline_flag) {
int index = 0;
while (index < tokenStruct->count) {
TaggedValueStruct_append(taggedValueStruct, parse_token(tokenStruct, &index));
}
}

9
src/parser/parser.h Normal file
View File

@@ -0,0 +1,9 @@
#include "../lexer/token.h"
#include "string/string.h"
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
void parser(TaggedValueStruct * TaggedValueStruct, TokenStruct * tokenStruct, bool inline_flag);
TaggedValue parse_token(TokenStruct * tokenStruct, int *index);

View File

@@ -0,0 +1,78 @@
#include "string.h"
#include "../../lexer/token.h"
#include <cjson/cJSON.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
char *swap_quotes(const char *input) {
size_t len = strlen(input);
char *result = malloc(len + 1);
if (!result)
return NULL;
for (size_t i = 0; i < len; ++i) {
if (input[i] == '"')
result[i] = '\'';
else if (input[i] == '\'')
result[i] = '"';
else
result[i] = input[i];
}
result[len] = '\0';
return result;
}
char *unquote(const char *str) {
if (*str == '\0')
return NULL;
char quote = str[0];
char *swapped = NULL;
char *unescaped = NULL;
if (quote == '\'') {
swapped = swap_quotes(str);
if (!swapped)
return NULL;
str = swapped;
}
cJSON *json = cJSON_Parse(str);
if (!json || !cJSON_IsString(json)) {
if (swapped)
free(swapped);
return NULL;
}
// Copy unescaped string before freeing JSON object
const char *decoded = cJSON_GetStringValue(json);
if (!decoded) {
cJSON_Delete(json);
if (swapped)
free(swapped);
return NULL;
}
unescaped = strdup(decoded);
cJSON_Delete(json);
if (swapped)
free(swapped);
// If input was single-quoted, swap quotes back in the output
if (quote == '\'') {
char *final = swap_quotes(unescaped);
free(unescaped);
return final;
}
return unescaped;
}
TaggedValue parse_string(Token token) {
return (TaggedValue){
TYPE_STRING,
unquote(token.value)
};
}

View File

@@ -0,0 +1,8 @@
#include "../../lexer/token.h"
#include "../taggedValue.h"
char *swap_quotes(const char *input);
char *unquote(const char *str);
TaggedValue parse_string(Token token);

26
src/parser/taggedValue.c Normal file
View File

@@ -0,0 +1,26 @@
#include "taggedValue.h"
#include <stdlib.h>
TaggedValueStruct init_TaggedValueStruct() {
TaggedValueStruct taggedValueStruct = {
0,
INITIAL_CAPACITY,
malloc(sizeof(TaggedValue)*INITIAL_CAPACITY)
};
return taggedValueStruct;
}
void TaggedValueStruct_append(TaggedValueStruct *TaggedValueStruct,
TaggedValue TaggedValue) {
if (TaggedValueStruct->count >= TaggedValueStruct->capacity) {
TaggedValueStruct->capacity *= 2;
TaggedValueStruct->TaggedValue =
realloc(TaggedValueStruct->TaggedValue,
sizeof(TaggedValue) * TaggedValueStruct->capacity);
}
TaggedValueStruct[TaggedValueStruct->count].TaggedValue->data =
TaggedValue.data;
TaggedValueStruct[TaggedValueStruct->count].TaggedValue->type =
TaggedValue.type;
TaggedValueStruct->count++;
}

23
src/parser/taggedValue.h Normal file
View File

@@ -0,0 +1,23 @@
typedef enum {
TYPE_STRING,
} ValueType;
typedef struct {
ValueType type;
void *data;
} TaggedValue;
#define INITIAL_CAPACITY 64
typedef struct {
int count;
int capacity;
TaggedValue * TaggedValue;
} TaggedValueStruct;
TaggedValueStruct init_TaggedValueStruct();
void TaggedValueStruct_append(TaggedValueStruct *TaggedValueStruct,
TaggedValue TaggedValue);

View File

@@ -3,65 +3,6 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#include <cjson/cJSON.h>
char *swap_quotes(const char *input) {
size_t len = strlen(input);
char *result = malloc(len + 1);
if (!result) return NULL;
for (size_t i = 0; i < len; ++i) {
if (input[i] == '"') result[i] = '\'';
else if (input[i] == '\'') result[i] = '"';
else result[i] = input[i];
}
result[len] = '\0';
return result;
}
char *unquote(const char *str) {
if (*str == '\0') return NULL;
char quote = str[0];
char *swapped = NULL;
char *unescaped = NULL;
if (quote == '\'') {
swapped = swap_quotes(str);
if (!swapped) return NULL;
str = swapped;
}
cJSON *json = cJSON_Parse(str);
if (!json || !cJSON_IsString(json)) {
if (swapped) free(swapped);
return NULL;
}
// Copy unescaped string before freeing JSON object
const char *decoded = cJSON_GetStringValue(json);
if (!decoded) {
cJSON_Delete(json);
if (swapped) free(swapped);
return NULL;
}
unescaped = strdup(decoded);
cJSON_Delete(json);
if (swapped) free(swapped);
// If input was single-quoted, swap quotes back in the output
if (quote == '\'') {
char *final = swap_quotes(unescaped);
free(unescaped);
return final;
}
return unescaped;
}
const char *WHITE_SPACE = " \t\n\r\f\v";

View File

@@ -7,9 +7,5 @@ char* cloneString(char* str);
void stripString(char* str, const char* chars);
char *swap_quotes(const char *input);
char *unquote(const char *str);
#endif // CLONESTRING_H