start implimenting a parser

2025-05-27 17:19:09 +01:00
parent 3dedd7f348
commit 43bc7663fc
14 changed files with 241 additions and 195 deletions
--- a/src/lexer/lexer.c
+++ b/src/lexer/lexer.c
@@ -1,17 +1,9 @@
 #include "lex.yy.h"
 #include "lexer.h"
-#include "../string/string.h"
-#include <stdlib.h>

 void lexer(LexerState state) {
    yyscan_t scanner;

-    char *unquoted = unquote(state.content);
-    if (unquoted) {
-        printf("%s\n", unquoted);
-        free(unquoted);
-    }
-
    yylex_init(&scanner);

    yyset_extra(&state, scanner);
--- a/src/lexer/token.c
+++ b/src/lexer/token.c
@@ -6,7 +6,7 @@


 TokenStruct* init_token() {
-    TokenStruct *tokenStruct = malloc(sizeof(TokenStruct));\
+    TokenStruct *tokenStruct = malloc(sizeof(TokenStruct));
    if (tokenStruct == NULL) {
        // handle malloc failure
        return NULL;
--- a/src/lexer/token.h
+++ b/src/lexer/token.h
@@ -2,85 +2,86 @@
 #define TOKEN_H

 typedef enum {
-    TOKEN_STRING,
-    TOKEN_NUMBER,
-    TOKEN_FRACTION,
-    TOKEN_IDENTIFIER,
-    TOKEN_KEYWORD,
-    TOKEN_NEW_LINE,
-    TOKEN_INDENT,
+  TOKEN_STRING,
+  TOKEN_NUMBER,
+  TOKEN_FRACTION,
+  TOKEN_IDENTIFIER,
+  TOKEN_KEYWORD,
+  TOKEN_NEW_LINE,
+  TOKEN_INDENT,

-    // Operators
-    TOKEN_AND,         // &&
-    TOKEN_OR,          // ||
-    TOKEN_NOT_IN,      // not in
-    TOKEN_LE,          // <=
-    TOKEN_GE,          // >=
-    TOKEN_LT,          // <
-    TOKEN_GT,          // >
-    TOKEN_NE,          // !=
-    TOKEN_EQ,          // ==
-    TOKEN_ASSIGN,
-    TOKEN_PLUS,        // +
-    TOKEN_MINUS,       // -
-    TOKEN_MODULO,      // %
-    TOKEN_STAR,        // *
-    TOKEN_FLOORDIV,    // //
-    TOKEN_SLASH,       // /
-    TOKEN_CARET,        // ^
+  // Operators
+  TOKEN_AND,    // &&
+  TOKEN_OR,     // ||
+  TOKEN_NOT_IN, // not in
+  TOKEN_LE,     // <=
+  TOKEN_GE,     // >=
+  TOKEN_LT,     // <
+  TOKEN_GT,     // >
+  TOKEN_NE,     // !=
+  TOKEN_EQ,     // ==
+  TOKEN_ASSIGN,
+  TOKEN_PLUS,     // +
+  TOKEN_MINUS,    // -
+  TOKEN_MODULO,   // %
+  TOKEN_STAR,     // *
+  TOKEN_FLOORDIV, // //
+  TOKEN_SLASH,    // /
+  TOKEN_CARET,    // ^

-    // Keywords
-    TOKEN_IF,
-    TOKEN_ELSE,
-    TOKEN_WHILE,
-    TOKEN_FOREVER,
-    TOKEN_FOR,
-    TOKEN_BREAK,
-    TOKEN_CONTINUE,
-    TOKEN_RETURN,
-    TOKEN_LET,
-    TOKEN_IMPORT,
-    TOKEN_FROM,
-    TOKEN_DO,
-    TOKEN_TRUE,
-    TOKEN_FALSE,
-    TOKEN_NULL,
-    TOKEN_DELETE,
-    TOKEN_NOT,
-    TOKEN_IN,
-    TOKEN_TRY,
-    TOKEN_CATCH,
+  // Keywords
+  TOKEN_IF,
+  TOKEN_ELSE,
+  TOKEN_WHILE,
+  TOKEN_FOREVER,
+  TOKEN_FOR,
+  TOKEN_BREAK,
+  TOKEN_CONTINUE,
+  TOKEN_RETURN,
+  TOKEN_LET,
+  TOKEN_IMPORT,
+  TOKEN_FROM,
+  TOKEN_DO,
+  TOKEN_TRUE,
+  TOKEN_FALSE,
+  TOKEN_NULL,
+  TOKEN_DELETE,
+  TOKEN_NOT,
+  TOKEN_IN,
+  TOKEN_TRY,
+  TOKEN_CATCH,

-    // parentheses, brackets, and braces
-    TOKEN_LPAREN,    // (
-    TOKEN_RPAREN,    // )
-    TOKEN_LBRACKET,  // [
-    TOKEN_RBRACKET,  // ]
-    TOKEN_LBRACE,    // {
-    TOKEN_RBRACE,     // }
+  // parentheses, brackets, and braces
+  TOKEN_LPAREN,   // (
+  TOKEN_RPAREN,   // )
+  TOKEN_LBRACKET, // [
+  TOKEN_RBRACKET, // ]
+  TOKEN_LBRACE,   // {
+  TOKEN_RBRACE,   // }

-    TOKEN_DOT,
-    TOKEN_COMMA,
-    TOKEN_COLON,
+  TOKEN_DOT,
+  TOKEN_COMMA,
+  TOKEN_COLON,
 } TokenType;

 typedef struct {
-    TokenType type;
-    int line;
-    int column;
-    char* value;
+  TokenType type;
+  int line;
+  int column;
+  char *value;
 } Token;

 typedef struct {
-    int count;
-    int capacity;
-    Token* tokens;
+  int count;
+  int capacity;
+  Token *tokens;
 } TokenStruct;

-TokenStruct* init_token();
+TokenStruct *init_token();

-void add_token(TokenStruct* token,TokenType type, const char* value, int line, int column);
+void add_token(TokenStruct *token, TokenType type, const char *value, int line,
+               int column);

-void free_tokens(TokenStruct* token);
+void free_tokens(TokenStruct *token);

 #endif
--- a/src/main.c
+++ b/src/main.c
@@ -1,5 +1,7 @@
 #include "lexer/lexer.h"
+#include "parser/parser.h"

+#include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>

@@ -46,9 +48,11 @@ int main() {
    };
    lexer(state);
    free(content);
-    for (int i = 0; i<tokenStruct->count; i++) {
-        printf("%d\n", tokenStruct->tokens[i].type);
-    }
+
+    TaggedValueStruct taggedValueStruct = init_TaggedValueStruct();
+
+    parser(&taggedValueStruct, tokenStruct, false);
+
    free_tokens(tokenStruct);
    return 0;
 }
--- a/src/parser/parser.c
+++ b/src/parser/parser.c
@@ -0,0 +1,20 @@
+#include "parser.h"
+
+TaggedValue parse_token(TokenStruct * tokenStruct, int *index) {
+  Token token = tokenStruct->tokens[*index];
+  switch (token.type) {
+    case TOKEN_STRING:
+      index++;
+      return parse_string(token);
+    default:
+      perror("unreachable");
+      exit(0);
+  }
+}
+
+void parser(TaggedValueStruct * taggedValueStruct, TokenStruct * tokenStruct, bool inline_flag) {
+  int index = 0;
+  while (index < tokenStruct->count) {
+    TaggedValueStruct_append(taggedValueStruct, parse_token(tokenStruct, &index));
+  }
+}
--- a/src/parser/parser.h
+++ b/src/parser/parser.h
@@ -0,0 +1,9 @@
+#include "../lexer/token.h"
+#include "string/string.h"
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+void parser(TaggedValueStruct * TaggedValueStruct, TokenStruct * tokenStruct, bool inline_flag);
+
+TaggedValue parse_token(TokenStruct * tokenStruct, int *index);
--- a/src/parser/string/string.c
+++ b/src/parser/string/string.c
@@ -0,0 +1,78 @@
+#include "string.h"
+#include "../../lexer/token.h"
+
+#include <cjson/cJSON.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+char *swap_quotes(const char *input) {
+  size_t len = strlen(input);
+  char *result = malloc(len + 1);
+  if (!result)
+    return NULL;
+
+  for (size_t i = 0; i < len; ++i) {
+    if (input[i] == '"')
+      result[i] = '\'';
+    else if (input[i] == '\'')
+      result[i] = '"';
+    else
+      result[i] = input[i];
+  }
+  result[len] = '\0';
+  return result;
+}
+
+char *unquote(const char *str) {
+  if (*str == '\0')
+    return NULL;
+
+  char quote = str[0];
+  char *swapped = NULL;
+  char *unescaped = NULL;
+
+  if (quote == '\'') {
+    swapped = swap_quotes(str);
+    if (!swapped)
+      return NULL;
+    str = swapped;
+  }
+
+  cJSON *json = cJSON_Parse(str);
+  if (!json || !cJSON_IsString(json)) {
+    if (swapped)
+      free(swapped);
+    return NULL;
+  }
+
+  // Copy unescaped string before freeing JSON object
+  const char *decoded = cJSON_GetStringValue(json);
+  if (!decoded) {
+    cJSON_Delete(json);
+    if (swapped)
+      free(swapped);
+    return NULL;
+  }
+
+  unescaped = strdup(decoded);
+  cJSON_Delete(json);
+  if (swapped)
+    free(swapped);
+
+  // If input was single-quoted, swap quotes back in the output
+  if (quote == '\'') {
+    char *final = swap_quotes(unescaped);
+    free(unescaped);
+    return final;
+  }
+
+  return unescaped;
+}
+
+TaggedValue parse_string(Token token) {
+  return (TaggedValue){
+    TYPE_STRING,
+    unquote(token.value)
+  };
+}
--- a/src/parser/string/string.h
+++ b/src/parser/string/string.h
@@ -0,0 +1,8 @@
+#include "../../lexer/token.h"
+#include "../taggedValue.h"
+
+char *swap_quotes(const char *input);
+
+char *unquote(const char *str);
+
+TaggedValue parse_string(Token token);
--- a/src/parser/taggedValue.c
+++ b/src/parser/taggedValue.c
@@ -0,0 +1,26 @@
+#include "taggedValue.h"
+#include <stdlib.h>
+
+TaggedValueStruct init_TaggedValueStruct() {
+    TaggedValueStruct taggedValueStruct = {
+        0,
+        INITIAL_CAPACITY,
+        malloc(sizeof(TaggedValue)*INITIAL_CAPACITY)
+    };
+    return taggedValueStruct;
+}
+
+void TaggedValueStruct_append(TaggedValueStruct *TaggedValueStruct,
+                              TaggedValue TaggedValue) {
+  if (TaggedValueStruct->count >= TaggedValueStruct->capacity) {
+    TaggedValueStruct->capacity *= 2;
+    TaggedValueStruct->TaggedValue =
+        realloc(TaggedValueStruct->TaggedValue,
+                sizeof(TaggedValue) * TaggedValueStruct->capacity);
+  }
+  TaggedValueStruct[TaggedValueStruct->count].TaggedValue->data =
+      TaggedValue.data;
+  TaggedValueStruct[TaggedValueStruct->count].TaggedValue->type =
+      TaggedValue.type;
+  TaggedValueStruct->count++;
+}
--- a/src/parser/taggedValue.h
+++ b/src/parser/taggedValue.h
@@ -0,0 +1,23 @@
+typedef enum {
+  TYPE_STRING,
+} ValueType;
+
+typedef struct {
+  ValueType type;
+  void *data;
+  
+} TaggedValue;
+
+
+#define INITIAL_CAPACITY 64
+
+
+typedef struct {
+  int count;
+  int capacity;
+  TaggedValue * TaggedValue;
+} TaggedValueStruct;
+
+TaggedValueStruct init_TaggedValueStruct();
+void TaggedValueStruct_append(TaggedValueStruct *TaggedValueStruct,
+                              TaggedValue TaggedValue);
--- a/src/string/string.c
+++ b/src/string/string.c
@@ -3,65 +3,6 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <stddef.h>
-#include <stdlib.h>
-#include <string.h>
-#include <cjson/cJSON.h>
-
-char *swap_quotes(const char *input) {
-    size_t len = strlen(input);
-    char *result = malloc(len + 1);
-    if (!result) return NULL;
-
-    for (size_t i = 0; i < len; ++i) {
-        if (input[i] == '"') result[i] = '\'';
-        else if (input[i] == '\'') result[i] = '"';
-        else result[i] = input[i];
-    }
-    result[len] = '\0';
-    return result;
-}
-
-char *unquote(const char *str) {
-    if (*str == '\0') return NULL;
-
-    char quote = str[0];
-    char *swapped = NULL;
-    char *unescaped = NULL;
-
-    if (quote == '\'') {
-        swapped = swap_quotes(str);
-        if (!swapped) return NULL;
-        str = swapped;
-    }
-
-    cJSON *json = cJSON_Parse(str);
-    if (!json || !cJSON_IsString(json)) {
-        if (swapped) free(swapped);
-        return NULL;
-    }
-
-    // Copy unescaped string before freeing JSON object
-    const char *decoded = cJSON_GetStringValue(json);
-    if (!decoded) {
-        cJSON_Delete(json);
-        if (swapped) free(swapped);
-        return NULL;
-    }
-
-    unescaped = strdup(decoded);
-    cJSON_Delete(json);
-    if (swapped) free(swapped);
-
-    // If input was single-quoted, swap quotes back in the output
-    if (quote == '\'') {
-        char *final = swap_quotes(unescaped);
-        free(unescaped);
-        return final;
-    }
-
-    return unescaped;
-}

 const char *WHITE_SPACE = " \t\n\r\f\v";

--- a/src/string/string.h
+++ b/src/string/string.h
@@ -7,9 +7,5 @@ char* cloneString(char* str);

 void stripString(char* str, const char* chars);

-char *swap_quotes(const char *input);
-
-char *unquote(const char *str);
-

 #endif // CLONESTRING_H