From 154064575988ecc0352a42d1082d246806a8c55d Mon Sep 17 00:00:00 2001 From: William Bell Date: Tue, 27 May 2025 03:08:49 +0100 Subject: [PATCH] add flex lexer --- .gitignore | 5 +++++ Makefile | 20 +++++++++++++++++-- bin/cargon | Bin 17176 -> 0 bytes src/lexer/lex.l | 47 ++++++++++++++++++++++++++++++++++++++++++++ src/lexer/lexer.c | 20 +++++++++++++++++++ src/lexer/lexer.h | 1 + src/lexer/token.c | 35 +++++++++++++++++++++++++++++++++ src/lexer/token.h | 29 +++++++++++++++++++++++++++ src/main.c | 13 ++---------- src/number/number.c | 12 +++++------ 10 files changed, 163 insertions(+), 19 deletions(-) delete mode 100755 bin/cargon create mode 100644 src/lexer/lex.l create mode 100644 src/lexer/lexer.c create mode 100644 src/lexer/lexer.h create mode 100644 src/lexer/token.c create mode 100644 src/lexer/token.h diff --git a/.gitignore b/.gitignore index c6127b3..5a80a35 100644 --- a/.gitignore +++ b/.gitignore @@ -50,3 +50,8 @@ modules.order Module.symvers Mkfile.old dkms.conf + +bin + +*.yy.c +*.yy.h \ No newline at end of file diff --git a/Makefile b/Makefile index f6378c6..e95bc05 100644 --- a/Makefile +++ b/Makefile @@ -1,3 +1,19 @@ -build: +LEXER_SRC = src/lexer/lex.l +LEXER_C = src/lexer/lex.yy.c +LEXER_H = src/lexer/lex.yy.h + +CFILES = $(shell find src -name '*.c') +BINARY = bin/cargon + +all: $(BINARY) + +$(LEXER_C) $(LEXER_H): $(LEXER_SRC) + flex --header-file=$(LEXER_H) -o $(LEXER_C) $(LEXER_SRC) + +$(BINARY): $(CFILES) $(LEXER_C) $(LEXER_H) mkdir -p bin - gcc -O3 -o bin/cargon $(shell find src -name '*.c') -lm -Wall -Wextra -Werror \ No newline at end of file + gcc -O3 -o $(BINARY) $(CFILES) -lm -Wall -Wextra -Wno-unused-function + +clean: + rm -rf bin + rm -f $(LEXER_C) $(LEXER_H) \ No newline at end of file diff --git a/bin/cargon b/bin/cargon deleted file mode 100755 index e010e0029963ebcb36eb0f8bac63617fa697a66b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 17176 zcmeHO3vgT2nLd)8M0x2tG@<5^q5=Us#EP94ahMP(e&iY)$otWRD2gn_0$CEWl$t;x ztmD94CUo+o!M=+v>i&fAjcs%C9n?R5y+IdG!zLFVw{i%k7&RDJmhQD zVs>_!*_~ORnS1~9f9LewMo1Hl=O-zG7P;^z)VqPAyLxH7aGTl6h&E);7M;3 zMJ1n0EmBWWm7XonLCOdYe$Ud7G)7Ug9{DC&CghU~htl)XFPmlkl^(&Q*CX|Mq@JSv zWDtU)%AaHgUGrsrxzvR;3NC(UELX2Z>gCdEDX%D%L1E>G@rM6SdCR08`@YC0mp&z7 zMOAsrp+|oCuLjl3D`a{3+QB8;K~d>mHZv65GH=ezP(xWL7*4d5walMaHgArzIpUnf zO(463RcOhbdsHni`-87?awUj|HG?Ck6-_ga0`X{$w8fR37}tdGJIYyfF_x1GocMZa4_w>Rb@~bGR;HcDA`#ByN{7FMv*B zm$9Tv0BRm$p1M^_Jq`Yte`~Ng?vJfnS`~_f{j0oNLVo7)Y;B5!JFN43^9L8Fis-ANK=$G`J0*|+x)CK9*ajB1PKud z`NI(MhC&e^YZAoc!HnROHZ*yIBp{Vxb&Lg~v0ykJK#9H~;Tp|jWB6SlgwlLsVHZJZ{&C04PHcnWI0RQ;pq zI|A>P>!HGva!@_v66p$8{N5bg=MZwqMS{|tN%d8s+k~s7nbLYpxTb_bkD74V)p=^K z3BSxFulhZ$8&{Zc<`T>p5a|LA*i5*2-Y+uY{JDlp<9W)2Q{7Z>xI~Qdr%QzrmxvLj zd`C#Ri|#^JtzpE^chOzo<3yC-T`s{;KJrnD1G)*HAVm0mnF&Y7$cA+$+@3`l+ib#B zpQGeP6F!Lm8KN!`A$u;77?Bp2UBOdra>4DSQ_rB0uKe6fUqdI3Yrne>I)=ZDyy5H8t?aj>nJ>bl= zL%3^Kxez11kK#m-GKjJS}DEJ)Hjs;%Ny> z@84}x}5WO z5>HD}+QIoP#M2U#F5>(Th^M6|%{ae;c#Zh}ivXtGLOd-&>0Zt+CZ3j_bPwkj5Kl`^ zI?4H2#M4rf-V1(pMRFziJJki3=^YDqprq6ixI=GyBi^DLM|9%=;0FNf#>*AS)OeJs z8}9?V7Z`vqkV`;2l^oEu=`ZVT$6BrjcaXSO2eeI{5Nv?p^VA}#*CAVxq}JSUN0)9K zZT*vlg!RKIee6+E=^oJ59qfArN*CdRloRq?v+q^ysvXFIQU9noS!W#Hq#GaTtta}I zt*WU=Rvd&z1-WO+y=9D@2{^T@XothAz$u2(mT%==#~Te;fwLf$e>JSV9WFdw5zHaQ~G4zj)h+%nI&eg)>{{j zqo{6t7@w%OFFXJyGvz!sFDW04JuvD`0G2zt2JP(%492N{Q-0~Kkd^jTB-8ZxhC)UC zz&~r|SHD1x);s22GZ8OE^p2_RC_y)l>kocXj8<*X+Y6_aaEo?cYXg7=T>o_@)0Rw3 z)!PNspx+>55^nv${iMYk>%-sBMjuUYyrLT)r+m;tjVAEyKSrZ=%zYTWICTZ^mw018 zidgF3ze4XAtkQ1y_X4V)-U*!QryJ*WV?b{^8=uPkXdKp!xAiCVa~E}AC;DLGQ==Qc zWyS?T_4Z5a+coN(bL)+Rf!(^%?KXOJs6ppKov)j8KCU;qIjhl0k>;yO&S?+2N=5ply7QGJiXutv(#k)*L}HtSWhbKwJL-uJ9c9vk z+)EB%2>u6{uW@~XcpoI*qKwIanMbsT8ql?%vHrw>Ry4|kX$hX7kvL;NeAhgQVi>4a zxNawU4XUqg~^nOy)yL;X{niCW-_hkE!xf((GV~Cc{e>?aWeJL7wBsku2=17vD|O~nRK*R z93`pJ&!JgtdsTH&wKlx7X4uZ zCgnFMd)fcb{XywjmEPS-PuG5aG<_$Zrd)y z=;%g`t?k(TMaFyRNoexU4+>IkgtzXu=|&2Zm1%q%{ZE`*Wb~Xn+ICi}+t;;wD6hA# zwds%R9TWEH?Uj04Chn?8(vWrWc?pwDxo*4zp`ywg6DRp&33}nRy0NS%)p|}YP5jx! zG#{hJ)zr@?r8eh^0C%r8`Y|SSn z#ILJ2KIj{_TW?=cM6=&_D=$x+){Q%DVlj1Zbl-u3T`Zt$;g_eXo-J`A_Sgf7gl?Jks;jBAzUD6idRll?`tD>(-=Ebgz0r1V=`M%(!jT;N{gUR`H= zPV)y=2W=<4aIG`GN^L+1^~R|><3MT|qD!%IG*c1bij#dB>}`G7QeXMG)=Khj?Zr7W zAe^ctvrwZy>TM?z@w$#p8NIYGF}AM`@_m;0ViLTP1gA>Dl-|*dd``z^Y^XOr;(l{) zO8-4lWq%gFbZ=6Apc#v~4;0Q}kXMQ6Fcd`n>N?g~>N?1pkMz=yQ+_0*3E-FFTP+nGf#|7(vu*m@la zR*&JY#mO;qaOs~48=1YR)?R#zHvWgU9c9qHu?1;V1Zt~6mY z!0*JR!#B@@GPk{Gm$k}X+*(j=cUYt2?8QLbcAHzYFzt1Ji7N%W9?QxvCHdv{y|!&; zc`C0L*G80weH{;kox!yZ7`D1R5Jumc;=pQ5*pvmiHc(*VVD#B2G{B0!Y&JnzI{5rd>>%gJcUZ+w9vJb!x8MQwUzGX}3XhpQ(7#(06i?636x4IJ`f&?it#~+dxuEJ_CH4Q6 z`bUYLV<|{6rvAgCXI}~`{qu{N2=U1Ry32pAK_8|sx(Jf6luXCFe;!s#`_w*>-pW!i z|8qmURAVXme-%CEQ|OiYlLgDvo<{wH^Dy3mQ&9Nh(qLtGzer|MjH&YRg@o4!$y^lg z=s9036|tA(ys`t^9?rin3&K{8^Mx}1|1F~aAI~j+9&N4=Y9(DGX+Y9#lJ1i9CzAe? zq`#K*fTV9r`e#YcN;+EpoOP+B*GW20(ppK^NE(n-ScmH*L6=ljEpU|Jujo=o z#cby+XN6-{`K;OH^UEtldcHe#lS1}cCO4g$b2yyL7!Id?KOBC29-Pb?P9E^E_yXV- z^#0s-t0D#bL7V+=p7V^Y0kz5&r(e?I`(s|mMiOwE5P#}Y2DxIuao@JX<9uS;NR{#cSneoG#_GY_7~gFl=H ze~RPqkMzHq-+m_Xy>de5?*w4-bHK^Zx$T(DlmDB*9ipm3?tjW7e-`*8YjJkGDy0HU zki)ghlsx!!;FD;Pl7*}Jvy#i(Sya|by#sOO$!8_-BK&Xea4Sm$rS~7h+4CD-FZfd? z@^@=c3CVJo$%R3a*(D?%mHkt_i`pe|mu#2Er2M0K%6%F**_m`nK}r5i<`bof2!X$Y z1O0WLe10$Gd!?PNT#K>85--o`C+|vpvy|U1<$ERGFXh#{xDygr4+5V_`F@T|#zZVw zuxZ?(%kiMb??m9#PjbimWxyRKeHEQe^6*cuOCXXkH}I>Kkcn;teu)*ckz9XN(iWLd zQf8pWFMjh6hMhhpUVbCuj;)O2kN&MZdi$0;{k}M&jo#+QCT|omoDvYJp6d0txtG>e z;m?08N1htpQ@g^ww8o=XuV9`f^|vo}*L!ZStzB8O%CpM7xV{GG7jlmhxUxU^&-uo4 z5WbM#8%{*=W}aTq(*cL9CjIU&jxh{@JpKl6+{>DSP0>&=a8GT_>x%~?VTOYegL0L0 z9D&YE3_{2)9`lBqL*BSw7Ds0qg7R?05W%&2-KrYT%4P1Vn!h-2krkXnnm7rO1;}7= zDq;vEk7f+PP;NXZj#K#PP>5(4ltpzDCoY&2|pz=RL~!8PzOD-33P;mI#E@m z!Ot2Z2^?=(70EUW)l|A$9zV${@}fOknww>X`4JSDOlMGr2nEB<{#cyJz5)aIDV420 ze!e9bmCmDcFhkUQC@`KqZ-e7Eo|@aLzwxk*7%El#lMkKIF`s)GlC_7bh4!a&GD8xW zv-zH;xOWREPT+9bsA4!w0Y4(J6NB;;OPR~Gkf zWn7$`%$yDPgi(Z`@t8>2=8rYg7~o=(@gObc4|z#J#-gD(b8^d_aeoWqy#G665k8`v z{zgwA=56wO8XKUiIHBS3#$sL=N*XGTjJ-`kA4-iNFWj8G0FfD*l{xY8tjQmatC{M5 zMrq%Mr3_#9L@36*M?q1wZxiU?K2h=4AVSX)N`Jo8SJb5hCC$#GlAi}^)^CyeiqiKM zb5QHM!s-w)>pvy+6?G^|f#h_YYpM-?rRdTKwKYrhxiv=>$SYQL)J9+^Nk zlAo2nT;^C17@4f})&5sewcmvZ50r;F+zp2ImP%jkCl#G9g_ZqEPSN|3PJ30wtNp5? zJEXqyv9e#ntOF79snS>bWkvg?pekSKtNQ<))L$&+)c#x1URhz%r#*VE{!f9Ch{`{^ zj?492y_bLp54r9C6i~CiL+U9?-*3%9JtipnvmAZ({H5rFW6j!s@tDE>B}ZTFFBDbp zT~sc)_CF`}mH(MTN-CPieksL%iwKEi^}jDuBqjftLvHy8k!aT6BlQ$Ll+9S=qS{a8 zc?1zW^=04JsP{?g9Cog};uSp(U3$ir!#?}IsarmXDmjSopm;_5fKuO4`f4Ao&eiu* z#KcvO6otK)f5q>r>GUmX8k=b zA-AVoP&Lj3H)wyBhce}rwcjODo|pQY1)IG)vM~{vD9e}P*;J*UJO0R43TB$qMR@vA znPIsc*t8DOJge%h>P5p#+0GT^&32=RByti}xk{F{OF1Gu=|}Wp1>w>hg&dasC)Oaw AW&i*H diff --git a/src/lexer/lex.l b/src/lexer/lex.l new file mode 100644 index 0000000..166168d --- /dev/null +++ b/src/lexer/lex.l @@ -0,0 +1,47 @@ +%{ +#include "token.h" +int current_line = 1; +int current_column = 1; + +int yywrap() { + return 1; +} +%} + +%% + +\"(\\[a-z\"'`]|[^\\"])*\" { + add_token(TOKEN_STRING, yytext, current_line, current_column); + current_column += yyleng; +} + +[0-9]+ { + add_token(TOKEN_NUMBER, yytext, current_line, current_column); + current_column += yyleng; +} + +[a-zA-Z_][a-zA-Z0-9_]* { + add_token(TOKEN_IDENTIFIER, yytext, current_line, current_column); + current_column += yyleng; +} + +"." { + add_token(TOKEN_DOT, yytext, current_line, current_column); + current_column += yyleng; +} + +\n { + add_token(TOKEN_NEW_LINE, yytext, current_line, current_column); + current_line++; + current_column = 1; +} + +[ \t]+ { + current_column += yyleng; // Advance column for whitespace +} + +. { + fprintf(stderr, "Error: Unexpected character '%c' at line %d\n", *yytext, yylineno); + exit(1); +} +%% \ No newline at end of file diff --git a/src/lexer/lexer.c b/src/lexer/lexer.c new file mode 100644 index 0000000..38bc85c --- /dev/null +++ b/src/lexer/lexer.c @@ -0,0 +1,20 @@ +#include "lex.yy.h" +#include "token.h" + +int lexer() { + const char *input = "term.log\n"; + + + + void* buffer = yy_scan_string(input); + yy_switch_to_buffer(buffer); + yylex(); // This fills the token array + yy_delete_buffer(buffer); + + for (int i = 0; i < token_count; i++) { + printf("Token(type=%d, value='%s')\n", tokens[i].type, tokens[i].value); + } + + free_tokens(); + return 0; +} \ No newline at end of file diff --git a/src/lexer/lexer.h b/src/lexer/lexer.h new file mode 100644 index 0000000..c03af10 --- /dev/null +++ b/src/lexer/lexer.h @@ -0,0 +1 @@ +int lexer(); \ No newline at end of file diff --git a/src/lexer/token.c b/src/lexer/token.c new file mode 100644 index 0000000..2e1a207 --- /dev/null +++ b/src/lexer/token.c @@ -0,0 +1,35 @@ +#include +#include +#include "token.h" + +#define INITIAL_CAPACITY 64 + +Token* tokens = NULL; +int token_count = 0; +static int token_capacity = 0; + +void add_token(TokenType type, const char* value, int line, int column) { + if (tokens == NULL) { + token_capacity = INITIAL_CAPACITY; + tokens = malloc(sizeof(Token) * token_capacity); + } else if (token_count >= token_capacity) { + token_capacity *= 2; + tokens = realloc(tokens, sizeof(Token) * token_capacity); + } + + tokens[token_count].type = type; + tokens[token_count].value = strdup(value); + tokens[token_count].line = line; + tokens[token_count].column = column; + token_count++; +} + +void free_tokens() { + for (int i = 0; i < token_count; ++i) { + free(tokens[i].value); + } + free(tokens); + tokens = NULL; + token_count = 0; + token_capacity = 0; +} \ No newline at end of file diff --git a/src/lexer/token.h b/src/lexer/token.h new file mode 100644 index 0000000..b2d6131 --- /dev/null +++ b/src/lexer/token.h @@ -0,0 +1,29 @@ +#ifndef TOKEN_H +#define TOKEN_H + +typedef enum { + TOKEN_STRING, + TOKEN_NUMBER, + TOKEN_IDENTIFIER, + TOKEN_KEYWORD, + TOKEN_DOT, + TOKEN_NEW_LINE, +} TokenType; + +typedef struct { + TokenType type; + char* value; + int line; + int column; +} Token; + +extern int token_count; + +extern Token* tokens; + + +void add_token(TokenType type, const char* value, int line, int column); + +void free_tokens(); + +#endif \ No newline at end of file diff --git a/src/main.c b/src/main.c index 17bda6e..7e0f830 100644 --- a/src/main.c +++ b/src/main.c @@ -4,6 +4,7 @@ #include #include #include +#include "lexer/lexer.h" void initialize() { initNumber(); @@ -14,16 +15,6 @@ void cleanup() { } int main() { - initialize(); - char *code = "1.2e20"; - struct number mynum = translateNumber(code); - if (mynum.denominator == 0) { - printf("Invalid number\n"); - return 1; - } - double f = 1.0 * mynum.numerator / mynum.denominator; - printf("Numerator: %ld\n", mynum.numerator); - printf("Denominator: %lu\n", mynum.denominator); - printf("Float: %lf\n", f); + lexer(); return 0; } diff --git a/src/number/number.c b/src/number/number.c index 63ddef0..071473b 100644 --- a/src/number/number.c +++ b/src/number/number.c @@ -49,19 +49,19 @@ void doubleToFraction(double num, int64_t *numerator, uint64_t *denominator) { int currentSign = (num < 0) ? -1 : 1; num = fabs(num); - double tolerance = 1.0e-10; - double h1 = 1, h2 = 0, k1 = 0, k2 = 1; - double b = num; + long double tolerance = 1.0e-10; + long double h1 = 1, h2 = 0, k1 = 0, k2 = 1; + long double b = num; do { - double a = floor(b); - double aux = h1; + long double a = floor(b); + long double aux = h1; h1 = a * h1 + h2; h2 = aux; aux = k1; k1 = a * k1 + k2; k2 = aux; b = 1 / (b - a); - } while (fabs(num - h1 / k1) > num * tolerance); + } while (fabsl(num - h1 / k1) > num * tolerance); *numerator = (int64_t)(h1 * currentSign); *denominator = (uint64_t)k1;