From 5bca08da05c4b97e0d4934ae904aed6d37ce4118 Mon Sep 17 00:00:00 2001 From: _N3m0 Date: Tue, 2 Jan 2024 21:44:27 +0100 Subject: [PATCH] lexicouille --- lexer.c | 48 +++++++++++++++++++++++++++++++++++------------- lexer.h | 5 +++-- main.c | 7 ++----- 3 files changed, 40 insertions(+), 20 deletions(-) diff --git a/lexer.c b/lexer.c index adf02ce..9dd1f06 100644 --- a/lexer.c +++ b/lexer.c @@ -17,11 +17,10 @@ Cursor prev = { .offset = -2, }; -void increment_cursor(Cursor* cur){ +int increment_cursor(Cursor* cur){ if (cur->offset+1 < 0){ - printf("init prev cursor.\n"); cur->offset++; - return; + return 0; } if (page.chunks[cur->chunk][cur->offset+1] == '\0'){ @@ -30,11 +29,18 @@ void increment_cursor(Cursor* cur){ } else { cur->offset++; } + + if (cur->chunk >= page.len){ + return -1; + } + + return 0; } char* nextchar(void){ - increment_cursor(&cursor); - increment_cursor(&prev); + if (increment_cursor(&cursor) < 0 || increment_cursor(&prev) < 0){ + return NULL; + } if (cursor.chunk >= page.len){ return NULL; @@ -48,10 +54,7 @@ void go_back(void){ cursor.offset = prev.offset; prev.offset--; - printf("got back mf.\n"); - if (cursor.chunk < 0 || cursor.offset < 0){ - printf("chunk = %d | offset= %d\n", cursor.chunk, cursor.offset); puts("ERROR: go way too back."); exit(1); } @@ -64,6 +67,14 @@ TokenType token_by_name(const char name[HTML_BALISE_LEN]){ return BODY; } else if (strncmp(name, "/body", HTML_BALISE_LEN) == 0){ return END_BODY; + } else if (strncmp(name, "html", HTML_BALISE_LEN) == 0){ + return HTML; + } else if (strncmp(name, "/html", HTML_BALISE_LEN) == 0){ + return END_HTML; + } else if (strncmp(name, "a", HTML_BALISE_LEN) == 0){ + return A; + } else if (strncmp(name, "/a", HTML_BALISE_LEN) == 0){ + return END_A; } else if (strncmp(name, "ul", HTML_BALISE_LEN) == 0){ return UL; } else if (strncmp(name, "li", HTML_BALISE_LEN) == 0){ @@ -90,8 +101,9 @@ Token* nexttoken(void){ static char* cursor = NULL; cursor = nextchar(); - while (*cursor == '\n'){ - printf("new line skipped.\n"); + if (cursor == NULL) return NULL; + + while (*cursor == '\0' || *cursor == ' ' || *cursor == '\n' || *cursor == '\t'){ cursor = nextchar(); } @@ -101,7 +113,6 @@ Token* nexttoken(void){ do { cursor = nextchar(); i++; - printf("text: '%c'.\n", *cursor); } while (*cursor != '<'); go_back(); @@ -117,7 +128,6 @@ Token* nexttoken(void){ while (*cursor != '>' && *cursor != ' ' && len < HTML_BALISE_LEN){ balise[len] = *cursor; len++; - printf("balise: '%c'.\n", *cursor); cursor = nextchar(); } @@ -127,9 +137,9 @@ Token* nexttoken(void){ strncpy(token->value, balise, len+1); token->len = len; + go_back(); do { cursor = nextchar(); - printf("skip: '%c'.\n", *cursor); } while (*cursor != '>'); } @@ -158,6 +168,18 @@ void printtoken(Token* token){ case END_BODY: printf("END_BODY: "); break; + case HTML: + printf("HTML: "); + break; + case END_HTML: + printf("END_HTML: "); + break; + case A: + printf("A: "); + break; + case END_A: + printf("END_A: "); + break; case UL: printf("UL: "); break; diff --git a/lexer.h b/lexer.h index 2903369..5fe49e8 100644 --- a/lexer.h +++ b/lexer.h @@ -11,8 +11,9 @@ typedef enum TokenType { UNDEFINED_TYPE, DONT_CARE, TEXT, - BODY, - END_BODY, + BODY, END_BODY, + HTML, END_HTML, + A, END_A, UL, LI, H1, H2, H3, H4, H5, H6, } TokenType; diff --git a/main.c b/main.c index f85744c..db08e5a 100644 --- a/main.c +++ b/main.c @@ -7,15 +7,12 @@ int main(int argc, char* argv[]){ getUserConfig(argc, argv); getPage(); - printPage(); - Token* token = NULL; do { token = nexttoken(); printtoken(token); - } while (token != NULL && token->type != UNDEFINED_TYPE); - - //parse(token); + //evaluate(token); + } while (token != NULL && token->type != END_HTML); return 0; }