lexer done?

This commit is contained in:
_N3m0 2024-01-02 20:53:15 +01:00
parent 023c39c1f1
commit ab044c1126
4 changed files with 149 additions and 52 deletions

View File

@ -2,7 +2,7 @@ BIN=webpage
SRCS=main.c config.c page.c lexer.c SRCS=main.c config.c page.c lexer.c
INC=. INC=.
LIB=curl LIB=curl
FLAGS=-Wall -Wextra -Og -g FLAGS=-Wall -Wextra -Og -g -ggdb -fvar-tracking
all: $(BIN) all: $(BIN)

181
lexer.c
View File

@ -1,17 +1,40 @@
#include "lexer.h" #include "lexer.h"
#define HTML_BALISE_LEN 12
typedef struct Cursor {
int chunk;
int offset;
} Cursor;
Cursor cursor = { Cursor cursor = {
.chunk = 0, .chunk = 0,
.offset = 0, .offset = -1,
}; };
char* nextchar(void){ Cursor prev = {
if (page.chunks[cursor.chunk][cursor.offset+1] == '\0'){ .chunk = 0,
cursor.chunk++; .offset = -2,
cursor.offset = 0; };
void increment_cursor(Cursor* cur){
if (cur->offset+1 < 0){
printf("init prev cursor.\n");
cur->offset++;
return;
}
if (page.chunks[cur->chunk][cur->offset+1] == '\0'){
cur->chunk++;
cur->offset = 0;
} else { } else {
cursor.offset++; cur->offset++;
} }
}
char* nextchar(void){
increment_cursor(&cursor);
increment_cursor(&prev);
if (cursor.chunk >= page.len){ if (cursor.chunk >= page.len){
return NULL; return NULL;
@ -20,48 +43,111 @@ char* nextchar(void){
return &page.chunks[cursor.chunk][cursor.offset]; return &page.chunks[cursor.chunk][cursor.offset];
} }
char* HTMLbalise(void){ void go_back(void){
return NULL; cursor.chunk = prev.chunk;
cursor.offset = prev.offset;
prev.offset--;
printf("got back mf.\n");
if (cursor.chunk < 0 || cursor.offset < 0){
printf("chunk = %d | offset= %d\n", cursor.chunk, cursor.offset);
puts("ERROR: go way too back.");
exit(1);
}
}
TokenType token_by_name(const char name[HTML_BALISE_LEN]){
if (name == NULL){
return UNDEFINED_TYPE;
} else if (strncmp(name, "body", HTML_BALISE_LEN) == 0){
return BODY;
} else if (strncmp(name, "/body", HTML_BALISE_LEN) == 0){
return END_BODY;
} else if (strncmp(name, "ul", HTML_BALISE_LEN) == 0){
return UL;
} else if (strncmp(name, "li", HTML_BALISE_LEN) == 0){
return LI;
} else if (strncmp(name, "h1", HTML_BALISE_LEN) == 0){
return H1;
} else if (strncmp(name, "h2", HTML_BALISE_LEN) == 0){
return H2;
} else if (strncmp(name, "h3", HTML_BALISE_LEN) == 0){
return H3;
} else if (strncmp(name, "h4", HTML_BALISE_LEN) == 0){
return H4;
} else if (strncmp(name, "h5", HTML_BALISE_LEN) == 0){
return H5;
} else if (strncmp(name, "h6", HTML_BALISE_LEN) == 0){
return H6;
}
return DONT_CARE;
} }
Token* nexttoken(void){ Token* nexttoken(void){
Token* token = malloc(sizeof(Token)); Token* token = NULL;
token->value = NULL; static char* cursor = NULL;
cursor = nextchar();
char* word = HTMLbalise(); while (*cursor == '\n'){
if (word == NULL){ printf("new line skipped.\n");
token->type = NO_TYPE; cursor = nextchar();
} else if (strcmp(word, "body") == 0){ }
token->type = BODY;
} else if (strcmp(word, "/body") == 0){ if (*cursor != '<'){
token->type = END_BODY; int i = 0;
} else if (strcmp(word, "ul") == 0){
token->type = UL; do {
} else if (strcmp(word, "li") == 0){ cursor = nextchar();
token->type = LI; i++;
} else if (strcmp(word, "h1") == 0){ printf("text: '%c'.\n", *cursor);
token->type = H1; } while (*cursor != '<');
} else if (strcmp(word, "h2") == 0){ go_back();
token->type = H2;
} else if (strcmp(word, "h3") == 0){ token = malloc(sizeof(Token));
token->type = H3; token->type = TEXT;
} else if (strcmp(word, "h4") == 0){ token->value = "TODO";
token->type = H4; token->len = i;
} else if (strcmp(word, "h5") == 0){ } else if (*cursor == '<'){
token->type = H5; char balise[HTML_BALISE_LEN] = {0};
} else if (strcmp(word, "h6") == 0){ int len = 0;
token->type = H6;
} else { cursor = nextchar();
token->type = NO_TYPE; while (*cursor != '>' && *cursor != ' ' && len < HTML_BALISE_LEN){
balise[len] = *cursor;
len++;
printf("balise: '%c'.\n", *cursor);
cursor = nextchar();
}
token = malloc(sizeof(Token));
token->type = token_by_name(balise);
token->value = malloc(sizeof(char) * len);
strncpy(token->value, balise, len+1);
token->len = len;
do {
cursor = nextchar();
printf("skip: '%c'.\n", *cursor);
} while (*cursor != '>');
} }
return token; return token;
} }
void printtoken(Token* token){ void printtoken(Token* token){
if (token == NULL){
puts("NULL TOKEN");
return;
}
switch (token->type) { switch (token->type) {
case NO_TYPE: case UNDEFINED_TYPE:
printf("NO_TYPE: "); printf("UNDEFINED_TYPE: ");
break;
case DONT_CARE:
printf("DONT_CARE: ");
break; break;
case TEXT: case TEXT:
printf("TEXT: "); printf("TEXT: ");
@ -97,14 +183,27 @@ void printtoken(Token* token){
printf("H6: "); printf("H6: ");
break; break;
default: default:
printf("UNDEFINED TOKEN: "); printf("ERROR: UNKNOWN TOKEN: ");
break; break;
} }
if (token->value == NULL){ if (token->value == NULL){
puts("NO VALUE FOUND"); puts("'NO VALUE FOUND'");
return; return;
} }
printf("%s\n", token->value); printf("'%s'\n", token->value);
} }
#if 0
<!DOCTYPE html>
<html lang="en">
<head>
<title>https://n3m0.fr/fr/</title>
<link rel="canonical" href="https://n3m0.fr/fr/">
<meta name="robots" content="noindex">
<meta charset="utf-8">
<meta http-equiv="refresh" content="0; url=https://n3m0.fr/fr/">
</head>
</html>
#endif

View File

@ -8,7 +8,8 @@
#include "page.h" #include "page.h"
typedef enum TokenType { typedef enum TokenType {
NO_TYPE, UNDEFINED_TYPE,
DONT_CARE,
TEXT, TEXT,
BODY, BODY,
END_BODY, END_BODY,
@ -19,13 +20,9 @@ typedef enum TokenType {
typedef struct Token { typedef struct Token {
TokenType type; TokenType type;
char* value; char* value;
int len;
} Token; } Token;
typedef struct Cursor {
int chunk;
int offset;
} Cursor;
Token* nexttoken(void); Token* nexttoken(void);
void printtoken(Token* token); void printtoken(Token* token);

9
main.c
View File

@ -9,12 +9,13 @@ int main(int argc, char* argv[]){
printPage(); printPage();
Token* token; Token* token = NULL;
do{ do {
token = nexttoken(); token = nexttoken();
printtoken(token); printtoken(token);
//parse(token); } while (token != NULL && token->type != UNDEFINED_TYPE);
} while (token->type != NO_TYPE);
//parse(token);
return 0; return 0;
} }