lexer: begin refacto, broken ?

This commit is contained in:
_N3m0 2024-01-02 22:33:34 +01:00
parent 3eb96aabf0
commit 768473c996
1 changed files with 55 additions and 60 deletions

115
lexer.c
View File

@ -7,7 +7,7 @@ typedef struct Cursor {
int offset; int offset;
} Cursor; } Cursor;
Cursor cursor = { Cursor curr = {
.chunk = 0, .chunk = 0,
.offset = -1, .offset = -1,
}; };
@ -17,20 +17,20 @@ Cursor prev = {
.offset = -2, .offset = -2,
}; };
int increment_cursor(Cursor* cur){ int increment_cursor(Cursor* cursor){
if (cur->offset+1 < 0){ if (cursor->offset+1 < 0){
cur->offset++; cursor->offset++;
return 0; return 0;
} }
if (page.chunks[cur->chunk][cur->offset+1] == '\0'){ if (page.chunks[cursor->chunk][cursor->offset+1] == '\0'){
cur->chunk++; cursor->chunk++;
cur->offset = 0; cursor->offset = 0;
} else { } else {
cur->offset++; cursor->offset++;
} }
if (cur->chunk >= page.len){ if (cursor->chunk >= page.len){
return -1; return -1;
} }
@ -38,23 +38,23 @@ int increment_cursor(Cursor* cur){
} }
char* nextchar(void){ char* nextchar(void){
if (increment_cursor(&cursor) < 0 || increment_cursor(&prev) < 0){ if (increment_cursor(&curr) < 0 || increment_cursor(&prev) < 0){
return NULL; return NULL;
} }
if (cursor.chunk >= page.len){ if (curr.chunk >= page.len){
return NULL; return NULL;
} }
return &page.chunks[cursor.chunk][cursor.offset]; return &page.chunks[curr.chunk][curr.offset];
} }
void go_back(void){ void go_back(void){
cursor.chunk = prev.chunk; curr.chunk = prev.chunk;
cursor.offset = prev.offset; curr.offset = prev.offset;
prev.offset--; prev.offset--;
if (cursor.chunk < 0 || cursor.offset < 0){ if (curr.chunk < 0 || curr.offset < 0){
puts("ERROR: go way too back."); puts("ERROR: go way too back.");
exit(1); exit(1);
} }
@ -96,6 +96,44 @@ TokenType token_by_name(const char name[HTML_BALISE_LEN]){
return DONT_CARE; return DONT_CARE;
} }
void create_text_token(Token* token, char* cursor){
int i = 0;
do {
cursor = nextchar();
i++;
} while (*cursor != '<');
go_back();
token = malloc(sizeof(Token));
token->type = TEXT;
token->value = "TODO";
token->len = i;
}
void create_balise_token(Token* token, char* cursor){
char balise[HTML_BALISE_LEN] = {0};
int len = 0;
cursor = nextchar();
while (*cursor != '>' && *cursor != ' ' && *cursor != '\n' && *cursor != '\t' && len < HTML_BALISE_LEN){
balise[len] = *cursor;
len++;
cursor = nextchar();
}
token = malloc(sizeof(Token));
token->type = token_by_name(balise);
token->value = malloc(sizeof(char) * len);
strncpy(token->value, balise, len+1);
token->len = len;
go_back();
do {
cursor = nextchar();
} while (*cursor != '>');
}
Token* nexttoken(void){ Token* nexttoken(void){
Token* token = NULL; Token* token = NULL;
static char* cursor = NULL; static char* cursor = NULL;
@ -108,39 +146,9 @@ Token* nexttoken(void){
} }
if (*cursor != '<'){ if (*cursor != '<'){
int i = 0; create_text_token(token, cursor);
do {
cursor = nextchar();
i++;
} while (*cursor != '<');
go_back();
token = malloc(sizeof(Token));
token->type = TEXT;
token->value = "TODO";
token->len = i;
} else if (*cursor == '<'){ } else if (*cursor == '<'){
char balise[HTML_BALISE_LEN] = {0}; create_balise_token(token, cursor);
int len = 0;
cursor = nextchar();
while (*cursor != '>' && *cursor != ' ' && *cursor != '\n' && *cursor != '\t' && len < HTML_BALISE_LEN){
balise[len] = *cursor;
len++;
cursor = nextchar();
}
token = malloc(sizeof(Token));
token->type = token_by_name(balise);
token->value = malloc(sizeof(char) * len);
strncpy(token->value, balise, len+1);
token->len = len;
go_back();
do {
cursor = nextchar();
} while (*cursor != '>');
} }
return token; return token;
@ -216,16 +224,3 @@ void printtoken(Token* token){
printf("'%s'\n", token->value); printf("'%s'\n", token->value);
} }
#if 0
<!DOCTYPE html>
<html lang="en">
<head>
<title>https://n3m0.fr/fr/</title>
<link rel="canonical" href="https://n3m0.fr/fr/">
<meta name="robots" content="noindex">
<meta charset="utf-8">
<meta http-equiv="refresh" content="0; url=https://n3m0.fr/fr/">
</head>
</html>
#endif