webpage/lexer.c

231 lines
4.5 KiB
C

#include "lexer.h"
#define HTML_BALISE_LEN 12
typedef struct Cursor {
int chunk;
int offset;
} Cursor;
Cursor curr = {
.chunk = 0,
.offset = -1,
};
Cursor prev = {
.chunk = 0,
.offset = -2,
};
void printtoken(Token* token){
if (token == NULL){
puts("NULL TOKEN");
return;
}
switch (token->type) {
case UNDEFINED_TYPE:
printf("UNDEFINED_TYPE: ");
break;
case DONT_CARE:
printf("DONT_CARE: ");
break;
case TEXT:
printf("TEXT: ");
break;
case BODY:
printf("BODY: ");
break;
case END_BODY:
printf("END_BODY: ");
break;
case HTML:
printf("HTML: ");
break;
case END_HTML:
printf("END_HTML: ");
break;
case A:
printf("A: ");
break;
case END_A:
printf("END_A: ");
break;
case UL:
printf("UL: ");
break;
case LI:
printf("LI: ");
break;
case H1:
printf("H1: ");
break;
case H2:
printf("H2: ");
break;
case H3:
printf("H3: ");
break;
case H4:
printf("H4: ");
break;
case H5:
printf("H5: ");
break;
case H6:
printf("H6: ");
break;
default:
printf("ERROR: UNKNOWN TOKEN: ");
break;
}
if (token->value == NULL){
puts("'NO VALUE FOUND'");
return;
}
printf("'%s'\n", token->value);
}
int increment_cursor(Cursor* cursor){
if (cursor->offset+1 < 0){
cursor->offset++;
return 0;
}
if (page.chunks[cursor->chunk][cursor->offset+1] == '\0'){
cursor->chunk++;
cursor->offset = 0;
} else {
cursor->offset++;
}
if (cursor->chunk >= page.len){
return -1;
}
return 0;
}
char* nextchar(void){
if (increment_cursor(&curr) < 0 || increment_cursor(&prev) < 0){
return NULL;
}
if (curr.chunk >= page.len){
return NULL;
}
return &page.chunks[curr.chunk][curr.offset];
}
void go_back(void){
curr.chunk = prev.chunk;
curr.offset = prev.offset;
prev.offset--;
if (curr.chunk < 0 || curr.offset < 0){
puts("ERROR: go way too back.");
exit(1);
}
}
TokenType token_by_name(const char name[HTML_BALISE_LEN]){
if (name == NULL){
return UNDEFINED_TYPE;
} else if (strncmp(name, "body", HTML_BALISE_LEN) == 0){
return BODY;
} else if (strncmp(name, "/body", HTML_BALISE_LEN) == 0){
return END_BODY;
} else if (strncmp(name, "html", HTML_BALISE_LEN) == 0){
return HTML;
} else if (strncmp(name, "/html", HTML_BALISE_LEN) == 0){
return END_HTML;
} else if (strncmp(name, "a", HTML_BALISE_LEN) == 0){
return A;
} else if (strncmp(name, "/a", HTML_BALISE_LEN) == 0){
return END_A;
} else if (strncmp(name, "ul", HTML_BALISE_LEN) == 0){
return UL;
} else if (strncmp(name, "li", HTML_BALISE_LEN) == 0){
return LI;
} else if (strncmp(name, "h1", HTML_BALISE_LEN) == 0){
return H1;
} else if (strncmp(name, "h2", HTML_BALISE_LEN) == 0){
return H2;
} else if (strncmp(name, "h3", HTML_BALISE_LEN) == 0){
return H3;
} else if (strncmp(name, "h4", HTML_BALISE_LEN) == 0){
return H4;
} else if (strncmp(name, "h5", HTML_BALISE_LEN) == 0){
return H5;
} else if (strncmp(name, "h6", HTML_BALISE_LEN) == 0){
return H6;
}
return DONT_CARE;
}
Token* create_text_token(Token* token, char* cursor){
int i = 0;
do {
cursor = nextchar();
i++;
} while (*cursor != '<');
go_back();
token = malloc(sizeof(Token));
token->type = TEXT;
token->value = "TODO";
token->len = i;
return token;
}
Token* create_balise_token(Token* token, char* cursor){
char balise[HTML_BALISE_LEN] = {0};
int len = 0;
cursor = nextchar();
while (*cursor != '>' && *cursor != ' ' && *cursor != '\n' && *cursor != '\t' && len < HTML_BALISE_LEN){
balise[len] = *cursor;
len++;
cursor = nextchar();
}
token = malloc(sizeof(Token));
token->type = token_by_name(balise);
token->value = malloc(sizeof(char) * len);
strncpy(token->value, balise, len+1);
token->len = len;
go_back();
do {
cursor = nextchar();
} while (*cursor != '>');
return token;
}
Token* nexttoken(void){
Token* token = NULL;
static char* cursor = NULL;
cursor = nextchar();
if (cursor == NULL) return NULL;
while (*cursor == '\0' || *cursor == ' ' || *cursor == '\n' || *cursor == '\t'){
cursor = nextchar();
}
if (*cursor != '<'){
token = create_text_token(token, cursor);
} else if (*cursor == '<'){
token = create_balise_token(token, cursor);
}
return token;
}