Diff
checker
Text
Text
Bilder
Dokumente
Excel
Ordner
Legal
Enterprise
Desktop-App
Preise
Einloggen
Diffchecker Desktop herunterladen
Texte vergleichen
Finde den Unterschied zwischen zwei Textdateien
Werkzeuge
Verlauf
Live-Editor
Gleiches ausblenden
Zeilenumbruch aus
Ansicht
Zweispaltig
Einspaltig
Vergleichsgenauigkeit
Intelligent
Wort
Zeichen
Syntaxhervorhebung
Syntax auswählen
Ignorieren
Text umwandeln
Zur ersten Änderung
Eingabe bearbeiten
Diffchecker Desktop
Der sicherste Weg, Diffchecker zu nutzen. Hol dir die Desktop-App: Deine Diffs verlassen nie deinen Computer!
Desktop holen
lexing_goto_v_musttail
Erstellt
vor 12 Monaten
Diff läuft nie ab
Löschen
Exportieren
Teilen
Erklären
47 Entfernungen
Zeilen
Gesamt
Entfernt
Zeichen
Gesamt
Entfernt
Um diese Funktion weiterhin zu nutzen, aktualisiere auf
Diff
checker
Pro
Preise anzeigen
311 Zeilen
Kopieren
109 Hinzufügungen
Zeilen
Gesamt
Hinzugefügt
Zeichen
Gesamt
Hinzugefügt
Um diese Funktion weiterhin zu nutzen, aktualisiere auf
Diff
checker
Pro
Preise anzeigen
354 Zeilen
Kopieren
#include "lexer.h"
#include "lexer.h"
#include "common.h"
#include "common.h"
#include "mem.h"
#include "mem.h"
#include "strings.h"
#include "strings.h"
#include <stddef.h>
#include <stddef.h>
#include <stdint.h>
#include <stdint.h>
#include <stdio.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdlib.h>
#define SINGLE_TOK(t) ((Token){.type = t})
#define SINGLE_TOK(t) ((Token){.type = t})
Str TOKEN_TYPE_MAP[] = {[T_DELIMITOR_LEFT] = STRING("T_DELIMITOR_LEFT"),
Str TOKEN_TYPE_MAP[] = {[T_DELIMITOR_LEFT] = STRING("T_DELIMITOR_LEFT"),
[T_DELIMITOR_RIGHT] = STRING("T_DELIMITOR_RIGHT"),
[T_DELIMITOR_RIGHT] = STRING("T_DELIMITOR_RIGHT"),
[T_BRAKET_LEFT] = STRING("T_BRAKET_LEFT"),
[T_BRAKET_LEFT] = STRING("T_BRAKET_LEFT"),
[T_BRAKET_RIGHT] = STRING("T_BRAKET_RIGHT"),
[T_BRAKET_RIGHT] = STRING("T_BRAKET_RIGHT"),
[T_STRING] = STRING("T_STRING"),
[T_STRING] = STRING("T_STRING"),
[T_TRUE] = STRING("T_TRUE"),
[T_TRUE] = STRING("T_TRUE"),
[T_FALSE] = STRING("T_FALSE"),
[T_FALSE] = STRING("T_FALSE"),
[T_DOUBLE] = STRING("T_DOUBLE"),
[T_DOUBLE] = STRING("T_DOUBLE"),
[T_INTEGER] = STRING("T_INTEGER"),
[T_INTEGER] = STRING("T_INTEGER"),
[T_BUILTIN] = STRING("T_BUILTIN"),
[T_BUILTIN] = STRING("T_BUILTIN"),
[T_IDENT] = STRING("T_IDENT"),
[T_IDENT] = STRING("T_IDENT"),
[T_PLUS] = STRING("T_PLUS"),
[T_PLUS] = STRING("T_PLUS"),
[T_MINUS] = STRING("T_MINUS"),
[T_MINUS] = STRING("T_MINUS"),
[T_ASTERISKS] = STRING("T_ASTERISKS"),
[T_ASTERISKS] = STRING("T_ASTERISKS"),
[T_SLASH] = STRING("T_SLASH"),
[T_SLASH] = STRING("T_SLASH"),
[T_EQUAL] = STRING("T_EQUAL"),
[T_EQUAL] = STRING("T_EQUAL"),
[T_EOF] = STRING("T_EOF")};
[T_EOF] = STRING("T_EOF")};
Lexer Lexer_new(Str input) {
Lexer Lexer_new(Str input) {
return (Lexer){
return (Lexer){
.input = input,
.input = input,
.pos = 0,
.pos = 0,
};
};
}
}
#define cur(L) (L->input.p[L->pos])
#define cur(L) (L->input.p[L->pos])
__attribute__((always_inline)) inline static bool is_alphanum(uint8_t cc) {
__attribute__((always_inline)) inline static bool is_alphanum(uint8_t cc) {
uint8_t lower = cc | 0x20;
uint8_t lower = cc | 0x20;
bool is_alpha = (lower >= 'a' && lower <= 'z');
bool is_alpha = (lower >= 'a' && lower <= 'z');
bool is_digit = (cc >= '0' && cc <= '9');
bool is_digit = (cc >= '0' && cc <= '9');
return is_alpha || is_digit || cc == '_' || cc == '-';
return is_alpha || is_digit || cc == '_' || cc == '-';
}
}
// we can "intern" these, since all of them are the same, regardless of position
// we can "intern" these, since all of them are the same, regardless of position
Token *INTERN_DELIMITOR_LEFT = &SINGLE_TOK(T_DELIMITOR_LEFT);
Token *INTERN_DELIMITOR_LEFT = &SINGLE_TOK(T_DELIMITOR_LEFT);
Token *INTERN_DELIMITOR_RIGHT = &SINGLE_TOK(T_DELIMITOR_RIGHT);
Token *INTERN_DELIMITOR_RIGHT = &SINGLE_TOK(T_DELIMITOR_RIGHT);
Token *INTERN_BRAKET_LEFT = &SINGLE_TOK(T_BRAKET_LEFT);
Token *INTERN_BRAKET_LEFT = &SINGLE_TOK(T_BRAKET_LEFT);
Token *INTERN_BRAKET_RIGHT = &SINGLE_TOK(T_BRAKET_RIGHT);
Token *INTERN_BRAKET_RIGHT = &SINGLE_TOK(T_BRAKET_RIGHT);
Token *INTERN_MINUS = &SINGLE_TOK(T_MINUS);
Token *INTERN_MINUS = &SINGLE_TOK(T_MINUS);
Token *INTERN_PLUS = &SINGLE_TOK(T_PLUS);
Token *INTERN_PLUS = &SINGLE_TOK(T_PLUS);
Token *INTERN_ASTERISKS = &SINGLE_TOK(T_ASTERISKS);
Token *INTERN_ASTERISKS = &SINGLE_TOK(T_ASTERISKS);
Token *INTERN_SLASH = &SINGLE_TOK(T_SLASH);
Token *INTERN_SLASH = &SINGLE_TOK(T_SLASH);
Token *INTERN_FALSE = &SINGLE_TOK(T_FALSE);
Token *INTERN_FALSE = &SINGLE_TOK(T_FALSE);
Token *INTERN_TRUE = &SINGLE_TOK(T_TRUE);
Token *INTERN_TRUE = &SINGLE_TOK(T_TRUE);
Token *INTERN_EQUAL = &SINGLE_TOK(T_EQUAL);
Token *INTERN_EQUAL = &SINGLE_TOK(T_EQUAL);
Token *INTERN_EOF = &SINGLE_TOK(T_EOF);
Token *INTERN_EOF = &SINGLE_TOK(T_EOF);
Kopieren
Kopiert
Kopieren
Kopiert
size_t
Lexer_all
(Lexer *l, Allocator *a, Token **out)
{
#define rule(name)
size_t
name(Lexer *l, Allocator *a, Token **out)
rule(Lexer_all);
rule(delimitor_left);
rule(delimitor_right);
rule(braket_left);
rule(builtin);
rule(plus);
rule(minus);
rule(slash);
rule(equal);
rule(asterisks);
rule(number);
rule(ident);
rule(quoted);
rule(string);
rule(comment);
rule(whitespace);
rule(unknown);
rule(end);
typedef size_t (*rule_t)
(Lexer *l, Allocator *a, Token **out)
;
static rule_t jump_table[256] = {
[0 ... 255] = &unknown,
[' '] = &whitespace,
['\t'] = &whitespace,
['\n'] = &whitespace,
[';'] = &comment,
['('] = &delimitor_left,
[')'] = &delimitor_right,
['@'] = &builtin,
['.'] = &number,
['0' ... '9'] = &number,
['a' ... 'z'] = &ident,
['A' ... 'Z'] = &ident,
['_'] = &ident,
['\''] = "ed,
['"'] = &string,
['+'] = &plus,
['-'] = &minus,
['/'] = &slash,
['*'] = &asterisks,
['='] = &equal,
['['] = &braket_left,
[']'] = &braket_right,
[0] = &end,
};
#ifdef __clang__
#define musttail [[clang::musttail]]
#elif __GNUC__
#define musttail [[gnu::musttail]]
#else
#define musttail
#endif
#define JUMP_TARGET return musttail jump_table[(int32_t)l->input.p[l->pos]](l, a, out)
rule(Lexer_all) {
ASSERT(out != NULL, "Failed to allocate token list");
ASSERT(out != NULL, "Failed to allocate token list");
// empty input
// empty input
if (l->input.len == 0) {
if (l->input.len == 0) {
out[0] = INTERN_EOF;
out[0] = INTERN_EOF;
return 1;
return 1;
}
}
size_t true_hash = Str_hash(&STRING("true"));
size_t true_hash = Str_hash(&STRING("true"));
size_t false_hash = Str_hash(&STRING("false"));
size_t false_hash = Str_hash(&STRING("false"));
size_t count = 0;
size_t count = 0;
Kopieren
Kopiert
Kopieren
Kopiert
static void *jump_table[256] = {
[0 ... 255] = &&unknown,
[' '] = &&whitespace,
['\t'] = &&whitespace,
['\n'] = &&whitespace,
[';'] = &&comment,
['('] = &&delimitor_left,
[')'] = &&delimitor_right,
['@'] = &&builtin,
['.'] = &&number,
['0' ... '9'] = &&number,
['a' ... 'z'] = &&ident,
['A' ... 'Z'] = &&ident,
['_'] = &&ident,
['\''] = &"ed,
['"'] = &&string,
['+'] = &&plus,
['-'] = &&minus,
['/'] = &&slash,
['*'] = &&asterisks,
['='] = &&equal,
['['] = &&braket_left,
[']'] = &&braket_right,
[0] = &&end,
};
#define JUMP_TARGET goto *jump_table[(int32_t)l->input.p[l->pos]]
JUMP_TARGET;
JUMP_TARGET;
Kopieren
Kopiert
Kopieren
Kopiert
}
Kopieren
Kopiert
Kopieren
Kopiert
delimitor_left
:
rule(
delimitor_left
) {
out[count++] = INTERN_DELIMITOR_LEFT;
out[count++] = INTERN_DELIMITOR_LEFT;
l->pos++;
l->pos++;
JUMP_TARGET;
JUMP_TARGET;
Kopieren
Kopiert
Kopieren
Kopiert
}
Kopieren
Kopiert
Kopieren
Kopiert
delimitor_right
:
rule(
delimitor_right
) {
out[count++] = INTERN_DELIMITOR_RIGHT;
out[count++] = INTERN_DELIMITOR_RIGHT;
l->pos++;
l->pos++;
JUMP_TARGET;
JUMP_TARGET;
Kopieren
Kopiert
Kopieren
Kopiert
}
Kopieren
Kopiert
Kopieren
Kopiert
braket_left
:
rule(
braket_left
) {
out[count++] = INTERN_BRAKET_LEFT;
out[count++] = INTERN_BRAKET_LEFT;
l->pos++;
l->pos++;
JUMP_TARGET;
JUMP_TARGET;
Kopieren
Kopiert
Kopieren
Kopiert
}
Kopieren
Kopiert
Kopieren
Kopiert
braket_right
:
rule(
braket_right
) {
out[count++] = INTERN_BRAKET_RIGHT;
out[count++] = INTERN_BRAKET_RIGHT;
l->pos++;
l->pos++;
JUMP_TARGET;
JUMP_TARGET;
Kopieren
Kopiert
Kopieren
Kopiert
}
Kopieren
Kopiert
Kopieren
Kopiert
builtin
:
{
rule(
builtin
)
{
l->pos++;
l->pos++;
// not an ident after @, this is shit
// not an ident after @, this is shit
if (!is_alphanum(cur(l))) {
if (!is_alphanum(cur(l))) {
out[count++] = INTERN_EOF;
out[count++] = INTERN_EOF;
}
}
size_t start = l->pos;
size_t start = l->pos;
size_t hash = FNV_OFFSET_BASIS;
size_t hash = FNV_OFFSET_BASIS;
for (char cc = cur(l); cc > 0 && is_alphanum(cc); l->pos++, cc = cur(l)) {
for (char cc = cur(l); cc > 0 && is_alphanum(cc); l->pos++, cc = cur(l)) {
hash ^= cc;
hash ^= cc;
hash *= FNV_PRIME;
hash *= FNV_PRIME;
}
}
size_t len = l->pos - start;
size_t len = l->pos - start;
Str s = (Str){
Str s = (Str){
.p = l->input.p + start,
.p = l->input.p + start,
.len = len,
.len = len,
.hash = hash,
.hash = hash,
};
};
Token *b = CALL(a, request, sizeof(Token));
Token *b = CALL(a, request, sizeof(Token));
b->string = s;
b->string = s;
b->type = T_BUILTIN;
b->type = T_BUILTIN;
out[count++] = b;
out[count++] = b;
JUMP_TARGET;
JUMP_TARGET;
}
}
Kopieren
Kopiert
Kopieren
Kopiert
plus
:
rule(
plus
) {
out[count++] = INTERN_PLUS;
out[count++] = INTERN_PLUS;
l->pos++;
l->pos++;
JUMP_TARGET;
JUMP_TARGET;
Kopieren
Kopiert
Kopieren
Kopiert
}
Kopieren
Kopiert
Kopieren
Kopiert
minus
:
rule(
minus
) {
out[count++] = INTERN_MINUS;
out[count++] = INTERN_MINUS;
l->pos++;
l->pos++;
JUMP_TARGET;
JUMP_TARGET;
Kopieren
Kopiert
Kopieren
Kopiert
}
Kopieren
Kopiert
Kopieren
Kopiert
slash
:
rule(
slash
) {
out[count++] = INTERN_SLASH;
out[count++] = INTERN_SLASH;
l->pos++;
l->pos++;
JUMP_TARGET;
JUMP_TARGET;
Kopieren
Kopiert
Kopieren
Kopiert
}
Kopieren
Kopiert
Kopieren
Kopiert
equal
:
rule(
equal
) {
out[count++] = INTERN_EQUAL;
out[count++] = INTERN_EQUAL;
l->pos++;
l->pos++;
JUMP_TARGET;
JUMP_TARGET;
Kopieren
Kopiert
Kopieren
Kopiert
}
Kopieren
Kopiert
Kopieren
Kopiert
asterisks
:
rule(
asterisks
) {
out[count++] = INTERN_ASTERISKS;
out[count++] = INTERN_ASTERISKS;
l->pos++;
l->pos++;
JUMP_TARGET;
JUMP_TARGET;
Kopieren
Kopiert
Kopieren
Kopiert
}
Kopieren
Kopiert
Kopieren
Kopiert
number
:
{
rule(
number
)
{
size_t start = l->pos;
size_t start = l->pos;
size_t i = start;
size_t i = start;
bool is_double = false;
bool is_double = false;
size_t hash = FNV_OFFSET_BASIS;
size_t hash = FNV_OFFSET_BASIS;
for (; i < l->input.len; i++) {
for (; i < l->input.len; i++) {
char cc = l->input.p[i];
char cc = l->input.p[i];
hash ^= cc;
hash ^= cc;
hash *= FNV_PRIME;
hash *= FNV_PRIME;
if (cc >= '0' && cc <= '9')
if (cc >= '0' && cc <= '9')
continue;
continue;
if (cc == '.') {
if (cc == '.') {
ASSERT(!is_double, "Two dots in double");
ASSERT(!is_double, "Two dots in double");
is_double = true;
is_double = true;
continue;
continue;
}
}
break;
break;
}
}
l->pos = i;
l->pos = i;
Token *n = CALL(a, request, sizeof(Token));
Token *n = CALL(a, request, sizeof(Token));
n->string = (Str){
n->string = (Str){
.p = l->input.p + start,
.p = l->input.p + start,
.len = i - start,
.len = i - start,
.hash = hash,
.hash = hash,
};
};
if (is_double) {
if (is_double) {
n->type = T_DOUBLE;
n->type = T_DOUBLE;
} else {
} else {
n->type = T_INTEGER;
n->type = T_INTEGER;
}
}
out[count++] = n;
out[count++] = n;
JUMP_TARGET;
JUMP_TARGET;
}
}
Kopieren
Kopiert
Kopieren
Kopiert
ident
:
{
rule(
ident
)
{
size_t start = l->pos;
size_t start = l->pos;
size_t hash = FNV_OFFSET_BASIS;
size_t hash = FNV_OFFSET_BASIS;
for (char cc = cur(l); cc > 0 && is_alphanum(cc); l->pos++, cc = cur(l)) {
for (char cc = cur(l); cc > 0 && is_alphanum(cc); l->pos++, cc = cur(l)) {
hash ^= cc;
hash ^= cc;
hash *= FNV_PRIME;
hash *= FNV_PRIME;
}
}
size_t len = l->pos - start;
size_t len = l->pos - start;
Token *t;
Token *t;
if (hash == true_hash) {
if (hash == true_hash) {
t = INTERN_TRUE;
t = INTERN_TRUE;
} else if (hash == false_hash) {
} else if (hash == false_hash) {
t = INTERN_FALSE;
t = INTERN_FALSE;
} else {
} else {
t = CALL(a, request, sizeof(Token));
t = CALL(a, request, sizeof(Token));
t->type = T_IDENT;
t->type = T_IDENT;
t->string = (Str){
t->string = (Str){
.p = l->input.p + start,
.p = l->input.p + start,
.len = len,
.len = len,
.hash = hash,
.hash = hash,
};
};
}
}
out[count++] = t;
out[count++] = t;
JUMP_TARGET;
JUMP_TARGET;
}
}
// same as string but only with leading '
// same as string but only with leading '
Kopieren
Kopiert
Kopieren
Kopiert
quoted
:
{
rule(
quoted
)
{
// skip '
// skip '
l->pos++;
l->pos++;
size_t start = l->pos;
size_t start = l->pos;
size_t hash = FNV_OFFSET_BASIS;
size_t hash = FNV_OFFSET_BASIS;
for (char cc = cur(l); cc > 0 && is_alphanum(cc); l->pos++, cc = cur(l)) {
for (char cc = cur(l); cc > 0 && is_alphanum(cc); l->pos++, cc = cur(l)) {
hash ^= cc;
hash ^= cc;
hash *= FNV_PRIME;
hash *= FNV_PRIME;
}
}
size_t len = l->pos - start;
size_t len = l->pos - start;
Token *t;
Token *t;
t = CALL(a, request, sizeof(Token));
t = CALL(a, request, sizeof(Token));
t->type = T_STRING;
t->type = T_STRING;
t->string = (Str){
t->string = (Str){
.p = l->input.p + start,
.p = l->input.p + start,
.len = len,
.len = len,
.hash = hash,
.hash = hash,
};
};
out[count++] = t;
out[count++] = t;
JUMP_TARGET;
JUMP_TARGET;
}
}
Kopieren
Kopiert
Kopieren
Kopiert
string
:
{
rule(
string
)
{
// skip "
// skip "
l->pos++;
l->pos++;
size_t start = l->pos;
size_t start = l->pos;
size_t hash = FNV_OFFSET_BASIS;
size_t hash = FNV_OFFSET_BASIS;
for (char cc = cur(l); cc > 0 && cc != '"'; l->pos++, cc = cur(l)) {
for (char cc = cur(l); cc > 0 && cc != '"'; l->pos++, cc = cur(l)) {
hash ^= cc;
hash ^= cc;
hash *= FNV_PRIME;
hash *= FNV_PRIME;
}
}
if (UNLIKELY(cur(l) != '"')) {
if (UNLIKELY(cur(l) != '"')) {
Str slice = Str_slice(&l->input, l->pos, l->input.len);
Str slice = Str_slice(&l->input, l->pos, l->input.len);
fprintf(stderr, "lex: Unterminated string near: '%.*s'", (int)slice.len,
fprintf(stderr, "lex: Unterminated string near: '%.*s'", (int)slice.len,
slice.p);
slice.p);
out[count++] = INTERN_EOF;
out[count++] = INTERN_EOF;
} else {
} else {
Token *t = CALL(a, request, sizeof(Token));
Token *t = CALL(a, request, sizeof(Token));
t->type = T_STRING;
t->type = T_STRING;
t->string = (Str){
t->string = (Str){
.p = l->input.p + start,
.p = l->input.p + start,
.len = l->pos - start,
.len = l->pos - start,
.hash = hash,
.hash = hash,
};
};
out[count++] = t;
out[count++] = t;
// skip "
// skip "
l->pos++;
l->pos++;
}
}
JUMP_TARGET;
JUMP_TARGET;
}
}
Kopieren
Kopiert
Kopieren
Kopiert
comment
:
rule(
comment
) {
for (char cc = cur(l); cc > 0 && cc != '\n'; l->pos++, cc = cur(l)) {
for (char cc = cur(l); cc > 0 && cc != '\n'; l->pos++, cc = cur(l)) {
}
}
JUMP_TARGET;
JUMP_TARGET;
Kopieren
Kopiert
Kopieren
Kopiert
}
Kopieren
Kopiert
Kopieren
Kopiert
whitespace
:
rule(
whitespace
) {
l->pos++;
l->pos++;
JUMP_TARGET;
JUMP_TARGET;
Kopieren
Kopiert
Kopieren
Kopiert
}
Kopieren
Kopiert
Kopieren
Kopiert
unknown
:
{
rule(
unknown
)
{
uint8_t c = cur(l);
uint8_t c = cur(l);
ASSERT(0, "Unexpected byte '%c' (0x%X) in input", c, c)
ASSERT(0, "Unexpected byte '%c' (0x%X) in input", c, c)
}
}
Kopieren
Kopiert
Kopieren
Kopiert
end
:
rule(
end
) {
out[count++] = INTERN_EOF;
out[count++] = INTERN_EOF;
return count;
return count;
}
}
#undef SINGLE_TOK
#undef SINGLE_TOK
Gespeicherte Diffs
Originaltext
Datei öffnen
#include "lexer.h" #include "common.h" #include "mem.h" #include "strings.h" #include <stddef.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> #define SINGLE_TOK(t) ((Token){.type = t}) Str TOKEN_TYPE_MAP[] = {[T_DELIMITOR_LEFT] = STRING("T_DELIMITOR_LEFT"), [T_DELIMITOR_RIGHT] = STRING("T_DELIMITOR_RIGHT"), [T_BRAKET_LEFT] = STRING("T_BRAKET_LEFT"), [T_BRAKET_RIGHT] = STRING("T_BRAKET_RIGHT"), [T_STRING] = STRING("T_STRING"), [T_TRUE] = STRING("T_TRUE"), [T_FALSE] = STRING("T_FALSE"), [T_DOUBLE] = STRING("T_DOUBLE"), [T_INTEGER] = STRING("T_INTEGER"), [T_BUILTIN] = STRING("T_BUILTIN"), [T_IDENT] = STRING("T_IDENT"), [T_PLUS] = STRING("T_PLUS"), [T_MINUS] = STRING("T_MINUS"), [T_ASTERISKS] = STRING("T_ASTERISKS"), [T_SLASH] = STRING("T_SLASH"), [T_EQUAL] = STRING("T_EQUAL"), [T_EOF] = STRING("T_EOF")}; Lexer Lexer_new(Str input) { return (Lexer){ .input = input, .pos = 0, }; } #define cur(L) (L->input.p[L->pos]) __attribute__((always_inline)) inline static bool is_alphanum(uint8_t cc) { uint8_t lower = cc | 0x20; bool is_alpha = (lower >= 'a' && lower <= 'z'); bool is_digit = (cc >= '0' && cc <= '9'); return is_alpha || is_digit || cc == '_' || cc == '-'; } // we can "intern" these, since all of them are the same, regardless of position Token *INTERN_DELIMITOR_LEFT = &SINGLE_TOK(T_DELIMITOR_LEFT); Token *INTERN_DELIMITOR_RIGHT = &SINGLE_TOK(T_DELIMITOR_RIGHT); Token *INTERN_BRAKET_LEFT = &SINGLE_TOK(T_BRAKET_LEFT); Token *INTERN_BRAKET_RIGHT = &SINGLE_TOK(T_BRAKET_RIGHT); Token *INTERN_MINUS = &SINGLE_TOK(T_MINUS); Token *INTERN_PLUS = &SINGLE_TOK(T_PLUS); Token *INTERN_ASTERISKS = &SINGLE_TOK(T_ASTERISKS); Token *INTERN_SLASH = &SINGLE_TOK(T_SLASH); Token *INTERN_FALSE = &SINGLE_TOK(T_FALSE); Token *INTERN_TRUE = &SINGLE_TOK(T_TRUE); Token *INTERN_EQUAL = &SINGLE_TOK(T_EQUAL); Token *INTERN_EOF = &SINGLE_TOK(T_EOF); size_t Lexer_all(Lexer *l, Allocator *a, Token **out) { ASSERT(out != NULL, "Failed to allocate token list"); // empty input if (l->input.len == 0) { out[0] = INTERN_EOF; return 1; } size_t true_hash = Str_hash(&STRING("true")); size_t false_hash = Str_hash(&STRING("false")); size_t count = 0; static void *jump_table[256] = { [0 ... 255] = &&unknown, [' '] = &&whitespace, ['\t'] = &&whitespace, ['\n'] = &&whitespace, [';'] = &&comment, ['('] = &&delimitor_left, [')'] = &&delimitor_right, ['@'] = &&builtin, ['.'] = &&number, ['0' ... '9'] = &&number, ['a' ... 'z'] = &&ident, ['A' ... 'Z'] = &&ident, ['_'] = &&ident, ['\''] = &"ed, ['"'] = &&string, ['+'] = &&plus, ['-'] = &&minus, ['/'] = &&slash, ['*'] = &&asterisks, ['='] = &&equal, ['['] = &&braket_left, [']'] = &&braket_right, [0] = &&end, }; #define JUMP_TARGET goto *jump_table[(int32_t)l->input.p[l->pos]] JUMP_TARGET; delimitor_left: out[count++] = INTERN_DELIMITOR_LEFT; l->pos++; JUMP_TARGET; delimitor_right: out[count++] = INTERN_DELIMITOR_RIGHT; l->pos++; JUMP_TARGET; braket_left: out[count++] = INTERN_BRAKET_LEFT; l->pos++; JUMP_TARGET; braket_right: out[count++] = INTERN_BRAKET_RIGHT; l->pos++; JUMP_TARGET; builtin: { l->pos++; // not an ident after @, this is shit if (!is_alphanum(cur(l))) { out[count++] = INTERN_EOF; } size_t start = l->pos; size_t hash = FNV_OFFSET_BASIS; for (char cc = cur(l); cc > 0 && is_alphanum(cc); l->pos++, cc = cur(l)) { hash ^= cc; hash *= FNV_PRIME; } size_t len = l->pos - start; Str s = (Str){ .p = l->input.p + start, .len = len, .hash = hash, }; Token *b = CALL(a, request, sizeof(Token)); b->string = s; b->type = T_BUILTIN; out[count++] = b; JUMP_TARGET; } plus: out[count++] = INTERN_PLUS; l->pos++; JUMP_TARGET; minus: out[count++] = INTERN_MINUS; l->pos++; JUMP_TARGET; slash: out[count++] = INTERN_SLASH; l->pos++; JUMP_TARGET; equal: out[count++] = INTERN_EQUAL; l->pos++; JUMP_TARGET; asterisks: out[count++] = INTERN_ASTERISKS; l->pos++; JUMP_TARGET; number: { size_t start = l->pos; size_t i = start; bool is_double = false; size_t hash = FNV_OFFSET_BASIS; for (; i < l->input.len; i++) { char cc = l->input.p[i]; hash ^= cc; hash *= FNV_PRIME; if (cc >= '0' && cc <= '9') continue; if (cc == '.') { ASSERT(!is_double, "Two dots in double"); is_double = true; continue; } break; } l->pos = i; Token *n = CALL(a, request, sizeof(Token)); n->string = (Str){ .p = l->input.p + start, .len = i - start, .hash = hash, }; if (is_double) { n->type = T_DOUBLE; } else { n->type = T_INTEGER; } out[count++] = n; JUMP_TARGET; } ident: { size_t start = l->pos; size_t hash = FNV_OFFSET_BASIS; for (char cc = cur(l); cc > 0 && is_alphanum(cc); l->pos++, cc = cur(l)) { hash ^= cc; hash *= FNV_PRIME; } size_t len = l->pos - start; Token *t; if (hash == true_hash) { t = INTERN_TRUE; } else if (hash == false_hash) { t = INTERN_FALSE; } else { t = CALL(a, request, sizeof(Token)); t->type = T_IDENT; t->string = (Str){ .p = l->input.p + start, .len = len, .hash = hash, }; } out[count++] = t; JUMP_TARGET; } // same as string but only with leading ' quoted: { // skip ' l->pos++; size_t start = l->pos; size_t hash = FNV_OFFSET_BASIS; for (char cc = cur(l); cc > 0 && is_alphanum(cc); l->pos++, cc = cur(l)) { hash ^= cc; hash *= FNV_PRIME; } size_t len = l->pos - start; Token *t; t = CALL(a, request, sizeof(Token)); t->type = T_STRING; t->string = (Str){ .p = l->input.p + start, .len = len, .hash = hash, }; out[count++] = t; JUMP_TARGET; } string: { // skip " l->pos++; size_t start = l->pos; size_t hash = FNV_OFFSET_BASIS; for (char cc = cur(l); cc > 0 && cc != '"'; l->pos++, cc = cur(l)) { hash ^= cc; hash *= FNV_PRIME; } if (UNLIKELY(cur(l) != '"')) { Str slice = Str_slice(&l->input, l->pos, l->input.len); fprintf(stderr, "lex: Unterminated string near: '%.*s'", (int)slice.len, slice.p); out[count++] = INTERN_EOF; } else { Token *t = CALL(a, request, sizeof(Token)); t->type = T_STRING; t->string = (Str){ .p = l->input.p + start, .len = l->pos - start, .hash = hash, }; out[count++] = t; // skip " l->pos++; } JUMP_TARGET; } comment: for (char cc = cur(l); cc > 0 && cc != '\n'; l->pos++, cc = cur(l)) { } JUMP_TARGET; whitespace: l->pos++; JUMP_TARGET; unknown: { uint8_t c = cur(l); ASSERT(0, "Unexpected byte '%c' (0x%X) in input", c, c) } end: out[count++] = INTERN_EOF; return count; } #undef SINGLE_TOK
Bearbeitung
Datei öffnen
#include "lexer.h" #include "common.h" #include "mem.h" #include "strings.h" #include <stddef.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> #define SINGLE_TOK(t) ((Token){.type = t}) Str TOKEN_TYPE_MAP[] = {[T_DELIMITOR_LEFT] = STRING("T_DELIMITOR_LEFT"), [T_DELIMITOR_RIGHT] = STRING("T_DELIMITOR_RIGHT"), [T_BRAKET_LEFT] = STRING("T_BRAKET_LEFT"), [T_BRAKET_RIGHT] = STRING("T_BRAKET_RIGHT"), [T_STRING] = STRING("T_STRING"), [T_TRUE] = STRING("T_TRUE"), [T_FALSE] = STRING("T_FALSE"), [T_DOUBLE] = STRING("T_DOUBLE"), [T_INTEGER] = STRING("T_INTEGER"), [T_BUILTIN] = STRING("T_BUILTIN"), [T_IDENT] = STRING("T_IDENT"), [T_PLUS] = STRING("T_PLUS"), [T_MINUS] = STRING("T_MINUS"), [T_ASTERISKS] = STRING("T_ASTERISKS"), [T_SLASH] = STRING("T_SLASH"), [T_EQUAL] = STRING("T_EQUAL"), [T_EOF] = STRING("T_EOF")}; Lexer Lexer_new(Str input) { return (Lexer){ .input = input, .pos = 0, }; } #define cur(L) (L->input.p[L->pos]) __attribute__((always_inline)) inline static bool is_alphanum(uint8_t cc) { uint8_t lower = cc | 0x20; bool is_alpha = (lower >= 'a' && lower <= 'z'); bool is_digit = (cc >= '0' && cc <= '9'); return is_alpha || is_digit || cc == '_' || cc == '-'; } // we can "intern" these, since all of them are the same, regardless of position Token *INTERN_DELIMITOR_LEFT = &SINGLE_TOK(T_DELIMITOR_LEFT); Token *INTERN_DELIMITOR_RIGHT = &SINGLE_TOK(T_DELIMITOR_RIGHT); Token *INTERN_BRAKET_LEFT = &SINGLE_TOK(T_BRAKET_LEFT); Token *INTERN_BRAKET_RIGHT = &SINGLE_TOK(T_BRAKET_RIGHT); Token *INTERN_MINUS = &SINGLE_TOK(T_MINUS); Token *INTERN_PLUS = &SINGLE_TOK(T_PLUS); Token *INTERN_ASTERISKS = &SINGLE_TOK(T_ASTERISKS); Token *INTERN_SLASH = &SINGLE_TOK(T_SLASH); Token *INTERN_FALSE = &SINGLE_TOK(T_FALSE); Token *INTERN_TRUE = &SINGLE_TOK(T_TRUE); Token *INTERN_EQUAL = &SINGLE_TOK(T_EQUAL); Token *INTERN_EOF = &SINGLE_TOK(T_EOF); #define rule(name) size_t name(Lexer *l, Allocator *a, Token **out) rule(Lexer_all); rule(delimitor_left); rule(delimitor_right); rule(braket_left); rule(builtin); rule(plus); rule(minus); rule(slash); rule(equal); rule(asterisks); rule(number); rule(ident); rule(quoted); rule(string); rule(comment); rule(whitespace); rule(unknown); rule(end); typedef size_t (*rule_t)(Lexer *l, Allocator *a, Token **out); static rule_t jump_table[256] = { [0 ... 255] = &unknown, [' '] = &whitespace, ['\t'] = &whitespace, ['\n'] = &whitespace, [';'] = &comment, ['('] = &delimitor_left, [')'] = &delimitor_right, ['@'] = &builtin, ['.'] = &number, ['0' ... '9'] = &number, ['a' ... 'z'] = &ident, ['A' ... 'Z'] = &ident, ['_'] = &ident, ['\''] = "ed, ['"'] = &string, ['+'] = &plus, ['-'] = &minus, ['/'] = &slash, ['*'] = &asterisks, ['='] = &equal, ['['] = &braket_left, [']'] = &braket_right, [0] = &end, }; #ifdef __clang__ #define musttail [[clang::musttail]] #elif __GNUC__ #define musttail [[gnu::musttail]] #else #define musttail #endif #define JUMP_TARGET return musttail jump_table[(int32_t)l->input.p[l->pos]](l, a, out) rule(Lexer_all) { ASSERT(out != NULL, "Failed to allocate token list"); // empty input if (l->input.len == 0) { out[0] = INTERN_EOF; return 1; } size_t true_hash = Str_hash(&STRING("true")); size_t false_hash = Str_hash(&STRING("false")); size_t count = 0; JUMP_TARGET; } rule(delimitor_left) { out[count++] = INTERN_DELIMITOR_LEFT; l->pos++; JUMP_TARGET; } rule(delimitor_right) { out[count++] = INTERN_DELIMITOR_RIGHT; l->pos++; JUMP_TARGET; } rule(braket_left) { out[count++] = INTERN_BRAKET_LEFT; l->pos++; JUMP_TARGET; } rule(braket_right) { out[count++] = INTERN_BRAKET_RIGHT; l->pos++; JUMP_TARGET; } rule(builtin) { l->pos++; // not an ident after @, this is shit if (!is_alphanum(cur(l))) { out[count++] = INTERN_EOF; } size_t start = l->pos; size_t hash = FNV_OFFSET_BASIS; for (char cc = cur(l); cc > 0 && is_alphanum(cc); l->pos++, cc = cur(l)) { hash ^= cc; hash *= FNV_PRIME; } size_t len = l->pos - start; Str s = (Str){ .p = l->input.p + start, .len = len, .hash = hash, }; Token *b = CALL(a, request, sizeof(Token)); b->string = s; b->type = T_BUILTIN; out[count++] = b; JUMP_TARGET; } rule(plus) { out[count++] = INTERN_PLUS; l->pos++; JUMP_TARGET; } rule(minus) { out[count++] = INTERN_MINUS; l->pos++; JUMP_TARGET; } rule(slash) { out[count++] = INTERN_SLASH; l->pos++; JUMP_TARGET; } rule(equal) { out[count++] = INTERN_EQUAL; l->pos++; JUMP_TARGET; } rule(asterisks) { out[count++] = INTERN_ASTERISKS; l->pos++; JUMP_TARGET; } rule(number) { size_t start = l->pos; size_t i = start; bool is_double = false; size_t hash = FNV_OFFSET_BASIS; for (; i < l->input.len; i++) { char cc = l->input.p[i]; hash ^= cc; hash *= FNV_PRIME; if (cc >= '0' && cc <= '9') continue; if (cc == '.') { ASSERT(!is_double, "Two dots in double"); is_double = true; continue; } break; } l->pos = i; Token *n = CALL(a, request, sizeof(Token)); n->string = (Str){ .p = l->input.p + start, .len = i - start, .hash = hash, }; if (is_double) { n->type = T_DOUBLE; } else { n->type = T_INTEGER; } out[count++] = n; JUMP_TARGET; } rule(ident) { size_t start = l->pos; size_t hash = FNV_OFFSET_BASIS; for (char cc = cur(l); cc > 0 && is_alphanum(cc); l->pos++, cc = cur(l)) { hash ^= cc; hash *= FNV_PRIME; } size_t len = l->pos - start; Token *t; if (hash == true_hash) { t = INTERN_TRUE; } else if (hash == false_hash) { t = INTERN_FALSE; } else { t = CALL(a, request, sizeof(Token)); t->type = T_IDENT; t->string = (Str){ .p = l->input.p + start, .len = len, .hash = hash, }; } out[count++] = t; JUMP_TARGET; } // same as string but only with leading ' rule(quoted) { // skip ' l->pos++; size_t start = l->pos; size_t hash = FNV_OFFSET_BASIS; for (char cc = cur(l); cc > 0 && is_alphanum(cc); l->pos++, cc = cur(l)) { hash ^= cc; hash *= FNV_PRIME; } size_t len = l->pos - start; Token *t; t = CALL(a, request, sizeof(Token)); t->type = T_STRING; t->string = (Str){ .p = l->input.p + start, .len = len, .hash = hash, }; out[count++] = t; JUMP_TARGET; } rule(string) { // skip " l->pos++; size_t start = l->pos; size_t hash = FNV_OFFSET_BASIS; for (char cc = cur(l); cc > 0 && cc != '"'; l->pos++, cc = cur(l)) { hash ^= cc; hash *= FNV_PRIME; } if (UNLIKELY(cur(l) != '"')) { Str slice = Str_slice(&l->input, l->pos, l->input.len); fprintf(stderr, "lex: Unterminated string near: '%.*s'", (int)slice.len, slice.p); out[count++] = INTERN_EOF; } else { Token *t = CALL(a, request, sizeof(Token)); t->type = T_STRING; t->string = (Str){ .p = l->input.p + start, .len = l->pos - start, .hash = hash, }; out[count++] = t; // skip " l->pos++; } JUMP_TARGET; } rule(comment) { for (char cc = cur(l); cc > 0 && cc != '\n'; l->pos++, cc = cur(l)) { } JUMP_TARGET; } rule(whitespace) { l->pos++; JUMP_TARGET; } rule(unknown) { uint8_t c = cur(l); ASSERT(0, "Unexpected byte '%c' (0x%X) in input", c, c) } rule(end) { out[count++] = INTERN_EOF; return count; } #undef SINGLE_TOK
Unterschied finden