Diff
checker
Text
Text
Images
Documents
Excel
Folders
Legal
Enterprise
Desktop
Pricing
Sign in
Download Diffchecker Desktop
Compare text
Find the difference between two text files
Tools
History
Real-time editor
Hide unchanged lines
Disable line wrap
Layout
Split
Unified
Diff precision
Smart
Word
Char
Syntax highlighting
Choose syntax
Ignore
Transform text
Go to first change
Edit input
Diffchecker Desktop
The most secure way to run Diffchecker. Get the Diffchecker Desktop app: your diffs never leave your computer!
Get Desktop
lexing_goto_v_musttail
Created
12 months ago
Diff never expires
Clear
Export
Share
Explain
47 removals
Lines
Total
Removed
Characters
Total
Removed
To continue using this feature, upgrade to
Diff
checker
Pro
View Pricing
311 lines
Copy
109 additions
Lines
Total
Added
Characters
Total
Added
To continue using this feature, upgrade to
Diff
checker
Pro
View Pricing
354 lines
Copy
#include "lexer.h"
#include "lexer.h"
#include "common.h"
#include "common.h"
#include "mem.h"
#include "mem.h"
#include "strings.h"
#include "strings.h"
#include <stddef.h>
#include <stddef.h>
#include <stdint.h>
#include <stdint.h>
#include <stdio.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdlib.h>
#define SINGLE_TOK(t) ((Token){.type = t})
#define SINGLE_TOK(t) ((Token){.type = t})
Str TOKEN_TYPE_MAP[] = {[T_DELIMITOR_LEFT] = STRING("T_DELIMITOR_LEFT"),
Str TOKEN_TYPE_MAP[] = {[T_DELIMITOR_LEFT] = STRING("T_DELIMITOR_LEFT"),
[T_DELIMITOR_RIGHT] = STRING("T_DELIMITOR_RIGHT"),
[T_DELIMITOR_RIGHT] = STRING("T_DELIMITOR_RIGHT"),
[T_BRAKET_LEFT] = STRING("T_BRAKET_LEFT"),
[T_BRAKET_LEFT] = STRING("T_BRAKET_LEFT"),
[T_BRAKET_RIGHT] = STRING("T_BRAKET_RIGHT"),
[T_BRAKET_RIGHT] = STRING("T_BRAKET_RIGHT"),
[T_STRING] = STRING("T_STRING"),
[T_STRING] = STRING("T_STRING"),
[T_TRUE] = STRING("T_TRUE"),
[T_TRUE] = STRING("T_TRUE"),
[T_FALSE] = STRING("T_FALSE"),
[T_FALSE] = STRING("T_FALSE"),
[T_DOUBLE] = STRING("T_DOUBLE"),
[T_DOUBLE] = STRING("T_DOUBLE"),
[T_INTEGER] = STRING("T_INTEGER"),
[T_INTEGER] = STRING("T_INTEGER"),
[T_BUILTIN] = STRING("T_BUILTIN"),
[T_BUILTIN] = STRING("T_BUILTIN"),
[T_IDENT] = STRING("T_IDENT"),
[T_IDENT] = STRING("T_IDENT"),
[T_PLUS] = STRING("T_PLUS"),
[T_PLUS] = STRING("T_PLUS"),
[T_MINUS] = STRING("T_MINUS"),
[T_MINUS] = STRING("T_MINUS"),
[T_ASTERISKS] = STRING("T_ASTERISKS"),
[T_ASTERISKS] = STRING("T_ASTERISKS"),
[T_SLASH] = STRING("T_SLASH"),
[T_SLASH] = STRING("T_SLASH"),
[T_EQUAL] = STRING("T_EQUAL"),
[T_EQUAL] = STRING("T_EQUAL"),
[T_EOF] = STRING("T_EOF")};
[T_EOF] = STRING("T_EOF")};
Lexer Lexer_new(Str input) {
Lexer Lexer_new(Str input) {
return (Lexer){
return (Lexer){
.input = input,
.input = input,
.pos = 0,
.pos = 0,
};
};
}
}
#define cur(L) (L->input.p[L->pos])
#define cur(L) (L->input.p[L->pos])
__attribute__((always_inline)) inline static bool is_alphanum(uint8_t cc) {
__attribute__((always_inline)) inline static bool is_alphanum(uint8_t cc) {
uint8_t lower = cc | 0x20;
uint8_t lower = cc | 0x20;
bool is_alpha = (lower >= 'a' && lower <= 'z');
bool is_alpha = (lower >= 'a' && lower <= 'z');
bool is_digit = (cc >= '0' && cc <= '9');
bool is_digit = (cc >= '0' && cc <= '9');
return is_alpha || is_digit || cc == '_' || cc == '-';
return is_alpha || is_digit || cc == '_' || cc == '-';
}
}
// we can "intern" these, since all of them are the same, regardless of position
// we can "intern" these, since all of them are the same, regardless of position
Token *INTERN_DELIMITOR_LEFT = &SINGLE_TOK(T_DELIMITOR_LEFT);
Token *INTERN_DELIMITOR_LEFT = &SINGLE_TOK(T_DELIMITOR_LEFT);
Token *INTERN_DELIMITOR_RIGHT = &SINGLE_TOK(T_DELIMITOR_RIGHT);
Token *INTERN_DELIMITOR_RIGHT = &SINGLE_TOK(T_DELIMITOR_RIGHT);
Token *INTERN_BRAKET_LEFT = &SINGLE_TOK(T_BRAKET_LEFT);
Token *INTERN_BRAKET_LEFT = &SINGLE_TOK(T_BRAKET_LEFT);
Token *INTERN_BRAKET_RIGHT = &SINGLE_TOK(T_BRAKET_RIGHT);
Token *INTERN_BRAKET_RIGHT = &SINGLE_TOK(T_BRAKET_RIGHT);
Token *INTERN_MINUS = &SINGLE_TOK(T_MINUS);
Token *INTERN_MINUS = &SINGLE_TOK(T_MINUS);
Token *INTERN_PLUS = &SINGLE_TOK(T_PLUS);
Token *INTERN_PLUS = &SINGLE_TOK(T_PLUS);
Token *INTERN_ASTERISKS = &SINGLE_TOK(T_ASTERISKS);
Token *INTERN_ASTERISKS = &SINGLE_TOK(T_ASTERISKS);
Token *INTERN_SLASH = &SINGLE_TOK(T_SLASH);
Token *INTERN_SLASH = &SINGLE_TOK(T_SLASH);
Token *INTERN_FALSE = &SINGLE_TOK(T_FALSE);
Token *INTERN_FALSE = &SINGLE_TOK(T_FALSE);
Token *INTERN_TRUE = &SINGLE_TOK(T_TRUE);
Token *INTERN_TRUE = &SINGLE_TOK(T_TRUE);
Token *INTERN_EQUAL = &SINGLE_TOK(T_EQUAL);
Token *INTERN_EQUAL = &SINGLE_TOK(T_EQUAL);
Token *INTERN_EOF = &SINGLE_TOK(T_EOF);
Token *INTERN_EOF = &SINGLE_TOK(T_EOF);
Copy
Copied
Copy
Copied
size_t
Lexer_all
(Lexer *l, Allocator *a, Token **out)
{
#define rule(name)
size_t
name(Lexer *l, Allocator *a, Token **out)
rule(Lexer_all);
rule(delimitor_left);
rule(delimitor_right);
rule(braket_left);
rule(builtin);
rule(plus);
rule(minus);
rule(slash);
rule(equal);
rule(asterisks);
rule(number);
rule(ident);
rule(quoted);
rule(string);
rule(comment);
rule(whitespace);
rule(unknown);
rule(end);
typedef size_t (*rule_t)
(Lexer *l, Allocator *a, Token **out)
;
static rule_t jump_table[256] = {
[0 ... 255] = &unknown,
[' '] = &whitespace,
['\t'] = &whitespace,
['\n'] = &whitespace,
[';'] = &comment,
['('] = &delimitor_left,
[')'] = &delimitor_right,
['@'] = &builtin,
['.'] = &number,
['0' ... '9'] = &number,
['a' ... 'z'] = &ident,
['A' ... 'Z'] = &ident,
['_'] = &ident,
['\''] = "ed,
['"'] = &string,
['+'] = &plus,
['-'] = &minus,
['/'] = &slash,
['*'] = &asterisks,
['='] = &equal,
['['] = &braket_left,
[']'] = &braket_right,
[0] = &end,
};
#ifdef __clang__
#define musttail [[clang::musttail]]
#elif __GNUC__
#define musttail [[gnu::musttail]]
#else
#define musttail
#endif
#define JUMP_TARGET return musttail jump_table[(int32_t)l->input.p[l->pos]](l, a, out)
rule(Lexer_all) {
ASSERT(out != NULL, "Failed to allocate token list");
ASSERT(out != NULL, "Failed to allocate token list");
// empty input
// empty input
if (l->input.len == 0) {
if (l->input.len == 0) {
out[0] = INTERN_EOF;
out[0] = INTERN_EOF;
return 1;
return 1;
}
}
size_t true_hash = Str_hash(&STRING("true"));
size_t true_hash = Str_hash(&STRING("true"));
size_t false_hash = Str_hash(&STRING("false"));
size_t false_hash = Str_hash(&STRING("false"));
size_t count = 0;
size_t count = 0;
Copy
Copied
Copy
Copied
static void *jump_table[256] = {
[0 ... 255] = &&unknown,
[' '] = &&whitespace,
['\t'] = &&whitespace,
['\n'] = &&whitespace,
[';'] = &&comment,
['('] = &&delimitor_left,
[')'] = &&delimitor_right,
['@'] = &&builtin,
['.'] = &&number,
['0' ... '9'] = &&number,
['a' ... 'z'] = &&ident,
['A' ... 'Z'] = &&ident,
['_'] = &&ident,
['\''] = &"ed,
['"'] = &&string,
['+'] = &&plus,
['-'] = &&minus,
['/'] = &&slash,
['*'] = &&asterisks,
['='] = &&equal,
['['] = &&braket_left,
[']'] = &&braket_right,
[0] = &&end,
};
#define JUMP_TARGET goto *jump_table[(int32_t)l->input.p[l->pos]]
JUMP_TARGET;
JUMP_TARGET;
Copy
Copied
Copy
Copied
}
Copy
Copied
Copy
Copied
delimitor_left
:
rule(
delimitor_left
) {
out[count++] = INTERN_DELIMITOR_LEFT;
out[count++] = INTERN_DELIMITOR_LEFT;
l->pos++;
l->pos++;
JUMP_TARGET;
JUMP_TARGET;
Copy
Copied
Copy
Copied
}
Copy
Copied
Copy
Copied
delimitor_right
:
rule(
delimitor_right
) {
out[count++] = INTERN_DELIMITOR_RIGHT;
out[count++] = INTERN_DELIMITOR_RIGHT;
l->pos++;
l->pos++;
JUMP_TARGET;
JUMP_TARGET;
Copy
Copied
Copy
Copied
}
Copy
Copied
Copy
Copied
braket_left
:
rule(
braket_left
) {
out[count++] = INTERN_BRAKET_LEFT;
out[count++] = INTERN_BRAKET_LEFT;
l->pos++;
l->pos++;
JUMP_TARGET;
JUMP_TARGET;
Copy
Copied
Copy
Copied
}
Copy
Copied
Copy
Copied
braket_right
:
rule(
braket_right
) {
out[count++] = INTERN_BRAKET_RIGHT;
out[count++] = INTERN_BRAKET_RIGHT;
l->pos++;
l->pos++;
JUMP_TARGET;
JUMP_TARGET;
Copy
Copied
Copy
Copied
}
Copy
Copied
Copy
Copied
builtin
:
{
rule(
builtin
)
{
l->pos++;
l->pos++;
// not an ident after @, this is shit
// not an ident after @, this is shit
if (!is_alphanum(cur(l))) {
if (!is_alphanum(cur(l))) {
out[count++] = INTERN_EOF;
out[count++] = INTERN_EOF;
}
}
size_t start = l->pos;
size_t start = l->pos;
size_t hash = FNV_OFFSET_BASIS;
size_t hash = FNV_OFFSET_BASIS;
for (char cc = cur(l); cc > 0 && is_alphanum(cc); l->pos++, cc = cur(l)) {
for (char cc = cur(l); cc > 0 && is_alphanum(cc); l->pos++, cc = cur(l)) {
hash ^= cc;
hash ^= cc;
hash *= FNV_PRIME;
hash *= FNV_PRIME;
}
}
size_t len = l->pos - start;
size_t len = l->pos - start;
Str s = (Str){
Str s = (Str){
.p = l->input.p + start,
.p = l->input.p + start,
.len = len,
.len = len,
.hash = hash,
.hash = hash,
};
};
Token *b = CALL(a, request, sizeof(Token));
Token *b = CALL(a, request, sizeof(Token));
b->string = s;
b->string = s;
b->type = T_BUILTIN;
b->type = T_BUILTIN;
out[count++] = b;
out[count++] = b;
JUMP_TARGET;
JUMP_TARGET;
}
}
Copy
Copied
Copy
Copied
plus
:
rule(
plus
) {
out[count++] = INTERN_PLUS;
out[count++] = INTERN_PLUS;
l->pos++;
l->pos++;
JUMP_TARGET;
JUMP_TARGET;
Copy
Copied
Copy
Copied
}
Copy
Copied
Copy
Copied
minus
:
rule(
minus
) {
out[count++] = INTERN_MINUS;
out[count++] = INTERN_MINUS;
l->pos++;
l->pos++;
JUMP_TARGET;
JUMP_TARGET;
Copy
Copied
Copy
Copied
}
Copy
Copied
Copy
Copied
slash
:
rule(
slash
) {
out[count++] = INTERN_SLASH;
out[count++] = INTERN_SLASH;
l->pos++;
l->pos++;
JUMP_TARGET;
JUMP_TARGET;
Copy
Copied
Copy
Copied
}
Copy
Copied
Copy
Copied
equal
:
rule(
equal
) {
out[count++] = INTERN_EQUAL;
out[count++] = INTERN_EQUAL;
l->pos++;
l->pos++;
JUMP_TARGET;
JUMP_TARGET;
Copy
Copied
Copy
Copied
}
Copy
Copied
Copy
Copied
asterisks
:
rule(
asterisks
) {
out[count++] = INTERN_ASTERISKS;
out[count++] = INTERN_ASTERISKS;
l->pos++;
l->pos++;
JUMP_TARGET;
JUMP_TARGET;
Copy
Copied
Copy
Copied
}
Copy
Copied
Copy
Copied
number
:
{
rule(
number
)
{
size_t start = l->pos;
size_t start = l->pos;
size_t i = start;
size_t i = start;
bool is_double = false;
bool is_double = false;
size_t hash = FNV_OFFSET_BASIS;
size_t hash = FNV_OFFSET_BASIS;
for (; i < l->input.len; i++) {
for (; i < l->input.len; i++) {
char cc = l->input.p[i];
char cc = l->input.p[i];
hash ^= cc;
hash ^= cc;
hash *= FNV_PRIME;
hash *= FNV_PRIME;
if (cc >= '0' && cc <= '9')
if (cc >= '0' && cc <= '9')
continue;
continue;
if (cc == '.') {
if (cc == '.') {
ASSERT(!is_double, "Two dots in double");
ASSERT(!is_double, "Two dots in double");
is_double = true;
is_double = true;
continue;
continue;
}
}
break;
break;
}
}
l->pos = i;
l->pos = i;
Token *n = CALL(a, request, sizeof(Token));
Token *n = CALL(a, request, sizeof(Token));
n->string = (Str){
n->string = (Str){
.p = l->input.p + start,
.p = l->input.p + start,
.len = i - start,
.len = i - start,
.hash = hash,
.hash = hash,
};
};
if (is_double) {
if (is_double) {
n->type = T_DOUBLE;
n->type = T_DOUBLE;
} else {
} else {
n->type = T_INTEGER;
n->type = T_INTEGER;
}
}
out[count++] = n;
out[count++] = n;
JUMP_TARGET;
JUMP_TARGET;
}
}
Copy
Copied
Copy
Copied
ident
:
{
rule(
ident
)
{
size_t start = l->pos;
size_t start = l->pos;
size_t hash = FNV_OFFSET_BASIS;
size_t hash = FNV_OFFSET_BASIS;
for (char cc = cur(l); cc > 0 && is_alphanum(cc); l->pos++, cc = cur(l)) {
for (char cc = cur(l); cc > 0 && is_alphanum(cc); l->pos++, cc = cur(l)) {
hash ^= cc;
hash ^= cc;
hash *= FNV_PRIME;
hash *= FNV_PRIME;
}
}
size_t len = l->pos - start;
size_t len = l->pos - start;
Token *t;
Token *t;
if (hash == true_hash) {
if (hash == true_hash) {
t = INTERN_TRUE;
t = INTERN_TRUE;
} else if (hash == false_hash) {
} else if (hash == false_hash) {
t = INTERN_FALSE;
t = INTERN_FALSE;
} else {
} else {
t = CALL(a, request, sizeof(Token));
t = CALL(a, request, sizeof(Token));
t->type = T_IDENT;
t->type = T_IDENT;
t->string = (Str){
t->string = (Str){
.p = l->input.p + start,
.p = l->input.p + start,
.len = len,
.len = len,
.hash = hash,
.hash = hash,
};
};
}
}
out[count++] = t;
out[count++] = t;
JUMP_TARGET;
JUMP_TARGET;
}
}
// same as string but only with leading '
// same as string but only with leading '
Copy
Copied
Copy
Copied
quoted
:
{
rule(
quoted
)
{
// skip '
// skip '
l->pos++;
l->pos++;
size_t start = l->pos;
size_t start = l->pos;
size_t hash = FNV_OFFSET_BASIS;
size_t hash = FNV_OFFSET_BASIS;
for (char cc = cur(l); cc > 0 && is_alphanum(cc); l->pos++, cc = cur(l)) {
for (char cc = cur(l); cc > 0 && is_alphanum(cc); l->pos++, cc = cur(l)) {
hash ^= cc;
hash ^= cc;
hash *= FNV_PRIME;
hash *= FNV_PRIME;
}
}
size_t len = l->pos - start;
size_t len = l->pos - start;
Token *t;
Token *t;
t = CALL(a, request, sizeof(Token));
t = CALL(a, request, sizeof(Token));
t->type = T_STRING;
t->type = T_STRING;
t->string = (Str){
t->string = (Str){
.p = l->input.p + start,
.p = l->input.p + start,
.len = len,
.len = len,
.hash = hash,
.hash = hash,
};
};
out[count++] = t;
out[count++] = t;
JUMP_TARGET;
JUMP_TARGET;
}
}
Copy
Copied
Copy
Copied
string
:
{
rule(
string
)
{
// skip "
// skip "
l->pos++;
l->pos++;
size_t start = l->pos;
size_t start = l->pos;
size_t hash = FNV_OFFSET_BASIS;
size_t hash = FNV_OFFSET_BASIS;
for (char cc = cur(l); cc > 0 && cc != '"'; l->pos++, cc = cur(l)) {
for (char cc = cur(l); cc > 0 && cc != '"'; l->pos++, cc = cur(l)) {
hash ^= cc;
hash ^= cc;
hash *= FNV_PRIME;
hash *= FNV_PRIME;
}
}
if (UNLIKELY(cur(l) != '"')) {
if (UNLIKELY(cur(l) != '"')) {
Str slice = Str_slice(&l->input, l->pos, l->input.len);
Str slice = Str_slice(&l->input, l->pos, l->input.len);
fprintf(stderr, "lex: Unterminated string near: '%.*s'", (int)slice.len,
fprintf(stderr, "lex: Unterminated string near: '%.*s'", (int)slice.len,
slice.p);
slice.p);
out[count++] = INTERN_EOF;
out[count++] = INTERN_EOF;
} else {
} else {
Token *t = CALL(a, request, sizeof(Token));
Token *t = CALL(a, request, sizeof(Token));
t->type = T_STRING;
t->type = T_STRING;
t->string = (Str){
t->string = (Str){
.p = l->input.p + start,
.p = l->input.p + start,
.len = l->pos - start,
.len = l->pos - start,
.hash = hash,
.hash = hash,
};
};
out[count++] = t;
out[count++] = t;
// skip "
// skip "
l->pos++;
l->pos++;
}
}
JUMP_TARGET;
JUMP_TARGET;
}
}
Copy
Copied
Copy
Copied
comment
:
rule(
comment
) {
for (char cc = cur(l); cc > 0 && cc != '\n'; l->pos++, cc = cur(l)) {
for (char cc = cur(l); cc > 0 && cc != '\n'; l->pos++, cc = cur(l)) {
}
}
JUMP_TARGET;
JUMP_TARGET;
Copy
Copied
Copy
Copied
}
Copy
Copied
Copy
Copied
whitespace
:
rule(
whitespace
) {
l->pos++;
l->pos++;
JUMP_TARGET;
JUMP_TARGET;
Copy
Copied
Copy
Copied
}
Copy
Copied
Copy
Copied
unknown
:
{
rule(
unknown
)
{
uint8_t c = cur(l);
uint8_t c = cur(l);
ASSERT(0, "Unexpected byte '%c' (0x%X) in input", c, c)
ASSERT(0, "Unexpected byte '%c' (0x%X) in input", c, c)
}
}
Copy
Copied
Copy
Copied
end
:
rule(
end
) {
out[count++] = INTERN_EOF;
out[count++] = INTERN_EOF;
return count;
return count;
}
}
#undef SINGLE_TOK
#undef SINGLE_TOK
Saved diffs
Original text
Open file
#include "lexer.h" #include "common.h" #include "mem.h" #include "strings.h" #include <stddef.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> #define SINGLE_TOK(t) ((Token){.type = t}) Str TOKEN_TYPE_MAP[] = {[T_DELIMITOR_LEFT] = STRING("T_DELIMITOR_LEFT"), [T_DELIMITOR_RIGHT] = STRING("T_DELIMITOR_RIGHT"), [T_BRAKET_LEFT] = STRING("T_BRAKET_LEFT"), [T_BRAKET_RIGHT] = STRING("T_BRAKET_RIGHT"), [T_STRING] = STRING("T_STRING"), [T_TRUE] = STRING("T_TRUE"), [T_FALSE] = STRING("T_FALSE"), [T_DOUBLE] = STRING("T_DOUBLE"), [T_INTEGER] = STRING("T_INTEGER"), [T_BUILTIN] = STRING("T_BUILTIN"), [T_IDENT] = STRING("T_IDENT"), [T_PLUS] = STRING("T_PLUS"), [T_MINUS] = STRING("T_MINUS"), [T_ASTERISKS] = STRING("T_ASTERISKS"), [T_SLASH] = STRING("T_SLASH"), [T_EQUAL] = STRING("T_EQUAL"), [T_EOF] = STRING("T_EOF")}; Lexer Lexer_new(Str input) { return (Lexer){ .input = input, .pos = 0, }; } #define cur(L) (L->input.p[L->pos]) __attribute__((always_inline)) inline static bool is_alphanum(uint8_t cc) { uint8_t lower = cc | 0x20; bool is_alpha = (lower >= 'a' && lower <= 'z'); bool is_digit = (cc >= '0' && cc <= '9'); return is_alpha || is_digit || cc == '_' || cc == '-'; } // we can "intern" these, since all of them are the same, regardless of position Token *INTERN_DELIMITOR_LEFT = &SINGLE_TOK(T_DELIMITOR_LEFT); Token *INTERN_DELIMITOR_RIGHT = &SINGLE_TOK(T_DELIMITOR_RIGHT); Token *INTERN_BRAKET_LEFT = &SINGLE_TOK(T_BRAKET_LEFT); Token *INTERN_BRAKET_RIGHT = &SINGLE_TOK(T_BRAKET_RIGHT); Token *INTERN_MINUS = &SINGLE_TOK(T_MINUS); Token *INTERN_PLUS = &SINGLE_TOK(T_PLUS); Token *INTERN_ASTERISKS = &SINGLE_TOK(T_ASTERISKS); Token *INTERN_SLASH = &SINGLE_TOK(T_SLASH); Token *INTERN_FALSE = &SINGLE_TOK(T_FALSE); Token *INTERN_TRUE = &SINGLE_TOK(T_TRUE); Token *INTERN_EQUAL = &SINGLE_TOK(T_EQUAL); Token *INTERN_EOF = &SINGLE_TOK(T_EOF); size_t Lexer_all(Lexer *l, Allocator *a, Token **out) { ASSERT(out != NULL, "Failed to allocate token list"); // empty input if (l->input.len == 0) { out[0] = INTERN_EOF; return 1; } size_t true_hash = Str_hash(&STRING("true")); size_t false_hash = Str_hash(&STRING("false")); size_t count = 0; static void *jump_table[256] = { [0 ... 255] = &&unknown, [' '] = &&whitespace, ['\t'] = &&whitespace, ['\n'] = &&whitespace, [';'] = &&comment, ['('] = &&delimitor_left, [')'] = &&delimitor_right, ['@'] = &&builtin, ['.'] = &&number, ['0' ... '9'] = &&number, ['a' ... 'z'] = &&ident, ['A' ... 'Z'] = &&ident, ['_'] = &&ident, ['\''] = &"ed, ['"'] = &&string, ['+'] = &&plus, ['-'] = &&minus, ['/'] = &&slash, ['*'] = &&asterisks, ['='] = &&equal, ['['] = &&braket_left, [']'] = &&braket_right, [0] = &&end, }; #define JUMP_TARGET goto *jump_table[(int32_t)l->input.p[l->pos]] JUMP_TARGET; delimitor_left: out[count++] = INTERN_DELIMITOR_LEFT; l->pos++; JUMP_TARGET; delimitor_right: out[count++] = INTERN_DELIMITOR_RIGHT; l->pos++; JUMP_TARGET; braket_left: out[count++] = INTERN_BRAKET_LEFT; l->pos++; JUMP_TARGET; braket_right: out[count++] = INTERN_BRAKET_RIGHT; l->pos++; JUMP_TARGET; builtin: { l->pos++; // not an ident after @, this is shit if (!is_alphanum(cur(l))) { out[count++] = INTERN_EOF; } size_t start = l->pos; size_t hash = FNV_OFFSET_BASIS; for (char cc = cur(l); cc > 0 && is_alphanum(cc); l->pos++, cc = cur(l)) { hash ^= cc; hash *= FNV_PRIME; } size_t len = l->pos - start; Str s = (Str){ .p = l->input.p + start, .len = len, .hash = hash, }; Token *b = CALL(a, request, sizeof(Token)); b->string = s; b->type = T_BUILTIN; out[count++] = b; JUMP_TARGET; } plus: out[count++] = INTERN_PLUS; l->pos++; JUMP_TARGET; minus: out[count++] = INTERN_MINUS; l->pos++; JUMP_TARGET; slash: out[count++] = INTERN_SLASH; l->pos++; JUMP_TARGET; equal: out[count++] = INTERN_EQUAL; l->pos++; JUMP_TARGET; asterisks: out[count++] = INTERN_ASTERISKS; l->pos++; JUMP_TARGET; number: { size_t start = l->pos; size_t i = start; bool is_double = false; size_t hash = FNV_OFFSET_BASIS; for (; i < l->input.len; i++) { char cc = l->input.p[i]; hash ^= cc; hash *= FNV_PRIME; if (cc >= '0' && cc <= '9') continue; if (cc == '.') { ASSERT(!is_double, "Two dots in double"); is_double = true; continue; } break; } l->pos = i; Token *n = CALL(a, request, sizeof(Token)); n->string = (Str){ .p = l->input.p + start, .len = i - start, .hash = hash, }; if (is_double) { n->type = T_DOUBLE; } else { n->type = T_INTEGER; } out[count++] = n; JUMP_TARGET; } ident: { size_t start = l->pos; size_t hash = FNV_OFFSET_BASIS; for (char cc = cur(l); cc > 0 && is_alphanum(cc); l->pos++, cc = cur(l)) { hash ^= cc; hash *= FNV_PRIME; } size_t len = l->pos - start; Token *t; if (hash == true_hash) { t = INTERN_TRUE; } else if (hash == false_hash) { t = INTERN_FALSE; } else { t = CALL(a, request, sizeof(Token)); t->type = T_IDENT; t->string = (Str){ .p = l->input.p + start, .len = len, .hash = hash, }; } out[count++] = t; JUMP_TARGET; } // same as string but only with leading ' quoted: { // skip ' l->pos++; size_t start = l->pos; size_t hash = FNV_OFFSET_BASIS; for (char cc = cur(l); cc > 0 && is_alphanum(cc); l->pos++, cc = cur(l)) { hash ^= cc; hash *= FNV_PRIME; } size_t len = l->pos - start; Token *t; t = CALL(a, request, sizeof(Token)); t->type = T_STRING; t->string = (Str){ .p = l->input.p + start, .len = len, .hash = hash, }; out[count++] = t; JUMP_TARGET; } string: { // skip " l->pos++; size_t start = l->pos; size_t hash = FNV_OFFSET_BASIS; for (char cc = cur(l); cc > 0 && cc != '"'; l->pos++, cc = cur(l)) { hash ^= cc; hash *= FNV_PRIME; } if (UNLIKELY(cur(l) != '"')) { Str slice = Str_slice(&l->input, l->pos, l->input.len); fprintf(stderr, "lex: Unterminated string near: '%.*s'", (int)slice.len, slice.p); out[count++] = INTERN_EOF; } else { Token *t = CALL(a, request, sizeof(Token)); t->type = T_STRING; t->string = (Str){ .p = l->input.p + start, .len = l->pos - start, .hash = hash, }; out[count++] = t; // skip " l->pos++; } JUMP_TARGET; } comment: for (char cc = cur(l); cc > 0 && cc != '\n'; l->pos++, cc = cur(l)) { } JUMP_TARGET; whitespace: l->pos++; JUMP_TARGET; unknown: { uint8_t c = cur(l); ASSERT(0, "Unexpected byte '%c' (0x%X) in input", c, c) } end: out[count++] = INTERN_EOF; return count; } #undef SINGLE_TOK
Changed text
Open file
#include "lexer.h" #include "common.h" #include "mem.h" #include "strings.h" #include <stddef.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> #define SINGLE_TOK(t) ((Token){.type = t}) Str TOKEN_TYPE_MAP[] = {[T_DELIMITOR_LEFT] = STRING("T_DELIMITOR_LEFT"), [T_DELIMITOR_RIGHT] = STRING("T_DELIMITOR_RIGHT"), [T_BRAKET_LEFT] = STRING("T_BRAKET_LEFT"), [T_BRAKET_RIGHT] = STRING("T_BRAKET_RIGHT"), [T_STRING] = STRING("T_STRING"), [T_TRUE] = STRING("T_TRUE"), [T_FALSE] = STRING("T_FALSE"), [T_DOUBLE] = STRING("T_DOUBLE"), [T_INTEGER] = STRING("T_INTEGER"), [T_BUILTIN] = STRING("T_BUILTIN"), [T_IDENT] = STRING("T_IDENT"), [T_PLUS] = STRING("T_PLUS"), [T_MINUS] = STRING("T_MINUS"), [T_ASTERISKS] = STRING("T_ASTERISKS"), [T_SLASH] = STRING("T_SLASH"), [T_EQUAL] = STRING("T_EQUAL"), [T_EOF] = STRING("T_EOF")}; Lexer Lexer_new(Str input) { return (Lexer){ .input = input, .pos = 0, }; } #define cur(L) (L->input.p[L->pos]) __attribute__((always_inline)) inline static bool is_alphanum(uint8_t cc) { uint8_t lower = cc | 0x20; bool is_alpha = (lower >= 'a' && lower <= 'z'); bool is_digit = (cc >= '0' && cc <= '9'); return is_alpha || is_digit || cc == '_' || cc == '-'; } // we can "intern" these, since all of them are the same, regardless of position Token *INTERN_DELIMITOR_LEFT = &SINGLE_TOK(T_DELIMITOR_LEFT); Token *INTERN_DELIMITOR_RIGHT = &SINGLE_TOK(T_DELIMITOR_RIGHT); Token *INTERN_BRAKET_LEFT = &SINGLE_TOK(T_BRAKET_LEFT); Token *INTERN_BRAKET_RIGHT = &SINGLE_TOK(T_BRAKET_RIGHT); Token *INTERN_MINUS = &SINGLE_TOK(T_MINUS); Token *INTERN_PLUS = &SINGLE_TOK(T_PLUS); Token *INTERN_ASTERISKS = &SINGLE_TOK(T_ASTERISKS); Token *INTERN_SLASH = &SINGLE_TOK(T_SLASH); Token *INTERN_FALSE = &SINGLE_TOK(T_FALSE); Token *INTERN_TRUE = &SINGLE_TOK(T_TRUE); Token *INTERN_EQUAL = &SINGLE_TOK(T_EQUAL); Token *INTERN_EOF = &SINGLE_TOK(T_EOF); #define rule(name) size_t name(Lexer *l, Allocator *a, Token **out) rule(Lexer_all); rule(delimitor_left); rule(delimitor_right); rule(braket_left); rule(builtin); rule(plus); rule(minus); rule(slash); rule(equal); rule(asterisks); rule(number); rule(ident); rule(quoted); rule(string); rule(comment); rule(whitespace); rule(unknown); rule(end); typedef size_t (*rule_t)(Lexer *l, Allocator *a, Token **out); static rule_t jump_table[256] = { [0 ... 255] = &unknown, [' '] = &whitespace, ['\t'] = &whitespace, ['\n'] = &whitespace, [';'] = &comment, ['('] = &delimitor_left, [')'] = &delimitor_right, ['@'] = &builtin, ['.'] = &number, ['0' ... '9'] = &number, ['a' ... 'z'] = &ident, ['A' ... 'Z'] = &ident, ['_'] = &ident, ['\''] = "ed, ['"'] = &string, ['+'] = &plus, ['-'] = &minus, ['/'] = &slash, ['*'] = &asterisks, ['='] = &equal, ['['] = &braket_left, [']'] = &braket_right, [0] = &end, }; #ifdef __clang__ #define musttail [[clang::musttail]] #elif __GNUC__ #define musttail [[gnu::musttail]] #else #define musttail #endif #define JUMP_TARGET return musttail jump_table[(int32_t)l->input.p[l->pos]](l, a, out) rule(Lexer_all) { ASSERT(out != NULL, "Failed to allocate token list"); // empty input if (l->input.len == 0) { out[0] = INTERN_EOF; return 1; } size_t true_hash = Str_hash(&STRING("true")); size_t false_hash = Str_hash(&STRING("false")); size_t count = 0; JUMP_TARGET; } rule(delimitor_left) { out[count++] = INTERN_DELIMITOR_LEFT; l->pos++; JUMP_TARGET; } rule(delimitor_right) { out[count++] = INTERN_DELIMITOR_RIGHT; l->pos++; JUMP_TARGET; } rule(braket_left) { out[count++] = INTERN_BRAKET_LEFT; l->pos++; JUMP_TARGET; } rule(braket_right) { out[count++] = INTERN_BRAKET_RIGHT; l->pos++; JUMP_TARGET; } rule(builtin) { l->pos++; // not an ident after @, this is shit if (!is_alphanum(cur(l))) { out[count++] = INTERN_EOF; } size_t start = l->pos; size_t hash = FNV_OFFSET_BASIS; for (char cc = cur(l); cc > 0 && is_alphanum(cc); l->pos++, cc = cur(l)) { hash ^= cc; hash *= FNV_PRIME; } size_t len = l->pos - start; Str s = (Str){ .p = l->input.p + start, .len = len, .hash = hash, }; Token *b = CALL(a, request, sizeof(Token)); b->string = s; b->type = T_BUILTIN; out[count++] = b; JUMP_TARGET; } rule(plus) { out[count++] = INTERN_PLUS; l->pos++; JUMP_TARGET; } rule(minus) { out[count++] = INTERN_MINUS; l->pos++; JUMP_TARGET; } rule(slash) { out[count++] = INTERN_SLASH; l->pos++; JUMP_TARGET; } rule(equal) { out[count++] = INTERN_EQUAL; l->pos++; JUMP_TARGET; } rule(asterisks) { out[count++] = INTERN_ASTERISKS; l->pos++; JUMP_TARGET; } rule(number) { size_t start = l->pos; size_t i = start; bool is_double = false; size_t hash = FNV_OFFSET_BASIS; for (; i < l->input.len; i++) { char cc = l->input.p[i]; hash ^= cc; hash *= FNV_PRIME; if (cc >= '0' && cc <= '9') continue; if (cc == '.') { ASSERT(!is_double, "Two dots in double"); is_double = true; continue; } break; } l->pos = i; Token *n = CALL(a, request, sizeof(Token)); n->string = (Str){ .p = l->input.p + start, .len = i - start, .hash = hash, }; if (is_double) { n->type = T_DOUBLE; } else { n->type = T_INTEGER; } out[count++] = n; JUMP_TARGET; } rule(ident) { size_t start = l->pos; size_t hash = FNV_OFFSET_BASIS; for (char cc = cur(l); cc > 0 && is_alphanum(cc); l->pos++, cc = cur(l)) { hash ^= cc; hash *= FNV_PRIME; } size_t len = l->pos - start; Token *t; if (hash == true_hash) { t = INTERN_TRUE; } else if (hash == false_hash) { t = INTERN_FALSE; } else { t = CALL(a, request, sizeof(Token)); t->type = T_IDENT; t->string = (Str){ .p = l->input.p + start, .len = len, .hash = hash, }; } out[count++] = t; JUMP_TARGET; } // same as string but only with leading ' rule(quoted) { // skip ' l->pos++; size_t start = l->pos; size_t hash = FNV_OFFSET_BASIS; for (char cc = cur(l); cc > 0 && is_alphanum(cc); l->pos++, cc = cur(l)) { hash ^= cc; hash *= FNV_PRIME; } size_t len = l->pos - start; Token *t; t = CALL(a, request, sizeof(Token)); t->type = T_STRING; t->string = (Str){ .p = l->input.p + start, .len = len, .hash = hash, }; out[count++] = t; JUMP_TARGET; } rule(string) { // skip " l->pos++; size_t start = l->pos; size_t hash = FNV_OFFSET_BASIS; for (char cc = cur(l); cc > 0 && cc != '"'; l->pos++, cc = cur(l)) { hash ^= cc; hash *= FNV_PRIME; } if (UNLIKELY(cur(l) != '"')) { Str slice = Str_slice(&l->input, l->pos, l->input.len); fprintf(stderr, "lex: Unterminated string near: '%.*s'", (int)slice.len, slice.p); out[count++] = INTERN_EOF; } else { Token *t = CALL(a, request, sizeof(Token)); t->type = T_STRING; t->string = (Str){ .p = l->input.p + start, .len = l->pos - start, .hash = hash, }; out[count++] = t; // skip " l->pos++; } JUMP_TARGET; } rule(comment) { for (char cc = cur(l); cc > 0 && cc != '\n'; l->pos++, cc = cur(l)) { } JUMP_TARGET; } rule(whitespace) { l->pos++; JUMP_TARGET; } rule(unknown) { uint8_t c = cur(l); ASSERT(0, "Unexpected byte '%c' (0x%X) in input", c, c) } rule(end) { out[count++] = INTERN_EOF; return count; } #undef SINGLE_TOK
Find difference