Diff
checker
文本
文本
圖像
文檔
Excel
文件夾
Legal
Enterprise
桌面版
定價
登入
下載 Diffchecker 桌面版
比較文本
尋找兩個文字檔案之間的差異
工具
歷史
即時編輯器
摺疊未變更行
關閉換行
檢視
拆分
統一
比對精度
智能
單詞
字符
語法突出顯示
選擇語法
忽略
文字轉換
前往第一個差異
編輯輸入
Diffchecker Desktop
執行Diffchecker最安全的方式。取得Diffchecker桌面應用程式:您的差異永遠不會離開您的電腦!
取得桌面版
lexing_goto_v_musttail
建立於
12 個月前
差異永不過期
清除
匯出
分享
解釋
47 刪除
行
總計
刪除
字符
總計
刪除
要繼續使用此功能,請升級到
Diff
checker
Pro
查看價格
311 行
全部複製
109 新增
行
總計
新增
字符
總計
新增
要繼續使用此功能,請升級到
Diff
checker
Pro
查看價格
354 行
全部複製
#include "lexer.h"
#include "lexer.h"
#include "common.h"
#include "common.h"
#include "mem.h"
#include "mem.h"
#include "strings.h"
#include "strings.h"
#include <stddef.h>
#include <stddef.h>
#include <stdint.h>
#include <stdint.h>
#include <stdio.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdlib.h>
#define SINGLE_TOK(t) ((Token){.type = t})
#define SINGLE_TOK(t) ((Token){.type = t})
Str TOKEN_TYPE_MAP[] = {[T_DELIMITOR_LEFT] = STRING("T_DELIMITOR_LEFT"),
Str TOKEN_TYPE_MAP[] = {[T_DELIMITOR_LEFT] = STRING("T_DELIMITOR_LEFT"),
[T_DELIMITOR_RIGHT] = STRING("T_DELIMITOR_RIGHT"),
[T_DELIMITOR_RIGHT] = STRING("T_DELIMITOR_RIGHT"),
[T_BRAKET_LEFT] = STRING("T_BRAKET_LEFT"),
[T_BRAKET_LEFT] = STRING("T_BRAKET_LEFT"),
[T_BRAKET_RIGHT] = STRING("T_BRAKET_RIGHT"),
[T_BRAKET_RIGHT] = STRING("T_BRAKET_RIGHT"),
[T_STRING] = STRING("T_STRING"),
[T_STRING] = STRING("T_STRING"),
[T_TRUE] = STRING("T_TRUE"),
[T_TRUE] = STRING("T_TRUE"),
[T_FALSE] = STRING("T_FALSE"),
[T_FALSE] = STRING("T_FALSE"),
[T_DOUBLE] = STRING("T_DOUBLE"),
[T_DOUBLE] = STRING("T_DOUBLE"),
[T_INTEGER] = STRING("T_INTEGER"),
[T_INTEGER] = STRING("T_INTEGER"),
[T_BUILTIN] = STRING("T_BUILTIN"),
[T_BUILTIN] = STRING("T_BUILTIN"),
[T_IDENT] = STRING("T_IDENT"),
[T_IDENT] = STRING("T_IDENT"),
[T_PLUS] = STRING("T_PLUS"),
[T_PLUS] = STRING("T_PLUS"),
[T_MINUS] = STRING("T_MINUS"),
[T_MINUS] = STRING("T_MINUS"),
[T_ASTERISKS] = STRING("T_ASTERISKS"),
[T_ASTERISKS] = STRING("T_ASTERISKS"),
[T_SLASH] = STRING("T_SLASH"),
[T_SLASH] = STRING("T_SLASH"),
[T_EQUAL] = STRING("T_EQUAL"),
[T_EQUAL] = STRING("T_EQUAL"),
[T_EOF] = STRING("T_EOF")};
[T_EOF] = STRING("T_EOF")};
Lexer Lexer_new(Str input) {
Lexer Lexer_new(Str input) {
return (Lexer){
return (Lexer){
.input = input,
.input = input,
.pos = 0,
.pos = 0,
};
};
}
}
#define cur(L) (L->input.p[L->pos])
#define cur(L) (L->input.p[L->pos])
__attribute__((always_inline)) inline static bool is_alphanum(uint8_t cc) {
__attribute__((always_inline)) inline static bool is_alphanum(uint8_t cc) {
uint8_t lower = cc | 0x20;
uint8_t lower = cc | 0x20;
bool is_alpha = (lower >= 'a' && lower <= 'z');
bool is_alpha = (lower >= 'a' && lower <= 'z');
bool is_digit = (cc >= '0' && cc <= '9');
bool is_digit = (cc >= '0' && cc <= '9');
return is_alpha || is_digit || cc == '_' || cc == '-';
return is_alpha || is_digit || cc == '_' || cc == '-';
}
}
// we can "intern" these, since all of them are the same, regardless of position
// we can "intern" these, since all of them are the same, regardless of position
Token *INTERN_DELIMITOR_LEFT = &SINGLE_TOK(T_DELIMITOR_LEFT);
Token *INTERN_DELIMITOR_LEFT = &SINGLE_TOK(T_DELIMITOR_LEFT);
Token *INTERN_DELIMITOR_RIGHT = &SINGLE_TOK(T_DELIMITOR_RIGHT);
Token *INTERN_DELIMITOR_RIGHT = &SINGLE_TOK(T_DELIMITOR_RIGHT);
Token *INTERN_BRAKET_LEFT = &SINGLE_TOK(T_BRAKET_LEFT);
Token *INTERN_BRAKET_LEFT = &SINGLE_TOK(T_BRAKET_LEFT);
Token *INTERN_BRAKET_RIGHT = &SINGLE_TOK(T_BRAKET_RIGHT);
Token *INTERN_BRAKET_RIGHT = &SINGLE_TOK(T_BRAKET_RIGHT);
Token *INTERN_MINUS = &SINGLE_TOK(T_MINUS);
Token *INTERN_MINUS = &SINGLE_TOK(T_MINUS);
Token *INTERN_PLUS = &SINGLE_TOK(T_PLUS);
Token *INTERN_PLUS = &SINGLE_TOK(T_PLUS);
Token *INTERN_ASTERISKS = &SINGLE_TOK(T_ASTERISKS);
Token *INTERN_ASTERISKS = &SINGLE_TOK(T_ASTERISKS);
Token *INTERN_SLASH = &SINGLE_TOK(T_SLASH);
Token *INTERN_SLASH = &SINGLE_TOK(T_SLASH);
Token *INTERN_FALSE = &SINGLE_TOK(T_FALSE);
Token *INTERN_FALSE = &SINGLE_TOK(T_FALSE);
Token *INTERN_TRUE = &SINGLE_TOK(T_TRUE);
Token *INTERN_TRUE = &SINGLE_TOK(T_TRUE);
Token *INTERN_EQUAL = &SINGLE_TOK(T_EQUAL);
Token *INTERN_EQUAL = &SINGLE_TOK(T_EQUAL);
Token *INTERN_EOF = &SINGLE_TOK(T_EOF);
Token *INTERN_EOF = &SINGLE_TOK(T_EOF);
複製
已複製
複製
已複製
size_t
Lexer_all
(Lexer *l, Allocator *a, Token **out)
{
#define rule(name)
size_t
name(Lexer *l, Allocator *a, Token **out)
rule(Lexer_all);
rule(delimitor_left);
rule(delimitor_right);
rule(braket_left);
rule(builtin);
rule(plus);
rule(minus);
rule(slash);
rule(equal);
rule(asterisks);
rule(number);
rule(ident);
rule(quoted);
rule(string);
rule(comment);
rule(whitespace);
rule(unknown);
rule(end);
typedef size_t (*rule_t)
(Lexer *l, Allocator *a, Token **out)
;
static rule_t jump_table[256] = {
[0 ... 255] = &unknown,
[' '] = &whitespace,
['\t'] = &whitespace,
['\n'] = &whitespace,
[';'] = &comment,
['('] = &delimitor_left,
[')'] = &delimitor_right,
['@'] = &builtin,
['.'] = &number,
['0' ... '9'] = &number,
['a' ... 'z'] = &ident,
['A' ... 'Z'] = &ident,
['_'] = &ident,
['\''] = "ed,
['"'] = &string,
['+'] = &plus,
['-'] = &minus,
['/'] = &slash,
['*'] = &asterisks,
['='] = &equal,
['['] = &braket_left,
[']'] = &braket_right,
[0] = &end,
};
#ifdef __clang__
#define musttail [[clang::musttail]]
#elif __GNUC__
#define musttail [[gnu::musttail]]
#else
#define musttail
#endif
#define JUMP_TARGET return musttail jump_table[(int32_t)l->input.p[l->pos]](l, a, out)
rule(Lexer_all) {
ASSERT(out != NULL, "Failed to allocate token list");
ASSERT(out != NULL, "Failed to allocate token list");
// empty input
// empty input
if (l->input.len == 0) {
if (l->input.len == 0) {
out[0] = INTERN_EOF;
out[0] = INTERN_EOF;
return 1;
return 1;
}
}
size_t true_hash = Str_hash(&STRING("true"));
size_t true_hash = Str_hash(&STRING("true"));
size_t false_hash = Str_hash(&STRING("false"));
size_t false_hash = Str_hash(&STRING("false"));
size_t count = 0;
size_t count = 0;
複製
已複製
複製
已複製
static void *jump_table[256] = {
[0 ... 255] = &&unknown,
[' '] = &&whitespace,
['\t'] = &&whitespace,
['\n'] = &&whitespace,
[';'] = &&comment,
['('] = &&delimitor_left,
[')'] = &&delimitor_right,
['@'] = &&builtin,
['.'] = &&number,
['0' ... '9'] = &&number,
['a' ... 'z'] = &&ident,
['A' ... 'Z'] = &&ident,
['_'] = &&ident,
['\''] = &"ed,
['"'] = &&string,
['+'] = &&plus,
['-'] = &&minus,
['/'] = &&slash,
['*'] = &&asterisks,
['='] = &&equal,
['['] = &&braket_left,
[']'] = &&braket_right,
[0] = &&end,
};
#define JUMP_TARGET goto *jump_table[(int32_t)l->input.p[l->pos]]
JUMP_TARGET;
JUMP_TARGET;
複製
已複製
複製
已複製
}
複製
已複製
複製
已複製
delimitor_left
:
rule(
delimitor_left
) {
out[count++] = INTERN_DELIMITOR_LEFT;
out[count++] = INTERN_DELIMITOR_LEFT;
l->pos++;
l->pos++;
JUMP_TARGET;
JUMP_TARGET;
複製
已複製
複製
已複製
}
複製
已複製
複製
已複製
delimitor_right
:
rule(
delimitor_right
) {
out[count++] = INTERN_DELIMITOR_RIGHT;
out[count++] = INTERN_DELIMITOR_RIGHT;
l->pos++;
l->pos++;
JUMP_TARGET;
JUMP_TARGET;
複製
已複製
複製
已複製
}
複製
已複製
複製
已複製
braket_left
:
rule(
braket_left
) {
out[count++] = INTERN_BRAKET_LEFT;
out[count++] = INTERN_BRAKET_LEFT;
l->pos++;
l->pos++;
JUMP_TARGET;
JUMP_TARGET;
複製
已複製
複製
已複製
}
複製
已複製
複製
已複製
braket_right
:
rule(
braket_right
) {
out[count++] = INTERN_BRAKET_RIGHT;
out[count++] = INTERN_BRAKET_RIGHT;
l->pos++;
l->pos++;
JUMP_TARGET;
JUMP_TARGET;
複製
已複製
複製
已複製
}
複製
已複製
複製
已複製
builtin
:
{
rule(
builtin
)
{
l->pos++;
l->pos++;
// not an ident after @, this is shit
// not an ident after @, this is shit
if (!is_alphanum(cur(l))) {
if (!is_alphanum(cur(l))) {
out[count++] = INTERN_EOF;
out[count++] = INTERN_EOF;
}
}
size_t start = l->pos;
size_t start = l->pos;
size_t hash = FNV_OFFSET_BASIS;
size_t hash = FNV_OFFSET_BASIS;
for (char cc = cur(l); cc > 0 && is_alphanum(cc); l->pos++, cc = cur(l)) {
for (char cc = cur(l); cc > 0 && is_alphanum(cc); l->pos++, cc = cur(l)) {
hash ^= cc;
hash ^= cc;
hash *= FNV_PRIME;
hash *= FNV_PRIME;
}
}
size_t len = l->pos - start;
size_t len = l->pos - start;
Str s = (Str){
Str s = (Str){
.p = l->input.p + start,
.p = l->input.p + start,
.len = len,
.len = len,
.hash = hash,
.hash = hash,
};
};
Token *b = CALL(a, request, sizeof(Token));
Token *b = CALL(a, request, sizeof(Token));
b->string = s;
b->string = s;
b->type = T_BUILTIN;
b->type = T_BUILTIN;
out[count++] = b;
out[count++] = b;
JUMP_TARGET;
JUMP_TARGET;
}
}
複製
已複製
複製
已複製
plus
:
rule(
plus
) {
out[count++] = INTERN_PLUS;
out[count++] = INTERN_PLUS;
l->pos++;
l->pos++;
JUMP_TARGET;
JUMP_TARGET;
複製
已複製
複製
已複製
}
複製
已複製
複製
已複製
minus
:
rule(
minus
) {
out[count++] = INTERN_MINUS;
out[count++] = INTERN_MINUS;
l->pos++;
l->pos++;
JUMP_TARGET;
JUMP_TARGET;
複製
已複製
複製
已複製
}
複製
已複製
複製
已複製
slash
:
rule(
slash
) {
out[count++] = INTERN_SLASH;
out[count++] = INTERN_SLASH;
l->pos++;
l->pos++;
JUMP_TARGET;
JUMP_TARGET;
複製
已複製
複製
已複製
}
複製
已複製
複製
已複製
equal
:
rule(
equal
) {
out[count++] = INTERN_EQUAL;
out[count++] = INTERN_EQUAL;
l->pos++;
l->pos++;
JUMP_TARGET;
JUMP_TARGET;
複製
已複製
複製
已複製
}
複製
已複製
複製
已複製
asterisks
:
rule(
asterisks
) {
out[count++] = INTERN_ASTERISKS;
out[count++] = INTERN_ASTERISKS;
l->pos++;
l->pos++;
JUMP_TARGET;
JUMP_TARGET;
複製
已複製
複製
已複製
}
複製
已複製
複製
已複製
number
:
{
rule(
number
)
{
size_t start = l->pos;
size_t start = l->pos;
size_t i = start;
size_t i = start;
bool is_double = false;
bool is_double = false;
size_t hash = FNV_OFFSET_BASIS;
size_t hash = FNV_OFFSET_BASIS;
for (; i < l->input.len; i++) {
for (; i < l->input.len; i++) {
char cc = l->input.p[i];
char cc = l->input.p[i];
hash ^= cc;
hash ^= cc;
hash *= FNV_PRIME;
hash *= FNV_PRIME;
if (cc >= '0' && cc <= '9')
if (cc >= '0' && cc <= '9')
continue;
continue;
if (cc == '.') {
if (cc == '.') {
ASSERT(!is_double, "Two dots in double");
ASSERT(!is_double, "Two dots in double");
is_double = true;
is_double = true;
continue;
continue;
}
}
break;
break;
}
}
l->pos = i;
l->pos = i;
Token *n = CALL(a, request, sizeof(Token));
Token *n = CALL(a, request, sizeof(Token));
n->string = (Str){
n->string = (Str){
.p = l->input.p + start,
.p = l->input.p + start,
.len = i - start,
.len = i - start,
.hash = hash,
.hash = hash,
};
};
if (is_double) {
if (is_double) {
n->type = T_DOUBLE;
n->type = T_DOUBLE;
} else {
} else {
n->type = T_INTEGER;
n->type = T_INTEGER;
}
}
out[count++] = n;
out[count++] = n;
JUMP_TARGET;
JUMP_TARGET;
}
}
複製
已複製
複製
已複製
ident
:
{
rule(
ident
)
{
size_t start = l->pos;
size_t start = l->pos;
size_t hash = FNV_OFFSET_BASIS;
size_t hash = FNV_OFFSET_BASIS;
for (char cc = cur(l); cc > 0 && is_alphanum(cc); l->pos++, cc = cur(l)) {
for (char cc = cur(l); cc > 0 && is_alphanum(cc); l->pos++, cc = cur(l)) {
hash ^= cc;
hash ^= cc;
hash *= FNV_PRIME;
hash *= FNV_PRIME;
}
}
size_t len = l->pos - start;
size_t len = l->pos - start;
Token *t;
Token *t;
if (hash == true_hash) {
if (hash == true_hash) {
t = INTERN_TRUE;
t = INTERN_TRUE;
} else if (hash == false_hash) {
} else if (hash == false_hash) {
t = INTERN_FALSE;
t = INTERN_FALSE;
} else {
} else {
t = CALL(a, request, sizeof(Token));
t = CALL(a, request, sizeof(Token));
t->type = T_IDENT;
t->type = T_IDENT;
t->string = (Str){
t->string = (Str){
.p = l->input.p + start,
.p = l->input.p + start,
.len = len,
.len = len,
.hash = hash,
.hash = hash,
};
};
}
}
out[count++] = t;
out[count++] = t;
JUMP_TARGET;
JUMP_TARGET;
}
}
// same as string but only with leading '
// same as string but only with leading '
複製
已複製
複製
已複製
quoted
:
{
rule(
quoted
)
{
// skip '
// skip '
l->pos++;
l->pos++;
size_t start = l->pos;
size_t start = l->pos;
size_t hash = FNV_OFFSET_BASIS;
size_t hash = FNV_OFFSET_BASIS;
for (char cc = cur(l); cc > 0 && is_alphanum(cc); l->pos++, cc = cur(l)) {
for (char cc = cur(l); cc > 0 && is_alphanum(cc); l->pos++, cc = cur(l)) {
hash ^= cc;
hash ^= cc;
hash *= FNV_PRIME;
hash *= FNV_PRIME;
}
}
size_t len = l->pos - start;
size_t len = l->pos - start;
Token *t;
Token *t;
t = CALL(a, request, sizeof(Token));
t = CALL(a, request, sizeof(Token));
t->type = T_STRING;
t->type = T_STRING;
t->string = (Str){
t->string = (Str){
.p = l->input.p + start,
.p = l->input.p + start,
.len = len,
.len = len,
.hash = hash,
.hash = hash,
};
};
out[count++] = t;
out[count++] = t;
JUMP_TARGET;
JUMP_TARGET;
}
}
複製
已複製
複製
已複製
string
:
{
rule(
string
)
{
// skip "
// skip "
l->pos++;
l->pos++;
size_t start = l->pos;
size_t start = l->pos;
size_t hash = FNV_OFFSET_BASIS;
size_t hash = FNV_OFFSET_BASIS;
for (char cc = cur(l); cc > 0 && cc != '"'; l->pos++, cc = cur(l)) {
for (char cc = cur(l); cc > 0 && cc != '"'; l->pos++, cc = cur(l)) {
hash ^= cc;
hash ^= cc;
hash *= FNV_PRIME;
hash *= FNV_PRIME;
}
}
if (UNLIKELY(cur(l) != '"')) {
if (UNLIKELY(cur(l) != '"')) {
Str slice = Str_slice(&l->input, l->pos, l->input.len);
Str slice = Str_slice(&l->input, l->pos, l->input.len);
fprintf(stderr, "lex: Unterminated string near: '%.*s'", (int)slice.len,
fprintf(stderr, "lex: Unterminated string near: '%.*s'", (int)slice.len,
slice.p);
slice.p);
out[count++] = INTERN_EOF;
out[count++] = INTERN_EOF;
} else {
} else {
Token *t = CALL(a, request, sizeof(Token));
Token *t = CALL(a, request, sizeof(Token));
t->type = T_STRING;
t->type = T_STRING;
t->string = (Str){
t->string = (Str){
.p = l->input.p + start,
.p = l->input.p + start,
.len = l->pos - start,
.len = l->pos - start,
.hash = hash,
.hash = hash,
};
};
out[count++] = t;
out[count++] = t;
// skip "
// skip "
l->pos++;
l->pos++;
}
}
JUMP_TARGET;
JUMP_TARGET;
}
}
複製
已複製
複製
已複製
comment
:
rule(
comment
) {
for (char cc = cur(l); cc > 0 && cc != '\n'; l->pos++, cc = cur(l)) {
for (char cc = cur(l); cc > 0 && cc != '\n'; l->pos++, cc = cur(l)) {
}
}
JUMP_TARGET;
JUMP_TARGET;
複製
已複製
複製
已複製
}
複製
已複製
複製
已複製
whitespace
:
rule(
whitespace
) {
l->pos++;
l->pos++;
JUMP_TARGET;
JUMP_TARGET;
複製
已複製
複製
已複製
}
複製
已複製
複製
已複製
unknown
:
{
rule(
unknown
)
{
uint8_t c = cur(l);
uint8_t c = cur(l);
ASSERT(0, "Unexpected byte '%c' (0x%X) in input", c, c)
ASSERT(0, "Unexpected byte '%c' (0x%X) in input", c, c)
}
}
複製
已複製
複製
已複製
end
:
rule(
end
) {
out[count++] = INTERN_EOF;
out[count++] = INTERN_EOF;
return count;
return count;
}
}
#undef SINGLE_TOK
#undef SINGLE_TOK
已保存差異
原始文本
開啟檔案
#include "lexer.h" #include "common.h" #include "mem.h" #include "strings.h" #include <stddef.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> #define SINGLE_TOK(t) ((Token){.type = t}) Str TOKEN_TYPE_MAP[] = {[T_DELIMITOR_LEFT] = STRING("T_DELIMITOR_LEFT"), [T_DELIMITOR_RIGHT] = STRING("T_DELIMITOR_RIGHT"), [T_BRAKET_LEFT] = STRING("T_BRAKET_LEFT"), [T_BRAKET_RIGHT] = STRING("T_BRAKET_RIGHT"), [T_STRING] = STRING("T_STRING"), [T_TRUE] = STRING("T_TRUE"), [T_FALSE] = STRING("T_FALSE"), [T_DOUBLE] = STRING("T_DOUBLE"), [T_INTEGER] = STRING("T_INTEGER"), [T_BUILTIN] = STRING("T_BUILTIN"), [T_IDENT] = STRING("T_IDENT"), [T_PLUS] = STRING("T_PLUS"), [T_MINUS] = STRING("T_MINUS"), [T_ASTERISKS] = STRING("T_ASTERISKS"), [T_SLASH] = STRING("T_SLASH"), [T_EQUAL] = STRING("T_EQUAL"), [T_EOF] = STRING("T_EOF")}; Lexer Lexer_new(Str input) { return (Lexer){ .input = input, .pos = 0, }; } #define cur(L) (L->input.p[L->pos]) __attribute__((always_inline)) inline static bool is_alphanum(uint8_t cc) { uint8_t lower = cc | 0x20; bool is_alpha = (lower >= 'a' && lower <= 'z'); bool is_digit = (cc >= '0' && cc <= '9'); return is_alpha || is_digit || cc == '_' || cc == '-'; } // we can "intern" these, since all of them are the same, regardless of position Token *INTERN_DELIMITOR_LEFT = &SINGLE_TOK(T_DELIMITOR_LEFT); Token *INTERN_DELIMITOR_RIGHT = &SINGLE_TOK(T_DELIMITOR_RIGHT); Token *INTERN_BRAKET_LEFT = &SINGLE_TOK(T_BRAKET_LEFT); Token *INTERN_BRAKET_RIGHT = &SINGLE_TOK(T_BRAKET_RIGHT); Token *INTERN_MINUS = &SINGLE_TOK(T_MINUS); Token *INTERN_PLUS = &SINGLE_TOK(T_PLUS); Token *INTERN_ASTERISKS = &SINGLE_TOK(T_ASTERISKS); Token *INTERN_SLASH = &SINGLE_TOK(T_SLASH); Token *INTERN_FALSE = &SINGLE_TOK(T_FALSE); Token *INTERN_TRUE = &SINGLE_TOK(T_TRUE); Token *INTERN_EQUAL = &SINGLE_TOK(T_EQUAL); Token *INTERN_EOF = &SINGLE_TOK(T_EOF); size_t Lexer_all(Lexer *l, Allocator *a, Token **out) { ASSERT(out != NULL, "Failed to allocate token list"); // empty input if (l->input.len == 0) { out[0] = INTERN_EOF; return 1; } size_t true_hash = Str_hash(&STRING("true")); size_t false_hash = Str_hash(&STRING("false")); size_t count = 0; static void *jump_table[256] = { [0 ... 255] = &&unknown, [' '] = &&whitespace, ['\t'] = &&whitespace, ['\n'] = &&whitespace, [';'] = &&comment, ['('] = &&delimitor_left, [')'] = &&delimitor_right, ['@'] = &&builtin, ['.'] = &&number, ['0' ... '9'] = &&number, ['a' ... 'z'] = &&ident, ['A' ... 'Z'] = &&ident, ['_'] = &&ident, ['\''] = &"ed, ['"'] = &&string, ['+'] = &&plus, ['-'] = &&minus, ['/'] = &&slash, ['*'] = &&asterisks, ['='] = &&equal, ['['] = &&braket_left, [']'] = &&braket_right, [0] = &&end, }; #define JUMP_TARGET goto *jump_table[(int32_t)l->input.p[l->pos]] JUMP_TARGET; delimitor_left: out[count++] = INTERN_DELIMITOR_LEFT; l->pos++; JUMP_TARGET; delimitor_right: out[count++] = INTERN_DELIMITOR_RIGHT; l->pos++; JUMP_TARGET; braket_left: out[count++] = INTERN_BRAKET_LEFT; l->pos++; JUMP_TARGET; braket_right: out[count++] = INTERN_BRAKET_RIGHT; l->pos++; JUMP_TARGET; builtin: { l->pos++; // not an ident after @, this is shit if (!is_alphanum(cur(l))) { out[count++] = INTERN_EOF; } size_t start = l->pos; size_t hash = FNV_OFFSET_BASIS; for (char cc = cur(l); cc > 0 && is_alphanum(cc); l->pos++, cc = cur(l)) { hash ^= cc; hash *= FNV_PRIME; } size_t len = l->pos - start; Str s = (Str){ .p = l->input.p + start, .len = len, .hash = hash, }; Token *b = CALL(a, request, sizeof(Token)); b->string = s; b->type = T_BUILTIN; out[count++] = b; JUMP_TARGET; } plus: out[count++] = INTERN_PLUS; l->pos++; JUMP_TARGET; minus: out[count++] = INTERN_MINUS; l->pos++; JUMP_TARGET; slash: out[count++] = INTERN_SLASH; l->pos++; JUMP_TARGET; equal: out[count++] = INTERN_EQUAL; l->pos++; JUMP_TARGET; asterisks: out[count++] = INTERN_ASTERISKS; l->pos++; JUMP_TARGET; number: { size_t start = l->pos; size_t i = start; bool is_double = false; size_t hash = FNV_OFFSET_BASIS; for (; i < l->input.len; i++) { char cc = l->input.p[i]; hash ^= cc; hash *= FNV_PRIME; if (cc >= '0' && cc <= '9') continue; if (cc == '.') { ASSERT(!is_double, "Two dots in double"); is_double = true; continue; } break; } l->pos = i; Token *n = CALL(a, request, sizeof(Token)); n->string = (Str){ .p = l->input.p + start, .len = i - start, .hash = hash, }; if (is_double) { n->type = T_DOUBLE; } else { n->type = T_INTEGER; } out[count++] = n; JUMP_TARGET; } ident: { size_t start = l->pos; size_t hash = FNV_OFFSET_BASIS; for (char cc = cur(l); cc > 0 && is_alphanum(cc); l->pos++, cc = cur(l)) { hash ^= cc; hash *= FNV_PRIME; } size_t len = l->pos - start; Token *t; if (hash == true_hash) { t = INTERN_TRUE; } else if (hash == false_hash) { t = INTERN_FALSE; } else { t = CALL(a, request, sizeof(Token)); t->type = T_IDENT; t->string = (Str){ .p = l->input.p + start, .len = len, .hash = hash, }; } out[count++] = t; JUMP_TARGET; } // same as string but only with leading ' quoted: { // skip ' l->pos++; size_t start = l->pos; size_t hash = FNV_OFFSET_BASIS; for (char cc = cur(l); cc > 0 && is_alphanum(cc); l->pos++, cc = cur(l)) { hash ^= cc; hash *= FNV_PRIME; } size_t len = l->pos - start; Token *t; t = CALL(a, request, sizeof(Token)); t->type = T_STRING; t->string = (Str){ .p = l->input.p + start, .len = len, .hash = hash, }; out[count++] = t; JUMP_TARGET; } string: { // skip " l->pos++; size_t start = l->pos; size_t hash = FNV_OFFSET_BASIS; for (char cc = cur(l); cc > 0 && cc != '"'; l->pos++, cc = cur(l)) { hash ^= cc; hash *= FNV_PRIME; } if (UNLIKELY(cur(l) != '"')) { Str slice = Str_slice(&l->input, l->pos, l->input.len); fprintf(stderr, "lex: Unterminated string near: '%.*s'", (int)slice.len, slice.p); out[count++] = INTERN_EOF; } else { Token *t = CALL(a, request, sizeof(Token)); t->type = T_STRING; t->string = (Str){ .p = l->input.p + start, .len = l->pos - start, .hash = hash, }; out[count++] = t; // skip " l->pos++; } JUMP_TARGET; } comment: for (char cc = cur(l); cc > 0 && cc != '\n'; l->pos++, cc = cur(l)) { } JUMP_TARGET; whitespace: l->pos++; JUMP_TARGET; unknown: { uint8_t c = cur(l); ASSERT(0, "Unexpected byte '%c' (0x%X) in input", c, c) } end: out[count++] = INTERN_EOF; return count; } #undef SINGLE_TOK
更改後文本
開啟檔案
#include "lexer.h" #include "common.h" #include "mem.h" #include "strings.h" #include <stddef.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> #define SINGLE_TOK(t) ((Token){.type = t}) Str TOKEN_TYPE_MAP[] = {[T_DELIMITOR_LEFT] = STRING("T_DELIMITOR_LEFT"), [T_DELIMITOR_RIGHT] = STRING("T_DELIMITOR_RIGHT"), [T_BRAKET_LEFT] = STRING("T_BRAKET_LEFT"), [T_BRAKET_RIGHT] = STRING("T_BRAKET_RIGHT"), [T_STRING] = STRING("T_STRING"), [T_TRUE] = STRING("T_TRUE"), [T_FALSE] = STRING("T_FALSE"), [T_DOUBLE] = STRING("T_DOUBLE"), [T_INTEGER] = STRING("T_INTEGER"), [T_BUILTIN] = STRING("T_BUILTIN"), [T_IDENT] = STRING("T_IDENT"), [T_PLUS] = STRING("T_PLUS"), [T_MINUS] = STRING("T_MINUS"), [T_ASTERISKS] = STRING("T_ASTERISKS"), [T_SLASH] = STRING("T_SLASH"), [T_EQUAL] = STRING("T_EQUAL"), [T_EOF] = STRING("T_EOF")}; Lexer Lexer_new(Str input) { return (Lexer){ .input = input, .pos = 0, }; } #define cur(L) (L->input.p[L->pos]) __attribute__((always_inline)) inline static bool is_alphanum(uint8_t cc) { uint8_t lower = cc | 0x20; bool is_alpha = (lower >= 'a' && lower <= 'z'); bool is_digit = (cc >= '0' && cc <= '9'); return is_alpha || is_digit || cc == '_' || cc == '-'; } // we can "intern" these, since all of them are the same, regardless of position Token *INTERN_DELIMITOR_LEFT = &SINGLE_TOK(T_DELIMITOR_LEFT); Token *INTERN_DELIMITOR_RIGHT = &SINGLE_TOK(T_DELIMITOR_RIGHT); Token *INTERN_BRAKET_LEFT = &SINGLE_TOK(T_BRAKET_LEFT); Token *INTERN_BRAKET_RIGHT = &SINGLE_TOK(T_BRAKET_RIGHT); Token *INTERN_MINUS = &SINGLE_TOK(T_MINUS); Token *INTERN_PLUS = &SINGLE_TOK(T_PLUS); Token *INTERN_ASTERISKS = &SINGLE_TOK(T_ASTERISKS); Token *INTERN_SLASH = &SINGLE_TOK(T_SLASH); Token *INTERN_FALSE = &SINGLE_TOK(T_FALSE); Token *INTERN_TRUE = &SINGLE_TOK(T_TRUE); Token *INTERN_EQUAL = &SINGLE_TOK(T_EQUAL); Token *INTERN_EOF = &SINGLE_TOK(T_EOF); #define rule(name) size_t name(Lexer *l, Allocator *a, Token **out) rule(Lexer_all); rule(delimitor_left); rule(delimitor_right); rule(braket_left); rule(builtin); rule(plus); rule(minus); rule(slash); rule(equal); rule(asterisks); rule(number); rule(ident); rule(quoted); rule(string); rule(comment); rule(whitespace); rule(unknown); rule(end); typedef size_t (*rule_t)(Lexer *l, Allocator *a, Token **out); static rule_t jump_table[256] = { [0 ... 255] = &unknown, [' '] = &whitespace, ['\t'] = &whitespace, ['\n'] = &whitespace, [';'] = &comment, ['('] = &delimitor_left, [')'] = &delimitor_right, ['@'] = &builtin, ['.'] = &number, ['0' ... '9'] = &number, ['a' ... 'z'] = &ident, ['A' ... 'Z'] = &ident, ['_'] = &ident, ['\''] = "ed, ['"'] = &string, ['+'] = &plus, ['-'] = &minus, ['/'] = &slash, ['*'] = &asterisks, ['='] = &equal, ['['] = &braket_left, [']'] = &braket_right, [0] = &end, }; #ifdef __clang__ #define musttail [[clang::musttail]] #elif __GNUC__ #define musttail [[gnu::musttail]] #else #define musttail #endif #define JUMP_TARGET return musttail jump_table[(int32_t)l->input.p[l->pos]](l, a, out) rule(Lexer_all) { ASSERT(out != NULL, "Failed to allocate token list"); // empty input if (l->input.len == 0) { out[0] = INTERN_EOF; return 1; } size_t true_hash = Str_hash(&STRING("true")); size_t false_hash = Str_hash(&STRING("false")); size_t count = 0; JUMP_TARGET; } rule(delimitor_left) { out[count++] = INTERN_DELIMITOR_LEFT; l->pos++; JUMP_TARGET; } rule(delimitor_right) { out[count++] = INTERN_DELIMITOR_RIGHT; l->pos++; JUMP_TARGET; } rule(braket_left) { out[count++] = INTERN_BRAKET_LEFT; l->pos++; JUMP_TARGET; } rule(braket_right) { out[count++] = INTERN_BRAKET_RIGHT; l->pos++; JUMP_TARGET; } rule(builtin) { l->pos++; // not an ident after @, this is shit if (!is_alphanum(cur(l))) { out[count++] = INTERN_EOF; } size_t start = l->pos; size_t hash = FNV_OFFSET_BASIS; for (char cc = cur(l); cc > 0 && is_alphanum(cc); l->pos++, cc = cur(l)) { hash ^= cc; hash *= FNV_PRIME; } size_t len = l->pos - start; Str s = (Str){ .p = l->input.p + start, .len = len, .hash = hash, }; Token *b = CALL(a, request, sizeof(Token)); b->string = s; b->type = T_BUILTIN; out[count++] = b; JUMP_TARGET; } rule(plus) { out[count++] = INTERN_PLUS; l->pos++; JUMP_TARGET; } rule(minus) { out[count++] = INTERN_MINUS; l->pos++; JUMP_TARGET; } rule(slash) { out[count++] = INTERN_SLASH; l->pos++; JUMP_TARGET; } rule(equal) { out[count++] = INTERN_EQUAL; l->pos++; JUMP_TARGET; } rule(asterisks) { out[count++] = INTERN_ASTERISKS; l->pos++; JUMP_TARGET; } rule(number) { size_t start = l->pos; size_t i = start; bool is_double = false; size_t hash = FNV_OFFSET_BASIS; for (; i < l->input.len; i++) { char cc = l->input.p[i]; hash ^= cc; hash *= FNV_PRIME; if (cc >= '0' && cc <= '9') continue; if (cc == '.') { ASSERT(!is_double, "Two dots in double"); is_double = true; continue; } break; } l->pos = i; Token *n = CALL(a, request, sizeof(Token)); n->string = (Str){ .p = l->input.p + start, .len = i - start, .hash = hash, }; if (is_double) { n->type = T_DOUBLE; } else { n->type = T_INTEGER; } out[count++] = n; JUMP_TARGET; } rule(ident) { size_t start = l->pos; size_t hash = FNV_OFFSET_BASIS; for (char cc = cur(l); cc > 0 && is_alphanum(cc); l->pos++, cc = cur(l)) { hash ^= cc; hash *= FNV_PRIME; } size_t len = l->pos - start; Token *t; if (hash == true_hash) { t = INTERN_TRUE; } else if (hash == false_hash) { t = INTERN_FALSE; } else { t = CALL(a, request, sizeof(Token)); t->type = T_IDENT; t->string = (Str){ .p = l->input.p + start, .len = len, .hash = hash, }; } out[count++] = t; JUMP_TARGET; } // same as string but only with leading ' rule(quoted) { // skip ' l->pos++; size_t start = l->pos; size_t hash = FNV_OFFSET_BASIS; for (char cc = cur(l); cc > 0 && is_alphanum(cc); l->pos++, cc = cur(l)) { hash ^= cc; hash *= FNV_PRIME; } size_t len = l->pos - start; Token *t; t = CALL(a, request, sizeof(Token)); t->type = T_STRING; t->string = (Str){ .p = l->input.p + start, .len = len, .hash = hash, }; out[count++] = t; JUMP_TARGET; } rule(string) { // skip " l->pos++; size_t start = l->pos; size_t hash = FNV_OFFSET_BASIS; for (char cc = cur(l); cc > 0 && cc != '"'; l->pos++, cc = cur(l)) { hash ^= cc; hash *= FNV_PRIME; } if (UNLIKELY(cur(l) != '"')) { Str slice = Str_slice(&l->input, l->pos, l->input.len); fprintf(stderr, "lex: Unterminated string near: '%.*s'", (int)slice.len, slice.p); out[count++] = INTERN_EOF; } else { Token *t = CALL(a, request, sizeof(Token)); t->type = T_STRING; t->string = (Str){ .p = l->input.p + start, .len = l->pos - start, .hash = hash, }; out[count++] = t; // skip " l->pos++; } JUMP_TARGET; } rule(comment) { for (char cc = cur(l); cc > 0 && cc != '\n'; l->pos++, cc = cur(l)) { } JUMP_TARGET; } rule(whitespace) { l->pos++; JUMP_TARGET; } rule(unknown) { uint8_t c = cur(l); ASSERT(0, "Unexpected byte '%c' (0x%X) in input", c, c) } rule(end) { out[count++] = INTERN_EOF; return count; } #undef SINGLE_TOK
尋找差異