Diff
checker
텍스트
텍스트
이미지
문서
Excel
폴더
Legal
Enterprise
데스크톱
요금제
로그인
데스크톱 앱 다운로드
텍스트 비교
두 텍스트 파일의 차이점을 찾아보세요
도구
기록
실시간 편집
변경 없는 행 숨기기
줄바꿈 비활성화
레이아웃
나란히 보기
합쳐 보기
비교 단위
스마트
단어
글자
구문 강조
언어 선택
제외
텍스트 변환
첫 변경으로
수정
Diffchecker Desktop
가장 안전하게 Diffchecker를 사용하는 방법. 데스크톱 앱을 사용하면 비교 데이터가 외부로 전송되지 않습니다!
데스크톱 앱 받기
lexing_goto_v_musttail
생성일
12개월 전
비교 결과 만료 없음
초기화
내보내기
공유
설명
47 삭제
행
총
삭제
글자
총
삭제
이 기능을 계속 사용하려면 업그레이드해 주세요
Diff
checker
Pro
요금제 보기
311 행
복사
109 추가
행
총
추가
글자
총
추가
이 기능을 계속 사용하려면 업그레이드해 주세요
Diff
checker
Pro
요금제 보기
354 행
복사
#include "lexer.h"
#include "lexer.h"
#include "common.h"
#include "common.h"
#include "mem.h"
#include "mem.h"
#include "strings.h"
#include "strings.h"
#include <stddef.h>
#include <stddef.h>
#include <stdint.h>
#include <stdint.h>
#include <stdio.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdlib.h>
#define SINGLE_TOK(t) ((Token){.type = t})
#define SINGLE_TOK(t) ((Token){.type = t})
Str TOKEN_TYPE_MAP[] = {[T_DELIMITOR_LEFT] = STRING("T_DELIMITOR_LEFT"),
Str TOKEN_TYPE_MAP[] = {[T_DELIMITOR_LEFT] = STRING("T_DELIMITOR_LEFT"),
[T_DELIMITOR_RIGHT] = STRING("T_DELIMITOR_RIGHT"),
[T_DELIMITOR_RIGHT] = STRING("T_DELIMITOR_RIGHT"),
[T_BRAKET_LEFT] = STRING("T_BRAKET_LEFT"),
[T_BRAKET_LEFT] = STRING("T_BRAKET_LEFT"),
[T_BRAKET_RIGHT] = STRING("T_BRAKET_RIGHT"),
[T_BRAKET_RIGHT] = STRING("T_BRAKET_RIGHT"),
[T_STRING] = STRING("T_STRING"),
[T_STRING] = STRING("T_STRING"),
[T_TRUE] = STRING("T_TRUE"),
[T_TRUE] = STRING("T_TRUE"),
[T_FALSE] = STRING("T_FALSE"),
[T_FALSE] = STRING("T_FALSE"),
[T_DOUBLE] = STRING("T_DOUBLE"),
[T_DOUBLE] = STRING("T_DOUBLE"),
[T_INTEGER] = STRING("T_INTEGER"),
[T_INTEGER] = STRING("T_INTEGER"),
[T_BUILTIN] = STRING("T_BUILTIN"),
[T_BUILTIN] = STRING("T_BUILTIN"),
[T_IDENT] = STRING("T_IDENT"),
[T_IDENT] = STRING("T_IDENT"),
[T_PLUS] = STRING("T_PLUS"),
[T_PLUS] = STRING("T_PLUS"),
[T_MINUS] = STRING("T_MINUS"),
[T_MINUS] = STRING("T_MINUS"),
[T_ASTERISKS] = STRING("T_ASTERISKS"),
[T_ASTERISKS] = STRING("T_ASTERISKS"),
[T_SLASH] = STRING("T_SLASH"),
[T_SLASH] = STRING("T_SLASH"),
[T_EQUAL] = STRING("T_EQUAL"),
[T_EQUAL] = STRING("T_EQUAL"),
[T_EOF] = STRING("T_EOF")};
[T_EOF] = STRING("T_EOF")};
Lexer Lexer_new(Str input) {
Lexer Lexer_new(Str input) {
return (Lexer){
return (Lexer){
.input = input,
.input = input,
.pos = 0,
.pos = 0,
};
};
}
}
#define cur(L) (L->input.p[L->pos])
#define cur(L) (L->input.p[L->pos])
__attribute__((always_inline)) inline static bool is_alphanum(uint8_t cc) {
__attribute__((always_inline)) inline static bool is_alphanum(uint8_t cc) {
uint8_t lower = cc | 0x20;
uint8_t lower = cc | 0x20;
bool is_alpha = (lower >= 'a' && lower <= 'z');
bool is_alpha = (lower >= 'a' && lower <= 'z');
bool is_digit = (cc >= '0' && cc <= '9');
bool is_digit = (cc >= '0' && cc <= '9');
return is_alpha || is_digit || cc == '_' || cc == '-';
return is_alpha || is_digit || cc == '_' || cc == '-';
}
}
// we can "intern" these, since all of them are the same, regardless of position
// we can "intern" these, since all of them are the same, regardless of position
Token *INTERN_DELIMITOR_LEFT = &SINGLE_TOK(T_DELIMITOR_LEFT);
Token *INTERN_DELIMITOR_LEFT = &SINGLE_TOK(T_DELIMITOR_LEFT);
Token *INTERN_DELIMITOR_RIGHT = &SINGLE_TOK(T_DELIMITOR_RIGHT);
Token *INTERN_DELIMITOR_RIGHT = &SINGLE_TOK(T_DELIMITOR_RIGHT);
Token *INTERN_BRAKET_LEFT = &SINGLE_TOK(T_BRAKET_LEFT);
Token *INTERN_BRAKET_LEFT = &SINGLE_TOK(T_BRAKET_LEFT);
Token *INTERN_BRAKET_RIGHT = &SINGLE_TOK(T_BRAKET_RIGHT);
Token *INTERN_BRAKET_RIGHT = &SINGLE_TOK(T_BRAKET_RIGHT);
Token *INTERN_MINUS = &SINGLE_TOK(T_MINUS);
Token *INTERN_MINUS = &SINGLE_TOK(T_MINUS);
Token *INTERN_PLUS = &SINGLE_TOK(T_PLUS);
Token *INTERN_PLUS = &SINGLE_TOK(T_PLUS);
Token *INTERN_ASTERISKS = &SINGLE_TOK(T_ASTERISKS);
Token *INTERN_ASTERISKS = &SINGLE_TOK(T_ASTERISKS);
Token *INTERN_SLASH = &SINGLE_TOK(T_SLASH);
Token *INTERN_SLASH = &SINGLE_TOK(T_SLASH);
Token *INTERN_FALSE = &SINGLE_TOK(T_FALSE);
Token *INTERN_FALSE = &SINGLE_TOK(T_FALSE);
Token *INTERN_TRUE = &SINGLE_TOK(T_TRUE);
Token *INTERN_TRUE = &SINGLE_TOK(T_TRUE);
Token *INTERN_EQUAL = &SINGLE_TOK(T_EQUAL);
Token *INTERN_EQUAL = &SINGLE_TOK(T_EQUAL);
Token *INTERN_EOF = &SINGLE_TOK(T_EOF);
Token *INTERN_EOF = &SINGLE_TOK(T_EOF);
복사
복사됨
복사
복사됨
size_t
Lexer_all
(Lexer *l, Allocator *a, Token **out)
{
#define rule(name)
size_t
name(Lexer *l, Allocator *a, Token **out)
rule(Lexer_all);
rule(delimitor_left);
rule(delimitor_right);
rule(braket_left);
rule(builtin);
rule(plus);
rule(minus);
rule(slash);
rule(equal);
rule(asterisks);
rule(number);
rule(ident);
rule(quoted);
rule(string);
rule(comment);
rule(whitespace);
rule(unknown);
rule(end);
typedef size_t (*rule_t)
(Lexer *l, Allocator *a, Token **out)
;
static rule_t jump_table[256] = {
[0 ... 255] = &unknown,
[' '] = &whitespace,
['\t'] = &whitespace,
['\n'] = &whitespace,
[';'] = &comment,
['('] = &delimitor_left,
[')'] = &delimitor_right,
['@'] = &builtin,
['.'] = &number,
['0' ... '9'] = &number,
['a' ... 'z'] = &ident,
['A' ... 'Z'] = &ident,
['_'] = &ident,
['\''] = "ed,
['"'] = &string,
['+'] = &plus,
['-'] = &minus,
['/'] = &slash,
['*'] = &asterisks,
['='] = &equal,
['['] = &braket_left,
[']'] = &braket_right,
[0] = &end,
};
#ifdef __clang__
#define musttail [[clang::musttail]]
#elif __GNUC__
#define musttail [[gnu::musttail]]
#else
#define musttail
#endif
#define JUMP_TARGET return musttail jump_table[(int32_t)l->input.p[l->pos]](l, a, out)
rule(Lexer_all) {
ASSERT(out != NULL, "Failed to allocate token list");
ASSERT(out != NULL, "Failed to allocate token list");
// empty input
// empty input
if (l->input.len == 0) {
if (l->input.len == 0) {
out[0] = INTERN_EOF;
out[0] = INTERN_EOF;
return 1;
return 1;
}
}
size_t true_hash = Str_hash(&STRING("true"));
size_t true_hash = Str_hash(&STRING("true"));
size_t false_hash = Str_hash(&STRING("false"));
size_t false_hash = Str_hash(&STRING("false"));
size_t count = 0;
size_t count = 0;
복사
복사됨
복사
복사됨
static void *jump_table[256] = {
[0 ... 255] = &&unknown,
[' '] = &&whitespace,
['\t'] = &&whitespace,
['\n'] = &&whitespace,
[';'] = &&comment,
['('] = &&delimitor_left,
[')'] = &&delimitor_right,
['@'] = &&builtin,
['.'] = &&number,
['0' ... '9'] = &&number,
['a' ... 'z'] = &&ident,
['A' ... 'Z'] = &&ident,
['_'] = &&ident,
['\''] = &"ed,
['"'] = &&string,
['+'] = &&plus,
['-'] = &&minus,
['/'] = &&slash,
['*'] = &&asterisks,
['='] = &&equal,
['['] = &&braket_left,
[']'] = &&braket_right,
[0] = &&end,
};
#define JUMP_TARGET goto *jump_table[(int32_t)l->input.p[l->pos]]
JUMP_TARGET;
JUMP_TARGET;
복사
복사됨
복사
복사됨
}
복사
복사됨
복사
복사됨
delimitor_left
:
rule(
delimitor_left
) {
out[count++] = INTERN_DELIMITOR_LEFT;
out[count++] = INTERN_DELIMITOR_LEFT;
l->pos++;
l->pos++;
JUMP_TARGET;
JUMP_TARGET;
복사
복사됨
복사
복사됨
}
복사
복사됨
복사
복사됨
delimitor_right
:
rule(
delimitor_right
) {
out[count++] = INTERN_DELIMITOR_RIGHT;
out[count++] = INTERN_DELIMITOR_RIGHT;
l->pos++;
l->pos++;
JUMP_TARGET;
JUMP_TARGET;
복사
복사됨
복사
복사됨
}
복사
복사됨
복사
복사됨
braket_left
:
rule(
braket_left
) {
out[count++] = INTERN_BRAKET_LEFT;
out[count++] = INTERN_BRAKET_LEFT;
l->pos++;
l->pos++;
JUMP_TARGET;
JUMP_TARGET;
복사
복사됨
복사
복사됨
}
복사
복사됨
복사
복사됨
braket_right
:
rule(
braket_right
) {
out[count++] = INTERN_BRAKET_RIGHT;
out[count++] = INTERN_BRAKET_RIGHT;
l->pos++;
l->pos++;
JUMP_TARGET;
JUMP_TARGET;
복사
복사됨
복사
복사됨
}
복사
복사됨
복사
복사됨
builtin
:
{
rule(
builtin
)
{
l->pos++;
l->pos++;
// not an ident after @, this is shit
// not an ident after @, this is shit
if (!is_alphanum(cur(l))) {
if (!is_alphanum(cur(l))) {
out[count++] = INTERN_EOF;
out[count++] = INTERN_EOF;
}
}
size_t start = l->pos;
size_t start = l->pos;
size_t hash = FNV_OFFSET_BASIS;
size_t hash = FNV_OFFSET_BASIS;
for (char cc = cur(l); cc > 0 && is_alphanum(cc); l->pos++, cc = cur(l)) {
for (char cc = cur(l); cc > 0 && is_alphanum(cc); l->pos++, cc = cur(l)) {
hash ^= cc;
hash ^= cc;
hash *= FNV_PRIME;
hash *= FNV_PRIME;
}
}
size_t len = l->pos - start;
size_t len = l->pos - start;
Str s = (Str){
Str s = (Str){
.p = l->input.p + start,
.p = l->input.p + start,
.len = len,
.len = len,
.hash = hash,
.hash = hash,
};
};
Token *b = CALL(a, request, sizeof(Token));
Token *b = CALL(a, request, sizeof(Token));
b->string = s;
b->string = s;
b->type = T_BUILTIN;
b->type = T_BUILTIN;
out[count++] = b;
out[count++] = b;
JUMP_TARGET;
JUMP_TARGET;
}
}
복사
복사됨
복사
복사됨
plus
:
rule(
plus
) {
out[count++] = INTERN_PLUS;
out[count++] = INTERN_PLUS;
l->pos++;
l->pos++;
JUMP_TARGET;
JUMP_TARGET;
복사
복사됨
복사
복사됨
}
복사
복사됨
복사
복사됨
minus
:
rule(
minus
) {
out[count++] = INTERN_MINUS;
out[count++] = INTERN_MINUS;
l->pos++;
l->pos++;
JUMP_TARGET;
JUMP_TARGET;
복사
복사됨
복사
복사됨
}
복사
복사됨
복사
복사됨
slash
:
rule(
slash
) {
out[count++] = INTERN_SLASH;
out[count++] = INTERN_SLASH;
l->pos++;
l->pos++;
JUMP_TARGET;
JUMP_TARGET;
복사
복사됨
복사
복사됨
}
복사
복사됨
복사
복사됨
equal
:
rule(
equal
) {
out[count++] = INTERN_EQUAL;
out[count++] = INTERN_EQUAL;
l->pos++;
l->pos++;
JUMP_TARGET;
JUMP_TARGET;
복사
복사됨
복사
복사됨
}
복사
복사됨
복사
복사됨
asterisks
:
rule(
asterisks
) {
out[count++] = INTERN_ASTERISKS;
out[count++] = INTERN_ASTERISKS;
l->pos++;
l->pos++;
JUMP_TARGET;
JUMP_TARGET;
복사
복사됨
복사
복사됨
}
복사
복사됨
복사
복사됨
number
:
{
rule(
number
)
{
size_t start = l->pos;
size_t start = l->pos;
size_t i = start;
size_t i = start;
bool is_double = false;
bool is_double = false;
size_t hash = FNV_OFFSET_BASIS;
size_t hash = FNV_OFFSET_BASIS;
for (; i < l->input.len; i++) {
for (; i < l->input.len; i++) {
char cc = l->input.p[i];
char cc = l->input.p[i];
hash ^= cc;
hash ^= cc;
hash *= FNV_PRIME;
hash *= FNV_PRIME;
if (cc >= '0' && cc <= '9')
if (cc >= '0' && cc <= '9')
continue;
continue;
if (cc == '.') {
if (cc == '.') {
ASSERT(!is_double, "Two dots in double");
ASSERT(!is_double, "Two dots in double");
is_double = true;
is_double = true;
continue;
continue;
}
}
break;
break;
}
}
l->pos = i;
l->pos = i;
Token *n = CALL(a, request, sizeof(Token));
Token *n = CALL(a, request, sizeof(Token));
n->string = (Str){
n->string = (Str){
.p = l->input.p + start,
.p = l->input.p + start,
.len = i - start,
.len = i - start,
.hash = hash,
.hash = hash,
};
};
if (is_double) {
if (is_double) {
n->type = T_DOUBLE;
n->type = T_DOUBLE;
} else {
} else {
n->type = T_INTEGER;
n->type = T_INTEGER;
}
}
out[count++] = n;
out[count++] = n;
JUMP_TARGET;
JUMP_TARGET;
}
}
복사
복사됨
복사
복사됨
ident
:
{
rule(
ident
)
{
size_t start = l->pos;
size_t start = l->pos;
size_t hash = FNV_OFFSET_BASIS;
size_t hash = FNV_OFFSET_BASIS;
for (char cc = cur(l); cc > 0 && is_alphanum(cc); l->pos++, cc = cur(l)) {
for (char cc = cur(l); cc > 0 && is_alphanum(cc); l->pos++, cc = cur(l)) {
hash ^= cc;
hash ^= cc;
hash *= FNV_PRIME;
hash *= FNV_PRIME;
}
}
size_t len = l->pos - start;
size_t len = l->pos - start;
Token *t;
Token *t;
if (hash == true_hash) {
if (hash == true_hash) {
t = INTERN_TRUE;
t = INTERN_TRUE;
} else if (hash == false_hash) {
} else if (hash == false_hash) {
t = INTERN_FALSE;
t = INTERN_FALSE;
} else {
} else {
t = CALL(a, request, sizeof(Token));
t = CALL(a, request, sizeof(Token));
t->type = T_IDENT;
t->type = T_IDENT;
t->string = (Str){
t->string = (Str){
.p = l->input.p + start,
.p = l->input.p + start,
.len = len,
.len = len,
.hash = hash,
.hash = hash,
};
};
}
}
out[count++] = t;
out[count++] = t;
JUMP_TARGET;
JUMP_TARGET;
}
}
// same as string but only with leading '
// same as string but only with leading '
복사
복사됨
복사
복사됨
quoted
:
{
rule(
quoted
)
{
// skip '
// skip '
l->pos++;
l->pos++;
size_t start = l->pos;
size_t start = l->pos;
size_t hash = FNV_OFFSET_BASIS;
size_t hash = FNV_OFFSET_BASIS;
for (char cc = cur(l); cc > 0 && is_alphanum(cc); l->pos++, cc = cur(l)) {
for (char cc = cur(l); cc > 0 && is_alphanum(cc); l->pos++, cc = cur(l)) {
hash ^= cc;
hash ^= cc;
hash *= FNV_PRIME;
hash *= FNV_PRIME;
}
}
size_t len = l->pos - start;
size_t len = l->pos - start;
Token *t;
Token *t;
t = CALL(a, request, sizeof(Token));
t = CALL(a, request, sizeof(Token));
t->type = T_STRING;
t->type = T_STRING;
t->string = (Str){
t->string = (Str){
.p = l->input.p + start,
.p = l->input.p + start,
.len = len,
.len = len,
.hash = hash,
.hash = hash,
};
};
out[count++] = t;
out[count++] = t;
JUMP_TARGET;
JUMP_TARGET;
}
}
복사
복사됨
복사
복사됨
string
:
{
rule(
string
)
{
// skip "
// skip "
l->pos++;
l->pos++;
size_t start = l->pos;
size_t start = l->pos;
size_t hash = FNV_OFFSET_BASIS;
size_t hash = FNV_OFFSET_BASIS;
for (char cc = cur(l); cc > 0 && cc != '"'; l->pos++, cc = cur(l)) {
for (char cc = cur(l); cc > 0 && cc != '"'; l->pos++, cc = cur(l)) {
hash ^= cc;
hash ^= cc;
hash *= FNV_PRIME;
hash *= FNV_PRIME;
}
}
if (UNLIKELY(cur(l) != '"')) {
if (UNLIKELY(cur(l) != '"')) {
Str slice = Str_slice(&l->input, l->pos, l->input.len);
Str slice = Str_slice(&l->input, l->pos, l->input.len);
fprintf(stderr, "lex: Unterminated string near: '%.*s'", (int)slice.len,
fprintf(stderr, "lex: Unterminated string near: '%.*s'", (int)slice.len,
slice.p);
slice.p);
out[count++] = INTERN_EOF;
out[count++] = INTERN_EOF;
} else {
} else {
Token *t = CALL(a, request, sizeof(Token));
Token *t = CALL(a, request, sizeof(Token));
t->type = T_STRING;
t->type = T_STRING;
t->string = (Str){
t->string = (Str){
.p = l->input.p + start,
.p = l->input.p + start,
.len = l->pos - start,
.len = l->pos - start,
.hash = hash,
.hash = hash,
};
};
out[count++] = t;
out[count++] = t;
// skip "
// skip "
l->pos++;
l->pos++;
}
}
JUMP_TARGET;
JUMP_TARGET;
}
}
복사
복사됨
복사
복사됨
comment
:
rule(
comment
) {
for (char cc = cur(l); cc > 0 && cc != '\n'; l->pos++, cc = cur(l)) {
for (char cc = cur(l); cc > 0 && cc != '\n'; l->pos++, cc = cur(l)) {
}
}
JUMP_TARGET;
JUMP_TARGET;
복사
복사됨
복사
복사됨
}
복사
복사됨
복사
복사됨
whitespace
:
rule(
whitespace
) {
l->pos++;
l->pos++;
JUMP_TARGET;
JUMP_TARGET;
복사
복사됨
복사
복사됨
}
복사
복사됨
복사
복사됨
unknown
:
{
rule(
unknown
)
{
uint8_t c = cur(l);
uint8_t c = cur(l);
ASSERT(0, "Unexpected byte '%c' (0x%X) in input", c, c)
ASSERT(0, "Unexpected byte '%c' (0x%X) in input", c, c)
}
}
복사
복사됨
복사
복사됨
end
:
rule(
end
) {
out[count++] = INTERN_EOF;
out[count++] = INTERN_EOF;
return count;
return count;
}
}
#undef SINGLE_TOK
#undef SINGLE_TOK
저장된 비교 결과
원본
파일 열기
#include "lexer.h" #include "common.h" #include "mem.h" #include "strings.h" #include <stddef.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> #define SINGLE_TOK(t) ((Token){.type = t}) Str TOKEN_TYPE_MAP[] = {[T_DELIMITOR_LEFT] = STRING("T_DELIMITOR_LEFT"), [T_DELIMITOR_RIGHT] = STRING("T_DELIMITOR_RIGHT"), [T_BRAKET_LEFT] = STRING("T_BRAKET_LEFT"), [T_BRAKET_RIGHT] = STRING("T_BRAKET_RIGHT"), [T_STRING] = STRING("T_STRING"), [T_TRUE] = STRING("T_TRUE"), [T_FALSE] = STRING("T_FALSE"), [T_DOUBLE] = STRING("T_DOUBLE"), [T_INTEGER] = STRING("T_INTEGER"), [T_BUILTIN] = STRING("T_BUILTIN"), [T_IDENT] = STRING("T_IDENT"), [T_PLUS] = STRING("T_PLUS"), [T_MINUS] = STRING("T_MINUS"), [T_ASTERISKS] = STRING("T_ASTERISKS"), [T_SLASH] = STRING("T_SLASH"), [T_EQUAL] = STRING("T_EQUAL"), [T_EOF] = STRING("T_EOF")}; Lexer Lexer_new(Str input) { return (Lexer){ .input = input, .pos = 0, }; } #define cur(L) (L->input.p[L->pos]) __attribute__((always_inline)) inline static bool is_alphanum(uint8_t cc) { uint8_t lower = cc | 0x20; bool is_alpha = (lower >= 'a' && lower <= 'z'); bool is_digit = (cc >= '0' && cc <= '9'); return is_alpha || is_digit || cc == '_' || cc == '-'; } // we can "intern" these, since all of them are the same, regardless of position Token *INTERN_DELIMITOR_LEFT = &SINGLE_TOK(T_DELIMITOR_LEFT); Token *INTERN_DELIMITOR_RIGHT = &SINGLE_TOK(T_DELIMITOR_RIGHT); Token *INTERN_BRAKET_LEFT = &SINGLE_TOK(T_BRAKET_LEFT); Token *INTERN_BRAKET_RIGHT = &SINGLE_TOK(T_BRAKET_RIGHT); Token *INTERN_MINUS = &SINGLE_TOK(T_MINUS); Token *INTERN_PLUS = &SINGLE_TOK(T_PLUS); Token *INTERN_ASTERISKS = &SINGLE_TOK(T_ASTERISKS); Token *INTERN_SLASH = &SINGLE_TOK(T_SLASH); Token *INTERN_FALSE = &SINGLE_TOK(T_FALSE); Token *INTERN_TRUE = &SINGLE_TOK(T_TRUE); Token *INTERN_EQUAL = &SINGLE_TOK(T_EQUAL); Token *INTERN_EOF = &SINGLE_TOK(T_EOF); size_t Lexer_all(Lexer *l, Allocator *a, Token **out) { ASSERT(out != NULL, "Failed to allocate token list"); // empty input if (l->input.len == 0) { out[0] = INTERN_EOF; return 1; } size_t true_hash = Str_hash(&STRING("true")); size_t false_hash = Str_hash(&STRING("false")); size_t count = 0; static void *jump_table[256] = { [0 ... 255] = &&unknown, [' '] = &&whitespace, ['\t'] = &&whitespace, ['\n'] = &&whitespace, [';'] = &&comment, ['('] = &&delimitor_left, [')'] = &&delimitor_right, ['@'] = &&builtin, ['.'] = &&number, ['0' ... '9'] = &&number, ['a' ... 'z'] = &&ident, ['A' ... 'Z'] = &&ident, ['_'] = &&ident, ['\''] = &"ed, ['"'] = &&string, ['+'] = &&plus, ['-'] = &&minus, ['/'] = &&slash, ['*'] = &&asterisks, ['='] = &&equal, ['['] = &&braket_left, [']'] = &&braket_right, [0] = &&end, }; #define JUMP_TARGET goto *jump_table[(int32_t)l->input.p[l->pos]] JUMP_TARGET; delimitor_left: out[count++] = INTERN_DELIMITOR_LEFT; l->pos++; JUMP_TARGET; delimitor_right: out[count++] = INTERN_DELIMITOR_RIGHT; l->pos++; JUMP_TARGET; braket_left: out[count++] = INTERN_BRAKET_LEFT; l->pos++; JUMP_TARGET; braket_right: out[count++] = INTERN_BRAKET_RIGHT; l->pos++; JUMP_TARGET; builtin: { l->pos++; // not an ident after @, this is shit if (!is_alphanum(cur(l))) { out[count++] = INTERN_EOF; } size_t start = l->pos; size_t hash = FNV_OFFSET_BASIS; for (char cc = cur(l); cc > 0 && is_alphanum(cc); l->pos++, cc = cur(l)) { hash ^= cc; hash *= FNV_PRIME; } size_t len = l->pos - start; Str s = (Str){ .p = l->input.p + start, .len = len, .hash = hash, }; Token *b = CALL(a, request, sizeof(Token)); b->string = s; b->type = T_BUILTIN; out[count++] = b; JUMP_TARGET; } plus: out[count++] = INTERN_PLUS; l->pos++; JUMP_TARGET; minus: out[count++] = INTERN_MINUS; l->pos++; JUMP_TARGET; slash: out[count++] = INTERN_SLASH; l->pos++; JUMP_TARGET; equal: out[count++] = INTERN_EQUAL; l->pos++; JUMP_TARGET; asterisks: out[count++] = INTERN_ASTERISKS; l->pos++; JUMP_TARGET; number: { size_t start = l->pos; size_t i = start; bool is_double = false; size_t hash = FNV_OFFSET_BASIS; for (; i < l->input.len; i++) { char cc = l->input.p[i]; hash ^= cc; hash *= FNV_PRIME; if (cc >= '0' && cc <= '9') continue; if (cc == '.') { ASSERT(!is_double, "Two dots in double"); is_double = true; continue; } break; } l->pos = i; Token *n = CALL(a, request, sizeof(Token)); n->string = (Str){ .p = l->input.p + start, .len = i - start, .hash = hash, }; if (is_double) { n->type = T_DOUBLE; } else { n->type = T_INTEGER; } out[count++] = n; JUMP_TARGET; } ident: { size_t start = l->pos; size_t hash = FNV_OFFSET_BASIS; for (char cc = cur(l); cc > 0 && is_alphanum(cc); l->pos++, cc = cur(l)) { hash ^= cc; hash *= FNV_PRIME; } size_t len = l->pos - start; Token *t; if (hash == true_hash) { t = INTERN_TRUE; } else if (hash == false_hash) { t = INTERN_FALSE; } else { t = CALL(a, request, sizeof(Token)); t->type = T_IDENT; t->string = (Str){ .p = l->input.p + start, .len = len, .hash = hash, }; } out[count++] = t; JUMP_TARGET; } // same as string but only with leading ' quoted: { // skip ' l->pos++; size_t start = l->pos; size_t hash = FNV_OFFSET_BASIS; for (char cc = cur(l); cc > 0 && is_alphanum(cc); l->pos++, cc = cur(l)) { hash ^= cc; hash *= FNV_PRIME; } size_t len = l->pos - start; Token *t; t = CALL(a, request, sizeof(Token)); t->type = T_STRING; t->string = (Str){ .p = l->input.p + start, .len = len, .hash = hash, }; out[count++] = t; JUMP_TARGET; } string: { // skip " l->pos++; size_t start = l->pos; size_t hash = FNV_OFFSET_BASIS; for (char cc = cur(l); cc > 0 && cc != '"'; l->pos++, cc = cur(l)) { hash ^= cc; hash *= FNV_PRIME; } if (UNLIKELY(cur(l) != '"')) { Str slice = Str_slice(&l->input, l->pos, l->input.len); fprintf(stderr, "lex: Unterminated string near: '%.*s'", (int)slice.len, slice.p); out[count++] = INTERN_EOF; } else { Token *t = CALL(a, request, sizeof(Token)); t->type = T_STRING; t->string = (Str){ .p = l->input.p + start, .len = l->pos - start, .hash = hash, }; out[count++] = t; // skip " l->pos++; } JUMP_TARGET; } comment: for (char cc = cur(l); cc > 0 && cc != '\n'; l->pos++, cc = cur(l)) { } JUMP_TARGET; whitespace: l->pos++; JUMP_TARGET; unknown: { uint8_t c = cur(l); ASSERT(0, "Unexpected byte '%c' (0x%X) in input", c, c) } end: out[count++] = INTERN_EOF; return count; } #undef SINGLE_TOK
수정본
파일 열기
#include "lexer.h" #include "common.h" #include "mem.h" #include "strings.h" #include <stddef.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> #define SINGLE_TOK(t) ((Token){.type = t}) Str TOKEN_TYPE_MAP[] = {[T_DELIMITOR_LEFT] = STRING("T_DELIMITOR_LEFT"), [T_DELIMITOR_RIGHT] = STRING("T_DELIMITOR_RIGHT"), [T_BRAKET_LEFT] = STRING("T_BRAKET_LEFT"), [T_BRAKET_RIGHT] = STRING("T_BRAKET_RIGHT"), [T_STRING] = STRING("T_STRING"), [T_TRUE] = STRING("T_TRUE"), [T_FALSE] = STRING("T_FALSE"), [T_DOUBLE] = STRING("T_DOUBLE"), [T_INTEGER] = STRING("T_INTEGER"), [T_BUILTIN] = STRING("T_BUILTIN"), [T_IDENT] = STRING("T_IDENT"), [T_PLUS] = STRING("T_PLUS"), [T_MINUS] = STRING("T_MINUS"), [T_ASTERISKS] = STRING("T_ASTERISKS"), [T_SLASH] = STRING("T_SLASH"), [T_EQUAL] = STRING("T_EQUAL"), [T_EOF] = STRING("T_EOF")}; Lexer Lexer_new(Str input) { return (Lexer){ .input = input, .pos = 0, }; } #define cur(L) (L->input.p[L->pos]) __attribute__((always_inline)) inline static bool is_alphanum(uint8_t cc) { uint8_t lower = cc | 0x20; bool is_alpha = (lower >= 'a' && lower <= 'z'); bool is_digit = (cc >= '0' && cc <= '9'); return is_alpha || is_digit || cc == '_' || cc == '-'; } // we can "intern" these, since all of them are the same, regardless of position Token *INTERN_DELIMITOR_LEFT = &SINGLE_TOK(T_DELIMITOR_LEFT); Token *INTERN_DELIMITOR_RIGHT = &SINGLE_TOK(T_DELIMITOR_RIGHT); Token *INTERN_BRAKET_LEFT = &SINGLE_TOK(T_BRAKET_LEFT); Token *INTERN_BRAKET_RIGHT = &SINGLE_TOK(T_BRAKET_RIGHT); Token *INTERN_MINUS = &SINGLE_TOK(T_MINUS); Token *INTERN_PLUS = &SINGLE_TOK(T_PLUS); Token *INTERN_ASTERISKS = &SINGLE_TOK(T_ASTERISKS); Token *INTERN_SLASH = &SINGLE_TOK(T_SLASH); Token *INTERN_FALSE = &SINGLE_TOK(T_FALSE); Token *INTERN_TRUE = &SINGLE_TOK(T_TRUE); Token *INTERN_EQUAL = &SINGLE_TOK(T_EQUAL); Token *INTERN_EOF = &SINGLE_TOK(T_EOF); #define rule(name) size_t name(Lexer *l, Allocator *a, Token **out) rule(Lexer_all); rule(delimitor_left); rule(delimitor_right); rule(braket_left); rule(builtin); rule(plus); rule(minus); rule(slash); rule(equal); rule(asterisks); rule(number); rule(ident); rule(quoted); rule(string); rule(comment); rule(whitespace); rule(unknown); rule(end); typedef size_t (*rule_t)(Lexer *l, Allocator *a, Token **out); static rule_t jump_table[256] = { [0 ... 255] = &unknown, [' '] = &whitespace, ['\t'] = &whitespace, ['\n'] = &whitespace, [';'] = &comment, ['('] = &delimitor_left, [')'] = &delimitor_right, ['@'] = &builtin, ['.'] = &number, ['0' ... '9'] = &number, ['a' ... 'z'] = &ident, ['A' ... 'Z'] = &ident, ['_'] = &ident, ['\''] = "ed, ['"'] = &string, ['+'] = &plus, ['-'] = &minus, ['/'] = &slash, ['*'] = &asterisks, ['='] = &equal, ['['] = &braket_left, [']'] = &braket_right, [0] = &end, }; #ifdef __clang__ #define musttail [[clang::musttail]] #elif __GNUC__ #define musttail [[gnu::musttail]] #else #define musttail #endif #define JUMP_TARGET return musttail jump_table[(int32_t)l->input.p[l->pos]](l, a, out) rule(Lexer_all) { ASSERT(out != NULL, "Failed to allocate token list"); // empty input if (l->input.len == 0) { out[0] = INTERN_EOF; return 1; } size_t true_hash = Str_hash(&STRING("true")); size_t false_hash = Str_hash(&STRING("false")); size_t count = 0; JUMP_TARGET; } rule(delimitor_left) { out[count++] = INTERN_DELIMITOR_LEFT; l->pos++; JUMP_TARGET; } rule(delimitor_right) { out[count++] = INTERN_DELIMITOR_RIGHT; l->pos++; JUMP_TARGET; } rule(braket_left) { out[count++] = INTERN_BRAKET_LEFT; l->pos++; JUMP_TARGET; } rule(braket_right) { out[count++] = INTERN_BRAKET_RIGHT; l->pos++; JUMP_TARGET; } rule(builtin) { l->pos++; // not an ident after @, this is shit if (!is_alphanum(cur(l))) { out[count++] = INTERN_EOF; } size_t start = l->pos; size_t hash = FNV_OFFSET_BASIS; for (char cc = cur(l); cc > 0 && is_alphanum(cc); l->pos++, cc = cur(l)) { hash ^= cc; hash *= FNV_PRIME; } size_t len = l->pos - start; Str s = (Str){ .p = l->input.p + start, .len = len, .hash = hash, }; Token *b = CALL(a, request, sizeof(Token)); b->string = s; b->type = T_BUILTIN; out[count++] = b; JUMP_TARGET; } rule(plus) { out[count++] = INTERN_PLUS; l->pos++; JUMP_TARGET; } rule(minus) { out[count++] = INTERN_MINUS; l->pos++; JUMP_TARGET; } rule(slash) { out[count++] = INTERN_SLASH; l->pos++; JUMP_TARGET; } rule(equal) { out[count++] = INTERN_EQUAL; l->pos++; JUMP_TARGET; } rule(asterisks) { out[count++] = INTERN_ASTERISKS; l->pos++; JUMP_TARGET; } rule(number) { size_t start = l->pos; size_t i = start; bool is_double = false; size_t hash = FNV_OFFSET_BASIS; for (; i < l->input.len; i++) { char cc = l->input.p[i]; hash ^= cc; hash *= FNV_PRIME; if (cc >= '0' && cc <= '9') continue; if (cc == '.') { ASSERT(!is_double, "Two dots in double"); is_double = true; continue; } break; } l->pos = i; Token *n = CALL(a, request, sizeof(Token)); n->string = (Str){ .p = l->input.p + start, .len = i - start, .hash = hash, }; if (is_double) { n->type = T_DOUBLE; } else { n->type = T_INTEGER; } out[count++] = n; JUMP_TARGET; } rule(ident) { size_t start = l->pos; size_t hash = FNV_OFFSET_BASIS; for (char cc = cur(l); cc > 0 && is_alphanum(cc); l->pos++, cc = cur(l)) { hash ^= cc; hash *= FNV_PRIME; } size_t len = l->pos - start; Token *t; if (hash == true_hash) { t = INTERN_TRUE; } else if (hash == false_hash) { t = INTERN_FALSE; } else { t = CALL(a, request, sizeof(Token)); t->type = T_IDENT; t->string = (Str){ .p = l->input.p + start, .len = len, .hash = hash, }; } out[count++] = t; JUMP_TARGET; } // same as string but only with leading ' rule(quoted) { // skip ' l->pos++; size_t start = l->pos; size_t hash = FNV_OFFSET_BASIS; for (char cc = cur(l); cc > 0 && is_alphanum(cc); l->pos++, cc = cur(l)) { hash ^= cc; hash *= FNV_PRIME; } size_t len = l->pos - start; Token *t; t = CALL(a, request, sizeof(Token)); t->type = T_STRING; t->string = (Str){ .p = l->input.p + start, .len = len, .hash = hash, }; out[count++] = t; JUMP_TARGET; } rule(string) { // skip " l->pos++; size_t start = l->pos; size_t hash = FNV_OFFSET_BASIS; for (char cc = cur(l); cc > 0 && cc != '"'; l->pos++, cc = cur(l)) { hash ^= cc; hash *= FNV_PRIME; } if (UNLIKELY(cur(l) != '"')) { Str slice = Str_slice(&l->input, l->pos, l->input.len); fprintf(stderr, "lex: Unterminated string near: '%.*s'", (int)slice.len, slice.p); out[count++] = INTERN_EOF; } else { Token *t = CALL(a, request, sizeof(Token)); t->type = T_STRING; t->string = (Str){ .p = l->input.p + start, .len = l->pos - start, .hash = hash, }; out[count++] = t; // skip " l->pos++; } JUMP_TARGET; } rule(comment) { for (char cc = cur(l); cc > 0 && cc != '\n'; l->pos++, cc = cur(l)) { } JUMP_TARGET; } rule(whitespace) { l->pos++; JUMP_TARGET; } rule(unknown) { uint8_t c = cur(l); ASSERT(0, "Unexpected byte '%c' (0x%X) in input", c, c) } rule(end) { out[count++] = INTERN_EOF; return count; } #undef SINGLE_TOK
비교하기