blob: 1c435a9afcb143178c51eb5ed3eada57d4d09035 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
|
#ifndef SAND_TOKENIZER_H
#define SAND_TOKENIZER_H
// This module defines the tokenizer. It takes in the raw source text and splits
// it into tokens. Tokens (a.k.a. lexemes) are the smallest meaningful units of
// the Sand language.
//
// The tokenizer should not need to do any allocation. The source and filename
// just have to remain valid for as long as the tokens are in use.
#include "location.h"
#include <stddef.h>
#define SAND_EACH_TOKEN(DO) \
/* Single-character tokens */ \
DO(SAND_TOKEN_LEFT_PAREN) \
DO(SAND_TOKEN_RIGHT_PAREN) \
DO(SAND_TOKEN_LEFT_BRACE) \
DO(SAND_TOKEN_RIGHT_BRACE) \
DO(SAND_TOKEN_COMMA) \
DO(SAND_TOKEN_DOT) \
DO(SAND_TOKEN_MINUS) \
DO(SAND_TOKEN_PLUS) \
DO(SAND_TOKEN_SEMICOLON) \
DO(SAND_TOKEN_SLASH) \
DO(SAND_TOKEN_STAR) \
/* One or two character tokens. */ \
DO(SAND_TOKEN_BANG) \
DO(SAND_TOKEN_BANG_EQUAL) \
DO(SAND_TOKEN_EQUAL) \
DO(SAND_TOKEN_EQUAL_EQUAL) \
DO(SAND_TOKEN_GREATER) \
DO(SAND_TOKEN_GREATER_EQUAL) \
DO(SAND_TOKEN_LESS) \
DO(SAND_TOKEN_LESS_EQUAL) \
/* Literals */ \
DO(SAND_TOKEN_IDENTIFIER) \
DO(SAND_TOKEN_STRING) \
DO(SAND_TOKEN_NUMBER) \
/* Keywords */ \
DO(SAND_TOKEN_AND) \
DO(SAND_TOKEN_ELSE) \
DO(SAND_TOKEN_FALSE) \
DO(SAND_TOKEN_FOR) \
DO(SAND_TOKEN_FUN) \
DO(SAND_TOKEN_IF) \
DO(SAND_TOKEN_NIL) \
DO(SAND_TOKEN_OR) \
DO(SAND_TOKEN_PRINT) \
DO(SAND_TOKEN_RETURN) \
DO(SAND_TOKEN_TRUE) \
DO(SAND_TOKEN_VAR) \
DO(SAND_TOKEN_WHILE) \
/* Special tokens */ \
DO(SAND_TOKEN_ERROR) \
DO(SAND_TOKEN_EOF)
typedef enum {
#define X(n) n,
SAND_EACH_TOKEN(X)
#undef X
} SandTokenKind;
typedef struct {
SandTokenKind kind;
const char *content;
size_t content_length;
SandLocation location;
} SandToken;
typedef struct {
const char *const filename;
const char *const source;
const size_t source_length;
const char *start;
unsigned start_line;
unsigned start_column;
const char *current;
unsigned current_line;
unsigned current_column;
} SandTokenizer;
// There is no corresponding destructor, as a tokenizer does not own any resources.
SandTokenizer sand_create_tokenizer(const char *source, size_t source_length, const char *filename);
SandToken sand_get_next_token(SandTokenizer *);
// Returns the string representation of the token kind.
const char *sand_token_kind_to_string(SandTokenKind);
#endif
|