summaryrefslogtreecommitdiff
path: root/src/core/tokenizer.h
blob: 6105c3e24b9eb10a93cdbbf9a153442995e67e73 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#ifndef SAND_TOKENIZER_H
#define SAND_TOKENIZER_H

// This module defines the tokenizer. It takes in the raw source text and splits
// it into tokens. Tokens (a.k.a. lexemes) are the smallest meaningful units of
// the Sand language.
//
// The tokenizer should not need to do any allocation. The source and filename
// just have to remain valid for as long as the tokens are in use.

#include "location.h"

#include <stddef.h>

#define SAND_EACH_TOKEN(DO)                 \
	/* Single-character tokens */       \
	DO(SAND_TOKEN_LEFT_PAREN)           \
	DO(SAND_TOKEN_RIGHT_PAREN)          \
	DO(SAND_TOKEN_LEFT_BRACE)           \
	DO(SAND_TOKEN_RIGHT_BRACE)          \
	DO(SAND_TOKEN_COMMA)                \
	DO(SAND_TOKEN_DOT)                  \
	DO(SAND_TOKEN_MINUS)                \
	DO(SAND_TOKEN_PLUS)                 \
	DO(SAND_TOKEN_SEMICOLON)            \
	DO(SAND_TOKEN_SLASH)                \
	DO(SAND_TOKEN_STAR)                 \
	/* One or two character tokens. */  \
	DO(SAND_TOKEN_BANG)                 \
	DO(SAND_TOKEN_BANG_EQUAL)           \
	DO(SAND_TOKEN_EQUAL)                \
	DO(SAND_TOKEN_EQUAL_EQUAL)          \
	DO(SAND_TOKEN_GREATER)              \
	DO(SAND_TOKEN_GREATER_EQUAL)        \
	DO(SAND_TOKEN_LESS)                 \
	DO(SAND_TOKEN_LESS_EQUAL)           \
	/* Literals */                      \
	DO(SAND_TOKEN_IDENTIFIER)           \
	DO(SAND_TOKEN_STRING)               \
	DO(SAND_TOKEN_NUMBER)               \
	/* Keywords */                      \
	DO(SAND_TOKEN_AND)                  \
	DO(SAND_TOKEN_ELSE)                 \
	DO(SAND_TOKEN_FALSE)                \
	DO(SAND_TOKEN_FOR)                  \
	DO(SAND_TOKEN_FUN)                  \
	DO(SAND_TOKEN_IF)                   \
	DO(SAND_TOKEN_NIL)                  \
	DO(SAND_TOKEN_OR)                   \
	DO(SAND_TOKEN_PRINT)                \
	DO(SAND_TOKEN_RETURN)               \
	DO(SAND_TOKEN_TRUE)                 \
	DO(SAND_TOKEN_VAR)                  \
	DO(SAND_TOKEN_WHILE)                \
	/* Special tokens */                \
	DO(SAND_TOKEN_ERROR)                \
	DO(SAND_TOKEN_EOF)


typedef enum {
#define X(n) n,
SAND_EACH_TOKEN(X)
#undef X
} SandTokenKind;

typedef struct {
	SandTokenKind kind;
	const char *content;
	size_t content_length;
	SandLocation location;
} SandToken;

typedef struct {
	const char *const filename;

	const char *start;
	unsigned start_line;
	unsigned start_column;

	const char *current;
	unsigned current_line;
	unsigned current_column;
} SandTokenizer;

// There is no corresponding destructor, as a tokenizer does not own any resources.
SandTokenizer sand_create_tokenizer(const char *source, size_t source_length, const char *filename);

SandToken sand_get_next_token(SandTokenizer *);

// Returns the string representation of the token kind.
const char *sand_token_kind_to_string(SandTokenKind);

#endif