summaryrefslogtreecommitdiff
path: root/src/core/tokenizer.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/tokenizer.h')
-rw-r--r--src/core/tokenizer.h93
1 files changed, 93 insertions, 0 deletions
diff --git a/src/core/tokenizer.h b/src/core/tokenizer.h
new file mode 100644
index 0000000..6105c3e
--- /dev/null
+++ b/src/core/tokenizer.h
@@ -0,0 +1,93 @@
+#ifndef SAND_TOKENIZER_H
+#define SAND_TOKENIZER_H
+
+// This module defines the tokenizer. It takes in the raw source text and splits
+// it into tokens. Tokens (a.k.a. lexemes) are the smallest meaningful units of
+// the Sand language.
+//
+// The tokenizer should not need to do any allocation. The source and filename
+// just have to remain valid for as long as the tokens are in use.
+
+#include "location.h"
+
+#include <stddef.h>
+
+#define SAND_EACH_TOKEN(DO) \
+ /* Single-character tokens */ \
+ DO(SAND_TOKEN_LEFT_PAREN) \
+ DO(SAND_TOKEN_RIGHT_PAREN) \
+ DO(SAND_TOKEN_LEFT_BRACE) \
+ DO(SAND_TOKEN_RIGHT_BRACE) \
+ DO(SAND_TOKEN_COMMA) \
+ DO(SAND_TOKEN_DOT) \
+ DO(SAND_TOKEN_MINUS) \
+ DO(SAND_TOKEN_PLUS) \
+ DO(SAND_TOKEN_SEMICOLON) \
+ DO(SAND_TOKEN_SLASH) \
+ DO(SAND_TOKEN_STAR) \
+ /* One or two character tokens. */ \
+ DO(SAND_TOKEN_BANG) \
+ DO(SAND_TOKEN_BANG_EQUAL) \
+ DO(SAND_TOKEN_EQUAL) \
+ DO(SAND_TOKEN_EQUAL_EQUAL) \
+ DO(SAND_TOKEN_GREATER) \
+ DO(SAND_TOKEN_GREATER_EQUAL) \
+ DO(SAND_TOKEN_LESS) \
+ DO(SAND_TOKEN_LESS_EQUAL) \
+ /* Literals */ \
+ DO(SAND_TOKEN_IDENTIFIER) \
+ DO(SAND_TOKEN_STRING) \
+ DO(SAND_TOKEN_NUMBER) \
+ /* Keywords */ \
+ DO(SAND_TOKEN_AND) \
+ DO(SAND_TOKEN_ELSE) \
+ DO(SAND_TOKEN_FALSE) \
+ DO(SAND_TOKEN_FOR) \
+ DO(SAND_TOKEN_FUN) \
+ DO(SAND_TOKEN_IF) \
+ DO(SAND_TOKEN_NIL) \
+ DO(SAND_TOKEN_OR) \
+ DO(SAND_TOKEN_PRINT) \
+ DO(SAND_TOKEN_RETURN) \
+ DO(SAND_TOKEN_TRUE) \
+ DO(SAND_TOKEN_VAR) \
+ DO(SAND_TOKEN_WHILE) \
+ /* Special tokens */ \
+ DO(SAND_TOKEN_ERROR) \
+ DO(SAND_TOKEN_EOF)
+
+
+typedef enum {
+#define X(n) n,
+SAND_EACH_TOKEN(X)
+#undef X
+} SandTokenKind;
+
+typedef struct {
+ SandTokenKind kind;
+ const char *content;
+ size_t content_length;
+ SandLocation location;
+} SandToken;
+
+typedef struct {
+ const char *const filename;
+
+ const char *start;
+ unsigned start_line;
+ unsigned start_column;
+
+ const char *current;
+ unsigned current_line;
+ unsigned current_column;
+} SandTokenizer;
+
+// There is no corresponding destructor, as a tokenizer does not own any resources.
+SandTokenizer sand_create_tokenizer(const char *source, size_t source_length, const char *filename);
+
+SandToken sand_get_next_token(SandTokenizer *);
+
+// Returns the string representation of the token kind.
+const char *sand_token_kind_to_string(SandTokenKind);
+
+#endif