diff options
Diffstat (limited to 'src/core/tokenizer.h')
-rw-r--r-- | src/core/tokenizer.h | 93 |
1 files changed, 93 insertions, 0 deletions
diff --git a/src/core/tokenizer.h b/src/core/tokenizer.h new file mode 100644 index 0000000..6105c3e --- /dev/null +++ b/src/core/tokenizer.h @@ -0,0 +1,93 @@ +#ifndef SAND_TOKENIZER_H +#define SAND_TOKENIZER_H + +// This module defines the tokenizer. It takes in the raw source text and splits +// it into tokens. Tokens (a.k.a. lexemes) are the smallest meaningful units of +// the Sand language. +// +// The tokenizer should not need to do any allocation. The source and filename +// just have to remain valid for as long as the tokens are in use. + +#include "location.h" + +#include <stddef.h> + +#define SAND_EACH_TOKEN(DO) \ + /* Single-character tokens */ \ + DO(SAND_TOKEN_LEFT_PAREN) \ + DO(SAND_TOKEN_RIGHT_PAREN) \ + DO(SAND_TOKEN_LEFT_BRACE) \ + DO(SAND_TOKEN_RIGHT_BRACE) \ + DO(SAND_TOKEN_COMMA) \ + DO(SAND_TOKEN_DOT) \ + DO(SAND_TOKEN_MINUS) \ + DO(SAND_TOKEN_PLUS) \ + DO(SAND_TOKEN_SEMICOLON) \ + DO(SAND_TOKEN_SLASH) \ + DO(SAND_TOKEN_STAR) \ + /* One or two character tokens. */ \ + DO(SAND_TOKEN_BANG) \ + DO(SAND_TOKEN_BANG_EQUAL) \ + DO(SAND_TOKEN_EQUAL) \ + DO(SAND_TOKEN_EQUAL_EQUAL) \ + DO(SAND_TOKEN_GREATER) \ + DO(SAND_TOKEN_GREATER_EQUAL) \ + DO(SAND_TOKEN_LESS) \ + DO(SAND_TOKEN_LESS_EQUAL) \ + /* Literals */ \ + DO(SAND_TOKEN_IDENTIFIER) \ + DO(SAND_TOKEN_STRING) \ + DO(SAND_TOKEN_NUMBER) \ + /* Keywords */ \ + DO(SAND_TOKEN_AND) \ + DO(SAND_TOKEN_ELSE) \ + DO(SAND_TOKEN_FALSE) \ + DO(SAND_TOKEN_FOR) \ + DO(SAND_TOKEN_FUN) \ + DO(SAND_TOKEN_IF) \ + DO(SAND_TOKEN_NIL) \ + DO(SAND_TOKEN_OR) \ + DO(SAND_TOKEN_PRINT) \ + DO(SAND_TOKEN_RETURN) \ + DO(SAND_TOKEN_TRUE) \ + DO(SAND_TOKEN_VAR) \ + DO(SAND_TOKEN_WHILE) \ + /* Special tokens */ \ + DO(SAND_TOKEN_ERROR) \ + DO(SAND_TOKEN_EOF) + + +typedef enum { +#define X(n) n, +SAND_EACH_TOKEN(X) +#undef X +} SandTokenKind; + +typedef struct { + SandTokenKind kind; + const char *content; + size_t content_length; + SandLocation location; +} SandToken; + +typedef struct { + const char *const filename; + + const char *start; + unsigned start_line; + unsigned start_column; + + const char *current; + unsigned current_line; + unsigned current_column; +} SandTokenizer; + +// There is no corresponding destructor, as a tokenizer does not own any resources. +SandTokenizer sand_create_tokenizer(const char *source, size_t source_length, const char *filename); + +SandToken sand_get_next_token(SandTokenizer *); + +// Returns the string representation of the token kind. +const char *sand_token_kind_to_string(SandTokenKind); + +#endif |