#ifndef SAND_TOKENIZER_H #define SAND_TOKENIZER_H // This module defines the tokenizer. It takes in the raw source text and splits // it into tokens. Tokens (a.k.a. lexemes) are the smallest meaningful units of // the Sand language. // // The tokenizer should not need to do any allocation. The source and filename // just have to remain valid for as long as the tokens are in use. #include "location.h" #include #define SAND_EACH_TOKEN(DO) \ /* Single-character tokens */ \ DO(SAND_TOKEN_LEFT_PAREN) \ DO(SAND_TOKEN_RIGHT_PAREN) \ DO(SAND_TOKEN_LEFT_BRACE) \ DO(SAND_TOKEN_RIGHT_BRACE) \ DO(SAND_TOKEN_COMMA) \ DO(SAND_TOKEN_DOT) \ DO(SAND_TOKEN_MINUS) \ DO(SAND_TOKEN_PLUS) \ DO(SAND_TOKEN_SEMICOLON) \ DO(SAND_TOKEN_SLASH) \ DO(SAND_TOKEN_STAR) \ /* One or two character tokens. */ \ DO(SAND_TOKEN_BANG) \ DO(SAND_TOKEN_BANG_EQUAL) \ DO(SAND_TOKEN_EQUAL) \ DO(SAND_TOKEN_EQUAL_EQUAL) \ DO(SAND_TOKEN_GREATER) \ DO(SAND_TOKEN_GREATER_EQUAL) \ DO(SAND_TOKEN_LESS) \ DO(SAND_TOKEN_LESS_EQUAL) \ /* Literals */ \ DO(SAND_TOKEN_IDENTIFIER) \ DO(SAND_TOKEN_STRING) \ DO(SAND_TOKEN_NUMBER) \ /* Keywords */ \ DO(SAND_TOKEN_AND) \ DO(SAND_TOKEN_ELSE) \ DO(SAND_TOKEN_FALSE) \ DO(SAND_TOKEN_FOR) \ DO(SAND_TOKEN_FUN) \ DO(SAND_TOKEN_IF) \ DO(SAND_TOKEN_NIL) \ DO(SAND_TOKEN_OR) \ DO(SAND_TOKEN_PRINT) \ DO(SAND_TOKEN_RETURN) \ DO(SAND_TOKEN_TRUE) \ DO(SAND_TOKEN_VAR) \ DO(SAND_TOKEN_WHILE) \ /* Special tokens */ \ DO(SAND_TOKEN_ERROR) \ DO(SAND_TOKEN_EOF) typedef enum { #define X(n) n, SAND_EACH_TOKEN(X) #undef X } SandTokenKind; typedef struct { SandTokenKind kind; const char *content; size_t content_length; SandLocation location; } SandToken; typedef struct { const char *const filename; const char *start; unsigned start_line; unsigned start_column; const char *current; unsigned current_line; unsigned current_column; } SandTokenizer; // There is no corresponding destructor, as a tokenizer does not own any resources. SandTokenizer sand_create_tokenizer(const char *source, size_t source_length, const char *filename); SandToken sand_get_next_token(SandTokenizer *); // Returns the string representation of the token kind. const char *sand_token_kind_to_string(SandTokenKind); #endif