diff options
Diffstat (limited to 'src/unit/test_tokenizer.c')
-rw-r--r-- | src/unit/test_tokenizer.c | 233 |
1 files changed, 233 insertions, 0 deletions
diff --git a/src/unit/test_tokenizer.c b/src/unit/test_tokenizer.c new file mode 100644 index 0000000..e3861b8 --- /dev/null +++ b/src/unit/test_tokenizer.c @@ -0,0 +1,233 @@ +#include "../core/tokenizer.h" + +#include "greatest.h" +#include <string.h> + +#define STR(x) _STR(x) +#define _STR(x) #x +#define CREATE_TOKENIZER(src) sand_create_tokenizer(src, strlen(src), "<dummy file at " __FILE__ ":" STR(__LINE__) ">") + +TEST empty_source_gives_eof(void) { + SandTokenizer tokenizer = CREATE_TOKENIZER(""); + + ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_EOF); + + // It should still be the case for subsequent calls. + ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_EOF); + ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_EOF); + ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_EOF); + + PASS(); +} + +TEST single_char_tokens(void) { + SandTokenizer tokenizer = CREATE_TOKENIZER("( ) { } , . - + ; / *"); + + ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_LEFT_PAREN); + ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_RIGHT_PAREN); + ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_LEFT_BRACE); + ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_RIGHT_BRACE); + ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_COMMA); + ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_DOT); + ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_MINUS); + ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_PLUS); + ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_SEMICOLON); + ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_SLASH); + ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_STAR); + ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_EOF); + + PASS(); +} + +TEST one_or_two_char_tokens(void) { + SandTokenizer tokenizer = CREATE_TOKENIZER("! != = == > >= < <="); + + ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_BANG); + ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_BANG_EQUAL); + ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_EQUAL); + ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_EQUAL_EQUAL); + ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_GREATER); + ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_GREATER_EQUAL); + ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_LESS); + ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_LESS_EQUAL); + ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_EOF); + + PASS(); +} + +TEST comments_are_ignored(void) { + SandTokenizer tokenizer = CREATE_TOKENIZER(".// This is a comment\n."); + + ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_DOT); + // No comment node here! + ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_DOT); + ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_EOF); + + PASS(); +} + +TEST literal_string(void) { + SandTokenizer tokenizer = CREATE_TOKENIZER("\"abc\\\"def\\nghi\""); + + SandToken token = sand_get_next_token(&tokenizer); + ASSERT_EQ(token.kind, SAND_TOKEN_STRING); + ASSERT_EQ(token.content_length, 15); + ASSERT_STRN_EQ(token.content, "\"abc\\\"def\\nghi\"", token.content_length); + + ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_EOF); + PASS(); +} + +TEST unfinished_literal_string_eof(void) { + SandTokenizer tokenizer = CREATE_TOKENIZER("\"abc"); + + SandToken token = sand_get_next_token(&tokenizer); + ASSERT_EQ(token.kind, SAND_TOKEN_ERROR); + ASSERT_STR_EQ(token.content, "Unexpected end-of-file inside string literal"); + + ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_EOF); + PASS(); +} + +TEST unfinished_literal_string_eol(void) { + SandTokenizer tokenizer = CREATE_TOKENIZER("\"abc\n!"); + + SandToken token = sand_get_next_token(&tokenizer); + ASSERT_EQ(token.kind, SAND_TOKEN_ERROR); + ASSERT_STR_EQ(token.content, "Unexpected end-of-line inside string literal"); + + ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_BANG); + ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_EOF); + PASS(); +} + +TEST unfinished_literal_string_escape_eol(void) { + SandTokenizer tokenizer = CREATE_TOKENIZER("\"\\\n!"); + + SandToken token = sand_get_next_token(&tokenizer); + ASSERT_EQ(token.kind, SAND_TOKEN_ERROR); + ASSERT_STR_EQ(token.content, "Unfinished escape inside string literal"); + + ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_BANG); + ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_EOF); + PASS(); +} + +TEST unfinished_literal_string_escape_eof(void) { + SandTokenizer tokenizer = CREATE_TOKENIZER("\"\\"); + + SandToken token = sand_get_next_token(&tokenizer); + ASSERT_EQ(token.kind, SAND_TOKEN_ERROR); + ASSERT_STR_EQ(token.content, "Unfinished escape inside string literal"); + + ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_EOF); + PASS(); +} + +TEST literal_number(void) { + SandTokenizer tokenizer = CREATE_TOKENIZER("123"); + + SandToken token = sand_get_next_token(&tokenizer); + ASSERT_EQ(token.kind, SAND_TOKEN_NUMBER); + ASSERT_EQ(token.content_length, 3); + ASSERT_STRN_EQ(token.content, "123", token.content_length); + + ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_EOF); + PASS(); +} + +TEST fractional_literal_number(void) { + SandTokenizer tokenizer = CREATE_TOKENIZER("123.00000001"); + + SandToken token = sand_get_next_token(&tokenizer); + ASSERT_EQ(token.kind, SAND_TOKEN_NUMBER); + ASSERT_EQ(token.content_length, 12); + ASSERT_STRN_EQ(token.content, "123.00000001", token.content_length); + + ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_EOF); + PASS(); +} + +TEST literal_number_followed_by_dot(void) { + SandTokenizer tokenizer = CREATE_TOKENIZER("123."); + + SandToken token = sand_get_next_token(&tokenizer); + ASSERT_EQ(token.kind, SAND_TOKEN_NUMBER); + ASSERT_EQ(token.content_length, 3); + ASSERT_STRN_EQ(token.content, "123", token.content_length); + + ASSERT_EQm("Dot should not have been consumed", sand_get_next_token(&tokenizer).kind, SAND_TOKEN_DOT); + + ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_EOF); + PASS(); +} + +TEST identifiers(void) { + SandTokenizer tokenizer = CREATE_TOKENIZER("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ012456789"); + + SandToken token = sand_get_next_token(&tokenizer); + ASSERT_EQ(token.kind, SAND_TOKEN_IDENTIFIER); + ASSERT_EQ(token.content_length, 61); + ASSERT_MEM_EQ(token.content, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ012456789", token.content_length); + + ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_EOF); + PASS(); +} + +TEST identifiers_cannot_start_with_number(void) { + SandTokenizer tokenizer = CREATE_TOKENIZER("123abc"); + + SandToken token = sand_get_next_token(&tokenizer); + ASSERT_EQ(token.kind, SAND_TOKEN_NUMBER); + ASSERT_EQ(token.content_length, 3); + ASSERT_MEM_EQ(token.content, "123", token.content_length); + + token = sand_get_next_token(&tokenizer); + ASSERT_EQ(token.kind, SAND_TOKEN_IDENTIFIER); + ASSERT_EQ(token.content_length, 3); + ASSERT_MEM_EQ(token.content, "abc", token.content_length); + + ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_EOF); + PASS(); +} + +TEST keywords(void) { + SandTokenizer tokenizer = CREATE_TOKENIZER("and else false for fun " + "if nil or print return " + "true var while "); + + ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_AND); + ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_ELSE); + ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_FALSE); + ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_FOR); + ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_FUN); + ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_IF); + ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_NIL); + ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_OR); + ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_PRINT); + ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_RETURN); + ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_TRUE); + ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_VAR); + ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_WHILE); + + ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_EOF); + PASS(); +} + +SUITE(tokenizer) { + RUN_TEST(empty_source_gives_eof); + RUN_TEST(single_char_tokens); + RUN_TEST(one_or_two_char_tokens); + RUN_TEST(comments_are_ignored); + RUN_TEST(literal_string); + RUN_TEST(unfinished_literal_string_eof); + RUN_TEST(unfinished_literal_string_eol); + RUN_TEST(unfinished_literal_string_escape_eol); + RUN_TEST(unfinished_literal_string_escape_eof); + RUN_TEST(literal_number); + RUN_TEST(fractional_literal_number); + RUN_TEST(literal_number_followed_by_dot); + RUN_TEST(identifiers); + RUN_TEST(identifiers_cannot_start_with_number); + RUN_TEST(keywords); +} |