#include "../core/tokenizer.h" #include "greatest.h" #include #define STR(x) _STR(x) #define _STR(x) #x #define CREATE_TOKENIZER(src) sand_create_tokenizer(src, strlen(src), "") TEST empty_source_gives_eof(void) { SandTokenizer tokenizer = CREATE_TOKENIZER(""); ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_EOF); // It should still be the case for subsequent calls. ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_EOF); ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_EOF); ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_EOF); PASS(); } TEST single_char_tokens(void) { SandTokenizer tokenizer = CREATE_TOKENIZER("( ) { } , . - + ; / *"); ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_LEFT_PAREN); ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_RIGHT_PAREN); ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_LEFT_BRACE); ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_RIGHT_BRACE); ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_COMMA); ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_DOT); ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_MINUS); ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_PLUS); ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_SEMICOLON); ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_SLASH); ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_STAR); ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_EOF); PASS(); } TEST one_or_two_char_tokens(void) { SandTokenizer tokenizer = CREATE_TOKENIZER("! != = == > >= < <="); ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_BANG); ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_BANG_EQUAL); ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_EQUAL); ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_EQUAL_EQUAL); ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_GREATER); ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_GREATER_EQUAL); ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_LESS); ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_LESS_EQUAL); ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_EOF); PASS(); } TEST comments_are_ignored(void) { SandTokenizer tokenizer = CREATE_TOKENIZER(".// This is a comment\n."); ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_DOT); // No comment node here! ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_DOT); ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_EOF); PASS(); } TEST literal_string(void) { SandTokenizer tokenizer = CREATE_TOKENIZER("\"abc\\\"def\\nghi\""); SandToken token = sand_get_next_token(&tokenizer); ASSERT_EQ(token.kind, SAND_TOKEN_STRING); ASSERT_EQ(token.content_length, 15); ASSERT_STRN_EQ(token.content, "\"abc\\\"def\\nghi\"", token.content_length); ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_EOF); PASS(); } TEST unfinished_literal_string_eof(void) { SandTokenizer tokenizer = CREATE_TOKENIZER("\"abc"); SandToken token = sand_get_next_token(&tokenizer); ASSERT_EQ(token.kind, SAND_TOKEN_ERROR); ASSERT_STR_EQ(token.content, "Unexpected end-of-file inside string literal"); ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_EOF); PASS(); } TEST unfinished_literal_string_eol(void) { SandTokenizer tokenizer = CREATE_TOKENIZER("\"abc\n!"); SandToken token = sand_get_next_token(&tokenizer); ASSERT_EQ(token.kind, SAND_TOKEN_ERROR); ASSERT_STR_EQ(token.content, "Unexpected end-of-line inside string literal"); ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_BANG); ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_EOF); PASS(); } TEST unfinished_literal_string_escape_eol(void) { SandTokenizer tokenizer = CREATE_TOKENIZER("\"\\\n!"); SandToken token = sand_get_next_token(&tokenizer); ASSERT_EQ(token.kind, SAND_TOKEN_ERROR); ASSERT_STR_EQ(token.content, "Unfinished escape inside string literal"); ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_BANG); ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_EOF); PASS(); } TEST unfinished_literal_string_escape_eof(void) { SandTokenizer tokenizer = CREATE_TOKENIZER("\"\\"); SandToken token = sand_get_next_token(&tokenizer); ASSERT_EQ(token.kind, SAND_TOKEN_ERROR); ASSERT_STR_EQ(token.content, "Unfinished escape inside string literal"); ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_EOF); PASS(); } TEST literal_number(void) { SandTokenizer tokenizer = CREATE_TOKENIZER("123"); SandToken token = sand_get_next_token(&tokenizer); ASSERT_EQ(token.kind, SAND_TOKEN_NUMBER); ASSERT_EQ(token.content_length, 3); ASSERT_STRN_EQ(token.content, "123", token.content_length); ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_EOF); PASS(); } TEST fractional_literal_number(void) { SandTokenizer tokenizer = CREATE_TOKENIZER("123.00000001"); SandToken token = sand_get_next_token(&tokenizer); ASSERT_EQ(token.kind, SAND_TOKEN_NUMBER); ASSERT_EQ(token.content_length, 12); ASSERT_STRN_EQ(token.content, "123.00000001", token.content_length); ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_EOF); PASS(); } TEST literal_number_followed_by_dot(void) { SandTokenizer tokenizer = CREATE_TOKENIZER("123."); SandToken token = sand_get_next_token(&tokenizer); ASSERT_EQ(token.kind, SAND_TOKEN_NUMBER); ASSERT_EQ(token.content_length, 3); ASSERT_STRN_EQ(token.content, "123", token.content_length); ASSERT_EQm("Dot should not have been consumed", sand_get_next_token(&tokenizer).kind, SAND_TOKEN_DOT); ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_EOF); PASS(); } TEST identifiers(void) { SandTokenizer tokenizer = CREATE_TOKENIZER("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ012456789"); SandToken token = sand_get_next_token(&tokenizer); ASSERT_EQ(token.kind, SAND_TOKEN_IDENTIFIER); ASSERT_EQ(token.content_length, 61); ASSERT_MEM_EQ(token.content, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ012456789", token.content_length); ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_EOF); PASS(); } TEST identifiers_cannot_start_with_number(void) { SandTokenizer tokenizer = CREATE_TOKENIZER("123abc"); SandToken token = sand_get_next_token(&tokenizer); ASSERT_EQ(token.kind, SAND_TOKEN_NUMBER); ASSERT_EQ(token.content_length, 3); ASSERT_MEM_EQ(token.content, "123", token.content_length); token = sand_get_next_token(&tokenizer); ASSERT_EQ(token.kind, SAND_TOKEN_IDENTIFIER); ASSERT_EQ(token.content_length, 3); ASSERT_MEM_EQ(token.content, "abc", token.content_length); ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_EOF); PASS(); } TEST keywords(void) { SandTokenizer tokenizer = CREATE_TOKENIZER("and else false for fun " "if nil or print return " "true var while "); ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_AND); ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_ELSE); ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_FALSE); ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_FOR); ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_FUN); ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_IF); ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_NIL); ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_OR); ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_PRINT); ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_RETURN); ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_TRUE); ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_VAR); ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_WHILE); ASSERT_EQ(sand_get_next_token(&tokenizer).kind, SAND_TOKEN_EOF); PASS(); } SUITE(tokenizer) { RUN_TEST(empty_source_gives_eof); RUN_TEST(single_char_tokens); RUN_TEST(one_or_two_char_tokens); RUN_TEST(comments_are_ignored); RUN_TEST(literal_string); RUN_TEST(unfinished_literal_string_eof); RUN_TEST(unfinished_literal_string_eol); RUN_TEST(unfinished_literal_string_escape_eol); RUN_TEST(unfinished_literal_string_escape_eof); RUN_TEST(literal_number); RUN_TEST(fractional_literal_number); RUN_TEST(literal_number_followed_by_dot); RUN_TEST(identifiers); RUN_TEST(identifiers_cannot_start_with_number); RUN_TEST(keywords); }