diff --git a/starlark/ast.h b/starlark/ast.h index fc8b546..03cdd83 100644 --- a/starlark/ast.h +++ b/starlark/ast.h @@ -5,6 +5,7 @@ #ifndef STARLARK_AST_H_ #define STARLARK_AST_H_ +#include #include #include #include @@ -19,6 +20,11 @@ struct StringLiteral { constexpr bool operator==(StringLiteral const &) const = default; }; +struct IntLiteral { + std::int64_t value{}; + constexpr bool operator==(IntLiteral const &) const = default; +}; + struct Identifier { std::string name; constexpr bool operator==(Identifier const &) const = default; @@ -28,7 +34,8 @@ struct CallExpr; struct DictExpr; struct ListExpr; struct ListComp; -using Expression = std::variant; +using Expression = + std::variant; struct Argument; diff --git a/starlark/parser.h b/starlark/parser.h index e82e106..24c89b5 100644 --- a/starlark/parser.h +++ b/starlark/parser.h @@ -108,6 +108,10 @@ class Parser { return StringLiteral{.value = std::move(sl->value)}; } + if (auto *il = std::get_if(&token)) { + return IntLiteral{.value = il->value}; + } + if (std::holds_alternative(token)) { std::vector elements; while (true) { @@ -190,8 +194,8 @@ class Parser { continue; } - std::cerr << "Expected ',' or ']' in list expression, got " << to_string(next_token) - << ".\n"; + std::cerr << "Expected ',' or ']' in list expression, got '" + << to_string(next_token) << "'.\n"; return std::nullopt; } @@ -255,8 +259,8 @@ class Parser { continue; } - std::cerr << "Expected ',' or '}' in dict expression, got " << to_string(next_token) - << ".\n"; + std::cerr << "Expected ',' or '}' in dict expression, got '" + << to_string(next_token) << "'.\n"; return std::nullopt; } @@ -311,8 +315,8 @@ class Parser { auto const &token = *maybe_token; if (!std::holds_alternative(token) && !std::holds_alternative(token)) { - std::cerr << "Expected ',' or ')' in argument list, got " << to_string(token) - << ".\n"; + std::cerr << "Expected ',' or ')' in argument list, got '" << to_string(token) + << "'.\n"; return std::nullopt; } @@ -408,8 +412,8 @@ class Parser { } if (!std::holds_alternative(*maybe_comma_or_rparen)) { - std::cerr << "Expected ',' or ')' in load statement, got " - << to_string(*maybe_comma_or_rparen) << ".\n"; + std::cerr << "Expected ',' or ')' in load statement, got '" + << to_string(*maybe_comma_or_rparen) << "'.\n"; return std::nullopt; } @@ -459,8 +463,8 @@ class Parser { } if (next != expected) { - std::cerr << "Expected " << to_string(expected) << ", got " << to_string(*next) - << ".\n"; + std::cerr << "Expected " << to_string(expected) << ", got '" << to_string(*next) + << "'.\n"; return false; } @@ -479,8 +483,8 @@ class Parser { return std::move(*t); } - std::cerr << "Expected token of type " << typeid(T).name() << ", got " << to_string(*next) - << ".\n"; + std::cerr << "Expected token of type " << typeid(T).name() << ", got '" << to_string(*next) + << "'.\n"; return std::nullopt; } }; diff --git a/starlark/parser_test.cc b/starlark/parser_test.cc index f91359a..0db7a51 100644 --- a/starlark/parser_test.cc +++ b/starlark/parser_test.cc @@ -249,6 +249,16 @@ int main() { }, }, }, + { + "42", + starlark::Program{ + .statements{ + starlark::ExpressionStmt{ + .expr{starlark::IntLiteral{42}}, + }, + }, + }, + }, }); // TODO(robinlinden): Return error codes from parser and use that here. diff --git a/starlark/token.h b/starlark/token.h index bf92a43..6b1c0a9 100644 --- a/starlark/token.h +++ b/starlark/token.h @@ -5,6 +5,7 @@ #ifndef STARLARK_TOKEN_H_ #define STARLARK_TOKEN_H_ +#include #include #include #include @@ -311,6 +312,13 @@ struct Identifier { constexpr std::string_view to_string(Identifier const &id) { return id.name; } +struct IntLiteral { + std::int64_t value{}; + constexpr bool operator==(IntLiteral const &) const = default; +}; + +inline std::string to_string(IntLiteral const &i) { return std::to_string(i.value); } + struct StringLiteral { std::string value; constexpr bool operator==(StringLiteral const &) const = default; @@ -384,6 +392,7 @@ using Token = std::variant< token::Lambda, token::Return, token::Identifier, + token::IntLiteral, token::StringLiteral, token::Eof>; diff --git a/starlark/tokenizer.h b/starlark/tokenizer.h index 7d0150b..519069a 100644 --- a/starlark/tokenizer.h +++ b/starlark/tokenizer.h @@ -10,10 +10,12 @@ #include #include #include +#include #include #include #include #include +#include #include #include #include @@ -43,11 +45,14 @@ class Tokenizer { return tokenize_identifier(); } + if (is_digit(input_[pos_]) || + (input_[pos_] == '-' && pos_ + 1 < input_.size() && is_digit(input_[pos_ + 1]))) { + return tokenize_number(); + } + return tokenize_punctuator(); } - std::string_view remaining_input() const { return input_.substr(pos_); } - private: bool is_whitespace(char c) const { return c == ' ' || c == '\t' || c == '\n' || c == '\r'; } @@ -73,6 +78,30 @@ class Tokenizer { } } + std::optional tokenize_number() { + assert(is_digit(input_[pos_]) || input_[pos_] == '-'); + + std::size_t start = pos_++; + while (pos_ < input_.size() && is_digit(input_[pos_])) { + ++pos_; + } + + // TODO(robinlinden): Support floats. + if (pos_ < input_.size() && (is_alpha(input_[pos_]) || input_[pos_] == '.')) { + return std::nullopt; + } + + auto numstr = input_.substr(start, pos_ - start); + + std::int64_t value{}; + auto [ptr, ec] = std::from_chars(numstr.data(), numstr.data() + numstr.size(), value); + if (ec != std::errc{} || ptr != numstr.data() + numstr.size()) { + return std::nullopt; + } + + return token::IntLiteral{value}; + } + // TODO(robinlinden): Support escapes. std::optional tokenize_multiline_string() { pos_ += 3; // Move past the opening triple quotes diff --git a/starlark/tokenizer_test.cc b/starlark/tokenizer_test.cc index d176992..9a7f1a7 100644 --- a/starlark/tokenizer_test.cc +++ b/starlark/tokenizer_test.cc @@ -10,6 +10,8 @@ #include #include +#include +#include #include #include #include @@ -18,6 +20,7 @@ int main() { namespace t = starlark::token; + using Tokens = std::vector; auto const test_cases = std::to_array>>>({ @@ -36,6 +39,21 @@ int main() { }, {"global", std::nullopt}, // Reserved identifier. {"globalist", std::vector{t::Identifier{"globalist"}}}, + {"1234", Tokens{t::IntLiteral{1234}}}, + {"00001234", Tokens{t::IntLiteral{1234}}}, + {"123abc", std::nullopt}, + {"123.123", std::nullopt}, // TODO(robinlinden): Floats. + {"-123", Tokens{t::IntLiteral{-123}}}, + { + "9223372036854775807", + Tokens{t::IntLiteral{std::numeric_limits::max()}}, + }, + {"9223372036854775808", std::nullopt}, // Out of range. :( + { + "-9223372036854775808", + Tokens{t::IntLiteral{std::numeric_limits::min()}}, + }, + {"-9223372036854775809", std::nullopt}, // Out of range. :( }); etest::Suite s{};