Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion starlark/ast.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#ifndef STARLARK_AST_H_
#define STARLARK_AST_H_

#include <cstdint>
#include <memory>
#include <optional>
#include <string>
Expand All @@ -19,6 +20,11 @@ struct StringLiteral {
constexpr bool operator==(StringLiteral const &) const = default;
};

struct IntLiteral {
std::int64_t value{};
constexpr bool operator==(IntLiteral const &) const = default;
};

struct Identifier {
std::string name;
constexpr bool operator==(Identifier const &) const = default;
Expand All @@ -28,7 +34,8 @@ struct CallExpr;
struct DictExpr;
struct ListExpr;
struct ListComp;
using Expression = std::variant<CallExpr, StringLiteral, Identifier, ListComp, ListExpr, DictExpr>;
using Expression =
std::variant<CallExpr, StringLiteral, IntLiteral, Identifier, ListComp, ListExpr, DictExpr>;

struct Argument;

Expand Down
28 changes: 16 additions & 12 deletions starlark/parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,10 @@ class Parser {
return StringLiteral{.value = std::move(sl->value)};
}

if (auto *il = std::get_if<token::IntLiteral>(&token)) {
return IntLiteral{.value = il->value};
}

if (std::holds_alternative<token::LBracket>(token)) {
std::vector<Expression> elements;
while (true) {
Expand Down Expand Up @@ -190,8 +194,8 @@ class Parser {
continue;
}

std::cerr << "Expected ',' or ']' in list expression, got " << to_string(next_token)
<< ".\n";
std::cerr << "Expected ',' or ']' in list expression, got '"
<< to_string(next_token) << "'.\n";
return std::nullopt;
}

Expand Down Expand Up @@ -255,8 +259,8 @@ class Parser {
continue;
}

std::cerr << "Expected ',' or '}' in dict expression, got " << to_string(next_token)
<< ".\n";
std::cerr << "Expected ',' or '}' in dict expression, got '"
<< to_string(next_token) << "'.\n";
return std::nullopt;
}

Expand Down Expand Up @@ -311,8 +315,8 @@ class Parser {
auto const &token = *maybe_token;
if (!std::holds_alternative<token::Comma>(token) &&
!std::holds_alternative<token::RParen>(token)) {
std::cerr << "Expected ',' or ')' in argument list, got " << to_string(token)
<< ".\n";
std::cerr << "Expected ',' or ')' in argument list, got '" << to_string(token)
<< "'.\n";
return std::nullopt;
}

Expand Down Expand Up @@ -408,8 +412,8 @@ class Parser {
}

if (!std::holds_alternative<token::Comma>(*maybe_comma_or_rparen)) {
std::cerr << "Expected ',' or ')' in load statement, got "
<< to_string(*maybe_comma_or_rparen) << ".\n";
std::cerr << "Expected ',' or ')' in load statement, got '"
<< to_string(*maybe_comma_or_rparen) << "'.\n";
return std::nullopt;
}

Expand Down Expand Up @@ -459,8 +463,8 @@ class Parser {
}

if (next != expected) {
std::cerr << "Expected " << to_string(expected) << ", got " << to_string(*next)
<< ".\n";
std::cerr << "Expected " << to_string(expected) << ", got '" << to_string(*next)
<< "'.\n";
return false;
}

Expand All @@ -479,8 +483,8 @@ class Parser {
return std::move(*t);
}

std::cerr << "Expected token of type " << typeid(T).name() << ", got " << to_string(*next)
<< ".\n";
std::cerr << "Expected token of type " << typeid(T).name() << ", got '" << to_string(*next)
<< "'.\n";
return std::nullopt;
}
};
Expand Down
10 changes: 10 additions & 0 deletions starlark/parser_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,16 @@ int main() {
},
},
},
{
"42",
starlark::Program{
.statements{
starlark::ExpressionStmt{
.expr{starlark::IntLiteral{42}},
},
},
},
},
});

// TODO(robinlinden): Return error codes from parser and use that here.
Expand Down
9 changes: 9 additions & 0 deletions starlark/token.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#ifndef STARLARK_TOKEN_H_
#define STARLARK_TOKEN_H_

#include <cstdint>
#include <format>
#include <string>
#include <string_view>
Expand Down Expand Up @@ -311,6 +312,13 @@ struct Identifier {

constexpr std::string_view to_string(Identifier const &id) { return id.name; }

struct IntLiteral {
std::int64_t value{};
constexpr bool operator==(IntLiteral const &) const = default;
};

inline std::string to_string(IntLiteral const &i) { return std::to_string(i.value); }

struct StringLiteral {
std::string value;
constexpr bool operator==(StringLiteral const &) const = default;
Expand Down Expand Up @@ -384,6 +392,7 @@ using Token = std::variant<
token::Lambda,
token::Return,
token::Identifier,
token::IntLiteral,
token::StringLiteral,
token::Eof>;

Expand Down
33 changes: 31 additions & 2 deletions starlark/tokenizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,12 @@
#include <algorithm>
#include <array>
#include <cassert>
#include <charconv>
#include <cstddef>
#include <optional>
#include <string>
#include <string_view>
#include <system_error>
#include <utility>
#include <variant>
#include <vector>
Expand Down Expand Up @@ -43,11 +45,14 @@ class Tokenizer {
return tokenize_identifier();
}

if (is_digit(input_[pos_]) ||
(input_[pos_] == '-' && pos_ + 1 < input_.size() && is_digit(input_[pos_ + 1]))) {
return tokenize_number();
}

return tokenize_punctuator();
}

std::string_view remaining_input() const { return input_.substr(pos_); }

private:
bool is_whitespace(char c) const { return c == ' ' || c == '\t' || c == '\n' || c == '\r'; }

Expand All @@ -73,6 +78,30 @@ class Tokenizer {
}
}

std::optional<Token> tokenize_number() {
assert(is_digit(input_[pos_]) || input_[pos_] == '-');

std::size_t start = pos_++;
while (pos_ < input_.size() && is_digit(input_[pos_])) {
++pos_;
}

// TODO(robinlinden): Support floats.
if (pos_ < input_.size() && (is_alpha(input_[pos_]) || input_[pos_] == '.')) {
return std::nullopt;
}

auto numstr = input_.substr(start, pos_ - start);

std::int64_t value{};
auto [ptr, ec] = std::from_chars(numstr.data(), numstr.data() + numstr.size(), value);
if (ec != std::errc{} || ptr != numstr.data() + numstr.size()) {
return std::nullopt;
}

return token::IntLiteral{value};
}

// TODO(robinlinden): Support escapes.
std::optional<Token> tokenize_multiline_string() {
pos_ += 3; // Move past the opening triple quotes
Expand Down
18 changes: 18 additions & 0 deletions starlark/tokenizer_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@

#include <array>
#include <cassert>
#include <cstdint>
#include <limits>
#include <optional>
#include <string>
#include <string_view>
Expand All @@ -18,6 +20,7 @@

int main() {
namespace t = starlark::token;
using Tokens = std::vector<starlark::Token>;

auto const test_cases =
std::to_array<std::pair<std::string_view, std::optional<std::vector<starlark::Token>>>>({
Expand All @@ -36,6 +39,21 @@ int main() {
},
{"global", std::nullopt}, // Reserved identifier.
{"globalist", std::vector<starlark::Token>{t::Identifier{"globalist"}}},
{"1234", Tokens{t::IntLiteral{1234}}},
{"00001234", Tokens{t::IntLiteral{1234}}},
{"123abc", std::nullopt},
{"123.123", std::nullopt}, // TODO(robinlinden): Floats.
{"-123", Tokens{t::IntLiteral{-123}}},
{
"9223372036854775807",
Tokens{t::IntLiteral{std::numeric_limits<std::int64_t>::max()}},
},
{"9223372036854775808", std::nullopt}, // Out of range. :(
{
"-9223372036854775808",
Tokens{t::IntLiteral{std::numeric_limits<std::int64_t>::min()}},
},
{"-9223372036854775809", std::nullopt}, // Out of range. :(
});

etest::Suite s{};
Expand Down