diff --git a/starlark/tokenizer.h b/starlark/tokenizer.h index 519069a..e47e5f1 100644 --- a/starlark/tokenizer.h +++ b/starlark/tokenizer.h @@ -34,11 +34,19 @@ class Tokenizer { } if (input_.substr(pos_, 3) == R"(""")") { - return tokenize_multiline_string(); + return tokenize_multiline_string('"'); } if (input_[pos_] == '"') { - return tokenize_string(); + return tokenize_string('"'); + } + + if (input_.substr(pos_, 3) == R"(''')") { + return tokenize_multiline_string('\''); + } + + if (input_[pos_] == '\'') { + return tokenize_string('\''); } if (is_alpha(input_[pos_])) { @@ -103,11 +111,13 @@ class Tokenizer { } // TODO(robinlinden): Support escapes. - std::optional tokenize_multiline_string() { + std::optional tokenize_multiline_string(char quote_char) { + assert(quote_char == '"' || quote_char == '\''); pos_ += 3; // Move past the opening triple quotes std::size_t start = pos_; // Skip the opening triple quotes - while (pos_ + 2 < input_.size() && input_.substr(pos_, 3) != R"(""")") { + std::string_view closer = quote_char == '"' ? R"(""")" : R"(''')"; + while (pos_ + 2 < input_.size() && input_.substr(pos_, 3) != closer) { pos_++; } @@ -121,10 +131,11 @@ class Tokenizer { } // TODO(robinlinden): Support escapes. - std::optional tokenize_string() { - assert(input_[pos_] == '"'); + std::optional tokenize_string(char quote_char) { + assert(quote_char == '"' || quote_char == '\''); + assert(input_[pos_] == '"' || input_[pos_] == '\''); std::size_t start = ++pos_; - while (pos_ < input_.size() && input_[pos_] != '"') { + while (pos_ < input_.size() && input_[pos_] != quote_char) { ++pos_; } diff --git a/starlark/tokenizer_test.cc b/starlark/tokenizer_test.cc index 9a7f1a7..0dca164 100644 --- a/starlark/tokenizer_test.cc +++ b/starlark/tokenizer_test.cc @@ -54,6 +54,20 @@ int main() { Tokens{t::IntLiteral{std::numeric_limits::min()}}, }, {"-9223372036854775809", std::nullopt}, // Out of range. :( + { + R"("hello world" 'hello world')", + Tokens{t::StringLiteral{"hello world"}, t::StringLiteral{"hello world"}}, + }, + { + R"("""hello +world""")", + Tokens{t::StringLiteral{"hello\nworld"}}, + }, + { + R"('''hello +world''')", + Tokens{t::StringLiteral{"hello\nworld"}}, + }, }); etest::Suite s{};