| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349 |
- // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
- // Exceptions. See /LICENSE for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- #include "toolchain/lex/string_literal.h"
- #include <gmock/gmock.h>
- #include <gtest/gtest.h>
- #include "common/check.h"
- #include "toolchain/diagnostics/diagnostic_emitter.h"
- #include "toolchain/lex/test_helpers.h"
- namespace Carbon::Lex {
- namespace {
- class StringLiteralTest : public ::testing::Test {
- protected:
- StringLiteralTest() : error_tracker(ConsoleDiagnosticConsumer()) {}
- auto Lex(llvm::StringRef text) -> StringLiteral {
- std::optional<StringLiteral> result = StringLiteral::Lex(text);
- CARBON_CHECK(result);
- EXPECT_EQ(result->text(), text);
- return *result;
- }
- auto Parse(llvm::StringRef text) -> llvm::StringRef {
- StringLiteral token = Lex(text);
- Testing::SingleTokenDiagnosticTranslator translator(text);
- DiagnosticEmitter<const char*> emitter(translator, error_tracker);
- return token.ComputeValue(allocator, emitter);
- }
- llvm::BumpPtrAllocator allocator;
- ErrorTrackingDiagnosticConsumer error_tracker;
- };
- TEST_F(StringLiteralTest, StringLiteralBounds) {
- llvm::StringLiteral valid[] = {
- R"("")",
- R"('''
- ''')",
- R"('''
- "foo"
- ''')",
- // Lex """-delimited block string literals for error recovery.
- R"("""
- """)",
- R"("""
- "foo"
- """)",
- // Escaped terminators don't end the string.
- R"("\"")",
- R"("\\")",
- R"("\\\"")",
- R"('''
- \'''
- ''')",
- R"('''
- '\''
- ''')",
- R"('''
- ''\'
- ''')",
- R"('''
- ''\
- ''')",
- R"(#'''
- '''\#n
- '''#)",
- // Only a matching number of '#'s terminates the string.
- R"(#""#)",
- R"(#"xyz"foo"#)",
- R"(##"xyz"#foo"##)",
- R"(#"\""#)",
- // Escape sequences likewise require a matching number of '#'s.
- R"(#"\#"#"#)",
- R"(#"\"#)",
- R"(#'''
- \#'''#
- '''#)",
- // #"""# does not start a multiline string literal.
- R"(#"""#)",
- R"(##"""##)",
- };
- for (llvm::StringLiteral test : valid) {
- SCOPED_TRACE(test);
- std::optional<StringLiteral> result = StringLiteral::Lex(test);
- EXPECT_TRUE(result.has_value());
- if (result) {
- EXPECT_EQ(result->text(), test);
- }
- }
- llvm::StringLiteral invalid[] = {
- // clang-format off
- R"(")",
- R"("\)",
- R"("\")",
- R"("\\)",
- R"("\\\")",
- "'''\n",
- "'''\n'",
- "'''\n''",
- "#'''\n'''",
- R"(" \
- ")",
- // clang-format on
- };
- for (llvm::StringLiteral test : invalid) {
- SCOPED_TRACE(test);
- std::optional<StringLiteral> result = StringLiteral::Lex(test);
- EXPECT_TRUE(result.has_value());
- if (result) {
- EXPECT_FALSE(result->is_terminated());
- }
- }
- }
- TEST_F(StringLiteralTest, StringLiteralContents) {
- std::pair<llvm::StringLiteral, llvm::StringLiteral> testcases[] = {
- // Empty strings.
- {R"("")", ""},
- {R"(
- '''
- '''
- )",
- ""},
- // Nearly-empty strings.
- {R"(
- '''
- '''
- )",
- "\n"},
- // Lines containing only whitespace are treated as empty even if they
- // contain tabs.
- {"'''\n\t \t\n'''", "\n"},
- // Indent removal.
- {R"(
- '''file type indicator
- indented contents \
- '''
- )",
- " indented contents "},
- // Removal of tabs in indent and suffix.
- {"'''\n \t hello \t \n \t '''", " hello\n"},
- {R"(
- '''
- hello
- world
- end of test
- '''
- )",
- " hello\nworld\n\n end of test\n"},
- // Escape sequences.
- {R"(
- "\x14,\u{1234},\u{00000010},\n,\r,\t,\0,\",\',\\"
- )",
- llvm::StringLiteral::withInnerNUL(
- "\x14,\xE1\x88\xB4,\x10,\x0A,\x0D,\x09,\x00,\x22,\x27,\x5C")},
- {R"(
- "\0A\x1234"
- )",
- llvm::StringLiteral::withInnerNUL("\0A\x12"
- "34")},
- {R"(
- "\u{D7FF},\u{E000},\u{10FFFF}"
- )",
- "\xED\x9F\xBF,\xEE\x80\x80,\xF4\x8F\xBF\xBF"},
- // Escape sequences in 'raw' strings.
- {R"(
- #"\#x00,\#xFF,\#u{56789},\#u{ABCD},\#u{00000000000000000EF}"#
- )",
- llvm::StringLiteral::withInnerNUL(
- "\x00,\xFF,\xF1\x96\x9E\x89,\xEA\xAF\x8D,\xC3\xAF")},
- {R"(
- ##"\n,\#n,\##n,\##\##n,\##\###n"##
- )",
- "\\n,\\#n,\n,\\##n,\\###n"},
- // Trailing whitespace handling.
- {"'''\n Hello \\\n World \t \n Bye! \\\n '''",
- "Hello World\nBye! "},
- {"'''\n\\t\n'''", "\t\n"},
- {"'''\n\\t \n'''", "\t\n"},
- };
- for (auto [test, expected] : testcases) {
- error_tracker.Reset();
- auto value = Parse(test.trim());
- EXPECT_FALSE(error_tracker.seen_error()) << "`" << test << "`";
- EXPECT_EQ(value, expected);
- }
- }
- TEST_F(StringLiteralTest, DoubleQuotedMultiLineLiteral) {
- // For error recovery, """-delimited literals are lexed, but rejected.
- std::pair<llvm::StringLiteral, llvm::StringLiteral> testcases[] = {
- {R"(
- """
- '''
- """
- )",
- "'''\n"},
- {R"(
- #"""
- \#tx
- """#
- )",
- "\tx\n"},
- {R"(
- """abcxyz
- hello\
- """
- )",
- "hello"},
- };
- for (auto [test, contents] : testcases) {
- error_tracker.Reset();
- auto value = Parse(test.trim());
- EXPECT_TRUE(error_tracker.seen_error()) << "`" << test << "`";
- EXPECT_EQ(value, contents);
- }
- }
- TEST_F(StringLiteralTest, StringLiteralBadIndent) {
- std::pair<llvm::StringLiteral, llvm::StringLiteral> testcases[] = {
- // Indent doesn't match the last line.
- {"'''\n \tx\n '''", "x\n"},
- {"'''\n x\n '''", "x\n"},
- {"'''\n x\n\t'''", "x\n"},
- {"'''\n ok\n bad\n '''", "ok\nbad\n"},
- {"'''\n bad\n ok\n '''", "bad\nok\n"},
- {"'''\n escaped,\\\n bad\n '''", "escaped,bad\n"},
- // Indent on last line is followed by text.
- {"'''\n x\n x'''", "x\nx"},
- {"'''\n x\n x'''", " x\nx"},
- {"'''\n x\n x'''", "x\nx"},
- };
- for (auto [test, contents] : testcases) {
- error_tracker.Reset();
- auto value = Parse(test);
- EXPECT_TRUE(error_tracker.seen_error()) << "`" << test << "`";
- EXPECT_EQ(value, contents);
- }
- }
- TEST_F(StringLiteralTest, StringLiteralBadEscapeSequence) {
- llvm::StringLiteral testcases[] = {
- R"("\a")",
- R"("\b")",
- R"("\e")",
- R"("\f")",
- R"("\v")",
- R"("\?")",
- R"("\1")",
- R"("\9")",
- // \0 can't be followed by a decimal digit.
- R"("\01")",
- R"("\09")",
- // \x requires two (uppercase) hexadecimal digits.
- R"("\x")",
- R"("\x0")",
- R"("\x0G")",
- R"("\xab")",
- R"("\x\n")",
- R"("\x\"")",
- // \u requires a braced list of one or more hexadecimal digits.
- R"("\u")",
- R"("\u?")",
- R"("\u\"")",
- R"("\u{")",
- R"("\u{}")",
- R"("\u{A")",
- R"("\u{G}")",
- R"("\u{0000012323127z}")",
- R"("\u{-3}")",
- // \u must specify a non-surrogate code point.
- R"("\u{110000}")",
- R"("\u{000000000000000000000000000000000110000}")",
- R"("\u{D800}")",
- R"("\u{DFFF}")",
- };
- for (llvm::StringLiteral test : testcases) {
- error_tracker.Reset();
- Parse(test);
- EXPECT_TRUE(error_tracker.seen_error()) << "`" << test << "`";
- // TODO: Test value produced by error recovery.
- }
- }
- TEST_F(StringLiteralTest, TabInString) {
- auto value = Parse("\"x\ty\"");
- EXPECT_TRUE(error_tracker.seen_error());
- EXPECT_EQ(value, "x\ty");
- }
- TEST_F(StringLiteralTest, TabAtEndOfString) {
- auto value = Parse("\"\t\t\t\"");
- EXPECT_TRUE(error_tracker.seen_error());
- EXPECT_EQ(value, "\t\t\t");
- }
- TEST_F(StringLiteralTest, TabInBlockString) {
- auto value = Parse("'''\nx\ty\n'''");
- EXPECT_TRUE(error_tracker.seen_error());
- EXPECT_EQ(value, "x\ty\n");
- }
- TEST_F(StringLiteralTest, UnicodeTooManyDigits) {
- std::string text = "u{";
- text.append(10000, '9');
- text.append("}");
- auto value = Parse("\"\\" + text + "\"");
- EXPECT_TRUE(error_tracker.seen_error());
- EXPECT_EQ(value, text);
- }
- } // namespace
- } // namespace Carbon::Lex
|