| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294 |
- // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
- // Exceptions. See /LICENSE for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- #include "toolchain/lexer/string_literal.h"
- #include "gmock/gmock.h"
- #include "gtest/gtest.h"
- #include "toolchain/diagnostics/diagnostic_emitter.h"
- #include "toolchain/lexer/test_helpers.h"
- namespace Carbon {
- namespace {
- struct StringLiteralTest : ::testing::Test {
- StringLiteralTest() : error_tracker(ConsoleDiagnosticConsumer()) {}
- ErrorTrackingDiagnosticConsumer error_tracker;
- auto Lex(llvm::StringRef text) -> LexedStringLiteral {
- llvm::Optional<LexedStringLiteral> result = LexedStringLiteral::Lex(text);
- assert(result);
- EXPECT_EQ(result->Text(), text);
- return *result;
- }
- auto Parse(llvm::StringRef text) -> std::string {
- LexedStringLiteral token = Lex(text);
- Testing::SingleTokenDiagnosticTranslator translator(text);
- DiagnosticEmitter<const char*> emitter(translator, error_tracker);
- return token.ComputeValue(emitter);
- }
- };
- TEST_F(StringLiteralTest, StringLiteralBounds) {
- llvm::StringLiteral valid[] = {
- R"("")",
- R"("""
- """)",
- R"("""
- "foo"
- """)",
- // Escaped terminators don't end the string.
- R"("\"")",
- R"("\\")",
- R"("\\\"")",
- R"("""
- \"""
- """)",
- R"("""
- "\""
- """)",
- R"("""
- ""\"
- """)",
- R"("""
- ""\
- """)",
- R"(#"""
- """\#n
- """#)",
- // Only a matching number of '#'s terminates the string.
- R"(#""#)",
- R"(#"xyz"foo"#)",
- R"(##"xyz"#foo"##)",
- R"(#"\""#)",
- // Escape sequences likewise require a matching number of '#'s.
- R"(#"\#"#"#)",
- R"(#"\"#)",
- R"(#"""
- \#"""#
- """#)",
- // #"""# does not start a multiline string literal.
- R"(#"""#)",
- R"(##"""##)",
- };
- for (llvm::StringLiteral test : valid) {
- llvm::Optional<LexedStringLiteral> result = LexedStringLiteral::Lex(test);
- EXPECT_TRUE(result.hasValue()) << test;
- if (result) {
- EXPECT_EQ(result->Text(), test);
- }
- }
- llvm::StringLiteral invalid[] = {
- R"(")",
- R"("""
- "")",
- R"("\)", //
- R"("\")",
- R"("\\)", //
- R"("\\\")",
- R"("""
- )",
- R"(#"""
- """)",
- R"(" \
- ")",
- };
- for (llvm::StringLiteral test : invalid) {
- EXPECT_FALSE(LexedStringLiteral::Lex(test).hasValue())
- << "`" << test << "`";
- }
- }
- TEST_F(StringLiteralTest, StringLiteralContents) {
- // We use ""s strings to handle embedded nul characters below.
- using std::operator""s;
- std::pair<llvm::StringLiteral, llvm::StringLiteral> testcases[] = {
- // Empty strings.
- {R"("")", ""},
- {R"(
- """
- """
- )",
- ""},
- // Nearly-empty strings.
- {R"(
- """
- """
- )",
- "\n"},
- // Lines containing only whitespace are treated as empty even if they
- // contain tabs.
- {"\"\"\"\n\t \t\n\"\"\"", "\n"},
- // Indent removal.
- {R"(
- """file type indicator
- indented contents \
- """
- )",
- " indented contents "},
- // Removal of tabs in indent and suffix.
- {"\"\"\"\n \t hello \t \n \t \"\"\"", " hello\n"},
- {R"(
- """
- hello
- world
- end of test
- """
- )",
- " hello\nworld\n\n end of test\n"},
- // Escape sequences.
- {R"(
- "\x14,\u{1234},\u{00000010},\n,\r,\t,\0,\",\',\\"
- )",
- llvm::StringLiteral::withInnerNUL(
- "\x14,\xE1\x88\xB4,\x10,\x0A,\x0D,\x09,\x00,\x22,\x27,\x5C")},
- {R"(
- "\0A\x1234"
- )",
- llvm::StringLiteral::withInnerNUL("\0A\x12"
- "34")},
- {R"(
- "\u{D7FF},\u{E000},\u{10FFFF}"
- )",
- "\xED\x9F\xBF,\xEE\x80\x80,\xF4\x8F\xBF\xBF"},
- // Escape sequences in 'raw' strings.
- {R"(
- #"\#x00,\#xFF,\#u{56789},\#u{ABCD},\#u{00000000000000000EF}"#
- )",
- llvm::StringLiteral::withInnerNUL(
- "\x00,\xFF,\xF1\x96\x9E\x89,\xEA\xAF\x8D,\xC3\xAF")},
- {R"(
- ##"\n,\#n,\##n,\##\##n,\##\###n"##
- )",
- "\\n,\\#n,\n,\\##n,\\###n"},
- // Trailing whitespace handling.
- {"\"\"\"\n Hello \\\n World \t \n Bye! \\\n \"\"\"",
- "Hello World\nBye! "},
- };
- for (auto [test, contents] : testcases) {
- error_tracker.Reset();
- auto value = Parse(test.trim());
- EXPECT_FALSE(error_tracker.SeenError()) << "`" << test << "`";
- EXPECT_EQ(value, contents);
- }
- }
- TEST_F(StringLiteralTest, StringLiteralBadIndent) {
- std::pair<llvm::StringLiteral, llvm::StringLiteral> testcases[] = {
- // Indent doesn't match the last line.
- {"\"\"\"\n \tx\n \"\"\"", "x\n"},
- {"\"\"\"\n x\n \"\"\"", "x\n"},
- {"\"\"\"\n x\n\t\"\"\"", "x\n"},
- {"\"\"\"\n ok\n bad\n \"\"\"", "ok\nbad\n"},
- {"\"\"\"\n bad\n ok\n \"\"\"", "bad\nok\n"},
- {"\"\"\"\n escaped,\\\n bad\n \"\"\"", "escaped,bad\n"},
- // Indent on last line is followed by text.
- {"\"\"\"\n x\n x\"\"\"", "x\nx"},
- {"\"\"\"\n x\n x\"\"\"", " x\nx"},
- {"\"\"\"\n x\n x\"\"\"", "x\nx"},
- };
- for (auto [test, contents] : testcases) {
- error_tracker.Reset();
- auto value = Parse(test);
- EXPECT_TRUE(error_tracker.SeenError()) << "`" << test << "`";
- EXPECT_EQ(value, contents);
- }
- }
- TEST_F(StringLiteralTest, StringLiteralBadEscapeSequence) {
- llvm::StringLiteral testcases[] = {
- R"("\a")",
- R"("\b")",
- R"("\e")",
- R"("\f")",
- R"("\v")",
- R"("\?")",
- R"("\1")",
- R"("\9")",
- // \0 can't be followed by a decimal digit.
- R"("\01")",
- R"("\09")",
- // \x requires two (uppercase) hexadecimal digits.
- R"("\x")",
- R"("\x0")",
- R"("\x0G")",
- R"("\xab")",
- R"("\x\n")",
- R"("\x\"")",
- // \u requires a braced list of one or more hexadecimal digits.
- R"("\u")",
- R"("\u?")",
- R"("\u\"")",
- R"("\u{")",
- R"("\u{}")",
- R"("\u{A")",
- R"("\u{G}")",
- R"("\u{0000012323127z}")",
- R"("\u{-3}")",
- // \u must specify a non-surrogate code point.
- R"("\u{110000}")",
- R"("\u{000000000000000000000000000000000110000}")",
- R"("\u{D800}")",
- R"("\u{DFFF}")",
- };
- for (llvm::StringLiteral test : testcases) {
- error_tracker.Reset();
- auto value = Parse(test);
- EXPECT_TRUE(error_tracker.SeenError()) << "`" << test << "`";
- // TODO: Test value produced by error recovery.
- }
- }
- TEST_F(StringLiteralTest, TabInString) {
- auto value = Parse("\"x\ty\"");
- EXPECT_TRUE(error_tracker.SeenError());
- EXPECT_EQ(value, "x\ty");
- }
- TEST_F(StringLiteralTest, TabAtEndOfString) {
- auto value = Parse("\"\t\t\t\"");
- EXPECT_TRUE(error_tracker.SeenError());
- EXPECT_EQ(value, "\t\t\t");
- }
- TEST_F(StringLiteralTest, TabInBlockString) {
- auto value = Parse("\"\"\"\nx\ty\n\"\"\"");
- EXPECT_TRUE(error_tracker.SeenError());
- EXPECT_EQ(value, "x\ty\n");
- }
- } // namespace
- } // namespace Carbon
|