string_literal_test.cpp 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #include "toolchain/lexer/string_literal.h"
  5. #include "gmock/gmock.h"
  6. #include "gtest/gtest.h"
  7. #include "toolchain/diagnostics/diagnostic_emitter.h"
  8. #include "toolchain/lexer/test_helpers.h"
  9. namespace Carbon {
  10. namespace {
  11. struct StringLiteralTest : ::testing::Test {
  12. StringLiteralTest() : error_tracker(ConsoleDiagnosticConsumer()) {}
  13. ErrorTrackingDiagnosticConsumer error_tracker;
  14. auto Lex(llvm::StringRef text) -> LexedStringLiteral {
  15. llvm::Optional<LexedStringLiteral> result = LexedStringLiteral::Lex(text);
  16. assert(result);
  17. EXPECT_EQ(result->Text(), text);
  18. return *result;
  19. }
  20. auto Parse(llvm::StringRef text) -> std::string {
  21. LexedStringLiteral token = Lex(text);
  22. Testing::SingleTokenDiagnosticTranslator translator(text);
  23. DiagnosticEmitter<const char*> emitter(translator, error_tracker);
  24. return token.ComputeValue(emitter);
  25. }
  26. };
  27. TEST_F(StringLiteralTest, StringLiteralBounds) {
  28. llvm::StringLiteral valid[] = {
  29. R"("")",
  30. R"("""
  31. """)",
  32. R"("""
  33. "foo"
  34. """)",
  35. // Escaped terminators don't end the string.
  36. R"("\"")",
  37. R"("\\")",
  38. R"("\\\"")",
  39. R"("""
  40. \"""
  41. """)",
  42. R"("""
  43. "\""
  44. """)",
  45. R"("""
  46. ""\"
  47. """)",
  48. R"("""
  49. ""\
  50. """)",
  51. R"(#"""
  52. """\#n
  53. """#)",
  54. // Only a matching number of '#'s terminates the string.
  55. R"(#""#)",
  56. R"(#"xyz"foo"#)",
  57. R"(##"xyz"#foo"##)",
  58. R"(#"\""#)",
  59. // Escape sequences likewise require a matching number of '#'s.
  60. R"(#"\#"#"#)",
  61. R"(#"\"#)",
  62. R"(#"""
  63. \#"""#
  64. """#)",
  65. // #"""# does not start a multiline string literal.
  66. R"(#"""#)",
  67. R"(##"""##)",
  68. };
  69. for (llvm::StringLiteral test : valid) {
  70. llvm::Optional<LexedStringLiteral> result = LexedStringLiteral::Lex(test);
  71. EXPECT_TRUE(result.hasValue()) << test;
  72. if (result) {
  73. EXPECT_EQ(result->Text(), test);
  74. }
  75. }
  76. llvm::StringLiteral invalid[] = {
  77. R"(")",
  78. R"("""
  79. "")",
  80. R"("\)", //
  81. R"("\")",
  82. R"("\\)", //
  83. R"("\\\")",
  84. R"("""
  85. )",
  86. R"(#"""
  87. """)",
  88. R"(" \
  89. ")",
  90. };
  91. for (llvm::StringLiteral test : invalid) {
  92. EXPECT_FALSE(LexedStringLiteral::Lex(test).hasValue())
  93. << "`" << test << "`";
  94. }
  95. }
  96. TEST_F(StringLiteralTest, StringLiteralContents) {
  97. // We use ""s strings to handle embedded nul characters below.
  98. using std::operator""s;
  99. std::pair<llvm::StringLiteral, llvm::StringLiteral> testcases[] = {
  100. // Empty strings.
  101. {R"("")", ""},
  102. {R"(
  103. """
  104. """
  105. )",
  106. ""},
  107. // Nearly-empty strings.
  108. {R"(
  109. """
  110. """
  111. )",
  112. "\n"},
  113. // Indent removal.
  114. {R"(
  115. """file type indicator
  116. indented contents \
  117. """
  118. )",
  119. " indented contents "},
  120. {R"(
  121. """
  122. hello
  123. world
  124. end of test
  125. """
  126. )",
  127. " hello\nworld\n\n end of test\n"},
  128. // Escape sequences.
  129. {R"(
  130. "\x14,\u{1234},\u{00000010},\n,\r,\t,\0,\",\',\\"
  131. )",
  132. llvm::StringLiteral::withInnerNUL(
  133. "\x14,\xE1\x88\xB4,\x10,\x0A,\x0D,\x09,\x00,\x22,\x27,\x5C")},
  134. {R"(
  135. "\0A\x1234"
  136. )",
  137. llvm::StringLiteral::withInnerNUL("\0A\x12"
  138. "34")},
  139. {R"(
  140. "\u{D7FF},\u{E000},\u{10FFFF}"
  141. )",
  142. "\xED\x9F\xBF,\xEE\x80\x80,\xF4\x8F\xBF\xBF"},
  143. // Escape sequences in 'raw' strings.
  144. {R"(
  145. #"\#x00,\#xFF,\#u{56789},\#u{ABCD},\#u{00000000000000000EF}"#
  146. )",
  147. llvm::StringLiteral::withInnerNUL(
  148. "\x00,\xFF,\xF1\x96\x9E\x89,\xEA\xAF\x8D,\xC3\xAF")},
  149. {R"(
  150. ##"\n,\#n,\##n,\##\##n,\##\###n"##
  151. )",
  152. "\\n,\\#n,\n,\\##n,\\###n"},
  153. // Trailing whitespace handling.
  154. {"\"\"\"\n Hello \\\n World \t \n Bye! \\\n \"\"\"",
  155. "Hello World\nBye! "},
  156. };
  157. for (auto [test, contents] : testcases) {
  158. error_tracker.Reset();
  159. auto value = Parse(test.trim());
  160. EXPECT_FALSE(error_tracker.SeenError()) << "`" << test << "`";
  161. EXPECT_EQ(value, contents);
  162. }
  163. }
  164. TEST_F(StringLiteralTest, StringLiteralBadIndent) {
  165. std::pair<llvm::StringLiteral, llvm::StringLiteral> testcases[] = {
  166. // Indent doesn't match the last line.
  167. {"\"\"\"\n \tx\n \"\"\"", "x\n"},
  168. {"\"\"\"\n x\n \"\"\"", "x\n"},
  169. {"\"\"\"\n x\n\t\"\"\"", "x\n"},
  170. {"\"\"\"\n ok\n bad\n \"\"\"", "ok\nbad\n"},
  171. {"\"\"\"\n bad\n ok\n \"\"\"", "bad\nok\n"},
  172. {"\"\"\"\n escaped,\\\n bad\n \"\"\"", "escaped,bad\n"},
  173. // Indent on last line is followed by text.
  174. {"\"\"\"\n x\n x\"\"\"", "x\nx"},
  175. {"\"\"\"\n x\n x\"\"\"", " x\nx"},
  176. {"\"\"\"\n x\n x\"\"\"", "x\nx"},
  177. };
  178. for (auto [test, contents] : testcases) {
  179. error_tracker.Reset();
  180. auto value = Parse(test);
  181. EXPECT_TRUE(error_tracker.SeenError()) << "`" << test << "`";
  182. EXPECT_EQ(value, contents);
  183. }
  184. }
  185. TEST_F(StringLiteralTest, StringLiteralBadEscapeSequence) {
  186. llvm::StringLiteral testcases[] = {
  187. R"("\a")",
  188. R"("\b")",
  189. R"("\e")",
  190. R"("\f")",
  191. R"("\v")",
  192. R"("\?")",
  193. R"("\1")",
  194. R"("\9")",
  195. // \0 can't be followed by a decimal digit.
  196. R"("\01")",
  197. R"("\09")",
  198. // \x requires two (uppercase) hexadecimal digits.
  199. R"("\x")",
  200. R"("\x0")",
  201. R"("\x0G")",
  202. R"("\xab")",
  203. R"("\x\n")",
  204. R"("\x\"")",
  205. // \u requires a braced list of one or more hexadecimal digits.
  206. R"("\u")",
  207. R"("\u?")",
  208. R"("\u\"")",
  209. R"("\u{")",
  210. R"("\u{}")",
  211. R"("\u{A")",
  212. R"("\u{G}")",
  213. R"("\u{0000012323127z}")",
  214. R"("\u{-3}")",
  215. // \u must specify a non-surrogate code point.
  216. R"("\u{110000}")",
  217. R"("\u{000000000000000000000000000000000110000}")",
  218. R"("\u{D800}")",
  219. R"("\u{DFFF}")",
  220. };
  221. for (llvm::StringLiteral test : testcases) {
  222. error_tracker.Reset();
  223. auto value = Parse(test);
  224. EXPECT_TRUE(error_tracker.SeenError()) << "`" << test << "`";
  225. // TODO: Test value produced by error recovery.
  226. }
  227. }
  228. } // namespace
  229. } // namespace Carbon