tokenized_buffer_test.cpp 43 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #include "toolchain/lex/tokenized_buffer.h"
  5. #include <gmock/gmock.h>
  6. #include <gtest/gtest.h>
  7. #include <forward_list>
  8. #include <iterator>
  9. #include "llvm/ADT/ArrayRef.h"
  10. #include "testing/base/test_raw_ostream.h"
  11. #include "toolchain/base/value_store.h"
  12. #include "toolchain/diagnostics/diagnostic_emitter.h"
  13. #include "toolchain/diagnostics/mocks.h"
  14. #include "toolchain/lex/lex.h"
  15. #include "toolchain/lex/tokenized_buffer_test_helpers.h"
  16. #include "toolchain/testing/yaml_test_helpers.h"
  17. namespace Carbon::Lex {
  18. namespace {
  19. using ::Carbon::Testing::ExpectedToken;
  20. using ::Carbon::Testing::IsSingleDiagnostic;
  21. using ::Carbon::Testing::TestRawOstream;
  22. using ::testing::_;
  23. using ::testing::ElementsAre;
  24. using ::testing::Eq;
  25. using ::testing::HasSubstr;
  26. using ::testing::Pair;
  27. namespace Yaml = ::Carbon::Testing::Yaml;
  28. class LexerTest : public ::testing::Test {
  29. protected:
  30. auto GetSourceBuffer(llvm::StringRef text) -> SourceBuffer& {
  31. std::string filename = llvm::formatv("test{0}.carbon", ++file_index_);
  32. CARBON_CHECK(fs_.addFile(filename, /*ModificationTime=*/0,
  33. llvm::MemoryBuffer::getMemBuffer(text)));
  34. source_storage_.push_front(std::move(*SourceBuffer::MakeFromFile(
  35. fs_, filename, ConsoleDiagnosticConsumer())));
  36. return source_storage_.front();
  37. }
  38. auto Lex(llvm::StringRef text,
  39. DiagnosticConsumer& consumer = ConsoleDiagnosticConsumer())
  40. -> TokenizedBuffer {
  41. return Lex::Lex(value_stores_, GetSourceBuffer(text), consumer);
  42. }
  43. SharedValueStores value_stores_;
  44. llvm::vfs::InMemoryFileSystem fs_;
  45. int file_index_ = 0;
  46. std::forward_list<SourceBuffer> source_storage_;
  47. };
  48. TEST_F(LexerTest, HandlesEmptyBuffer) {
  49. auto buffer = Lex("");
  50. EXPECT_FALSE(buffer.has_errors());
  51. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  52. {.kind = TokenKind::FileStart},
  53. {.kind = TokenKind::FileEnd}}));
  54. }
  55. TEST_F(LexerTest, TracksLinesAndColumns) {
  56. auto buffer = Lex("\n ;;\n ;;;\n x\"foo\" '''baz\n a\n ''' y");
  57. EXPECT_FALSE(buffer.has_errors());
  58. EXPECT_THAT(
  59. buffer,
  60. HasTokens(llvm::ArrayRef<ExpectedToken>{
  61. {.kind = TokenKind::FileStart,
  62. .line = 1,
  63. .column = 1,
  64. .indent_column = 1},
  65. {.kind = TokenKind::Semi, .line = 2, .column = 3, .indent_column = 3},
  66. {.kind = TokenKind::Semi, .line = 2, .column = 4, .indent_column = 3},
  67. {.kind = TokenKind::Semi, .line = 3, .column = 4, .indent_column = 4},
  68. {.kind = TokenKind::Semi, .line = 3, .column = 5, .indent_column = 4},
  69. {.kind = TokenKind::Semi, .line = 3, .column = 6, .indent_column = 4},
  70. {.kind = TokenKind::Identifier,
  71. .line = 4,
  72. .column = 4,
  73. .indent_column = 4,
  74. .text = "x"},
  75. {.kind = TokenKind::StringLiteral,
  76. .line = 4,
  77. .column = 5,
  78. .indent_column = 4},
  79. {.kind = TokenKind::StringLiteral,
  80. .line = 4,
  81. .column = 11,
  82. .indent_column = 4},
  83. {.kind = TokenKind::Identifier,
  84. .line = 6,
  85. .column = 6,
  86. .indent_column = 11,
  87. .text = "y"},
  88. {.kind = TokenKind::FileEnd, .line = 6, .column = 7},
  89. }));
  90. }
  91. TEST_F(LexerTest, TracksLinesAndColumnsCRLF) {
  92. auto buffer =
  93. Lex("\r\n ;;\r\n ;;;\r\n x\"foo\" '''baz\r\n a\r\n ''' y");
  94. EXPECT_FALSE(buffer.has_errors());
  95. EXPECT_THAT(
  96. buffer,
  97. HasTokens(llvm::ArrayRef<ExpectedToken>{
  98. {.kind = TokenKind::FileStart,
  99. .line = 1,
  100. .column = 1,
  101. .indent_column = 1},
  102. {.kind = TokenKind::Semi, .line = 2, .column = 3, .indent_column = 3},
  103. {.kind = TokenKind::Semi, .line = 2, .column = 4, .indent_column = 3},
  104. {.kind = TokenKind::Semi, .line = 3, .column = 4, .indent_column = 4},
  105. {.kind = TokenKind::Semi, .line = 3, .column = 5, .indent_column = 4},
  106. {.kind = TokenKind::Semi, .line = 3, .column = 6, .indent_column = 4},
  107. {.kind = TokenKind::Identifier,
  108. .line = 4,
  109. .column = 4,
  110. .indent_column = 4,
  111. .text = "x"},
  112. {.kind = TokenKind::StringLiteral,
  113. .line = 4,
  114. .column = 5,
  115. .indent_column = 4},
  116. {.kind = TokenKind::StringLiteral,
  117. .line = 4,
  118. .column = 11,
  119. .indent_column = 4},
  120. {.kind = TokenKind::Identifier,
  121. .line = 6,
  122. .column = 6,
  123. .indent_column = 11,
  124. .text = "y"},
  125. {.kind = TokenKind::FileEnd, .line = 6, .column = 7},
  126. }));
  127. }
  128. TEST_F(LexerTest, InvalidCR) {
  129. auto buffer = Lex("\n ;;\r ;\n x");
  130. EXPECT_TRUE(buffer.has_errors());
  131. EXPECT_THAT(
  132. buffer,
  133. HasTokens(llvm::ArrayRef<ExpectedToken>{
  134. {.kind = TokenKind::FileStart,
  135. .line = 1,
  136. .column = 1,
  137. .indent_column = 1},
  138. {.kind = TokenKind::Semi, .line = 2, .column = 2, .indent_column = 2},
  139. {.kind = TokenKind::Semi, .line = 2, .column = 3, .indent_column = 2},
  140. {.kind = TokenKind::Semi, .line = 2, .column = 6, .indent_column = 2},
  141. {.kind = TokenKind::Identifier,
  142. .line = 3,
  143. .column = 4,
  144. .indent_column = 4,
  145. .text = "x"},
  146. {.kind = TokenKind::FileEnd, .line = 3, .column = 5},
  147. }));
  148. }
  149. TEST_F(LexerTest, InvalidLFCR) {
  150. auto buffer = Lex("\n ;;\n\r ;\n x");
  151. EXPECT_TRUE(buffer.has_errors());
  152. EXPECT_THAT(
  153. buffer,
  154. HasTokens(llvm::ArrayRef<ExpectedToken>{
  155. {.kind = TokenKind::FileStart,
  156. .line = 1,
  157. .column = 1,
  158. .indent_column = 1},
  159. {.kind = TokenKind::Semi, .line = 2, .column = 2, .indent_column = 2},
  160. {.kind = TokenKind::Semi, .line = 2, .column = 3, .indent_column = 2},
  161. {.kind = TokenKind::Semi, .line = 3, .column = 3, .indent_column = 1},
  162. {.kind = TokenKind::Identifier,
  163. .line = 4,
  164. .column = 4,
  165. .indent_column = 4,
  166. .text = "x"},
  167. {.kind = TokenKind::FileEnd, .line = 4, .column = 5},
  168. }));
  169. }
  170. TEST_F(LexerTest, HandlesNumericLiteral) {
  171. auto buffer = Lex("12-578\n 1 2\n0x12_3ABC\n0b10_10_11\n1_234_567\n1.5e9");
  172. EXPECT_FALSE(buffer.has_errors());
  173. ASSERT_THAT(buffer,
  174. HasTokens(llvm::ArrayRef<ExpectedToken>{
  175. {.kind = TokenKind::FileStart, .line = 1, .column = 1},
  176. {.kind = TokenKind::IntLiteral,
  177. .line = 1,
  178. .column = 1,
  179. .indent_column = 1,
  180. .text = "12"},
  181. {.kind = TokenKind::Minus,
  182. .line = 1,
  183. .column = 3,
  184. .indent_column = 1},
  185. {.kind = TokenKind::IntLiteral,
  186. .line = 1,
  187. .column = 4,
  188. .indent_column = 1,
  189. .text = "578"},
  190. {.kind = TokenKind::IntLiteral,
  191. .line = 2,
  192. .column = 3,
  193. .indent_column = 3,
  194. .text = "1"},
  195. {.kind = TokenKind::IntLiteral,
  196. .line = 2,
  197. .column = 6,
  198. .indent_column = 3,
  199. .text = "2"},
  200. {.kind = TokenKind::IntLiteral,
  201. .line = 3,
  202. .column = 1,
  203. .indent_column = 1,
  204. .text = "0x12_3ABC"},
  205. {.kind = TokenKind::IntLiteral,
  206. .line = 4,
  207. .column = 1,
  208. .indent_column = 1,
  209. .text = "0b10_10_11"},
  210. {.kind = TokenKind::IntLiteral,
  211. .line = 5,
  212. .column = 1,
  213. .indent_column = 1,
  214. .text = "1_234_567"},
  215. {.kind = TokenKind::RealLiteral,
  216. .line = 6,
  217. .column = 1,
  218. .indent_column = 1,
  219. .text = "1.5e9"},
  220. {.kind = TokenKind::FileEnd, .line = 6, .column = 6},
  221. }));
  222. auto token_start = buffer.tokens().begin();
  223. auto token_12 = token_start + 1;
  224. EXPECT_EQ(value_stores_.ints().Get(buffer.GetIntLiteral(*token_12)), 12);
  225. auto token_578 = token_12 + 2;
  226. EXPECT_EQ(value_stores_.ints().Get(buffer.GetIntLiteral(*token_578)), 578);
  227. auto token_1 = token_578 + 1;
  228. EXPECT_EQ(value_stores_.ints().Get(buffer.GetIntLiteral(*token_1)), 1);
  229. auto token_2 = token_1 + 1;
  230. EXPECT_EQ(value_stores_.ints().Get(buffer.GetIntLiteral(*token_2)), 2);
  231. auto token_0x12_3abc = token_2 + 1;
  232. EXPECT_EQ(value_stores_.ints().Get(buffer.GetIntLiteral(*token_0x12_3abc)),
  233. 0x12'3abc);
  234. auto token_0b10_10_11 = token_0x12_3abc + 1;
  235. EXPECT_EQ(value_stores_.ints().Get(buffer.GetIntLiteral(*token_0b10_10_11)),
  236. 0b10'10'11);
  237. auto token_1_234_567 = token_0b10_10_11 + 1;
  238. EXPECT_EQ(value_stores_.ints().Get(buffer.GetIntLiteral(*token_1_234_567)),
  239. 1'234'567);
  240. auto token_1_5e9 = token_1_234_567 + 1;
  241. auto value_1_5e9 =
  242. value_stores_.reals().Get(buffer.GetRealLiteral(*token_1_5e9));
  243. EXPECT_EQ(value_1_5e9.mantissa.getZExtValue(), 15);
  244. EXPECT_EQ(value_1_5e9.exponent.getSExtValue(), 8);
  245. EXPECT_EQ(value_1_5e9.is_decimal, true);
  246. }
  247. TEST_F(LexerTest, HandlesInvalidNumericLiterals) {
  248. auto buffer = Lex("14x 15_49 0x3.5q 0x3_4.5_6 0ops");
  249. EXPECT_TRUE(buffer.has_errors());
  250. ASSERT_THAT(buffer,
  251. HasTokens(llvm::ArrayRef<ExpectedToken>{
  252. {.kind = TokenKind::FileStart, .line = 1, .column = 1},
  253. {.kind = TokenKind::Error,
  254. .line = 1,
  255. .column = 1,
  256. .indent_column = 1,
  257. .text = "14x"},
  258. {.kind = TokenKind::IntLiteral,
  259. .line = 1,
  260. .column = 5,
  261. .indent_column = 1,
  262. .text = "15_49"},
  263. {.kind = TokenKind::Error,
  264. .line = 1,
  265. .column = 11,
  266. .indent_column = 1,
  267. .text = "0x3.5q"},
  268. {.kind = TokenKind::RealLiteral,
  269. .line = 1,
  270. .column = 18,
  271. .indent_column = 1,
  272. .text = "0x3_4.5_6"},
  273. {.kind = TokenKind::Error,
  274. .line = 1,
  275. .column = 28,
  276. .indent_column = 1,
  277. .text = "0ops"},
  278. {.kind = TokenKind::FileEnd, .line = 1, .column = 32},
  279. }));
  280. }
  281. TEST_F(LexerTest, SplitsNumericLiteralsProperly) {
  282. llvm::StringLiteral source_text = R"(
  283. 1.
  284. .2
  285. 3.+foo
  286. 4.0-bar
  287. 5.0e+123+456
  288. 6.0e+1e+2
  289. 1e7
  290. 8..10
  291. 9.0.9.5
  292. 10.foo
  293. 11.0.foo
  294. 12e+1
  295. 13._
  296. )";
  297. auto buffer = Lex(source_text);
  298. EXPECT_TRUE(buffer.has_errors());
  299. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  300. {.kind = TokenKind::FileStart},
  301. {.kind = TokenKind::IntLiteral, .text = "1"},
  302. {.kind = TokenKind::Period},
  303. // newline
  304. {.kind = TokenKind::Period},
  305. {.kind = TokenKind::IntLiteral, .text = "2"},
  306. // newline
  307. {.kind = TokenKind::IntLiteral, .text = "3"},
  308. {.kind = TokenKind::Period},
  309. {.kind = TokenKind::Plus},
  310. {.kind = TokenKind::Identifier, .text = "foo"},
  311. // newline
  312. {.kind = TokenKind::RealLiteral, .text = "4.0"},
  313. {.kind = TokenKind::Minus},
  314. {.kind = TokenKind::Identifier, .text = "bar"},
  315. // newline
  316. {.kind = TokenKind::RealLiteral, .text = "5.0e+123"},
  317. {.kind = TokenKind::Plus},
  318. {.kind = TokenKind::IntLiteral, .text = "456"},
  319. // newline
  320. {.kind = TokenKind::Error, .text = "6.0e+1e"},
  321. {.kind = TokenKind::Plus},
  322. {.kind = TokenKind::IntLiteral, .text = "2"},
  323. // newline
  324. {.kind = TokenKind::Error, .text = "1e7"},
  325. // newline
  326. {.kind = TokenKind::IntLiteral, .text = "8"},
  327. {.kind = TokenKind::Period},
  328. {.kind = TokenKind::Period},
  329. {.kind = TokenKind::IntLiteral, .text = "10"},
  330. // newline
  331. {.kind = TokenKind::RealLiteral, .text = "9.0"},
  332. {.kind = TokenKind::Period},
  333. {.kind = TokenKind::RealLiteral, .text = "9.5"},
  334. // newline
  335. {.kind = TokenKind::Error, .text = "10.foo"},
  336. // newline
  337. {.kind = TokenKind::RealLiteral, .text = "11.0"},
  338. {.kind = TokenKind::Period},
  339. {.kind = TokenKind::Identifier, .text = "foo"},
  340. // newline
  341. {.kind = TokenKind::Error, .text = "12e"},
  342. {.kind = TokenKind::Plus},
  343. {.kind = TokenKind::IntLiteral, .text = "1"},
  344. // newline
  345. {.kind = TokenKind::IntLiteral, .text = "13"},
  346. {.kind = TokenKind::Period},
  347. {.kind = TokenKind::Underscore},
  348. // newline
  349. {.kind = TokenKind::FileEnd},
  350. }));
  351. }
  352. TEST_F(LexerTest, HandlesGarbageCharacters) {
  353. constexpr char GarbageText[] = "$$💩-$\n$\0$12$\n\\\"\\\n\"x";
  354. auto buffer = Lex(llvm::StringRef(GarbageText, sizeof(GarbageText) - 1));
  355. EXPECT_TRUE(buffer.has_errors());
  356. EXPECT_THAT(
  357. buffer,
  358. HasTokens(llvm::ArrayRef<ExpectedToken>{
  359. {.kind = TokenKind::FileStart, .line = 1, .column = 1},
  360. {.kind = TokenKind::Error,
  361. .line = 1,
  362. .column = 1,
  363. // 💩 takes 4 bytes, and we count column as bytes offset.
  364. .text = llvm::StringRef("$$💩", 6)},
  365. {.kind = TokenKind::Minus, .line = 1, .column = 7},
  366. {.kind = TokenKind::Error, .line = 1, .column = 8, .text = "$"},
  367. // newline
  368. {.kind = TokenKind::Error,
  369. .line = 2,
  370. .column = 1,
  371. .text = llvm::StringRef("$\0$", 3)},
  372. {.kind = TokenKind::IntLiteral, .line = 2, .column = 4, .text = "12"},
  373. {.kind = TokenKind::Error, .line = 2, .column = 6, .text = "$"},
  374. // newline
  375. {.kind = TokenKind::Backslash, .line = 3, .column = 1, .text = "\\"},
  376. {.kind = TokenKind::Error, .line = 3, .column = 2, .text = "\"\\"},
  377. // newline
  378. {.kind = TokenKind::Error, .line = 4, .column = 1, .text = "\"x"},
  379. {.kind = TokenKind::FileEnd, .line = 4, .column = 3},
  380. }));
  381. }
  382. TEST_F(LexerTest, Symbols) {
  383. // We don't need to exhaustively test symbols here as they're handled with
  384. // common code, but we want to check specific patterns to verify things like
  385. // max-munch rule and handling of interesting symbols.
  386. auto buffer = Lex("<<<");
  387. EXPECT_FALSE(buffer.has_errors());
  388. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  389. {.kind = TokenKind::FileStart},
  390. {.kind = TokenKind::LessLess},
  391. {.kind = TokenKind::Less},
  392. {.kind = TokenKind::FileEnd},
  393. }));
  394. buffer = Lex("<<=>>");
  395. EXPECT_FALSE(buffer.has_errors());
  396. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  397. {.kind = TokenKind::FileStart},
  398. {.kind = TokenKind::LessLessEqual},
  399. {.kind = TokenKind::GreaterGreater},
  400. {.kind = TokenKind::FileEnd},
  401. }));
  402. buffer = Lex("< <=> >");
  403. EXPECT_FALSE(buffer.has_errors());
  404. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  405. {.kind = TokenKind::FileStart},
  406. {.kind = TokenKind::Less},
  407. {.kind = TokenKind::LessEqualGreater},
  408. {.kind = TokenKind::Greater},
  409. {.kind = TokenKind::FileEnd},
  410. }));
  411. buffer = Lex("\\/?@&^!");
  412. EXPECT_FALSE(buffer.has_errors());
  413. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  414. {.kind = TokenKind::FileStart},
  415. {.kind = TokenKind::Backslash},
  416. {.kind = TokenKind::Slash},
  417. {.kind = TokenKind::Question},
  418. {.kind = TokenKind::At},
  419. {.kind = TokenKind::Amp},
  420. {.kind = TokenKind::Caret},
  421. {.kind = TokenKind::Exclaim},
  422. {.kind = TokenKind::FileEnd},
  423. }));
  424. }
  425. TEST_F(LexerTest, Parens) {
  426. auto buffer = Lex("()");
  427. EXPECT_FALSE(buffer.has_errors());
  428. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  429. {.kind = TokenKind::FileStart},
  430. {.kind = TokenKind::OpenParen},
  431. {.kind = TokenKind::CloseParen},
  432. {.kind = TokenKind::FileEnd},
  433. }));
  434. buffer = Lex("((()()))");
  435. EXPECT_FALSE(buffer.has_errors());
  436. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  437. {.kind = TokenKind::FileStart},
  438. {.kind = TokenKind::OpenParen},
  439. {.kind = TokenKind::OpenParen},
  440. {.kind = TokenKind::OpenParen},
  441. {.kind = TokenKind::CloseParen},
  442. {.kind = TokenKind::OpenParen},
  443. {.kind = TokenKind::CloseParen},
  444. {.kind = TokenKind::CloseParen},
  445. {.kind = TokenKind::CloseParen},
  446. {.kind = TokenKind::FileEnd},
  447. }));
  448. }
  449. TEST_F(LexerTest, CurlyBraces) {
  450. auto buffer = Lex("{}");
  451. EXPECT_FALSE(buffer.has_errors());
  452. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  453. {.kind = TokenKind::FileStart},
  454. {.kind = TokenKind::OpenCurlyBrace},
  455. {.kind = TokenKind::CloseCurlyBrace},
  456. {.kind = TokenKind::FileEnd},
  457. }));
  458. buffer = Lex("{{{}{}}}");
  459. EXPECT_FALSE(buffer.has_errors());
  460. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  461. {.kind = TokenKind::FileStart},
  462. {.kind = TokenKind::OpenCurlyBrace},
  463. {.kind = TokenKind::OpenCurlyBrace},
  464. {.kind = TokenKind::OpenCurlyBrace},
  465. {.kind = TokenKind::CloseCurlyBrace},
  466. {.kind = TokenKind::OpenCurlyBrace},
  467. {.kind = TokenKind::CloseCurlyBrace},
  468. {.kind = TokenKind::CloseCurlyBrace},
  469. {.kind = TokenKind::CloseCurlyBrace},
  470. {.kind = TokenKind::FileEnd},
  471. }));
  472. }
  473. TEST_F(LexerTest, MatchingGroups) {
  474. {
  475. TokenizedBuffer buffer = Lex("(){}");
  476. ASSERT_FALSE(buffer.has_errors());
  477. auto it = ++buffer.tokens().begin();
  478. auto open_paren_token = *it++;
  479. auto close_paren_token = *it++;
  480. EXPECT_EQ(close_paren_token,
  481. buffer.GetMatchedClosingToken(open_paren_token));
  482. EXPECT_EQ(open_paren_token,
  483. buffer.GetMatchedOpeningToken(close_paren_token));
  484. auto open_curly_token = *it++;
  485. auto close_curly_token = *it++;
  486. EXPECT_EQ(close_curly_token,
  487. buffer.GetMatchedClosingToken(open_curly_token));
  488. EXPECT_EQ(open_curly_token,
  489. buffer.GetMatchedOpeningToken(close_curly_token));
  490. auto eof_token = *it++;
  491. EXPECT_EQ(buffer.GetKind(eof_token), TokenKind::FileEnd);
  492. EXPECT_EQ(buffer.tokens().end(), it);
  493. }
  494. {
  495. TokenizedBuffer buffer = Lex("({x}){(y)} {{((z))}}");
  496. ASSERT_FALSE(buffer.has_errors());
  497. auto it = ++buffer.tokens().begin();
  498. auto open_paren_token = *it++;
  499. auto open_curly_token = *it++;
  500. ASSERT_EQ("x",
  501. value_stores_.identifiers().Get(buffer.GetIdentifier(*it++)));
  502. auto close_curly_token = *it++;
  503. auto close_paren_token = *it++;
  504. EXPECT_EQ(close_paren_token,
  505. buffer.GetMatchedClosingToken(open_paren_token));
  506. EXPECT_EQ(open_paren_token,
  507. buffer.GetMatchedOpeningToken(close_paren_token));
  508. EXPECT_EQ(close_curly_token,
  509. buffer.GetMatchedClosingToken(open_curly_token));
  510. EXPECT_EQ(open_curly_token,
  511. buffer.GetMatchedOpeningToken(close_curly_token));
  512. open_curly_token = *it++;
  513. open_paren_token = *it++;
  514. ASSERT_EQ("y",
  515. value_stores_.identifiers().Get(buffer.GetIdentifier(*it++)));
  516. close_paren_token = *it++;
  517. close_curly_token = *it++;
  518. EXPECT_EQ(close_curly_token,
  519. buffer.GetMatchedClosingToken(open_curly_token));
  520. EXPECT_EQ(open_curly_token,
  521. buffer.GetMatchedOpeningToken(close_curly_token));
  522. EXPECT_EQ(close_paren_token,
  523. buffer.GetMatchedClosingToken(open_paren_token));
  524. EXPECT_EQ(open_paren_token,
  525. buffer.GetMatchedOpeningToken(close_paren_token));
  526. open_curly_token = *it++;
  527. auto inner_open_curly_token = *it++;
  528. open_paren_token = *it++;
  529. auto inner_open_paren_token = *it++;
  530. ASSERT_EQ("z",
  531. value_stores_.identifiers().Get(buffer.GetIdentifier(*it++)));
  532. auto inner_close_paren_token = *it++;
  533. close_paren_token = *it++;
  534. auto inner_close_curly_token = *it++;
  535. close_curly_token = *it++;
  536. EXPECT_EQ(close_curly_token,
  537. buffer.GetMatchedClosingToken(open_curly_token));
  538. EXPECT_EQ(open_curly_token,
  539. buffer.GetMatchedOpeningToken(close_curly_token));
  540. EXPECT_EQ(inner_close_curly_token,
  541. buffer.GetMatchedClosingToken(inner_open_curly_token));
  542. EXPECT_EQ(inner_open_curly_token,
  543. buffer.GetMatchedOpeningToken(inner_close_curly_token));
  544. EXPECT_EQ(close_paren_token,
  545. buffer.GetMatchedClosingToken(open_paren_token));
  546. EXPECT_EQ(open_paren_token,
  547. buffer.GetMatchedOpeningToken(close_paren_token));
  548. EXPECT_EQ(inner_close_paren_token,
  549. buffer.GetMatchedClosingToken(inner_open_paren_token));
  550. EXPECT_EQ(inner_open_paren_token,
  551. buffer.GetMatchedOpeningToken(inner_close_paren_token));
  552. auto eof_token = *it++;
  553. EXPECT_EQ(buffer.GetKind(eof_token), TokenKind::FileEnd);
  554. EXPECT_EQ(buffer.tokens().end(), it);
  555. }
  556. }
  557. TEST_F(LexerTest, MismatchedGroups) {
  558. auto buffer = Lex("{");
  559. EXPECT_TRUE(buffer.has_errors());
  560. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  561. {.kind = TokenKind::FileStart},
  562. {.kind = TokenKind::Error, .text = "{"},
  563. {.kind = TokenKind::FileEnd},
  564. }));
  565. buffer = Lex("}");
  566. EXPECT_TRUE(buffer.has_errors());
  567. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  568. {.kind = TokenKind::FileStart},
  569. {.kind = TokenKind::Error, .text = "}"},
  570. {.kind = TokenKind::FileEnd},
  571. }));
  572. buffer = Lex("{(}");
  573. EXPECT_TRUE(buffer.has_errors());
  574. EXPECT_THAT(
  575. buffer,
  576. HasTokens(llvm::ArrayRef<ExpectedToken>{
  577. {.kind = TokenKind::FileStart},
  578. {.kind = TokenKind::OpenCurlyBrace, .column = 1},
  579. {.kind = TokenKind::OpenParen, .column = 2},
  580. {.kind = TokenKind::CloseParen, .column = 3, .recovery = true},
  581. {.kind = TokenKind::CloseCurlyBrace, .column = 3},
  582. {.kind = TokenKind::FileEnd},
  583. }));
  584. buffer = Lex(")({)");
  585. EXPECT_TRUE(buffer.has_errors());
  586. EXPECT_THAT(
  587. buffer,
  588. HasTokens(llvm::ArrayRef<ExpectedToken>{
  589. {.kind = TokenKind::FileStart},
  590. {.kind = TokenKind::Error, .column = 1, .text = ")"},
  591. {.kind = TokenKind::OpenParen, .column = 2},
  592. {.kind = TokenKind::OpenCurlyBrace, .column = 3},
  593. {.kind = TokenKind::CloseCurlyBrace, .column = 4, .recovery = true},
  594. {.kind = TokenKind::CloseParen, .column = 4},
  595. {.kind = TokenKind::FileEnd},
  596. }));
  597. }
  598. TEST_F(LexerTest, Whitespace) {
  599. auto buffer = Lex("{( } {(");
  600. // Whether there should be whitespace before/after each token.
  601. bool space[] = {true,
  602. // start-of-file
  603. true,
  604. // {
  605. false,
  606. // (
  607. true,
  608. // inserted )
  609. true,
  610. // }
  611. true,
  612. // error {
  613. false,
  614. // error (
  615. true,
  616. // EOF
  617. false};
  618. int pos = 0;
  619. for (TokenIndex token : buffer.tokens()) {
  620. SCOPED_TRACE(
  621. llvm::formatv("Token #{0}: '{1}'", token, buffer.GetTokenText(token)));
  622. ASSERT_LT(pos, std::size(space));
  623. EXPECT_THAT(buffer.HasLeadingWhitespace(token), Eq(space[pos]));
  624. ++pos;
  625. ASSERT_LT(pos, std::size(space));
  626. EXPECT_THAT(buffer.HasTrailingWhitespace(token), Eq(space[pos]));
  627. }
  628. ASSERT_EQ(pos + 1, std::size(space));
  629. }
  630. TEST_F(LexerTest, Keywords) {
  631. TokenKind keywords[] = {
  632. #define CARBON_TOKEN(TokenName)
  633. #define CARBON_KEYWORD_TOKEN(TokenName, ...) TokenKind::TokenName,
  634. #include "toolchain/lex/token_kind.def"
  635. };
  636. for (const auto& keyword : keywords) {
  637. auto buffer = Lex(keyword.fixed_spelling());
  638. EXPECT_FALSE(buffer.has_errors());
  639. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  640. {.kind = TokenKind::FileStart},
  641. {.kind = keyword, .column = 1, .indent_column = 1},
  642. {.kind = TokenKind::FileEnd},
  643. }));
  644. }
  645. }
  646. TEST_F(LexerTest, Comments) {
  647. auto buffer = Lex(" ;\n // foo\n ;\n");
  648. EXPECT_FALSE(buffer.has_errors());
  649. EXPECT_THAT(
  650. buffer,
  651. HasTokens(llvm::ArrayRef<ExpectedToken>{
  652. {.kind = TokenKind::FileStart, .line = 1, .column = 1},
  653. {.kind = TokenKind::Semi, .line = 1, .column = 2, .indent_column = 2},
  654. {.kind = TokenKind::Semi, .line = 3, .column = 3, .indent_column = 3},
  655. {.kind = TokenKind::FileEnd, .line = 3, .column = 4},
  656. }));
  657. buffer = Lex("// foo\n//\n// bar");
  658. EXPECT_FALSE(buffer.has_errors());
  659. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  660. {.kind = TokenKind::FileStart},
  661. {.kind = TokenKind::FileEnd}}));
  662. // Make sure weird characters aren't a problem.
  663. buffer = Lex(" // foo#$!^?@-_💩🍫⃠ [̲̅$̲̅(̲̅ ͡° ͜ʖ ͡°̲̅)̲̅$̲̅]");
  664. EXPECT_FALSE(buffer.has_errors());
  665. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  666. {.kind = TokenKind::FileStart},
  667. {.kind = TokenKind::FileEnd}}));
  668. // Make sure we can lex a comment at the end of the input.
  669. buffer = Lex("//");
  670. EXPECT_FALSE(buffer.has_errors());
  671. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  672. {.kind = TokenKind::FileStart},
  673. {.kind = TokenKind::FileEnd}}));
  674. }
  675. TEST_F(LexerTest, InvalidComments) {
  676. llvm::StringLiteral testcases[] = {
  677. " /// foo\n",
  678. "foo // bar\n",
  679. "//! hello",
  680. " //world",
  681. };
  682. for (llvm::StringLiteral testcase : testcases) {
  683. auto buffer = Lex(testcase);
  684. EXPECT_TRUE(buffer.has_errors());
  685. }
  686. }
  687. TEST_F(LexerTest, Identifiers) {
  688. auto buffer = Lex(" foobar");
  689. EXPECT_FALSE(buffer.has_errors());
  690. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  691. {.kind = TokenKind::FileStart},
  692. {.kind = TokenKind::Identifier,
  693. .column = 4,
  694. .indent_column = 4,
  695. .text = "foobar"},
  696. {.kind = TokenKind::FileEnd},
  697. }));
  698. // Check different kinds of identifier character sequences.
  699. buffer = Lex("_foo_bar");
  700. EXPECT_FALSE(buffer.has_errors());
  701. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  702. {.kind = TokenKind::FileStart},
  703. {.kind = TokenKind::Identifier, .text = "_foo_bar"},
  704. {.kind = TokenKind::FileEnd},
  705. }));
  706. buffer = Lex("foo2bar00");
  707. EXPECT_FALSE(buffer.has_errors());
  708. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  709. {.kind = TokenKind::FileStart},
  710. {.kind = TokenKind::Identifier, .text = "foo2bar00"},
  711. {.kind = TokenKind::FileEnd},
  712. }));
  713. // Check that we can parse identifiers that start with a keyword.
  714. buffer = Lex("fnord");
  715. EXPECT_FALSE(buffer.has_errors());
  716. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  717. {.kind = TokenKind::FileStart},
  718. {.kind = TokenKind::Identifier, .text = "fnord"},
  719. {.kind = TokenKind::FileEnd},
  720. }));
  721. // Check multiple identifiers with indent and interning.
  722. buffer = Lex(" foo;bar\nbar \n foo\tfoo");
  723. EXPECT_FALSE(buffer.has_errors());
  724. EXPECT_THAT(buffer,
  725. HasTokens(llvm::ArrayRef<ExpectedToken>{
  726. {.kind = TokenKind::FileStart, .line = 1, .column = 1},
  727. {.kind = TokenKind::Identifier,
  728. .line = 1,
  729. .column = 4,
  730. .indent_column = 4,
  731. .text = "foo"},
  732. {.kind = TokenKind::Semi},
  733. {.kind = TokenKind::Identifier,
  734. .line = 1,
  735. .column = 8,
  736. .indent_column = 4,
  737. .text = "bar"},
  738. {.kind = TokenKind::Identifier,
  739. .line = 2,
  740. .column = 1,
  741. .indent_column = 1,
  742. .text = "bar"},
  743. {.kind = TokenKind::Identifier,
  744. .line = 3,
  745. .column = 3,
  746. .indent_column = 3,
  747. .text = "foo"},
  748. {.kind = TokenKind::Identifier,
  749. .line = 3,
  750. .column = 7,
  751. .indent_column = 3,
  752. .text = "foo"},
  753. {.kind = TokenKind::FileEnd, .line = 3, .column = 10},
  754. }));
  755. }
  756. TEST_F(LexerTest, StringLiterals) {
  757. llvm::StringLiteral testcase = R"(
  758. "hello world\n"
  759. '''foo
  760. test \
  761. \xAB
  762. ''' trailing
  763. #"""#
  764. "\0"
  765. #"\0"foo"\1"#
  766. """x"""
  767. )";
  768. auto buffer = Lex(testcase);
  769. EXPECT_FALSE(buffer.has_errors());
  770. EXPECT_THAT(buffer,
  771. HasTokens(llvm::ArrayRef<ExpectedToken>{
  772. {.kind = TokenKind::FileStart, .line = 1, .column = 1},
  773. {.kind = TokenKind::StringLiteral,
  774. .line = 2,
  775. .column = 5,
  776. .indent_column = 5,
  777. .value_stores = &value_stores_,
  778. .string_contents = {"hello world\n"}},
  779. {.kind = TokenKind::StringLiteral,
  780. .line = 4,
  781. .column = 5,
  782. .indent_column = 5,
  783. .value_stores = &value_stores_,
  784. .string_contents = {" test \xAB\n"}},
  785. {.kind = TokenKind::Identifier,
  786. .line = 7,
  787. .column = 10,
  788. .indent_column = 5,
  789. .text = "trailing"},
  790. {.kind = TokenKind::StringLiteral,
  791. .line = 9,
  792. .column = 7,
  793. .indent_column = 7,
  794. .value_stores = &value_stores_,
  795. .string_contents = {"\""}},
  796. {.kind = TokenKind::StringLiteral,
  797. .line = 11,
  798. .column = 5,
  799. .indent_column = 5,
  800. .value_stores = &value_stores_,
  801. .string_contents = llvm::StringLiteral::withInnerNUL("\0")},
  802. {.kind = TokenKind::StringLiteral,
  803. .line = 13,
  804. .column = 5,
  805. .indent_column = 5,
  806. .value_stores = &value_stores_,
  807. .string_contents = {"\\0\"foo\"\\1"}},
  808. // """x""" is three string literals, not one invalid
  809. // attempt at a block string literal.
  810. {.kind = TokenKind::StringLiteral,
  811. .line = 15,
  812. .column = 5,
  813. .indent_column = 5,
  814. .value_stores = &value_stores_,
  815. .string_contents = {""}},
  816. {.kind = TokenKind::StringLiteral,
  817. .line = 15,
  818. .column = 7,
  819. .indent_column = 5,
  820. .value_stores = &value_stores_,
  821. .string_contents = {"x"}},
  822. {.kind = TokenKind::StringLiteral,
  823. .line = 15,
  824. .column = 10,
  825. .indent_column = 5,
  826. .value_stores = &value_stores_,
  827. .string_contents = {""}},
  828. {.kind = TokenKind::FileEnd, .line = 16, .column = 3},
  829. }));
  830. }
  831. TEST_F(LexerTest, InvalidStringLiterals) {
  832. llvm::StringLiteral invalid[] = {
  833. // clang-format off
  834. R"(")",
  835. R"('''
  836. '')",
  837. R"("\)",
  838. R"("\")",
  839. R"("\\)",
  840. R"("\\\")",
  841. R"(''')",
  842. R"('''
  843. )",
  844. R"('''\)",
  845. R"(#'''
  846. ''')",
  847. // clang-format on
  848. };
  849. for (llvm::StringLiteral test : invalid) {
  850. SCOPED_TRACE(test);
  851. auto buffer = Lex(test);
  852. EXPECT_TRUE(buffer.has_errors());
  853. // We should have formed at least one error token.
  854. bool found_error = false;
  855. for (TokenIndex token : buffer.tokens()) {
  856. if (buffer.GetKind(token) == TokenKind::Error) {
  857. found_error = true;
  858. break;
  859. }
  860. }
  861. EXPECT_TRUE(found_error);
  862. }
  863. }
  864. TEST_F(LexerTest, TypeLiterals) {
  865. llvm::StringLiteral testcase = R"(
  866. i0 i1 i20 i999999999999 i0x1
  867. u0 u1 u64 u64b
  868. f32 f80 f1 fi
  869. s1
  870. )";
  871. auto buffer = Lex(testcase);
  872. EXPECT_FALSE(buffer.has_errors());
  873. ASSERT_THAT(buffer,
  874. HasTokens(llvm::ArrayRef<ExpectedToken>{
  875. {.kind = TokenKind::FileStart, .line = 1, .column = 1},
  876. {.kind = TokenKind::Identifier,
  877. .line = 2,
  878. .column = 5,
  879. .indent_column = 5,
  880. .text = {"i0"}},
  881. {.kind = TokenKind::IntTypeLiteral,
  882. .line = 2,
  883. .column = 8,
  884. .indent_column = 5,
  885. .text = {"i1"}},
  886. {.kind = TokenKind::IntTypeLiteral,
  887. .line = 2,
  888. .column = 11,
  889. .indent_column = 5,
  890. .text = {"i20"}},
  891. {.kind = TokenKind::IntTypeLiteral,
  892. .line = 2,
  893. .column = 15,
  894. .indent_column = 5,
  895. .text = {"i999999999999"}},
  896. {.kind = TokenKind::Identifier,
  897. .line = 2,
  898. .column = 29,
  899. .indent_column = 5,
  900. .text = {"i0x1"}},
  901. {.kind = TokenKind::Identifier,
  902. .line = 3,
  903. .column = 5,
  904. .indent_column = 5,
  905. .text = {"u0"}},
  906. {.kind = TokenKind::UnsignedIntTypeLiteral,
  907. .line = 3,
  908. .column = 8,
  909. .indent_column = 5,
  910. .text = {"u1"}},
  911. {.kind = TokenKind::UnsignedIntTypeLiteral,
  912. .line = 3,
  913. .column = 11,
  914. .indent_column = 5,
  915. .text = {"u64"}},
  916. {.kind = TokenKind::Identifier,
  917. .line = 3,
  918. .column = 15,
  919. .indent_column = 5,
  920. .text = {"u64b"}},
  921. {.kind = TokenKind::FloatTypeLiteral,
  922. .line = 4,
  923. .column = 5,
  924. .indent_column = 5,
  925. .text = {"f32"}},
  926. {.kind = TokenKind::FloatTypeLiteral,
  927. .line = 4,
  928. .column = 9,
  929. .indent_column = 5,
  930. .text = {"f80"}},
  931. {.kind = TokenKind::FloatTypeLiteral,
  932. .line = 4,
  933. .column = 13,
  934. .indent_column = 5,
  935. .text = {"f1"}},
  936. {.kind = TokenKind::Identifier,
  937. .line = 4,
  938. .column = 16,
  939. .indent_column = 5,
  940. .text = {"fi"}},
  941. {.kind = TokenKind::Identifier,
  942. .line = 5,
  943. .column = 5,
  944. .indent_column = 5,
  945. .text = {"s1"}},
  946. {.kind = TokenKind::FileEnd, .line = 6, .column = 3},
  947. }));
  948. auto type_size = [&](int token_index) {
  949. auto token = buffer.tokens().begin()[token_index];
  950. return value_stores_.ints().Get(buffer.GetTypeLiteralSize(token));
  951. };
  952. EXPECT_EQ(type_size(2), 1);
  953. EXPECT_EQ(type_size(3), 20);
  954. EXPECT_EQ(type_size(4), 999999999999ULL);
  955. EXPECT_EQ(type_size(7), 1);
  956. EXPECT_EQ(type_size(8), 64);
  957. EXPECT_EQ(type_size(10), 32);
  958. EXPECT_EQ(type_size(11), 80);
  959. EXPECT_EQ(type_size(12), 1);
  960. }
  961. TEST_F(LexerTest, TypeLiteralTooManyDigits) {
  962. std::string code = "i";
  963. constexpr int Count = 10000;
  964. code.append(Count, '9');
  965. Testing::MockDiagnosticConsumer consumer;
  966. EXPECT_CALL(consumer,
  967. HandleDiagnostic(IsSingleDiagnostic(
  968. DiagnosticKind::TooManyDigits, DiagnosticLevel::Error, 1, 2,
  969. HasSubstr(llvm::formatv(" {0} ", Count)))));
  970. auto buffer = Lex(code, consumer);
  971. EXPECT_TRUE(buffer.has_errors());
  972. ASSERT_THAT(buffer,
  973. HasTokens(llvm::ArrayRef<ExpectedToken>{
  974. {.kind = TokenKind::FileStart, .line = 1, .column = 1},
  975. {.kind = TokenKind::Error,
  976. .line = 1,
  977. .column = 1,
  978. .indent_column = 1,
  979. .text = code},
  980. {.kind = TokenKind::FileEnd, .line = 1, .column = Count + 2},
  981. }));
  982. }
  983. TEST_F(LexerTest, DiagnosticTrailingComment) {
  984. llvm::StringLiteral testcase = R"(
  985. // Hello!
  986. var String x; // trailing comment
  987. )";
  988. Testing::MockDiagnosticConsumer consumer;
  989. EXPECT_CALL(consumer, HandleDiagnostic(IsSingleDiagnostic(
  990. DiagnosticKind::TrailingComment,
  991. DiagnosticLevel::Error, 3, 19, _)));
  992. Lex(testcase, consumer);
  993. }
  994. TEST_F(LexerTest, DiagnosticWhitespace) {
  995. Testing::MockDiagnosticConsumer consumer;
  996. EXPECT_CALL(consumer, HandleDiagnostic(IsSingleDiagnostic(
  997. DiagnosticKind::NoWhitespaceAfterCommentIntroducer,
  998. DiagnosticLevel::Error, 1, 3, _)));
  999. Lex("//no space after comment", consumer);
  1000. }
  1001. TEST_F(LexerTest, DiagnosticUnrecognizedEscape) {
  1002. Testing::MockDiagnosticConsumer consumer;
  1003. EXPECT_CALL(consumer, HandleDiagnostic(IsSingleDiagnostic(
  1004. DiagnosticKind::UnknownEscapeSequence,
  1005. DiagnosticLevel::Error, 1, 8, HasSubstr("`b`"))));
  1006. Lex(R"("hello\bworld")", consumer);
  1007. }
  1008. TEST_F(LexerTest, DiagnosticBadHex) {
  1009. Testing::MockDiagnosticConsumer consumer;
  1010. EXPECT_CALL(consumer, HandleDiagnostic(IsSingleDiagnostic(
  1011. DiagnosticKind::HexadecimalEscapeMissingDigits,
  1012. DiagnosticLevel::Error, 1, 9, _)));
  1013. Lex(R"("hello\xabworld")", consumer);
  1014. }
  1015. TEST_F(LexerTest, DiagnosticInvalidDigit) {
  1016. Testing::MockDiagnosticConsumer consumer;
  1017. EXPECT_CALL(consumer, HandleDiagnostic(IsSingleDiagnostic(
  1018. DiagnosticKind::InvalidDigit,
  1019. DiagnosticLevel::Error, 1, 6, HasSubstr("'a'"))));
  1020. Lex("0x123abc", consumer);
  1021. }
  1022. TEST_F(LexerTest, DiagnosticMissingTerminator) {
  1023. Testing::MockDiagnosticConsumer consumer;
  1024. EXPECT_CALL(consumer, HandleDiagnostic(IsSingleDiagnostic(
  1025. DiagnosticKind::UnterminatedString,
  1026. DiagnosticLevel::Error, 1, 1, _)));
  1027. Lex(R"(#" ")", consumer);
  1028. }
  1029. TEST_F(LexerTest, DiagnosticUnrecognizedChar) {
  1030. Testing::MockDiagnosticConsumer consumer;
  1031. EXPECT_CALL(consumer, HandleDiagnostic(IsSingleDiagnostic(
  1032. DiagnosticKind::UnrecognizedCharacters,
  1033. DiagnosticLevel::Error, 1, 1, _)));
  1034. Lex("\b", consumer);
  1035. }
  1036. TEST_F(LexerTest, PrintingOutputYaml) {
  1037. // Test that we can parse this into YAML and verify line and indent data.
  1038. auto buffer = Lex("\n ;\n\n\n; ;\n\n\n\n\n\n\n\n\n\n\n");
  1039. ASSERT_FALSE(buffer.has_errors());
  1040. TestRawOstream print_stream;
  1041. buffer.Print(print_stream);
  1042. EXPECT_THAT(
  1043. Yaml::Value::FromText(print_stream.TakeStr()),
  1044. IsYaml(ElementsAre(Yaml::Sequence(ElementsAre(Yaml::Mapping(ElementsAre(
  1045. Pair("filename", source_storage_.front().filename().str()),
  1046. Pair("tokens",
  1047. Yaml::Sequence(ElementsAre(
  1048. Yaml::Mapping(ElementsAre(
  1049. Pair("index", "0"), Pair("kind", "FileStart"),
  1050. Pair("line", "1"), Pair("column", "1"),
  1051. Pair("indent", "1"), Pair("spelling", ""),
  1052. Pair("has_trailing_space", "true"))),
  1053. Yaml::Mapping(
  1054. ElementsAre(Pair("index", "1"), Pair("kind", "Semi"),
  1055. Pair("line", "2"), Pair("column", "2"),
  1056. Pair("indent", "2"), Pair("spelling", ";"),
  1057. Pair("has_trailing_space", "true"))),
  1058. Yaml::Mapping(
  1059. ElementsAre(Pair("index", "2"), Pair("kind", "Semi"),
  1060. Pair("line", "5"), Pair("column", "1"),
  1061. Pair("indent", "1"), Pair("spelling", ";"),
  1062. Pair("has_trailing_space", "true"))),
  1063. Yaml::Mapping(
  1064. ElementsAre(Pair("index", "3"), Pair("kind", "Semi"),
  1065. Pair("line", "5"), Pair("column", "3"),
  1066. Pair("indent", "1"), Pair("spelling", ";"),
  1067. Pair("has_trailing_space", "true"))),
  1068. Yaml::Mapping(ElementsAre(
  1069. Pair("index", "4"), Pair("kind", "FileEnd"),
  1070. Pair("line", "15"), Pair("column", "1"),
  1071. Pair("indent", "1"), Pair("spelling", "")))))))))))));
  1072. }
  1073. } // namespace
  1074. } // namespace Carbon::Lex