tokenized_buffer_test.cpp 41 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #include "toolchain/lex/tokenized_buffer.h"
  5. #include <gmock/gmock.h>
  6. #include <gtest/gtest.h>
  7. #include <forward_list>
  8. #include <iterator>
  9. #include "llvm/ADT/ArrayRef.h"
  10. #include "testing/base/test_raw_ostream.h"
  11. #include "toolchain/base/value_store.h"
  12. #include "toolchain/diagnostics/diagnostic_emitter.h"
  13. #include "toolchain/diagnostics/mocks.h"
  14. #include "toolchain/lex/lex.h"
  15. #include "toolchain/lex/tokenized_buffer_test_helpers.h"
  16. #include "toolchain/testing/yaml_test_helpers.h"
  17. namespace Carbon::Lex {
  18. namespace {
  19. using ::Carbon::Testing::ExpectedToken;
  20. using ::Carbon::Testing::IsDiagnostic;
  21. using ::Carbon::Testing::TestRawOstream;
  22. using ::testing::_;
  23. using ::testing::ElementsAre;
  24. using ::testing::Eq;
  25. using ::testing::HasSubstr;
  26. using ::testing::Pair;
  27. namespace Yaml = ::Carbon::Testing::Yaml;
  28. class LexerTest : public ::testing::Test {
  29. protected:
  30. auto GetSourceBuffer(llvm::StringRef text) -> SourceBuffer& {
  31. std::string filename = llvm::formatv("test{0}.carbon", ++file_index_);
  32. CARBON_CHECK(fs_.addFile(filename, /*ModificationTime=*/0,
  33. llvm::MemoryBuffer::getMemBuffer(text)));
  34. source_storage_.push_front(std::move(*SourceBuffer::CreateFromFile(
  35. fs_, filename, ConsoleDiagnosticConsumer())));
  36. return source_storage_.front();
  37. }
  38. auto Lex(llvm::StringRef text,
  39. DiagnosticConsumer& consumer = ConsoleDiagnosticConsumer())
  40. -> TokenizedBuffer {
  41. return Lex::Lex(value_stores_, GetSourceBuffer(text), consumer);
  42. }
  43. SharedValueStores value_stores_;
  44. llvm::vfs::InMemoryFileSystem fs_;
  45. int file_index_ = 0;
  46. std::forward_list<SourceBuffer> source_storage_;
  47. };
  48. TEST_F(LexerTest, HandlesEmptyBuffer) {
  49. auto buffer = Lex("");
  50. EXPECT_FALSE(buffer.has_errors());
  51. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  52. {TokenKind::StartOfFile}, {TokenKind::EndOfFile}}));
  53. }
  54. TEST_F(LexerTest, TracksLinesAndColumns) {
  55. auto buffer = Lex("\n ;;\n ;;;\n x\"foo\" '''baz\n a\n ''' y");
  56. EXPECT_FALSE(buffer.has_errors());
  57. EXPECT_THAT(
  58. buffer,
  59. HasTokens(llvm::ArrayRef<ExpectedToken>{
  60. {.kind = TokenKind::StartOfFile,
  61. .line = 1,
  62. .column = 1,
  63. .indent_column = 1},
  64. {.kind = TokenKind::Semi, .line = 2, .column = 3, .indent_column = 3},
  65. {.kind = TokenKind::Semi, .line = 2, .column = 4, .indent_column = 3},
  66. {.kind = TokenKind::Semi, .line = 3, .column = 4, .indent_column = 4},
  67. {.kind = TokenKind::Semi, .line = 3, .column = 5, .indent_column = 4},
  68. {.kind = TokenKind::Semi, .line = 3, .column = 6, .indent_column = 4},
  69. {.kind = TokenKind::Identifier,
  70. .line = 4,
  71. .column = 4,
  72. .indent_column = 4,
  73. .text = "x"},
  74. {.kind = TokenKind::StringLiteral,
  75. .line = 4,
  76. .column = 5,
  77. .indent_column = 4},
  78. {.kind = TokenKind::StringLiteral,
  79. .line = 4,
  80. .column = 11,
  81. .indent_column = 4},
  82. {.kind = TokenKind::Identifier,
  83. .line = 6,
  84. .column = 6,
  85. .indent_column = 11,
  86. .text = "y"},
  87. {.kind = TokenKind::EndOfFile, .line = 6, .column = 7},
  88. }));
  89. }
  90. TEST_F(LexerTest, HandlesNumericLiteral) {
  91. auto buffer = Lex("12-578\n 1 2\n0x12_3ABC\n0b10_10_11\n1_234_567\n1.5e9");
  92. EXPECT_FALSE(buffer.has_errors());
  93. ASSERT_THAT(buffer,
  94. HasTokens(llvm::ArrayRef<ExpectedToken>{
  95. {.kind = TokenKind::StartOfFile, .line = 1, .column = 1},
  96. {.kind = TokenKind::IntegerLiteral,
  97. .line = 1,
  98. .column = 1,
  99. .indent_column = 1,
  100. .text = "12"},
  101. {.kind = TokenKind::Minus,
  102. .line = 1,
  103. .column = 3,
  104. .indent_column = 1},
  105. {.kind = TokenKind::IntegerLiteral,
  106. .line = 1,
  107. .column = 4,
  108. .indent_column = 1,
  109. .text = "578"},
  110. {.kind = TokenKind::IntegerLiteral,
  111. .line = 2,
  112. .column = 3,
  113. .indent_column = 3,
  114. .text = "1"},
  115. {.kind = TokenKind::IntegerLiteral,
  116. .line = 2,
  117. .column = 6,
  118. .indent_column = 3,
  119. .text = "2"},
  120. {.kind = TokenKind::IntegerLiteral,
  121. .line = 3,
  122. .column = 1,
  123. .indent_column = 1,
  124. .text = "0x12_3ABC"},
  125. {.kind = TokenKind::IntegerLiteral,
  126. .line = 4,
  127. .column = 1,
  128. .indent_column = 1,
  129. .text = "0b10_10_11"},
  130. {.kind = TokenKind::IntegerLiteral,
  131. .line = 5,
  132. .column = 1,
  133. .indent_column = 1,
  134. .text = "1_234_567"},
  135. {.kind = TokenKind::RealLiteral,
  136. .line = 6,
  137. .column = 1,
  138. .indent_column = 1,
  139. .text = "1.5e9"},
  140. {.kind = TokenKind::EndOfFile, .line = 6, .column = 6},
  141. }));
  142. auto token_start = buffer.tokens().begin();
  143. auto token_12 = token_start + 1;
  144. EXPECT_EQ(value_stores_.integers().Get(buffer.GetIntegerLiteral(*token_12)),
  145. 12);
  146. auto token_578 = token_12 + 2;
  147. EXPECT_EQ(value_stores_.integers().Get(buffer.GetIntegerLiteral(*token_578)),
  148. 578);
  149. auto token_1 = token_578 + 1;
  150. EXPECT_EQ(value_stores_.integers().Get(buffer.GetIntegerLiteral(*token_1)),
  151. 1);
  152. auto token_2 = token_1 + 1;
  153. EXPECT_EQ(value_stores_.integers().Get(buffer.GetIntegerLiteral(*token_2)),
  154. 2);
  155. auto token_0x12_3abc = token_2 + 1;
  156. EXPECT_EQ(
  157. value_stores_.integers().Get(buffer.GetIntegerLiteral(*token_0x12_3abc)),
  158. 0x12'3abc);
  159. auto token_0b10_10_11 = token_0x12_3abc + 1;
  160. EXPECT_EQ(
  161. value_stores_.integers().Get(buffer.GetIntegerLiteral(*token_0b10_10_11)),
  162. 0b10'10'11);
  163. auto token_1_234_567 = token_0b10_10_11 + 1;
  164. EXPECT_EQ(
  165. value_stores_.integers().Get(buffer.GetIntegerLiteral(*token_1_234_567)),
  166. 1'234'567);
  167. auto token_1_5e9 = token_1_234_567 + 1;
  168. auto value_1_5e9 =
  169. value_stores_.reals().Get(buffer.GetRealLiteral(*token_1_5e9));
  170. EXPECT_EQ(value_1_5e9.mantissa.getZExtValue(), 15);
  171. EXPECT_EQ(value_1_5e9.exponent.getSExtValue(), 8);
  172. EXPECT_EQ(value_1_5e9.is_decimal, true);
  173. }
  174. TEST_F(LexerTest, HandlesInvalidNumericLiterals) {
  175. auto buffer = Lex("14x 15_49 0x3.5q 0x3_4.5_6 0ops");
  176. EXPECT_TRUE(buffer.has_errors());
  177. ASSERT_THAT(buffer,
  178. HasTokens(llvm::ArrayRef<ExpectedToken>{
  179. {.kind = TokenKind::StartOfFile, .line = 1, .column = 1},
  180. {.kind = TokenKind::Error,
  181. .line = 1,
  182. .column = 1,
  183. .indent_column = 1,
  184. .text = "14x"},
  185. {.kind = TokenKind::IntegerLiteral,
  186. .line = 1,
  187. .column = 5,
  188. .indent_column = 1,
  189. .text = "15_49"},
  190. {.kind = TokenKind::Error,
  191. .line = 1,
  192. .column = 11,
  193. .indent_column = 1,
  194. .text = "0x3.5q"},
  195. {.kind = TokenKind::RealLiteral,
  196. .line = 1,
  197. .column = 18,
  198. .indent_column = 1,
  199. .text = "0x3_4.5_6"},
  200. {.kind = TokenKind::Error,
  201. .line = 1,
  202. .column = 28,
  203. .indent_column = 1,
  204. .text = "0ops"},
  205. {.kind = TokenKind::EndOfFile, .line = 1, .column = 32},
  206. }));
  207. }
  208. TEST_F(LexerTest, SplitsNumericLiteralsProperly) {
  209. llvm::StringLiteral source_text = R"(
  210. 1.
  211. .2
  212. 3.+foo
  213. 4.0-bar
  214. 5.0e+123+456
  215. 6.0e+1e+2
  216. 1e7
  217. 8..10
  218. 9.0.9.5
  219. 10.foo
  220. 11.0.foo
  221. 12e+1
  222. 13._
  223. )";
  224. auto buffer = Lex(source_text);
  225. EXPECT_TRUE(buffer.has_errors());
  226. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  227. {.kind = TokenKind::StartOfFile},
  228. {.kind = TokenKind::IntegerLiteral, .text = "1"},
  229. {.kind = TokenKind::Period},
  230. // newline
  231. {.kind = TokenKind::Period},
  232. {.kind = TokenKind::IntegerLiteral, .text = "2"},
  233. // newline
  234. {.kind = TokenKind::IntegerLiteral, .text = "3"},
  235. {.kind = TokenKind::Period},
  236. {.kind = TokenKind::Plus},
  237. {.kind = TokenKind::Identifier, .text = "foo"},
  238. // newline
  239. {.kind = TokenKind::RealLiteral, .text = "4.0"},
  240. {.kind = TokenKind::Minus},
  241. {.kind = TokenKind::Identifier, .text = "bar"},
  242. // newline
  243. {.kind = TokenKind::RealLiteral, .text = "5.0e+123"},
  244. {.kind = TokenKind::Plus},
  245. {.kind = TokenKind::IntegerLiteral, .text = "456"},
  246. // newline
  247. {.kind = TokenKind::Error, .text = "6.0e+1e"},
  248. {.kind = TokenKind::Plus},
  249. {.kind = TokenKind::IntegerLiteral, .text = "2"},
  250. // newline
  251. {.kind = TokenKind::Error, .text = "1e7"},
  252. // newline
  253. {.kind = TokenKind::IntegerLiteral, .text = "8"},
  254. {.kind = TokenKind::Period},
  255. {.kind = TokenKind::Period},
  256. {.kind = TokenKind::IntegerLiteral, .text = "10"},
  257. // newline
  258. {.kind = TokenKind::RealLiteral, .text = "9.0"},
  259. {.kind = TokenKind::Period},
  260. {.kind = TokenKind::RealLiteral, .text = "9.5"},
  261. // newline
  262. {.kind = TokenKind::Error, .text = "10.foo"},
  263. // newline
  264. {.kind = TokenKind::RealLiteral, .text = "11.0"},
  265. {.kind = TokenKind::Period},
  266. {.kind = TokenKind::Identifier, .text = "foo"},
  267. // newline
  268. {.kind = TokenKind::Error, .text = "12e"},
  269. {.kind = TokenKind::Plus},
  270. {.kind = TokenKind::IntegerLiteral, .text = "1"},
  271. // newline
  272. {.kind = TokenKind::IntegerLiteral, .text = "13"},
  273. {.kind = TokenKind::Period},
  274. {.kind = TokenKind::Underscore},
  275. // newline
  276. {.kind = TokenKind::EndOfFile},
  277. }));
  278. }
  279. TEST_F(LexerTest, HandlesGarbageCharacters) {
  280. constexpr char GarbageText[] = "$$💩-$\n$\0$12$\n\\\"\\\n\"x";
  281. auto buffer = Lex(llvm::StringRef(GarbageText, sizeof(GarbageText) - 1));
  282. EXPECT_TRUE(buffer.has_errors());
  283. EXPECT_THAT(
  284. buffer,
  285. HasTokens(llvm::ArrayRef<ExpectedToken>{
  286. {.kind = TokenKind::StartOfFile, .line = 1, .column = 1},
  287. {.kind = TokenKind::Error,
  288. .line = 1,
  289. .column = 1,
  290. // 💩 takes 4 bytes, and we count column as bytes offset.
  291. .text = llvm::StringRef("$$💩", 6)},
  292. {.kind = TokenKind::Minus, .line = 1, .column = 7},
  293. {.kind = TokenKind::Error, .line = 1, .column = 8, .text = "$"},
  294. // newline
  295. {.kind = TokenKind::Error,
  296. .line = 2,
  297. .column = 1,
  298. .text = llvm::StringRef("$\0$", 3)},
  299. {.kind = TokenKind::IntegerLiteral,
  300. .line = 2,
  301. .column = 4,
  302. .text = "12"},
  303. {.kind = TokenKind::Error, .line = 2, .column = 6, .text = "$"},
  304. // newline
  305. {.kind = TokenKind::Backslash, .line = 3, .column = 1, .text = "\\"},
  306. {.kind = TokenKind::Error, .line = 3, .column = 2, .text = "\"\\"},
  307. // newline
  308. {.kind = TokenKind::Error, .line = 4, .column = 1, .text = "\"x"},
  309. {.kind = TokenKind::EndOfFile, .line = 4, .column = 3},
  310. }));
  311. }
  312. TEST_F(LexerTest, Symbols) {
  313. // We don't need to exhaustively test symbols here as they're handled with
  314. // common code, but we want to check specific patterns to verify things like
  315. // max-munch rule and handling of interesting symbols.
  316. auto buffer = Lex("<<<");
  317. EXPECT_FALSE(buffer.has_errors());
  318. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  319. {TokenKind::StartOfFile},
  320. {TokenKind::LessLess},
  321. {TokenKind::Less},
  322. {TokenKind::EndOfFile},
  323. }));
  324. buffer = Lex("<<=>>");
  325. EXPECT_FALSE(buffer.has_errors());
  326. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  327. {TokenKind::StartOfFile},
  328. {TokenKind::LessLessEqual},
  329. {TokenKind::GreaterGreater},
  330. {TokenKind::EndOfFile},
  331. }));
  332. buffer = Lex("< <=> >");
  333. EXPECT_FALSE(buffer.has_errors());
  334. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  335. {TokenKind::StartOfFile},
  336. {TokenKind::Less},
  337. {TokenKind::LessEqualGreater},
  338. {TokenKind::Greater},
  339. {TokenKind::EndOfFile},
  340. }));
  341. buffer = Lex("\\/?@&^!");
  342. EXPECT_FALSE(buffer.has_errors());
  343. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  344. {TokenKind::StartOfFile},
  345. {TokenKind::Backslash},
  346. {TokenKind::Slash},
  347. {TokenKind::Question},
  348. {TokenKind::At},
  349. {TokenKind::Amp},
  350. {TokenKind::Caret},
  351. {TokenKind::Exclaim},
  352. {TokenKind::EndOfFile},
  353. }));
  354. }
  355. TEST_F(LexerTest, Parens) {
  356. auto buffer = Lex("()");
  357. EXPECT_FALSE(buffer.has_errors());
  358. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  359. {TokenKind::StartOfFile},
  360. {TokenKind::OpenParen},
  361. {TokenKind::CloseParen},
  362. {TokenKind::EndOfFile},
  363. }));
  364. buffer = Lex("((()()))");
  365. EXPECT_FALSE(buffer.has_errors());
  366. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  367. {TokenKind::StartOfFile},
  368. {TokenKind::OpenParen},
  369. {TokenKind::OpenParen},
  370. {TokenKind::OpenParen},
  371. {TokenKind::CloseParen},
  372. {TokenKind::OpenParen},
  373. {TokenKind::CloseParen},
  374. {TokenKind::CloseParen},
  375. {TokenKind::CloseParen},
  376. {TokenKind::EndOfFile},
  377. }));
  378. }
  379. TEST_F(LexerTest, CurlyBraces) {
  380. auto buffer = Lex("{}");
  381. EXPECT_FALSE(buffer.has_errors());
  382. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  383. {TokenKind::StartOfFile},
  384. {TokenKind::OpenCurlyBrace},
  385. {TokenKind::CloseCurlyBrace},
  386. {TokenKind::EndOfFile},
  387. }));
  388. buffer = Lex("{{{}{}}}");
  389. EXPECT_FALSE(buffer.has_errors());
  390. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  391. {TokenKind::StartOfFile},
  392. {TokenKind::OpenCurlyBrace},
  393. {TokenKind::OpenCurlyBrace},
  394. {TokenKind::OpenCurlyBrace},
  395. {TokenKind::CloseCurlyBrace},
  396. {TokenKind::OpenCurlyBrace},
  397. {TokenKind::CloseCurlyBrace},
  398. {TokenKind::CloseCurlyBrace},
  399. {TokenKind::CloseCurlyBrace},
  400. {TokenKind::EndOfFile},
  401. }));
  402. }
  403. TEST_F(LexerTest, MatchingGroups) {
  404. {
  405. TokenizedBuffer buffer = Lex("(){}");
  406. ASSERT_FALSE(buffer.has_errors());
  407. auto it = ++buffer.tokens().begin();
  408. auto open_paren_token = *it++;
  409. auto close_paren_token = *it++;
  410. EXPECT_EQ(close_paren_token,
  411. buffer.GetMatchedClosingToken(open_paren_token));
  412. EXPECT_EQ(open_paren_token,
  413. buffer.GetMatchedOpeningToken(close_paren_token));
  414. auto open_curly_token = *it++;
  415. auto close_curly_token = *it++;
  416. EXPECT_EQ(close_curly_token,
  417. buffer.GetMatchedClosingToken(open_curly_token));
  418. EXPECT_EQ(open_curly_token,
  419. buffer.GetMatchedOpeningToken(close_curly_token));
  420. auto eof_token = *it++;
  421. EXPECT_EQ(buffer.GetKind(eof_token), TokenKind::EndOfFile);
  422. EXPECT_EQ(buffer.tokens().end(), it);
  423. }
  424. {
  425. TokenizedBuffer buffer = Lex("({x}){(y)} {{((z))}}");
  426. ASSERT_FALSE(buffer.has_errors());
  427. auto it = ++buffer.tokens().begin();
  428. auto open_paren_token = *it++;
  429. auto open_curly_token = *it++;
  430. ASSERT_EQ("x",
  431. value_stores_.identifiers().Get(buffer.GetIdentifier(*it++)));
  432. auto close_curly_token = *it++;
  433. auto close_paren_token = *it++;
  434. EXPECT_EQ(close_paren_token,
  435. buffer.GetMatchedClosingToken(open_paren_token));
  436. EXPECT_EQ(open_paren_token,
  437. buffer.GetMatchedOpeningToken(close_paren_token));
  438. EXPECT_EQ(close_curly_token,
  439. buffer.GetMatchedClosingToken(open_curly_token));
  440. EXPECT_EQ(open_curly_token,
  441. buffer.GetMatchedOpeningToken(close_curly_token));
  442. open_curly_token = *it++;
  443. open_paren_token = *it++;
  444. ASSERT_EQ("y",
  445. value_stores_.identifiers().Get(buffer.GetIdentifier(*it++)));
  446. close_paren_token = *it++;
  447. close_curly_token = *it++;
  448. EXPECT_EQ(close_curly_token,
  449. buffer.GetMatchedClosingToken(open_curly_token));
  450. EXPECT_EQ(open_curly_token,
  451. buffer.GetMatchedOpeningToken(close_curly_token));
  452. EXPECT_EQ(close_paren_token,
  453. buffer.GetMatchedClosingToken(open_paren_token));
  454. EXPECT_EQ(open_paren_token,
  455. buffer.GetMatchedOpeningToken(close_paren_token));
  456. open_curly_token = *it++;
  457. auto inner_open_curly_token = *it++;
  458. open_paren_token = *it++;
  459. auto inner_open_paren_token = *it++;
  460. ASSERT_EQ("z",
  461. value_stores_.identifiers().Get(buffer.GetIdentifier(*it++)));
  462. auto inner_close_paren_token = *it++;
  463. close_paren_token = *it++;
  464. auto inner_close_curly_token = *it++;
  465. close_curly_token = *it++;
  466. EXPECT_EQ(close_curly_token,
  467. buffer.GetMatchedClosingToken(open_curly_token));
  468. EXPECT_EQ(open_curly_token,
  469. buffer.GetMatchedOpeningToken(close_curly_token));
  470. EXPECT_EQ(inner_close_curly_token,
  471. buffer.GetMatchedClosingToken(inner_open_curly_token));
  472. EXPECT_EQ(inner_open_curly_token,
  473. buffer.GetMatchedOpeningToken(inner_close_curly_token));
  474. EXPECT_EQ(close_paren_token,
  475. buffer.GetMatchedClosingToken(open_paren_token));
  476. EXPECT_EQ(open_paren_token,
  477. buffer.GetMatchedOpeningToken(close_paren_token));
  478. EXPECT_EQ(inner_close_paren_token,
  479. buffer.GetMatchedClosingToken(inner_open_paren_token));
  480. EXPECT_EQ(inner_open_paren_token,
  481. buffer.GetMatchedOpeningToken(inner_close_paren_token));
  482. auto eof_token = *it++;
  483. EXPECT_EQ(buffer.GetKind(eof_token), TokenKind::EndOfFile);
  484. EXPECT_EQ(buffer.tokens().end(), it);
  485. }
  486. }
  487. TEST_F(LexerTest, MismatchedGroups) {
  488. auto buffer = Lex("{");
  489. EXPECT_TRUE(buffer.has_errors());
  490. EXPECT_THAT(buffer,
  491. HasTokens(llvm::ArrayRef<ExpectedToken>{
  492. {TokenKind::StartOfFile},
  493. {TokenKind::OpenCurlyBrace},
  494. {.kind = TokenKind::CloseCurlyBrace, .recovery = true},
  495. {TokenKind::EndOfFile},
  496. }));
  497. buffer = Lex("}");
  498. EXPECT_TRUE(buffer.has_errors());
  499. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  500. {TokenKind::StartOfFile},
  501. {.kind = TokenKind::Error, .text = "}"},
  502. {TokenKind::EndOfFile},
  503. }));
  504. buffer = Lex("{(}");
  505. EXPECT_TRUE(buffer.has_errors());
  506. EXPECT_THAT(
  507. buffer,
  508. HasTokens(llvm::ArrayRef<ExpectedToken>{
  509. {TokenKind::StartOfFile},
  510. {.kind = TokenKind::OpenCurlyBrace, .column = 1},
  511. {.kind = TokenKind::OpenParen, .column = 2},
  512. {.kind = TokenKind::CloseParen, .column = 3, .recovery = true},
  513. {.kind = TokenKind::CloseCurlyBrace, .column = 3},
  514. {TokenKind::EndOfFile},
  515. }));
  516. buffer = Lex(")({)");
  517. EXPECT_TRUE(buffer.has_errors());
  518. EXPECT_THAT(
  519. buffer,
  520. HasTokens(llvm::ArrayRef<ExpectedToken>{
  521. {TokenKind::StartOfFile},
  522. {.kind = TokenKind::Error, .column = 1, .text = ")"},
  523. {.kind = TokenKind::OpenParen, .column = 2},
  524. {.kind = TokenKind::OpenCurlyBrace, .column = 3},
  525. {.kind = TokenKind::CloseCurlyBrace, .column = 4, .recovery = true},
  526. {.kind = TokenKind::CloseParen, .column = 4},
  527. {TokenKind::EndOfFile},
  528. }));
  529. }
  530. TEST_F(LexerTest, Whitespace) {
  531. auto buffer = Lex("{( } {(");
  532. // Whether there should be whitespace before/after each token.
  533. bool space[] = {true,
  534. // start-of-file
  535. true,
  536. // {
  537. false,
  538. // (
  539. true,
  540. // inserted )
  541. true,
  542. // }
  543. true,
  544. // {
  545. false,
  546. // (
  547. true,
  548. // inserted )
  549. true,
  550. // inserted }
  551. true,
  552. // EOF
  553. false};
  554. int pos = 0;
  555. for (Token token : buffer.tokens()) {
  556. SCOPED_TRACE(
  557. llvm::formatv("Token #{0}: '{1}'", token, buffer.GetTokenText(token)));
  558. ASSERT_LT(pos, std::size(space));
  559. EXPECT_THAT(buffer.HasLeadingWhitespace(token), Eq(space[pos]));
  560. ++pos;
  561. ASSERT_LT(pos, std::size(space));
  562. EXPECT_THAT(buffer.HasTrailingWhitespace(token), Eq(space[pos]));
  563. }
  564. ASSERT_EQ(pos + 1, std::size(space));
  565. }
  566. TEST_F(LexerTest, Keywords) {
  567. auto buffer = Lex(" fn");
  568. EXPECT_FALSE(buffer.has_errors());
  569. EXPECT_THAT(buffer,
  570. HasTokens(llvm::ArrayRef<ExpectedToken>{
  571. {TokenKind::StartOfFile},
  572. {.kind = TokenKind::Fn, .column = 4, .indent_column = 4},
  573. {TokenKind::EndOfFile},
  574. }));
  575. buffer = Lex("and or not if else for return var break continue _");
  576. EXPECT_FALSE(buffer.has_errors());
  577. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  578. {TokenKind::StartOfFile},
  579. {TokenKind::And},
  580. {TokenKind::Or},
  581. {TokenKind::Not},
  582. {TokenKind::If},
  583. {TokenKind::Else},
  584. {TokenKind::For},
  585. {TokenKind::Return},
  586. {TokenKind::Var},
  587. {TokenKind::Break},
  588. {TokenKind::Continue},
  589. {TokenKind::Underscore},
  590. {TokenKind::EndOfFile},
  591. }));
  592. }
  593. TEST_F(LexerTest, Comments) {
  594. auto buffer = Lex(" ;\n // foo\n ;\n");
  595. EXPECT_FALSE(buffer.has_errors());
  596. EXPECT_THAT(
  597. buffer,
  598. HasTokens(llvm::ArrayRef<ExpectedToken>{
  599. {.kind = TokenKind::StartOfFile, .line = 1, .column = 1},
  600. {.kind = TokenKind::Semi, .line = 1, .column = 2, .indent_column = 2},
  601. {.kind = TokenKind::Semi, .line = 3, .column = 3, .indent_column = 3},
  602. {.kind = TokenKind::EndOfFile, .line = 3, .column = 4},
  603. }));
  604. buffer = Lex("// foo\n//\n// bar");
  605. EXPECT_FALSE(buffer.has_errors());
  606. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  607. {TokenKind::StartOfFile}, {TokenKind::EndOfFile}}));
  608. // Make sure weird characters aren't a problem.
  609. buffer = Lex(" // foo#$!^?@-_💩🍫⃠ [̲̅$̲̅(̲̅ ͡° ͜ʖ ͡°̲̅)̲̅$̲̅]");
  610. EXPECT_FALSE(buffer.has_errors());
  611. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  612. {TokenKind::StartOfFile}, {TokenKind::EndOfFile}}));
  613. // Make sure we can lex a comment at the end of the input.
  614. buffer = Lex("//");
  615. EXPECT_FALSE(buffer.has_errors());
  616. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  617. {TokenKind::StartOfFile}, {TokenKind::EndOfFile}}));
  618. }
  619. TEST_F(LexerTest, InvalidComments) {
  620. llvm::StringLiteral testcases[] = {
  621. " /// foo\n",
  622. "foo // bar\n",
  623. "//! hello",
  624. " //world",
  625. };
  626. for (llvm::StringLiteral testcase : testcases) {
  627. auto buffer = Lex(testcase);
  628. EXPECT_TRUE(buffer.has_errors());
  629. }
  630. }
  631. TEST_F(LexerTest, Identifiers) {
  632. auto buffer = Lex(" foobar");
  633. EXPECT_FALSE(buffer.has_errors());
  634. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  635. {TokenKind::StartOfFile},
  636. {.kind = TokenKind::Identifier,
  637. .column = 4,
  638. .indent_column = 4,
  639. .text = "foobar"},
  640. {TokenKind::EndOfFile},
  641. }));
  642. // Check different kinds of identifier character sequences.
  643. buffer = Lex("_foo_bar");
  644. EXPECT_FALSE(buffer.has_errors());
  645. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  646. {TokenKind::StartOfFile},
  647. {.kind = TokenKind::Identifier, .text = "_foo_bar"},
  648. {TokenKind::EndOfFile},
  649. }));
  650. buffer = Lex("foo2bar00");
  651. EXPECT_FALSE(buffer.has_errors());
  652. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  653. {TokenKind::StartOfFile},
  654. {.kind = TokenKind::Identifier, .text = "foo2bar00"},
  655. {TokenKind::EndOfFile},
  656. }));
  657. // Check that we can parse identifiers that start with a keyword.
  658. buffer = Lex("fnord");
  659. EXPECT_FALSE(buffer.has_errors());
  660. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  661. {TokenKind::StartOfFile},
  662. {.kind = TokenKind::Identifier, .text = "fnord"},
  663. {TokenKind::EndOfFile},
  664. }));
  665. // Check multiple identifiers with indent and interning.
  666. buffer = Lex(" foo;bar\nbar \n foo\tfoo");
  667. EXPECT_FALSE(buffer.has_errors());
  668. EXPECT_THAT(buffer,
  669. HasTokens(llvm::ArrayRef<ExpectedToken>{
  670. {.kind = TokenKind::StartOfFile, .line = 1, .column = 1},
  671. {.kind = TokenKind::Identifier,
  672. .line = 1,
  673. .column = 4,
  674. .indent_column = 4,
  675. .text = "foo"},
  676. {.kind = TokenKind::Semi},
  677. {.kind = TokenKind::Identifier,
  678. .line = 1,
  679. .column = 8,
  680. .indent_column = 4,
  681. .text = "bar"},
  682. {.kind = TokenKind::Identifier,
  683. .line = 2,
  684. .column = 1,
  685. .indent_column = 1,
  686. .text = "bar"},
  687. {.kind = TokenKind::Identifier,
  688. .line = 3,
  689. .column = 3,
  690. .indent_column = 3,
  691. .text = "foo"},
  692. {.kind = TokenKind::Identifier,
  693. .line = 3,
  694. .column = 7,
  695. .indent_column = 3,
  696. .text = "foo"},
  697. {.kind = TokenKind::EndOfFile, .line = 3, .column = 10},
  698. }));
  699. }
  700. TEST_F(LexerTest, StringLiterals) {
  701. llvm::StringLiteral testcase = R"(
  702. "hello world\n"
  703. '''foo
  704. test \
  705. \xAB
  706. ''' trailing
  707. #"""#
  708. "\0"
  709. #"\0"foo"\1"#
  710. """x"""
  711. )";
  712. auto buffer = Lex(testcase);
  713. EXPECT_FALSE(buffer.has_errors());
  714. EXPECT_THAT(buffer,
  715. HasTokens(llvm::ArrayRef<ExpectedToken>{
  716. {.kind = TokenKind::StartOfFile, .line = 1, .column = 1},
  717. {.kind = TokenKind::StringLiteral,
  718. .line = 2,
  719. .column = 5,
  720. .indent_column = 5,
  721. .value_stores = &value_stores_,
  722. .string_contents = {"hello world\n"}},
  723. {.kind = TokenKind::StringLiteral,
  724. .line = 4,
  725. .column = 5,
  726. .indent_column = 5,
  727. .value_stores = &value_stores_,
  728. .string_contents = {" test \xAB\n"}},
  729. {.kind = TokenKind::Identifier,
  730. .line = 7,
  731. .column = 10,
  732. .indent_column = 5,
  733. .text = "trailing"},
  734. {.kind = TokenKind::StringLiteral,
  735. .line = 9,
  736. .column = 7,
  737. .indent_column = 7,
  738. .value_stores = &value_stores_,
  739. .string_contents = {"\""}},
  740. {.kind = TokenKind::StringLiteral,
  741. .line = 11,
  742. .column = 5,
  743. .indent_column = 5,
  744. .value_stores = &value_stores_,
  745. .string_contents = llvm::StringLiteral::withInnerNUL("\0")},
  746. {.kind = TokenKind::StringLiteral,
  747. .line = 13,
  748. .column = 5,
  749. .indent_column = 5,
  750. .value_stores = &value_stores_,
  751. .string_contents = {"\\0\"foo\"\\1"}},
  752. // """x""" is three string literals, not one invalid
  753. // attempt at a block string literal.
  754. {.kind = TokenKind::StringLiteral,
  755. .line = 15,
  756. .column = 5,
  757. .indent_column = 5,
  758. .value_stores = &value_stores_,
  759. .string_contents = {""}},
  760. {.kind = TokenKind::StringLiteral,
  761. .line = 15,
  762. .column = 7,
  763. .indent_column = 5,
  764. .value_stores = &value_stores_,
  765. .string_contents = {"x"}},
  766. {.kind = TokenKind::StringLiteral,
  767. .line = 15,
  768. .column = 10,
  769. .indent_column = 5,
  770. .value_stores = &value_stores_,
  771. .string_contents = {""}},
  772. {.kind = TokenKind::EndOfFile, .line = 16, .column = 3},
  773. }));
  774. }
  775. TEST_F(LexerTest, InvalidStringLiterals) {
  776. llvm::StringLiteral invalid[] = {
  777. // clang-format off
  778. R"(")",
  779. R"('''
  780. '')",
  781. R"("\)",
  782. R"("\")",
  783. R"("\\)",
  784. R"("\\\")",
  785. R"(''')",
  786. R"('''
  787. )",
  788. R"('''\)",
  789. R"(#'''
  790. ''')",
  791. // clang-format on
  792. };
  793. for (llvm::StringLiteral test : invalid) {
  794. SCOPED_TRACE(test);
  795. auto buffer = Lex(test);
  796. EXPECT_TRUE(buffer.has_errors());
  797. // We should have formed at least one error token.
  798. bool found_error = false;
  799. for (Token token : buffer.tokens()) {
  800. if (buffer.GetKind(token) == TokenKind::Error) {
  801. found_error = true;
  802. break;
  803. }
  804. }
  805. EXPECT_TRUE(found_error);
  806. }
  807. }
  808. TEST_F(LexerTest, TypeLiterals) {
  809. llvm::StringLiteral testcase = R"(
  810. i0 i1 i20 i999999999999 i0x1
  811. u0 u1 u64 u64b
  812. f32 f80 f1 fi
  813. s1
  814. )";
  815. auto buffer = Lex(testcase);
  816. EXPECT_FALSE(buffer.has_errors());
  817. ASSERT_THAT(buffer,
  818. HasTokens(llvm::ArrayRef<ExpectedToken>{
  819. {.kind = TokenKind::StartOfFile, .line = 1, .column = 1},
  820. {.kind = TokenKind::Identifier,
  821. .line = 2,
  822. .column = 5,
  823. .indent_column = 5,
  824. .text = {"i0"}},
  825. {.kind = TokenKind::IntegerTypeLiteral,
  826. .line = 2,
  827. .column = 8,
  828. .indent_column = 5,
  829. .text = {"i1"}},
  830. {.kind = TokenKind::IntegerTypeLiteral,
  831. .line = 2,
  832. .column = 11,
  833. .indent_column = 5,
  834. .text = {"i20"}},
  835. {.kind = TokenKind::IntegerTypeLiteral,
  836. .line = 2,
  837. .column = 15,
  838. .indent_column = 5,
  839. .text = {"i999999999999"}},
  840. {.kind = TokenKind::Identifier,
  841. .line = 2,
  842. .column = 29,
  843. .indent_column = 5,
  844. .text = {"i0x1"}},
  845. {.kind = TokenKind::Identifier,
  846. .line = 3,
  847. .column = 5,
  848. .indent_column = 5,
  849. .text = {"u0"}},
  850. {.kind = TokenKind::UnsignedIntegerTypeLiteral,
  851. .line = 3,
  852. .column = 8,
  853. .indent_column = 5,
  854. .text = {"u1"}},
  855. {.kind = TokenKind::UnsignedIntegerTypeLiteral,
  856. .line = 3,
  857. .column = 11,
  858. .indent_column = 5,
  859. .text = {"u64"}},
  860. {.kind = TokenKind::Identifier,
  861. .line = 3,
  862. .column = 15,
  863. .indent_column = 5,
  864. .text = {"u64b"}},
  865. {.kind = TokenKind::FloatingPointTypeLiteral,
  866. .line = 4,
  867. .column = 5,
  868. .indent_column = 5,
  869. .text = {"f32"}},
  870. {.kind = TokenKind::FloatingPointTypeLiteral,
  871. .line = 4,
  872. .column = 9,
  873. .indent_column = 5,
  874. .text = {"f80"}},
  875. {.kind = TokenKind::FloatingPointTypeLiteral,
  876. .line = 4,
  877. .column = 13,
  878. .indent_column = 5,
  879. .text = {"f1"}},
  880. {.kind = TokenKind::Identifier,
  881. .line = 4,
  882. .column = 16,
  883. .indent_column = 5,
  884. .text = {"fi"}},
  885. {.kind = TokenKind::Identifier,
  886. .line = 5,
  887. .column = 5,
  888. .indent_column = 5,
  889. .text = {"s1"}},
  890. {.kind = TokenKind::EndOfFile, .line = 6, .column = 3},
  891. }));
  892. auto token_i1 = buffer.tokens().begin() + 2;
  893. EXPECT_EQ(buffer.GetTypeLiteralSize(*token_i1), 1);
  894. auto token_i20 = buffer.tokens().begin() + 3;
  895. EXPECT_EQ(buffer.GetTypeLiteralSize(*token_i20), 20);
  896. auto token_i999999999999 = buffer.tokens().begin() + 4;
  897. EXPECT_EQ(buffer.GetTypeLiteralSize(*token_i999999999999), 999999999999ULL);
  898. auto token_u1 = buffer.tokens().begin() + 7;
  899. EXPECT_EQ(buffer.GetTypeLiteralSize(*token_u1), 1);
  900. auto token_u64 = buffer.tokens().begin() + 8;
  901. EXPECT_EQ(buffer.GetTypeLiteralSize(*token_u64), 64);
  902. auto token_f32 = buffer.tokens().begin() + 10;
  903. EXPECT_EQ(buffer.GetTypeLiteralSize(*token_f32), 32);
  904. auto token_f80 = buffer.tokens().begin() + 11;
  905. EXPECT_EQ(buffer.GetTypeLiteralSize(*token_f80), 80);
  906. auto token_f1 = buffer.tokens().begin() + 12;
  907. EXPECT_EQ(buffer.GetTypeLiteralSize(*token_f1), 1);
  908. }
  909. TEST_F(LexerTest, TypeLiteralTooManyDigits) {
  910. std::string code = "i";
  911. constexpr int Count = 10000;
  912. code.append(Count, '9');
  913. Testing::MockDiagnosticConsumer consumer;
  914. EXPECT_CALL(consumer,
  915. HandleDiagnostic(IsDiagnostic(
  916. DiagnosticKind::TooManyDigits, DiagnosticLevel::Error, 1, 2,
  917. HasSubstr(llvm::formatv(" {0} ", Count)))));
  918. auto buffer = Lex(code, consumer);
  919. EXPECT_TRUE(buffer.has_errors());
  920. ASSERT_THAT(
  921. buffer,
  922. HasTokens(llvm::ArrayRef<ExpectedToken>{
  923. {.kind = TokenKind::StartOfFile, .line = 1, .column = 1},
  924. {.kind = TokenKind::Error,
  925. .line = 1,
  926. .column = 1,
  927. .indent_column = 1,
  928. .text = {code}},
  929. {.kind = TokenKind::EndOfFile, .line = 1, .column = Count + 2},
  930. }));
  931. }
  932. TEST_F(LexerTest, DiagnosticTrailingComment) {
  933. llvm::StringLiteral testcase = R"(
  934. // Hello!
  935. var String x; // trailing comment
  936. )";
  937. Testing::MockDiagnosticConsumer consumer;
  938. EXPECT_CALL(consumer,
  939. HandleDiagnostic(IsDiagnostic(DiagnosticKind::TrailingComment,
  940. DiagnosticLevel::Error, 3, 19, _)));
  941. Lex(testcase, consumer);
  942. }
  943. TEST_F(LexerTest, DiagnosticWhitespace) {
  944. Testing::MockDiagnosticConsumer consumer;
  945. EXPECT_CALL(consumer, HandleDiagnostic(IsDiagnostic(
  946. DiagnosticKind::NoWhitespaceAfterCommentIntroducer,
  947. DiagnosticLevel::Error, 1, 3, _)));
  948. Lex("//no space after comment", consumer);
  949. }
  950. TEST_F(LexerTest, DiagnosticUnrecognizedEscape) {
  951. Testing::MockDiagnosticConsumer consumer;
  952. EXPECT_CALL(consumer, HandleDiagnostic(IsDiagnostic(
  953. DiagnosticKind::UnknownEscapeSequence,
  954. DiagnosticLevel::Error, 1, 8, HasSubstr("`b`"))));
  955. Lex(R"("hello\bworld")", consumer);
  956. }
  957. TEST_F(LexerTest, DiagnosticBadHex) {
  958. Testing::MockDiagnosticConsumer consumer;
  959. EXPECT_CALL(consumer, HandleDiagnostic(IsDiagnostic(
  960. DiagnosticKind::HexadecimalEscapeMissingDigits,
  961. DiagnosticLevel::Error, 1, 9, _)));
  962. Lex(R"("hello\xabworld")", consumer);
  963. }
  964. TEST_F(LexerTest, DiagnosticInvalidDigit) {
  965. Testing::MockDiagnosticConsumer consumer;
  966. EXPECT_CALL(consumer, HandleDiagnostic(IsDiagnostic(
  967. DiagnosticKind::InvalidDigit,
  968. DiagnosticLevel::Error, 1, 6, HasSubstr("'a'"))));
  969. Lex("0x123abc", consumer);
  970. }
  971. TEST_F(LexerTest, DiagnosticMissingTerminator) {
  972. Testing::MockDiagnosticConsumer consumer;
  973. EXPECT_CALL(consumer,
  974. HandleDiagnostic(IsDiagnostic(DiagnosticKind::UnterminatedString,
  975. DiagnosticLevel::Error, 1, 1, _)));
  976. Lex(R"(#" ")", consumer);
  977. }
  978. TEST_F(LexerTest, DiagnosticUnrecognizedChar) {
  979. Testing::MockDiagnosticConsumer consumer;
  980. EXPECT_CALL(consumer, HandleDiagnostic(
  981. IsDiagnostic(DiagnosticKind::UnrecognizedCharacters,
  982. DiagnosticLevel::Error, 1, 1, _)));
  983. Lex("\b", consumer);
  984. }
  985. TEST_F(LexerTest, PrintingOutputYaml) {
  986. // Test that we can parse this into YAML and verify line and indent data.
  987. auto buffer = Lex("\n ;\n\n\n; ;\n\n\n\n\n\n\n\n\n\n\n");
  988. ASSERT_FALSE(buffer.has_errors());
  989. TestRawOstream print_stream;
  990. buffer.Print(print_stream);
  991. EXPECT_THAT(
  992. Yaml::Value::FromText(print_stream.TakeStr()),
  993. IsYaml(ElementsAre(Yaml::Sequence(ElementsAre(Yaml::Mapping(ElementsAre(
  994. Pair("filename", source_storage_.front().filename().str()),
  995. Pair("tokens",
  996. Yaml::Sequence(ElementsAre(
  997. Yaml::Mapping(ElementsAre(
  998. Pair("index", "0"), Pair("kind", "StartOfFile"),
  999. Pair("line", "1"), Pair("column", "1"),
  1000. Pair("indent", "1"), Pair("spelling", ""),
  1001. Pair("has_trailing_space", "true"))),
  1002. Yaml::Mapping(
  1003. ElementsAre(Pair("index", "1"), Pair("kind", "Semi"),
  1004. Pair("line", "2"), Pair("column", "2"),
  1005. Pair("indent", "2"), Pair("spelling", ";"),
  1006. Pair("has_trailing_space", "true"))),
  1007. Yaml::Mapping(
  1008. ElementsAre(Pair("index", "2"), Pair("kind", "Semi"),
  1009. Pair("line", "5"), Pair("column", "1"),
  1010. Pair("indent", "1"), Pair("spelling", ";"),
  1011. Pair("has_trailing_space", "true"))),
  1012. Yaml::Mapping(
  1013. ElementsAre(Pair("index", "3"), Pair("kind", "Semi"),
  1014. Pair("line", "5"), Pair("column", "3"),
  1015. Pair("indent", "1"), Pair("spelling", ";"),
  1016. Pair("has_trailing_space", "true"))),
  1017. Yaml::Mapping(ElementsAre(
  1018. Pair("index", "4"), Pair("kind", "EndOfFile"),
  1019. Pair("line", "15"), Pair("column", "1"),
  1020. Pair("indent", "1"), Pair("spelling", "")))))))))))));
  1021. }
  1022. } // namespace
  1023. } // namespace Carbon::Lex