tokenized_buffer_test.cpp 43 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #include "toolchain/lexer/tokenized_buffer.h"
  5. #include <gmock/gmock.h>
  6. #include <gtest/gtest.h>
  7. #include <iterator>
  8. #include "llvm/ADT/ArrayRef.h"
  9. #include "llvm/ADT/None.h"
  10. #include "llvm/ADT/Sequence.h"
  11. #include "llvm/ADT/SmallString.h"
  12. #include "llvm/ADT/Twine.h"
  13. #include "llvm/Support/SourceMgr.h"
  14. #include "llvm/Support/raw_ostream.h"
  15. #include "toolchain/common/yaml_test_helpers.h"
  16. #include "toolchain/diagnostics/diagnostic_emitter.h"
  17. #include "toolchain/diagnostics/mocks.h"
  18. #include "toolchain/lexer/tokenized_buffer_test_helpers.h"
  19. namespace Carbon::Testing {
  20. namespace {
  21. using ::testing::ElementsAre;
  22. using ::testing::Eq;
  23. using ::testing::HasSubstr;
  24. using ::testing::StrEq;
  25. class LexerTest : public ::testing::Test {
  26. protected:
  27. auto GetSourceBuffer(llvm::Twine text) -> SourceBuffer& {
  28. source_storage.push_back(
  29. std::move(*SourceBuffer::CreateFromText(text.str())));
  30. return source_storage.back();
  31. }
  32. auto Lex(llvm::Twine text,
  33. DiagnosticConsumer& consumer = ConsoleDiagnosticConsumer())
  34. -> TokenizedBuffer {
  35. return TokenizedBuffer::Lex(GetSourceBuffer(text), consumer);
  36. }
  37. llvm::SmallVector<SourceBuffer, 16> source_storage;
  38. };
  39. TEST_F(LexerTest, HandlesEmptyBuffer) {
  40. auto buffer = Lex("");
  41. EXPECT_FALSE(buffer.has_errors());
  42. EXPECT_THAT(
  43. buffer,
  44. HasTokens(llvm::ArrayRef<ExpectedToken>{{TokenKind::EndOfFile()}}));
  45. }
  46. TEST_F(LexerTest, TracksLinesAndColumns) {
  47. auto buffer = Lex("\n ;;\n ;;;\n x\"foo\" \"\"\"baz\n a\n \"\"\" y");
  48. EXPECT_FALSE(buffer.has_errors());
  49. EXPECT_THAT(buffer,
  50. HasTokens(llvm::ArrayRef<ExpectedToken>{
  51. {.kind = TokenKind::Semi(),
  52. .line = 2,
  53. .column = 3,
  54. .indent_column = 3},
  55. {.kind = TokenKind::Semi(),
  56. .line = 2,
  57. .column = 4,
  58. .indent_column = 3},
  59. {.kind = TokenKind::Semi(),
  60. .line = 3,
  61. .column = 4,
  62. .indent_column = 4},
  63. {.kind = TokenKind::Semi(),
  64. .line = 3,
  65. .column = 5,
  66. .indent_column = 4},
  67. {.kind = TokenKind::Semi(),
  68. .line = 3,
  69. .column = 6,
  70. .indent_column = 4},
  71. {.kind = TokenKind::Identifier(),
  72. .line = 4,
  73. .column = 4,
  74. .indent_column = 4,
  75. .text = "x"},
  76. {.kind = TokenKind::StringLiteral(),
  77. .line = 4,
  78. .column = 5,
  79. .indent_column = 4},
  80. {.kind = TokenKind::StringLiteral(),
  81. .line = 4,
  82. .column = 11,
  83. .indent_column = 4},
  84. {.kind = TokenKind::Identifier(),
  85. .line = 6,
  86. .column = 6,
  87. .indent_column = 11,
  88. .text = "y"},
  89. {.kind = TokenKind::EndOfFile(), .line = 6, .column = 7},
  90. }));
  91. }
  92. TEST_F(LexerTest, HandlesNumericLiteral) {
  93. auto buffer = Lex("12-578\n 1 2\n0x12_3ABC\n0b10_10_11\n1_234_567\n1.5e9");
  94. EXPECT_FALSE(buffer.has_errors());
  95. ASSERT_THAT(buffer,
  96. HasTokens(llvm::ArrayRef<ExpectedToken>{
  97. {.kind = TokenKind::IntegerLiteral(),
  98. .line = 1,
  99. .column = 1,
  100. .indent_column = 1,
  101. .text = "12"},
  102. {.kind = TokenKind::Minus(),
  103. .line = 1,
  104. .column = 3,
  105. .indent_column = 1},
  106. {.kind = TokenKind::IntegerLiteral(),
  107. .line = 1,
  108. .column = 4,
  109. .indent_column = 1,
  110. .text = "578"},
  111. {.kind = TokenKind::IntegerLiteral(),
  112. .line = 2,
  113. .column = 3,
  114. .indent_column = 3,
  115. .text = "1"},
  116. {.kind = TokenKind::IntegerLiteral(),
  117. .line = 2,
  118. .column = 6,
  119. .indent_column = 3,
  120. .text = "2"},
  121. {.kind = TokenKind::IntegerLiteral(),
  122. .line = 3,
  123. .column = 1,
  124. .indent_column = 1,
  125. .text = "0x12_3ABC"},
  126. {.kind = TokenKind::IntegerLiteral(),
  127. .line = 4,
  128. .column = 1,
  129. .indent_column = 1,
  130. .text = "0b10_10_11"},
  131. {.kind = TokenKind::IntegerLiteral(),
  132. .line = 5,
  133. .column = 1,
  134. .indent_column = 1,
  135. .text = "1_234_567"},
  136. {.kind = TokenKind::RealLiteral(),
  137. .line = 6,
  138. .column = 1,
  139. .indent_column = 1,
  140. .text = "1.5e9"},
  141. {.kind = TokenKind::EndOfFile(), .line = 6, .column = 6},
  142. }));
  143. auto token_12 = buffer.tokens().begin();
  144. EXPECT_EQ(buffer.GetIntegerLiteral(*token_12), 12);
  145. auto token_578 = buffer.tokens().begin() + 2;
  146. EXPECT_EQ(buffer.GetIntegerLiteral(*token_578), 578);
  147. auto token_1 = buffer.tokens().begin() + 3;
  148. EXPECT_EQ(buffer.GetIntegerLiteral(*token_1), 1);
  149. auto token_2 = buffer.tokens().begin() + 4;
  150. EXPECT_EQ(buffer.GetIntegerLiteral(*token_2), 2);
  151. auto token_0x12_3abc = buffer.tokens().begin() + 5;
  152. EXPECT_EQ(buffer.GetIntegerLiteral(*token_0x12_3abc), 0x12'3abc);
  153. auto token_0b10_10_11 = buffer.tokens().begin() + 6;
  154. EXPECT_EQ(buffer.GetIntegerLiteral(*token_0b10_10_11), 0b10'10'11);
  155. auto token_1_234_567 = buffer.tokens().begin() + 7;
  156. EXPECT_EQ(buffer.GetIntegerLiteral(*token_1_234_567), 1'234'567);
  157. auto token_1_5e9 = buffer.tokens().begin() + 8;
  158. auto value_1_5e9 = buffer.GetRealLiteral(*token_1_5e9);
  159. EXPECT_EQ(value_1_5e9.Mantissa().getZExtValue(), 15);
  160. EXPECT_EQ(value_1_5e9.Exponent().getSExtValue(), 8);
  161. EXPECT_EQ(value_1_5e9.IsDecimal(), true);
  162. }
  163. TEST_F(LexerTest, HandlesInvalidNumericLiterals) {
  164. auto buffer = Lex("14x 15_49 0x3.5q 0x3_4.5_6 0ops");
  165. EXPECT_TRUE(buffer.has_errors());
  166. ASSERT_THAT(buffer,
  167. HasTokens(llvm::ArrayRef<ExpectedToken>{
  168. {.kind = TokenKind::Error(),
  169. .line = 1,
  170. .column = 1,
  171. .indent_column = 1,
  172. .text = "14x"},
  173. {.kind = TokenKind::IntegerLiteral(),
  174. .line = 1,
  175. .column = 5,
  176. .indent_column = 1,
  177. .text = "15_49"},
  178. {.kind = TokenKind::Error(),
  179. .line = 1,
  180. .column = 11,
  181. .indent_column = 1,
  182. .text = "0x3.5q"},
  183. {.kind = TokenKind::RealLiteral(),
  184. .line = 1,
  185. .column = 18,
  186. .indent_column = 1,
  187. .text = "0x3_4.5_6"},
  188. {.kind = TokenKind::Error(),
  189. .line = 1,
  190. .column = 28,
  191. .indent_column = 1,
  192. .text = "0ops"},
  193. {.kind = TokenKind::EndOfFile(), .line = 1, .column = 32},
  194. }));
  195. }
  196. TEST_F(LexerTest, SplitsNumericLiteralsProperly) {
  197. llvm::StringLiteral source_text = R"(
  198. 1.
  199. .2
  200. 3.+foo
  201. 4.0-bar
  202. 5.0e+123+456
  203. 6.0e+1e+2
  204. 1e7
  205. 8..10
  206. 9.0.9.5
  207. 10.foo
  208. 11.0.foo
  209. 12e+1
  210. 13._
  211. )";
  212. auto buffer = Lex(source_text);
  213. EXPECT_TRUE(buffer.has_errors());
  214. EXPECT_THAT(buffer,
  215. HasTokens(llvm::ArrayRef<ExpectedToken>{
  216. {.kind = TokenKind::IntegerLiteral(), .text = "1"},
  217. {.kind = TokenKind::Period()},
  218. // newline
  219. {.kind = TokenKind::Period()},
  220. {.kind = TokenKind::IntegerLiteral(), .text = "2"},
  221. // newline
  222. {.kind = TokenKind::IntegerLiteral(), .text = "3"},
  223. {.kind = TokenKind::Period()},
  224. {.kind = TokenKind::Plus()},
  225. {.kind = TokenKind::Identifier(), .text = "foo"},
  226. // newline
  227. {.kind = TokenKind::RealLiteral(), .text = "4.0"},
  228. {.kind = TokenKind::Minus()},
  229. {.kind = TokenKind::Identifier(), .text = "bar"},
  230. // newline
  231. {.kind = TokenKind::RealLiteral(), .text = "5.0e+123"},
  232. {.kind = TokenKind::Plus()},
  233. {.kind = TokenKind::IntegerLiteral(), .text = "456"},
  234. // newline
  235. {.kind = TokenKind::Error(), .text = "6.0e+1e"},
  236. {.kind = TokenKind::Plus()},
  237. {.kind = TokenKind::IntegerLiteral(), .text = "2"},
  238. // newline
  239. {.kind = TokenKind::Error(), .text = "1e7"},
  240. // newline
  241. {.kind = TokenKind::IntegerLiteral(), .text = "8"},
  242. {.kind = TokenKind::Period()},
  243. {.kind = TokenKind::Period()},
  244. {.kind = TokenKind::IntegerLiteral(), .text = "10"},
  245. // newline
  246. {.kind = TokenKind::RealLiteral(), .text = "9.0"},
  247. {.kind = TokenKind::Period()},
  248. {.kind = TokenKind::RealLiteral(), .text = "9.5"},
  249. // newline
  250. {.kind = TokenKind::Error(), .text = "10.foo"},
  251. // newline
  252. {.kind = TokenKind::RealLiteral(), .text = "11.0"},
  253. {.kind = TokenKind::Period()},
  254. {.kind = TokenKind::Identifier(), .text = "foo"},
  255. // newline
  256. {.kind = TokenKind::Error(), .text = "12e"},
  257. {.kind = TokenKind::Plus()},
  258. {.kind = TokenKind::IntegerLiteral(), .text = "1"},
  259. // newline
  260. {.kind = TokenKind::IntegerLiteral(), .text = "13"},
  261. {.kind = TokenKind::Period()},
  262. {.kind = TokenKind::Underscore()},
  263. // newline
  264. {.kind = TokenKind::EndOfFile()},
  265. }));
  266. }
  267. TEST_F(LexerTest, HandlesGarbageCharacters) {
  268. constexpr char GarbageText[] = "$$💩-$\n$\0$12$\n\\\"\\\n\"x";
  269. auto buffer = Lex(llvm::StringRef(GarbageText, sizeof(GarbageText) - 1));
  270. EXPECT_TRUE(buffer.has_errors());
  271. EXPECT_THAT(
  272. buffer,
  273. HasTokens(llvm::ArrayRef<ExpectedToken>{
  274. {.kind = TokenKind::Error(),
  275. .line = 1,
  276. .column = 1,
  277. // 💩 takes 4 bytes, and we count column as bytes offset.
  278. .text = llvm::StringRef("$$💩", 6)},
  279. {.kind = TokenKind::Minus(), .line = 1, .column = 7},
  280. {.kind = TokenKind::Error(), .line = 1, .column = 8, .text = "$"},
  281. // newline
  282. {.kind = TokenKind::Error(),
  283. .line = 2,
  284. .column = 1,
  285. .text = llvm::StringRef("$\0$", 3)},
  286. {.kind = TokenKind::IntegerLiteral(),
  287. .line = 2,
  288. .column = 4,
  289. .text = "12"},
  290. {.kind = TokenKind::Error(), .line = 2, .column = 6, .text = "$"},
  291. // newline
  292. {.kind = TokenKind::Backslash(),
  293. .line = 3,
  294. .column = 1,
  295. .text = "\\"},
  296. {.kind = TokenKind::Error(), .line = 3, .column = 2, .text = "\"\\"},
  297. // newline
  298. {.kind = TokenKind::Error(), .line = 4, .column = 1, .text = "\"x"},
  299. {.kind = TokenKind::EndOfFile(), .line = 4, .column = 3},
  300. }));
  301. }
  302. TEST_F(LexerTest, Symbols) {
  303. // We don't need to exhaustively test symbols here as they're handled with
  304. // common code, but we want to check specific patterns to verify things like
  305. // max-munch rule and handling of interesting symbols.
  306. auto buffer = Lex("<<<");
  307. EXPECT_FALSE(buffer.has_errors());
  308. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  309. {TokenKind::LessLess()},
  310. {TokenKind::Less()},
  311. {TokenKind::EndOfFile()},
  312. }));
  313. buffer = Lex("<<=>>");
  314. EXPECT_FALSE(buffer.has_errors());
  315. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  316. {TokenKind::LessLessEqual()},
  317. {TokenKind::GreaterGreater()},
  318. {TokenKind::EndOfFile()},
  319. }));
  320. buffer = Lex("< <=> >");
  321. EXPECT_FALSE(buffer.has_errors());
  322. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  323. {TokenKind::Less()},
  324. {TokenKind::LessEqualGreater()},
  325. {TokenKind::Greater()},
  326. {TokenKind::EndOfFile()},
  327. }));
  328. buffer = Lex("\\/?@&^!");
  329. EXPECT_FALSE(buffer.has_errors());
  330. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  331. {TokenKind::Backslash()},
  332. {TokenKind::Slash()},
  333. {TokenKind::Question()},
  334. {TokenKind::At()},
  335. {TokenKind::Amp()},
  336. {TokenKind::Caret()},
  337. {TokenKind::Exclaim()},
  338. {TokenKind::EndOfFile()},
  339. }));
  340. }
  341. TEST_F(LexerTest, Parens) {
  342. auto buffer = Lex("()");
  343. EXPECT_FALSE(buffer.has_errors());
  344. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  345. {TokenKind::OpenParen()},
  346. {TokenKind::CloseParen()},
  347. {TokenKind::EndOfFile()},
  348. }));
  349. buffer = Lex("((()()))");
  350. EXPECT_FALSE(buffer.has_errors());
  351. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  352. {TokenKind::OpenParen()},
  353. {TokenKind::OpenParen()},
  354. {TokenKind::OpenParen()},
  355. {TokenKind::CloseParen()},
  356. {TokenKind::OpenParen()},
  357. {TokenKind::CloseParen()},
  358. {TokenKind::CloseParen()},
  359. {TokenKind::CloseParen()},
  360. {TokenKind::EndOfFile()},
  361. }));
  362. }
  363. TEST_F(LexerTest, CurlyBraces) {
  364. auto buffer = Lex("{}");
  365. EXPECT_FALSE(buffer.has_errors());
  366. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  367. {TokenKind::OpenCurlyBrace()},
  368. {TokenKind::CloseCurlyBrace()},
  369. {TokenKind::EndOfFile()},
  370. }));
  371. buffer = Lex("{{{}{}}}");
  372. EXPECT_FALSE(buffer.has_errors());
  373. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  374. {TokenKind::OpenCurlyBrace()},
  375. {TokenKind::OpenCurlyBrace()},
  376. {TokenKind::OpenCurlyBrace()},
  377. {TokenKind::CloseCurlyBrace()},
  378. {TokenKind::OpenCurlyBrace()},
  379. {TokenKind::CloseCurlyBrace()},
  380. {TokenKind::CloseCurlyBrace()},
  381. {TokenKind::CloseCurlyBrace()},
  382. {TokenKind::EndOfFile()},
  383. }));
  384. }
  385. TEST_F(LexerTest, MatchingGroups) {
  386. {
  387. TokenizedBuffer buffer = Lex("(){}");
  388. ASSERT_FALSE(buffer.has_errors());
  389. auto it = buffer.tokens().begin();
  390. auto open_paren_token = *it++;
  391. auto close_paren_token = *it++;
  392. EXPECT_EQ(close_paren_token,
  393. buffer.GetMatchedClosingToken(open_paren_token));
  394. EXPECT_EQ(open_paren_token,
  395. buffer.GetMatchedOpeningToken(close_paren_token));
  396. auto open_curly_token = *it++;
  397. auto close_curly_token = *it++;
  398. EXPECT_EQ(close_curly_token,
  399. buffer.GetMatchedClosingToken(open_curly_token));
  400. EXPECT_EQ(open_curly_token,
  401. buffer.GetMatchedOpeningToken(close_curly_token));
  402. auto eof_token = *it++;
  403. EXPECT_EQ(buffer.GetKind(eof_token), TokenKind::EndOfFile());
  404. EXPECT_EQ(buffer.tokens().end(), it);
  405. }
  406. {
  407. TokenizedBuffer buffer = Lex("({x}){(y)} {{((z))}}");
  408. ASSERT_FALSE(buffer.has_errors());
  409. auto it = buffer.tokens().begin();
  410. auto open_paren_token = *it++;
  411. auto open_curly_token = *it++;
  412. ASSERT_EQ("x", buffer.GetIdentifierText(buffer.GetIdentifier(*it++)));
  413. auto close_curly_token = *it++;
  414. auto close_paren_token = *it++;
  415. EXPECT_EQ(close_paren_token,
  416. buffer.GetMatchedClosingToken(open_paren_token));
  417. EXPECT_EQ(open_paren_token,
  418. buffer.GetMatchedOpeningToken(close_paren_token));
  419. EXPECT_EQ(close_curly_token,
  420. buffer.GetMatchedClosingToken(open_curly_token));
  421. EXPECT_EQ(open_curly_token,
  422. buffer.GetMatchedOpeningToken(close_curly_token));
  423. open_curly_token = *it++;
  424. open_paren_token = *it++;
  425. ASSERT_EQ("y", buffer.GetIdentifierText(buffer.GetIdentifier(*it++)));
  426. close_paren_token = *it++;
  427. close_curly_token = *it++;
  428. EXPECT_EQ(close_curly_token,
  429. buffer.GetMatchedClosingToken(open_curly_token));
  430. EXPECT_EQ(open_curly_token,
  431. buffer.GetMatchedOpeningToken(close_curly_token));
  432. EXPECT_EQ(close_paren_token,
  433. buffer.GetMatchedClosingToken(open_paren_token));
  434. EXPECT_EQ(open_paren_token,
  435. buffer.GetMatchedOpeningToken(close_paren_token));
  436. open_curly_token = *it++;
  437. auto inner_open_curly_token = *it++;
  438. open_paren_token = *it++;
  439. auto inner_open_paren_token = *it++;
  440. ASSERT_EQ("z", buffer.GetIdentifierText(buffer.GetIdentifier(*it++)));
  441. auto inner_close_paren_token = *it++;
  442. close_paren_token = *it++;
  443. auto inner_close_curly_token = *it++;
  444. close_curly_token = *it++;
  445. EXPECT_EQ(close_curly_token,
  446. buffer.GetMatchedClosingToken(open_curly_token));
  447. EXPECT_EQ(open_curly_token,
  448. buffer.GetMatchedOpeningToken(close_curly_token));
  449. EXPECT_EQ(inner_close_curly_token,
  450. buffer.GetMatchedClosingToken(inner_open_curly_token));
  451. EXPECT_EQ(inner_open_curly_token,
  452. buffer.GetMatchedOpeningToken(inner_close_curly_token));
  453. EXPECT_EQ(close_paren_token,
  454. buffer.GetMatchedClosingToken(open_paren_token));
  455. EXPECT_EQ(open_paren_token,
  456. buffer.GetMatchedOpeningToken(close_paren_token));
  457. EXPECT_EQ(inner_close_paren_token,
  458. buffer.GetMatchedClosingToken(inner_open_paren_token));
  459. EXPECT_EQ(inner_open_paren_token,
  460. buffer.GetMatchedOpeningToken(inner_close_paren_token));
  461. auto eof_token = *it++;
  462. EXPECT_EQ(buffer.GetKind(eof_token), TokenKind::EndOfFile());
  463. EXPECT_EQ(buffer.tokens().end(), it);
  464. }
  465. }
  466. TEST_F(LexerTest, MismatchedGroups) {
  467. auto buffer = Lex("{");
  468. EXPECT_TRUE(buffer.has_errors());
  469. EXPECT_THAT(buffer,
  470. HasTokens(llvm::ArrayRef<ExpectedToken>{
  471. {TokenKind::OpenCurlyBrace()},
  472. {.kind = TokenKind::CloseCurlyBrace(), .recovery = true},
  473. {TokenKind::EndOfFile()},
  474. }));
  475. buffer = Lex("}");
  476. EXPECT_TRUE(buffer.has_errors());
  477. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  478. {.kind = TokenKind::Error(), .text = "}"},
  479. {TokenKind::EndOfFile()},
  480. }));
  481. buffer = Lex("{(}");
  482. EXPECT_TRUE(buffer.has_errors());
  483. EXPECT_THAT(
  484. buffer,
  485. HasTokens(llvm::ArrayRef<ExpectedToken>{
  486. {.kind = TokenKind::OpenCurlyBrace(), .column = 1},
  487. {.kind = TokenKind::OpenParen(), .column = 2},
  488. {.kind = TokenKind::CloseParen(), .column = 3, .recovery = true},
  489. {.kind = TokenKind::CloseCurlyBrace(), .column = 3},
  490. {TokenKind::EndOfFile()},
  491. }));
  492. buffer = Lex(")({)");
  493. EXPECT_TRUE(buffer.has_errors());
  494. EXPECT_THAT(
  495. buffer,
  496. HasTokens(llvm::ArrayRef<ExpectedToken>{
  497. {.kind = TokenKind::Error(), .column = 1, .text = ")"},
  498. {.kind = TokenKind::OpenParen(), .column = 2},
  499. {.kind = TokenKind::OpenCurlyBrace(), .column = 3},
  500. {.kind = TokenKind::CloseCurlyBrace(), .column = 4, .recovery = true},
  501. {.kind = TokenKind::CloseParen(), .column = 4},
  502. {TokenKind::EndOfFile()},
  503. }));
  504. }
  505. TEST_F(LexerTest, Whitespace) {
  506. auto buffer = Lex("{( } {(");
  507. // Whether there should be whitespace before/after each token.
  508. bool space[] = {true,
  509. // {
  510. false,
  511. // (
  512. true,
  513. // inserted )
  514. true,
  515. // }
  516. true,
  517. // {
  518. false,
  519. // (
  520. true,
  521. // inserted )
  522. true,
  523. // inserted }
  524. true,
  525. // EOF
  526. false};
  527. int pos = 0;
  528. for (TokenizedBuffer::Token token : buffer.tokens()) {
  529. ASSERT_LT(pos, std::size(space));
  530. EXPECT_THAT(buffer.HasLeadingWhitespace(token), Eq(space[pos]));
  531. ++pos;
  532. ASSERT_LT(pos, std::size(space));
  533. EXPECT_THAT(buffer.HasTrailingWhitespace(token), Eq(space[pos]));
  534. }
  535. ASSERT_EQ(pos + 1, std::size(space));
  536. }
  537. TEST_F(LexerTest, Keywords) {
  538. auto buffer = Lex(" fn");
  539. EXPECT_FALSE(buffer.has_errors());
  540. EXPECT_THAT(buffer,
  541. HasTokens(llvm::ArrayRef<ExpectedToken>{
  542. {.kind = TokenKind::Fn(), .column = 4, .indent_column = 4},
  543. {TokenKind::EndOfFile()},
  544. }));
  545. buffer = Lex("and or not if else for return var break continue _");
  546. EXPECT_FALSE(buffer.has_errors());
  547. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  548. {TokenKind::And()},
  549. {TokenKind::Or()},
  550. {TokenKind::Not()},
  551. {TokenKind::If()},
  552. {TokenKind::Else()},
  553. {TokenKind::For()},
  554. {TokenKind::Return()},
  555. {TokenKind::Var()},
  556. {TokenKind::Break()},
  557. {TokenKind::Continue()},
  558. {TokenKind::Underscore()},
  559. {TokenKind::EndOfFile()},
  560. }));
  561. }
  562. TEST_F(LexerTest, Comments) {
  563. auto buffer = Lex(" ;\n // foo\n ;\n");
  564. EXPECT_FALSE(buffer.has_errors());
  565. EXPECT_THAT(buffer,
  566. HasTokens(llvm::ArrayRef<ExpectedToken>{
  567. {.kind = TokenKind::Semi(),
  568. .line = 1,
  569. .column = 2,
  570. .indent_column = 2},
  571. {.kind = TokenKind::Semi(),
  572. .line = 3,
  573. .column = 3,
  574. .indent_column = 3},
  575. {.kind = TokenKind::EndOfFile(), .line = 3, .column = 4},
  576. }));
  577. buffer = Lex("// foo\n//\n// bar");
  578. EXPECT_FALSE(buffer.has_errors());
  579. EXPECT_THAT(
  580. buffer,
  581. HasTokens(llvm::ArrayRef<ExpectedToken>{{TokenKind::EndOfFile()}}));
  582. // Make sure weird characters aren't a problem.
  583. buffer = Lex(" // foo#$!^?@-_💩🍫⃠ [̲̅$̲̅(̲̅ ͡° ͜ʖ ͡°̲̅)̲̅$̲̅]");
  584. EXPECT_FALSE(buffer.has_errors());
  585. EXPECT_THAT(
  586. buffer,
  587. HasTokens(llvm::ArrayRef<ExpectedToken>{{TokenKind::EndOfFile()}}));
  588. // Make sure we can lex a comment at the end of the input.
  589. buffer = Lex("//");
  590. EXPECT_FALSE(buffer.has_errors());
  591. EXPECT_THAT(
  592. buffer,
  593. HasTokens(llvm::ArrayRef<ExpectedToken>{{TokenKind::EndOfFile()}}));
  594. }
  595. TEST_F(LexerTest, InvalidComments) {
  596. llvm::StringLiteral testcases[] = {
  597. " /// foo\n",
  598. "foo // bar\n",
  599. "//! hello",
  600. " //world",
  601. };
  602. for (llvm::StringLiteral testcase : testcases) {
  603. auto buffer = Lex(testcase);
  604. EXPECT_TRUE(buffer.has_errors());
  605. }
  606. }
  607. TEST_F(LexerTest, Identifiers) {
  608. auto buffer = Lex(" foobar");
  609. EXPECT_FALSE(buffer.has_errors());
  610. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  611. {.kind = TokenKind::Identifier(),
  612. .column = 4,
  613. .indent_column = 4,
  614. .text = "foobar"},
  615. {TokenKind::EndOfFile()},
  616. }));
  617. // Check different kinds of identifier character sequences.
  618. buffer = Lex("_foo_bar");
  619. EXPECT_FALSE(buffer.has_errors());
  620. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  621. {.kind = TokenKind::Identifier(), .text = "_foo_bar"},
  622. {TokenKind::EndOfFile()},
  623. }));
  624. buffer = Lex("foo2bar00");
  625. EXPECT_FALSE(buffer.has_errors());
  626. EXPECT_THAT(buffer,
  627. HasTokens(llvm::ArrayRef<ExpectedToken>{
  628. {.kind = TokenKind::Identifier(), .text = "foo2bar00"},
  629. {TokenKind::EndOfFile()},
  630. }));
  631. // Check that we can parse identifiers that start with a keyword.
  632. buffer = Lex("fnord");
  633. EXPECT_FALSE(buffer.has_errors());
  634. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  635. {.kind = TokenKind::Identifier(), .text = "fnord"},
  636. {TokenKind::EndOfFile()},
  637. }));
  638. // Check multiple identifiers with indent and interning.
  639. buffer = Lex(" foo;bar\nbar \n foo\tfoo");
  640. EXPECT_FALSE(buffer.has_errors());
  641. EXPECT_THAT(buffer,
  642. HasTokens(llvm::ArrayRef<ExpectedToken>{
  643. {.kind = TokenKind::Identifier(),
  644. .line = 1,
  645. .column = 4,
  646. .indent_column = 4,
  647. .text = "foo"},
  648. {.kind = TokenKind::Semi()},
  649. {.kind = TokenKind::Identifier(),
  650. .line = 1,
  651. .column = 8,
  652. .indent_column = 4,
  653. .text = "bar"},
  654. {.kind = TokenKind::Identifier(),
  655. .line = 2,
  656. .column = 1,
  657. .indent_column = 1,
  658. .text = "bar"},
  659. {.kind = TokenKind::Identifier(),
  660. .line = 3,
  661. .column = 3,
  662. .indent_column = 3,
  663. .text = "foo"},
  664. {.kind = TokenKind::Identifier(),
  665. .line = 3,
  666. .column = 7,
  667. .indent_column = 3,
  668. .text = "foo"},
  669. {.kind = TokenKind::EndOfFile(), .line = 3, .column = 10},
  670. }));
  671. }
  672. TEST_F(LexerTest, StringLiterals) {
  673. llvm::StringLiteral testcase = R"(
  674. "hello world\n"
  675. """foo
  676. test \
  677. \xAB
  678. """ trailing
  679. #"""#
  680. "\0"
  681. #"\0"foo"\1"#
  682. """x"""
  683. )";
  684. auto buffer = Lex(testcase);
  685. EXPECT_FALSE(buffer.has_errors());
  686. EXPECT_THAT(buffer,
  687. HasTokens(llvm::ArrayRef<ExpectedToken>{
  688. {.kind = TokenKind::StringLiteral(),
  689. .line = 2,
  690. .column = 5,
  691. .indent_column = 5,
  692. .string_contents = {"hello world\n"}},
  693. {.kind = TokenKind::StringLiteral(),
  694. .line = 4,
  695. .column = 5,
  696. .indent_column = 5,
  697. .string_contents = {" test \xAB\n"}},
  698. {.kind = TokenKind::Identifier(),
  699. .line = 7,
  700. .column = 10,
  701. .indent_column = 5,
  702. .text = "trailing"},
  703. {.kind = TokenKind::StringLiteral(),
  704. .line = 9,
  705. .column = 7,
  706. .indent_column = 7,
  707. .string_contents = {"\""}},
  708. {.kind = TokenKind::StringLiteral(),
  709. .line = 11,
  710. .column = 5,
  711. .indent_column = 5,
  712. .string_contents = llvm::StringLiteral::withInnerNUL("\0")},
  713. {.kind = TokenKind::StringLiteral(),
  714. .line = 13,
  715. .column = 5,
  716. .indent_column = 5,
  717. .string_contents = {"\\0\"foo\"\\1"}},
  718. // """x""" is three string literals, not one.
  719. {.kind = TokenKind::StringLiteral(),
  720. .line = 15,
  721. .column = 5,
  722. .indent_column = 5,
  723. .string_contents = {""}},
  724. {.kind = TokenKind::StringLiteral(),
  725. .line = 15,
  726. .column = 7,
  727. .indent_column = 5,
  728. .string_contents = {"x"}},
  729. {.kind = TokenKind::StringLiteral(),
  730. .line = 15,
  731. .column = 10,
  732. .indent_column = 5,
  733. .string_contents = {""}},
  734. {.kind = TokenKind::EndOfFile(), .line = 16, .column = 3},
  735. }));
  736. }
  737. TEST_F(LexerTest, InvalidStringLiterals) {
  738. llvm::StringLiteral invalid[] = {
  739. // clang-format off
  740. R"(")",
  741. R"("""
  742. "")",
  743. R"("\)",
  744. R"("\")",
  745. R"("\\)",
  746. R"("\\\")",
  747. R"(""")",
  748. R"("""
  749. )",
  750. R"("""\)",
  751. R"(#"""
  752. """)",
  753. // clang-format on
  754. };
  755. for (llvm::StringLiteral test : invalid) {
  756. SCOPED_TRACE(test);
  757. auto buffer = Lex(test);
  758. EXPECT_TRUE(buffer.has_errors());
  759. // We should have formed at least one error token.
  760. bool found_error = false;
  761. for (TokenizedBuffer::Token token : buffer.tokens()) {
  762. if (buffer.GetKind(token) == TokenKind::Error()) {
  763. found_error = true;
  764. break;
  765. }
  766. }
  767. EXPECT_TRUE(found_error);
  768. }
  769. }
  770. TEST_F(LexerTest, TypeLiterals) {
  771. llvm::StringLiteral testcase = R"(
  772. i0 i1 i20 i999999999999 i0x1
  773. u0 u1 u64 u64b
  774. f32 f80 f1 fi
  775. s1
  776. )";
  777. auto buffer = Lex(testcase);
  778. EXPECT_FALSE(buffer.has_errors());
  779. ASSERT_THAT(buffer,
  780. HasTokens(llvm::ArrayRef<ExpectedToken>{
  781. {.kind = TokenKind::Identifier(),
  782. .line = 2,
  783. .column = 5,
  784. .indent_column = 5,
  785. .text = {"i0"}},
  786. {.kind = TokenKind::IntegerTypeLiteral(),
  787. .line = 2,
  788. .column = 8,
  789. .indent_column = 5,
  790. .text = {"i1"}},
  791. {.kind = TokenKind::IntegerTypeLiteral(),
  792. .line = 2,
  793. .column = 11,
  794. .indent_column = 5,
  795. .text = {"i20"}},
  796. {.kind = TokenKind::IntegerTypeLiteral(),
  797. .line = 2,
  798. .column = 15,
  799. .indent_column = 5,
  800. .text = {"i999999999999"}},
  801. {.kind = TokenKind::Identifier(),
  802. .line = 2,
  803. .column = 29,
  804. .indent_column = 5,
  805. .text = {"i0x1"}},
  806. {.kind = TokenKind::Identifier(),
  807. .line = 3,
  808. .column = 5,
  809. .indent_column = 5,
  810. .text = {"u0"}},
  811. {.kind = TokenKind::UnsignedIntegerTypeLiteral(),
  812. .line = 3,
  813. .column = 8,
  814. .indent_column = 5,
  815. .text = {"u1"}},
  816. {.kind = TokenKind::UnsignedIntegerTypeLiteral(),
  817. .line = 3,
  818. .column = 11,
  819. .indent_column = 5,
  820. .text = {"u64"}},
  821. {.kind = TokenKind::Identifier(),
  822. .line = 3,
  823. .column = 15,
  824. .indent_column = 5,
  825. .text = {"u64b"}},
  826. {.kind = TokenKind::FloatingPointTypeLiteral(),
  827. .line = 4,
  828. .column = 5,
  829. .indent_column = 5,
  830. .text = {"f32"}},
  831. {.kind = TokenKind::FloatingPointTypeLiteral(),
  832. .line = 4,
  833. .column = 9,
  834. .indent_column = 5,
  835. .text = {"f80"}},
  836. {.kind = TokenKind::FloatingPointTypeLiteral(),
  837. .line = 4,
  838. .column = 13,
  839. .indent_column = 5,
  840. .text = {"f1"}},
  841. {.kind = TokenKind::Identifier(),
  842. .line = 4,
  843. .column = 16,
  844. .indent_column = 5,
  845. .text = {"fi"}},
  846. {.kind = TokenKind::Identifier(),
  847. .line = 5,
  848. .column = 5,
  849. .indent_column = 5,
  850. .text = {"s1"}},
  851. {.kind = TokenKind::EndOfFile(), .line = 6, .column = 3},
  852. }));
  853. auto token_i1 = buffer.tokens().begin() + 1;
  854. EXPECT_EQ(buffer.GetTypeLiteralSize(*token_i1), 1);
  855. auto token_i20 = buffer.tokens().begin() + 2;
  856. EXPECT_EQ(buffer.GetTypeLiteralSize(*token_i20), 20);
  857. auto token_i999999999999 = buffer.tokens().begin() + 3;
  858. EXPECT_EQ(buffer.GetTypeLiteralSize(*token_i999999999999), 999999999999ULL);
  859. auto token_u1 = buffer.tokens().begin() + 6;
  860. EXPECT_EQ(buffer.GetTypeLiteralSize(*token_u1), 1);
  861. auto token_u64 = buffer.tokens().begin() + 7;
  862. EXPECT_EQ(buffer.GetTypeLiteralSize(*token_u64), 64);
  863. auto token_f32 = buffer.tokens().begin() + 9;
  864. EXPECT_EQ(buffer.GetTypeLiteralSize(*token_f32), 32);
  865. auto token_f80 = buffer.tokens().begin() + 10;
  866. EXPECT_EQ(buffer.GetTypeLiteralSize(*token_f80), 80);
  867. auto token_f1 = buffer.tokens().begin() + 11;
  868. EXPECT_EQ(buffer.GetTypeLiteralSize(*token_f1), 1);
  869. }
  870. TEST_F(LexerTest, TypeLiteralTooManyDigits) {
  871. std::string code = "i";
  872. code.append(10000, '9');
  873. Testing::MockDiagnosticConsumer consumer;
  874. EXPECT_CALL(
  875. consumer,
  876. HandleDiagnostic(AllOf(
  877. DiagnosticAt(1, 2),
  878. DiagnosticMessage(HasSubstr("Found a sequence of 10000 digits")))));
  879. auto buffer = Lex(code, consumer);
  880. EXPECT_TRUE(buffer.has_errors());
  881. ASSERT_THAT(buffer,
  882. HasTokens(llvm::ArrayRef<ExpectedToken>{
  883. {.kind = TokenKind::Error(),
  884. .line = 1,
  885. .column = 1,
  886. .indent_column = 1,
  887. .text = {code}},
  888. {.kind = TokenKind::EndOfFile(), .line = 1, .column = 10002},
  889. }));
  890. }
  891. TEST_F(LexerTest, DiagnosticTrailingComment) {
  892. llvm::StringLiteral testcase = R"(
  893. // Hello!
  894. var String x; // trailing comment
  895. )";
  896. Testing::MockDiagnosticConsumer consumer;
  897. EXPECT_CALL(consumer, HandleDiagnostic(AllOf(
  898. DiagnosticAt(3, 19),
  899. DiagnosticMessage(HasSubstr("Trailing comment")))));
  900. Lex(testcase, consumer);
  901. }
  902. TEST_F(LexerTest, DiagnosticWhitespace) {
  903. Testing::MockDiagnosticConsumer consumer;
  904. EXPECT_CALL(consumer,
  905. HandleDiagnostic(AllOf(
  906. DiagnosticAt(1, 3),
  907. DiagnosticMessage(HasSubstr("Whitespace is required")))));
  908. Lex("//no space after comment", consumer);
  909. }
  910. TEST_F(LexerTest, DiagnosticUnrecognizedEscape) {
  911. Testing::MockDiagnosticConsumer consumer;
  912. EXPECT_CALL(
  913. consumer,
  914. HandleDiagnostic(AllOf(
  915. DiagnosticAt(1, 8),
  916. DiagnosticMessage(HasSubstr("Unrecognized escape sequence `b`")))));
  917. Lex(R"("hello\bworld")", consumer);
  918. }
  919. TEST_F(LexerTest, DiagnosticBadHex) {
  920. Testing::MockDiagnosticConsumer consumer;
  921. EXPECT_CALL(
  922. consumer,
  923. HandleDiagnostic(AllOf(
  924. DiagnosticAt(1, 9),
  925. DiagnosticMessage(HasSubstr("two uppercase hexadecimal digits")))));
  926. Lex(R"("hello\xabworld")", consumer);
  927. }
  928. TEST_F(LexerTest, DiagnosticInvalidDigit) {
  929. Testing::MockDiagnosticConsumer consumer;
  930. EXPECT_CALL(
  931. consumer,
  932. HandleDiagnostic(AllOf(
  933. DiagnosticAt(1, 6),
  934. DiagnosticMessage(HasSubstr("Invalid digit 'a' in hexadecimal")))));
  935. Lex("0x123abc", consumer);
  936. }
  937. TEST_F(LexerTest, DiagnosticMissingTerminator) {
  938. Testing::MockDiagnosticConsumer consumer;
  939. EXPECT_CALL(consumer,
  940. HandleDiagnostic(
  941. AllOf(DiagnosticAt(1, 1),
  942. DiagnosticMessage(HasSubstr("missing a terminator")))));
  943. Lex(R"(#" ")", consumer);
  944. }
  945. TEST_F(LexerTest, DiagnosticUnrecognizedChar) {
  946. Testing::MockDiagnosticConsumer consumer;
  947. EXPECT_CALL(consumer,
  948. HandleDiagnostic(AllOf(
  949. DiagnosticAt(1, 1),
  950. DiagnosticMessage(HasSubstr("unrecognized character")))));
  951. Lex("\b", consumer);
  952. }
  953. auto GetAndDropLine(llvm::StringRef& text) -> std::string {
  954. auto newline_offset = text.find_first_of('\n');
  955. llvm::StringRef line = text.slice(0, newline_offset);
  956. if (newline_offset != llvm::StringRef::npos) {
  957. text = text.substr(newline_offset + 1);
  958. } else {
  959. text = "";
  960. }
  961. return line.str();
  962. }
  963. TEST_F(LexerTest, PrintingBasic) {
  964. auto buffer = Lex(";");
  965. ASSERT_FALSE(buffer.has_errors());
  966. std::string print_storage;
  967. llvm::raw_string_ostream print_stream(print_storage);
  968. buffer.Print(print_stream);
  969. llvm::StringRef print = print_stream.str();
  970. EXPECT_THAT(GetAndDropLine(print),
  971. StrEq("token: { index: 0, kind: 'Semi', line: 1, column: 1, "
  972. "indent: 1, spelling: ';', has_trailing_space: true }"));
  973. EXPECT_THAT(GetAndDropLine(print),
  974. StrEq("token: { index: 1, kind: 'EndOfFile', line: 1, column: 2, "
  975. "indent: 1, spelling: '' }"));
  976. EXPECT_TRUE(print.empty()) << print;
  977. }
  978. TEST_F(LexerTest, PrintingInteger) {
  979. auto buffer = Lex("123");
  980. ASSERT_FALSE(buffer.has_errors());
  981. std::string print_storage;
  982. llvm::raw_string_ostream print_stream(print_storage);
  983. buffer.Print(print_stream);
  984. llvm::StringRef print = print_stream.str();
  985. EXPECT_THAT(GetAndDropLine(print),
  986. StrEq("token: { index: 0, kind: 'IntegerLiteral', line: 1, "
  987. "column: 1, indent: 1, spelling: '123', value: `123`, "
  988. "has_trailing_space: true }"));
  989. EXPECT_THAT(GetAndDropLine(print), HasSubstr("'EndOfFile'"));
  990. EXPECT_TRUE(print.empty()) << print;
  991. }
  992. TEST_F(LexerTest, PrintingReal) {
  993. auto buffer = Lex("2.5");
  994. ASSERT_FALSE(buffer.has_errors());
  995. std::string print_storage;
  996. llvm::raw_string_ostream print_stream(print_storage);
  997. buffer.Print(print_stream);
  998. llvm::StringRef print = print_stream.str();
  999. EXPECT_THAT(
  1000. GetAndDropLine(print),
  1001. StrEq(
  1002. "token: { index: 0, kind: 'RealLiteral', line: 1, column: 1, indent: "
  1003. "1, spelling: '2.5', value: `25*10^-1`, has_trailing_space: true }"));
  1004. EXPECT_THAT(GetAndDropLine(print), HasSubstr("'EndOfFile'"));
  1005. EXPECT_TRUE(print.empty()) << print;
  1006. }
  1007. TEST_F(LexerTest, PrintingPadding) {
  1008. // Test kind padding.
  1009. auto buffer = Lex("(;foo;)");
  1010. ASSERT_FALSE(buffer.has_errors());
  1011. std::string print_storage;
  1012. llvm::raw_string_ostream print_stream(print_storage);
  1013. buffer.Print(print_stream);
  1014. llvm::StringRef print = print_stream.str();
  1015. EXPECT_THAT(GetAndDropLine(print),
  1016. StrEq("token: { index: 0, kind: 'OpenParen', line: 1, column: "
  1017. "1, indent: 1, spelling: '(', closing_token: 4 }"));
  1018. EXPECT_THAT(GetAndDropLine(print),
  1019. StrEq("token: { index: 1, kind: 'Semi', line: 1, column: "
  1020. "2, indent: 1, spelling: ';' }"));
  1021. EXPECT_THAT(GetAndDropLine(print),
  1022. StrEq("token: { index: 2, kind: 'Identifier', line: 1, column: "
  1023. "3, indent: 1, spelling: 'foo', identifier: 0 }"));
  1024. EXPECT_THAT(GetAndDropLine(print),
  1025. StrEq("token: { index: 3, kind: 'Semi', line: 1, column: "
  1026. "6, indent: 1, spelling: ';' }"));
  1027. EXPECT_THAT(GetAndDropLine(print),
  1028. StrEq("token: { index: 4, kind: 'CloseParen', line: 1, column: "
  1029. "7, indent: 1, spelling: ')', opening_token: 0, "
  1030. "has_trailing_space: true }"));
  1031. EXPECT_THAT(GetAndDropLine(print),
  1032. StrEq("token: { index: 5, kind: 'EndOfFile', line: 1, column: "
  1033. "8, indent: 1, spelling: '' }"));
  1034. EXPECT_TRUE(print.empty()) << print;
  1035. }
  1036. TEST_F(LexerTest, PrintingPaddingDigits) {
  1037. // Test digit padding with max values of 9, 10, and 11.
  1038. auto buffer = Lex(";\n\n\n\n\n\n\n\n\n\n ;;");
  1039. ASSERT_FALSE(buffer.has_errors());
  1040. std::string print_storage;
  1041. llvm::raw_string_ostream print_stream(print_storage);
  1042. buffer.Print(print_stream);
  1043. llvm::StringRef print = print_stream.str();
  1044. EXPECT_THAT(
  1045. GetAndDropLine(print),
  1046. StrEq("token: { index: 0, kind: 'Semi', line: 1, column: 1, "
  1047. "indent: 1, spelling: ';', has_trailing_space: true }"));
  1048. EXPECT_THAT(
  1049. GetAndDropLine(print),
  1050. StrEq("token: { index: 1, kind: 'Semi', line: 11, column: 9, "
  1051. "indent: 9, spelling: ';' }"));
  1052. EXPECT_THAT(
  1053. GetAndDropLine(print),
  1054. StrEq("token: { index: 2, kind: 'Semi', line: 11, column: 10, "
  1055. "indent: 9, spelling: ';', has_trailing_space: true }"));
  1056. EXPECT_THAT(
  1057. GetAndDropLine(print),
  1058. StrEq("token: { index: 3, kind: 'EndOfFile', line: 11, column: 11, "
  1059. "indent: 9, spelling: '' }"));
  1060. EXPECT_TRUE(print.empty()) << print;
  1061. }
  1062. TEST_F(LexerTest, PrintingAsYaml) {
  1063. // Test that we can parse this into YAML and verify line and indent data.
  1064. auto buffer = Lex("\n ;\n\n\n; ;\n\n\n\n\n\n\n\n\n\n\n");
  1065. ASSERT_FALSE(buffer.has_errors());
  1066. std::string print_output;
  1067. llvm::raw_string_ostream print_stream(print_output);
  1068. buffer.Print(print_stream);
  1069. print_stream.flush();
  1070. EXPECT_THAT(Yaml::Value::FromText(print_output),
  1071. ElementsAre(Yaml::MappingValue{
  1072. {"token", Yaml::MappingValue{{"index", "0"},
  1073. {"kind", "Semi"},
  1074. {"line", "2"},
  1075. {"column", "2"},
  1076. {"indent", "2"},
  1077. {"spelling", ";"},
  1078. {"has_trailing_space", "true"}}},
  1079. {"token", Yaml::MappingValue{{"index", "1"},
  1080. {"kind", "Semi"},
  1081. {"line", "5"},
  1082. {"column", "1"},
  1083. {"indent", "1"},
  1084. {"spelling", ";"},
  1085. {"has_trailing_space", "true"}}},
  1086. {"token", Yaml::MappingValue{{"index", "2"},
  1087. {"kind", "Semi"},
  1088. {"line", "5"},
  1089. {"column", "3"},
  1090. {"indent", "1"},
  1091. {"spelling", ";"},
  1092. {"has_trailing_space", "true"}}},
  1093. {"token", Yaml::MappingValue{{"index", "3"},
  1094. {"kind", "EndOfFile"},
  1095. {"line", "15"},
  1096. {"column", "1"},
  1097. {"indent", "1"},
  1098. {"spelling", ""}}}}));
  1099. }
  1100. } // namespace
  1101. } // namespace Carbon::Testing