tokenized_buffer_test.cpp 44 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #include "toolchain/lex/tokenized_buffer.h"
  5. #include <gmock/gmock.h>
  6. #include <gtest/gtest.h>
  7. #include <forward_list>
  8. #include <iterator>
  9. #include "llvm/ADT/ArrayRef.h"
  10. #include "testing/base/test_raw_ostream.h"
  11. #include "toolchain/base/value_store.h"
  12. #include "toolchain/diagnostics/diagnostic_emitter.h"
  13. #include "toolchain/diagnostics/mocks.h"
  14. #include "toolchain/lex/lex.h"
  15. #include "toolchain/lex/tokenized_buffer_test_helpers.h"
  16. #include "toolchain/testing/yaml_test_helpers.h"
  17. namespace Carbon::Lex {
  18. namespace {
  19. using ::Carbon::Testing::ExpectedToken;
  20. using ::Carbon::Testing::IsSingleDiagnostic;
  21. using ::Carbon::Testing::TestRawOstream;
  22. using ::testing::_;
  23. using ::testing::ElementsAre;
  24. using ::testing::Eq;
  25. using ::testing::HasSubstr;
  26. using ::testing::Pair;
  27. namespace Yaml = ::Carbon::Testing::Yaml;
  28. class LexerTest : public ::testing::Test {
  29. protected:
  30. auto GetSourceBuffer(llvm::StringRef text) -> SourceBuffer& {
  31. std::string filename = llvm::formatv("test{0}.carbon", ++file_index_);
  32. CARBON_CHECK(fs_.addFile(filename, /*ModificationTime=*/0,
  33. llvm::MemoryBuffer::getMemBuffer(text)));
  34. source_storage_.push_front(std::move(*SourceBuffer::MakeFromFile(
  35. fs_, filename, ConsoleDiagnosticConsumer())));
  36. return source_storage_.front();
  37. }
  38. auto Lex(llvm::StringRef text,
  39. DiagnosticConsumer& consumer = ConsoleDiagnosticConsumer())
  40. -> TokenizedBuffer {
  41. return Lex::Lex(value_stores_, GetSourceBuffer(text), consumer);
  42. }
  43. SharedValueStores value_stores_;
  44. llvm::vfs::InMemoryFileSystem fs_;
  45. int file_index_ = 0;
  46. std::forward_list<SourceBuffer> source_storage_;
  47. };
  48. TEST_F(LexerTest, HandlesEmptyBuffer) {
  49. auto buffer = Lex("");
  50. EXPECT_FALSE(buffer.has_errors());
  51. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  52. {.kind = TokenKind::FileStart},
  53. {.kind = TokenKind::FileEnd}}));
  54. }
  55. TEST_F(LexerTest, TracksLinesAndColumns) {
  56. auto buffer = Lex("\n ;;\n ;;;\n x\"foo\" '''baz\n a\n ''' y");
  57. EXPECT_FALSE(buffer.has_errors());
  58. EXPECT_THAT(
  59. buffer,
  60. HasTokens(llvm::ArrayRef<ExpectedToken>{
  61. {.kind = TokenKind::FileStart,
  62. .line = 1,
  63. .column = 1,
  64. .indent_column = 1},
  65. {.kind = TokenKind::Semi, .line = 2, .column = 3, .indent_column = 3},
  66. {.kind = TokenKind::Semi, .line = 2, .column = 4, .indent_column = 3},
  67. {.kind = TokenKind::Semi, .line = 3, .column = 4, .indent_column = 4},
  68. {.kind = TokenKind::Semi, .line = 3, .column = 5, .indent_column = 4},
  69. {.kind = TokenKind::Semi, .line = 3, .column = 6, .indent_column = 4},
  70. {.kind = TokenKind::Identifier,
  71. .line = 4,
  72. .column = 4,
  73. .indent_column = 4,
  74. .text = "x"},
  75. {.kind = TokenKind::StringLiteral,
  76. .line = 4,
  77. .column = 5,
  78. .indent_column = 4},
  79. {.kind = TokenKind::StringLiteral,
  80. .line = 4,
  81. .column = 11,
  82. .indent_column = 4},
  83. {.kind = TokenKind::Identifier,
  84. .line = 6,
  85. .column = 6,
  86. .indent_column = 11,
  87. .text = "y"},
  88. {.kind = TokenKind::FileEnd, .line = 6, .column = 7},
  89. }));
  90. }
  91. TEST_F(LexerTest, TracksLinesAndColumnsCRLF) {
  92. auto buffer =
  93. Lex("\r\n ;;\r\n ;;;\r\n x\"foo\" '''baz\r\n a\r\n ''' y");
  94. EXPECT_FALSE(buffer.has_errors());
  95. EXPECT_THAT(
  96. buffer,
  97. HasTokens(llvm::ArrayRef<ExpectedToken>{
  98. {.kind = TokenKind::FileStart,
  99. .line = 1,
  100. .column = 1,
  101. .indent_column = 1},
  102. {.kind = TokenKind::Semi, .line = 2, .column = 3, .indent_column = 3},
  103. {.kind = TokenKind::Semi, .line = 2, .column = 4, .indent_column = 3},
  104. {.kind = TokenKind::Semi, .line = 3, .column = 4, .indent_column = 4},
  105. {.kind = TokenKind::Semi, .line = 3, .column = 5, .indent_column = 4},
  106. {.kind = TokenKind::Semi, .line = 3, .column = 6, .indent_column = 4},
  107. {.kind = TokenKind::Identifier,
  108. .line = 4,
  109. .column = 4,
  110. .indent_column = 4,
  111. .text = "x"},
  112. {.kind = TokenKind::StringLiteral,
  113. .line = 4,
  114. .column = 5,
  115. .indent_column = 4},
  116. {.kind = TokenKind::StringLiteral,
  117. .line = 4,
  118. .column = 11,
  119. .indent_column = 4},
  120. {.kind = TokenKind::Identifier,
  121. .line = 6,
  122. .column = 6,
  123. .indent_column = 11,
  124. .text = "y"},
  125. {.kind = TokenKind::FileEnd, .line = 6, .column = 7},
  126. }));
  127. }
  128. TEST_F(LexerTest, InvalidCR) {
  129. auto buffer = Lex("\n ;;\r ;\n x");
  130. EXPECT_TRUE(buffer.has_errors());
  131. EXPECT_THAT(
  132. buffer,
  133. HasTokens(llvm::ArrayRef<ExpectedToken>{
  134. {.kind = TokenKind::FileStart,
  135. .line = 1,
  136. .column = 1,
  137. .indent_column = 1},
  138. {.kind = TokenKind::Semi, .line = 2, .column = 2, .indent_column = 2},
  139. {.kind = TokenKind::Semi, .line = 2, .column = 3, .indent_column = 2},
  140. {.kind = TokenKind::Semi, .line = 2, .column = 6, .indent_column = 2},
  141. {.kind = TokenKind::Identifier,
  142. .line = 3,
  143. .column = 4,
  144. .indent_column = 4,
  145. .text = "x"},
  146. {.kind = TokenKind::FileEnd, .line = 3, .column = 5},
  147. }));
  148. }
  149. TEST_F(LexerTest, InvalidLFCR) {
  150. auto buffer = Lex("\n ;;\n\r ;\n x");
  151. EXPECT_TRUE(buffer.has_errors());
  152. EXPECT_THAT(
  153. buffer,
  154. HasTokens(llvm::ArrayRef<ExpectedToken>{
  155. {.kind = TokenKind::FileStart,
  156. .line = 1,
  157. .column = 1,
  158. .indent_column = 1},
  159. {.kind = TokenKind::Semi, .line = 2, .column = 2, .indent_column = 2},
  160. {.kind = TokenKind::Semi, .line = 2, .column = 3, .indent_column = 2},
  161. {.kind = TokenKind::Semi, .line = 3, .column = 3, .indent_column = 1},
  162. {.kind = TokenKind::Identifier,
  163. .line = 4,
  164. .column = 4,
  165. .indent_column = 4,
  166. .text = "x"},
  167. {.kind = TokenKind::FileEnd, .line = 4, .column = 5},
  168. }));
  169. }
  170. TEST_F(LexerTest, HandlesNumericLiteral) {
  171. auto buffer = Lex("12-578\n 1 2\n0x12_3ABC\n0b10_10_11\n1_234_567\n1.5e9");
  172. EXPECT_FALSE(buffer.has_errors());
  173. ASSERT_THAT(buffer,
  174. HasTokens(llvm::ArrayRef<ExpectedToken>{
  175. {.kind = TokenKind::FileStart, .line = 1, .column = 1},
  176. {.kind = TokenKind::IntLiteral,
  177. .line = 1,
  178. .column = 1,
  179. .indent_column = 1,
  180. .text = "12"},
  181. {.kind = TokenKind::Minus,
  182. .line = 1,
  183. .column = 3,
  184. .indent_column = 1},
  185. {.kind = TokenKind::IntLiteral,
  186. .line = 1,
  187. .column = 4,
  188. .indent_column = 1,
  189. .text = "578"},
  190. {.kind = TokenKind::IntLiteral,
  191. .line = 2,
  192. .column = 3,
  193. .indent_column = 3,
  194. .text = "1"},
  195. {.kind = TokenKind::IntLiteral,
  196. .line = 2,
  197. .column = 6,
  198. .indent_column = 3,
  199. .text = "2"},
  200. {.kind = TokenKind::IntLiteral,
  201. .line = 3,
  202. .column = 1,
  203. .indent_column = 1,
  204. .text = "0x12_3ABC"},
  205. {.kind = TokenKind::IntLiteral,
  206. .line = 4,
  207. .column = 1,
  208. .indent_column = 1,
  209. .text = "0b10_10_11"},
  210. {.kind = TokenKind::IntLiteral,
  211. .line = 5,
  212. .column = 1,
  213. .indent_column = 1,
  214. .text = "1_234_567"},
  215. {.kind = TokenKind::RealLiteral,
  216. .line = 6,
  217. .column = 1,
  218. .indent_column = 1,
  219. .text = "1.5e9"},
  220. {.kind = TokenKind::FileEnd, .line = 6, .column = 6},
  221. }));
  222. auto token_start = buffer.tokens().begin();
  223. auto token_12 = token_start + 1;
  224. EXPECT_EQ(value_stores_.ints().Get(buffer.GetIntLiteral(*token_12)), 12);
  225. auto token_578 = token_12 + 2;
  226. EXPECT_EQ(value_stores_.ints().Get(buffer.GetIntLiteral(*token_578)), 578);
  227. auto token_1 = token_578 + 1;
  228. EXPECT_EQ(value_stores_.ints().Get(buffer.GetIntLiteral(*token_1)), 1);
  229. auto token_2 = token_1 + 1;
  230. EXPECT_EQ(value_stores_.ints().Get(buffer.GetIntLiteral(*token_2)), 2);
  231. auto token_0x12_3abc = token_2 + 1;
  232. EXPECT_EQ(value_stores_.ints().Get(buffer.GetIntLiteral(*token_0x12_3abc)),
  233. 0x12'3abc);
  234. auto token_0b10_10_11 = token_0x12_3abc + 1;
  235. EXPECT_EQ(value_stores_.ints().Get(buffer.GetIntLiteral(*token_0b10_10_11)),
  236. 0b10'10'11);
  237. auto token_1_234_567 = token_0b10_10_11 + 1;
  238. EXPECT_EQ(value_stores_.ints().Get(buffer.GetIntLiteral(*token_1_234_567)),
  239. 1'234'567);
  240. auto token_1_5e9 = token_1_234_567 + 1;
  241. auto value_1_5e9 =
  242. value_stores_.reals().Get(buffer.GetRealLiteral(*token_1_5e9));
  243. EXPECT_EQ(value_1_5e9.mantissa.getZExtValue(), 15);
  244. EXPECT_EQ(value_1_5e9.exponent.getSExtValue(), 8);
  245. EXPECT_EQ(value_1_5e9.is_decimal, true);
  246. }
  247. TEST_F(LexerTest, HandlesInvalidNumericLiterals) {
  248. auto buffer = Lex("14x 15_49 0x3.5q 0x3_4.5_6 0ops");
  249. EXPECT_TRUE(buffer.has_errors());
  250. ASSERT_THAT(buffer,
  251. HasTokens(llvm::ArrayRef<ExpectedToken>{
  252. {.kind = TokenKind::FileStart, .line = 1, .column = 1},
  253. {.kind = TokenKind::Error,
  254. .line = 1,
  255. .column = 1,
  256. .indent_column = 1,
  257. .text = "14x"},
  258. {.kind = TokenKind::IntLiteral,
  259. .line = 1,
  260. .column = 5,
  261. .indent_column = 1,
  262. .text = "15_49"},
  263. {.kind = TokenKind::Error,
  264. .line = 1,
  265. .column = 11,
  266. .indent_column = 1,
  267. .text = "0x3.5q"},
  268. {.kind = TokenKind::RealLiteral,
  269. .line = 1,
  270. .column = 18,
  271. .indent_column = 1,
  272. .text = "0x3_4.5_6"},
  273. {.kind = TokenKind::Error,
  274. .line = 1,
  275. .column = 28,
  276. .indent_column = 1,
  277. .text = "0ops"},
  278. {.kind = TokenKind::FileEnd, .line = 1, .column = 32},
  279. }));
  280. }
  281. TEST_F(LexerTest, SplitsNumericLiteralsProperly) {
  282. llvm::StringLiteral source_text = R"(
  283. 1.
  284. .2
  285. 3.+foo
  286. 4.0-bar
  287. 5.0e+123+456
  288. 6.0e+1e+2
  289. 1e7
  290. 8..10
  291. 9.0.9.5
  292. 10.foo
  293. 11.0.foo
  294. 12e+1
  295. 13._
  296. )";
  297. auto buffer = Lex(source_text);
  298. EXPECT_TRUE(buffer.has_errors());
  299. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  300. {.kind = TokenKind::FileStart},
  301. {.kind = TokenKind::IntLiteral, .text = "1"},
  302. {.kind = TokenKind::Period},
  303. // newline
  304. {.kind = TokenKind::Period},
  305. {.kind = TokenKind::IntLiteral, .text = "2"},
  306. // newline
  307. {.kind = TokenKind::IntLiteral, .text = "3"},
  308. {.kind = TokenKind::Period},
  309. {.kind = TokenKind::Plus},
  310. {.kind = TokenKind::Identifier, .text = "foo"},
  311. // newline
  312. {.kind = TokenKind::RealLiteral, .text = "4.0"},
  313. {.kind = TokenKind::Minus},
  314. {.kind = TokenKind::Identifier, .text = "bar"},
  315. // newline
  316. {.kind = TokenKind::RealLiteral, .text = "5.0e+123"},
  317. {.kind = TokenKind::Plus},
  318. {.kind = TokenKind::IntLiteral, .text = "456"},
  319. // newline
  320. {.kind = TokenKind::Error, .text = "6.0e+1e"},
  321. {.kind = TokenKind::Plus},
  322. {.kind = TokenKind::IntLiteral, .text = "2"},
  323. // newline
  324. {.kind = TokenKind::Error, .text = "1e7"},
  325. // newline
  326. {.kind = TokenKind::IntLiteral, .text = "8"},
  327. {.kind = TokenKind::Period},
  328. {.kind = TokenKind::Period},
  329. {.kind = TokenKind::IntLiteral, .text = "10"},
  330. // newline
  331. {.kind = TokenKind::RealLiteral, .text = "9.0"},
  332. {.kind = TokenKind::Period},
  333. {.kind = TokenKind::RealLiteral, .text = "9.5"},
  334. // newline
  335. {.kind = TokenKind::Error, .text = "10.foo"},
  336. // newline
  337. {.kind = TokenKind::RealLiteral, .text = "11.0"},
  338. {.kind = TokenKind::Period},
  339. {.kind = TokenKind::Identifier, .text = "foo"},
  340. // newline
  341. {.kind = TokenKind::Error, .text = "12e"},
  342. {.kind = TokenKind::Plus},
  343. {.kind = TokenKind::IntLiteral, .text = "1"},
  344. // newline
  345. {.kind = TokenKind::IntLiteral, .text = "13"},
  346. {.kind = TokenKind::Period},
  347. {.kind = TokenKind::Underscore},
  348. // newline
  349. {.kind = TokenKind::FileEnd},
  350. }));
  351. }
  352. TEST_F(LexerTest, HandlesGarbageCharacters) {
  353. constexpr char GarbageText[] = "$$💩-$\n$\0$12$\n\\\"\\\n\"x";
  354. auto buffer = Lex(llvm::StringRef(GarbageText, sizeof(GarbageText) - 1));
  355. EXPECT_TRUE(buffer.has_errors());
  356. EXPECT_THAT(
  357. buffer,
  358. HasTokens(llvm::ArrayRef<ExpectedToken>{
  359. {.kind = TokenKind::FileStart, .line = 1, .column = 1},
  360. {.kind = TokenKind::Error,
  361. .line = 1,
  362. .column = 1,
  363. // 💩 takes 4 bytes, and we count column as bytes offset.
  364. .text = llvm::StringRef("$$💩", 6)},
  365. {.kind = TokenKind::Minus, .line = 1, .column = 7},
  366. {.kind = TokenKind::Error, .line = 1, .column = 8, .text = "$"},
  367. // newline
  368. {.kind = TokenKind::Error,
  369. .line = 2,
  370. .column = 1,
  371. .text = llvm::StringRef("$\0$", 3)},
  372. {.kind = TokenKind::IntLiteral, .line = 2, .column = 4, .text = "12"},
  373. {.kind = TokenKind::Error, .line = 2, .column = 6, .text = "$"},
  374. // newline
  375. {.kind = TokenKind::Backslash, .line = 3, .column = 1, .text = "\\"},
  376. {.kind = TokenKind::Error, .line = 3, .column = 2, .text = "\"\\"},
  377. // newline
  378. {.kind = TokenKind::Error, .line = 4, .column = 1, .text = "\"x"},
  379. {.kind = TokenKind::FileEnd, .line = 4, .column = 3},
  380. }));
  381. }
  382. TEST_F(LexerTest, Symbols) {
  383. // We don't need to exhaustively test symbols here as they're handled with
  384. // common code, but we want to check specific patterns to verify things like
  385. // max-munch rule and handling of interesting symbols.
  386. auto buffer = Lex("<<<");
  387. EXPECT_FALSE(buffer.has_errors());
  388. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  389. {.kind = TokenKind::FileStart},
  390. {.kind = TokenKind::LessLess},
  391. {.kind = TokenKind::Less},
  392. {.kind = TokenKind::FileEnd},
  393. }));
  394. buffer = Lex("<<=>>");
  395. EXPECT_FALSE(buffer.has_errors());
  396. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  397. {.kind = TokenKind::FileStart},
  398. {.kind = TokenKind::LessLessEqual},
  399. {.kind = TokenKind::GreaterGreater},
  400. {.kind = TokenKind::FileEnd},
  401. }));
  402. buffer = Lex("< <=> >");
  403. EXPECT_FALSE(buffer.has_errors());
  404. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  405. {.kind = TokenKind::FileStart},
  406. {.kind = TokenKind::Less},
  407. {.kind = TokenKind::LessEqualGreater},
  408. {.kind = TokenKind::Greater},
  409. {.kind = TokenKind::FileEnd},
  410. }));
  411. buffer = Lex("\\/?@&^!");
  412. EXPECT_FALSE(buffer.has_errors());
  413. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  414. {.kind = TokenKind::FileStart},
  415. {.kind = TokenKind::Backslash},
  416. {.kind = TokenKind::Slash},
  417. {.kind = TokenKind::Question},
  418. {.kind = TokenKind::At},
  419. {.kind = TokenKind::Amp},
  420. {.kind = TokenKind::Caret},
  421. {.kind = TokenKind::Exclaim},
  422. {.kind = TokenKind::FileEnd},
  423. }));
  424. }
  425. TEST_F(LexerTest, Parens) {
  426. auto buffer = Lex("()");
  427. EXPECT_FALSE(buffer.has_errors());
  428. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  429. {.kind = TokenKind::FileStart},
  430. {.kind = TokenKind::OpenParen},
  431. {.kind = TokenKind::CloseParen},
  432. {.kind = TokenKind::FileEnd},
  433. }));
  434. buffer = Lex("((()()))");
  435. EXPECT_FALSE(buffer.has_errors());
  436. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  437. {.kind = TokenKind::FileStart},
  438. {.kind = TokenKind::OpenParen},
  439. {.kind = TokenKind::OpenParen},
  440. {.kind = TokenKind::OpenParen},
  441. {.kind = TokenKind::CloseParen},
  442. {.kind = TokenKind::OpenParen},
  443. {.kind = TokenKind::CloseParen},
  444. {.kind = TokenKind::CloseParen},
  445. {.kind = TokenKind::CloseParen},
  446. {.kind = TokenKind::FileEnd},
  447. }));
  448. }
  449. TEST_F(LexerTest, CurlyBraces) {
  450. auto buffer = Lex("{}");
  451. EXPECT_FALSE(buffer.has_errors());
  452. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  453. {.kind = TokenKind::FileStart},
  454. {.kind = TokenKind::OpenCurlyBrace},
  455. {.kind = TokenKind::CloseCurlyBrace},
  456. {.kind = TokenKind::FileEnd},
  457. }));
  458. buffer = Lex("{{{}{}}}");
  459. EXPECT_FALSE(buffer.has_errors());
  460. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  461. {.kind = TokenKind::FileStart},
  462. {.kind = TokenKind::OpenCurlyBrace},
  463. {.kind = TokenKind::OpenCurlyBrace},
  464. {.kind = TokenKind::OpenCurlyBrace},
  465. {.kind = TokenKind::CloseCurlyBrace},
  466. {.kind = TokenKind::OpenCurlyBrace},
  467. {.kind = TokenKind::CloseCurlyBrace},
  468. {.kind = TokenKind::CloseCurlyBrace},
  469. {.kind = TokenKind::CloseCurlyBrace},
  470. {.kind = TokenKind::FileEnd},
  471. }));
  472. }
  473. TEST_F(LexerTest, MatchingGroups) {
  474. {
  475. TokenizedBuffer buffer = Lex("(){}");
  476. ASSERT_FALSE(buffer.has_errors());
  477. auto it = ++buffer.tokens().begin();
  478. auto open_paren_token = *it++;
  479. auto close_paren_token = *it++;
  480. EXPECT_EQ(close_paren_token,
  481. buffer.GetMatchedClosingToken(open_paren_token));
  482. EXPECT_EQ(open_paren_token,
  483. buffer.GetMatchedOpeningToken(close_paren_token));
  484. auto open_curly_token = *it++;
  485. auto close_curly_token = *it++;
  486. EXPECT_EQ(close_curly_token,
  487. buffer.GetMatchedClosingToken(open_curly_token));
  488. EXPECT_EQ(open_curly_token,
  489. buffer.GetMatchedOpeningToken(close_curly_token));
  490. auto eof_token = *it++;
  491. EXPECT_EQ(buffer.GetKind(eof_token), TokenKind::FileEnd);
  492. EXPECT_EQ(buffer.tokens().end(), it);
  493. }
  494. {
  495. TokenizedBuffer buffer = Lex("({x}){(y)} {{((z))}}");
  496. ASSERT_FALSE(buffer.has_errors());
  497. auto it = ++buffer.tokens().begin();
  498. auto open_paren_token = *it++;
  499. auto open_curly_token = *it++;
  500. ASSERT_EQ("x",
  501. value_stores_.identifiers().Get(buffer.GetIdentifier(*it++)));
  502. auto close_curly_token = *it++;
  503. auto close_paren_token = *it++;
  504. EXPECT_EQ(close_paren_token,
  505. buffer.GetMatchedClosingToken(open_paren_token));
  506. EXPECT_EQ(open_paren_token,
  507. buffer.GetMatchedOpeningToken(close_paren_token));
  508. EXPECT_EQ(close_curly_token,
  509. buffer.GetMatchedClosingToken(open_curly_token));
  510. EXPECT_EQ(open_curly_token,
  511. buffer.GetMatchedOpeningToken(close_curly_token));
  512. open_curly_token = *it++;
  513. open_paren_token = *it++;
  514. ASSERT_EQ("y",
  515. value_stores_.identifiers().Get(buffer.GetIdentifier(*it++)));
  516. close_paren_token = *it++;
  517. close_curly_token = *it++;
  518. EXPECT_EQ(close_curly_token,
  519. buffer.GetMatchedClosingToken(open_curly_token));
  520. EXPECT_EQ(open_curly_token,
  521. buffer.GetMatchedOpeningToken(close_curly_token));
  522. EXPECT_EQ(close_paren_token,
  523. buffer.GetMatchedClosingToken(open_paren_token));
  524. EXPECT_EQ(open_paren_token,
  525. buffer.GetMatchedOpeningToken(close_paren_token));
  526. open_curly_token = *it++;
  527. auto inner_open_curly_token = *it++;
  528. open_paren_token = *it++;
  529. auto inner_open_paren_token = *it++;
  530. ASSERT_EQ("z",
  531. value_stores_.identifiers().Get(buffer.GetIdentifier(*it++)));
  532. auto inner_close_paren_token = *it++;
  533. close_paren_token = *it++;
  534. auto inner_close_curly_token = *it++;
  535. close_curly_token = *it++;
  536. EXPECT_EQ(close_curly_token,
  537. buffer.GetMatchedClosingToken(open_curly_token));
  538. EXPECT_EQ(open_curly_token,
  539. buffer.GetMatchedOpeningToken(close_curly_token));
  540. EXPECT_EQ(inner_close_curly_token,
  541. buffer.GetMatchedClosingToken(inner_open_curly_token));
  542. EXPECT_EQ(inner_open_curly_token,
  543. buffer.GetMatchedOpeningToken(inner_close_curly_token));
  544. EXPECT_EQ(close_paren_token,
  545. buffer.GetMatchedClosingToken(open_paren_token));
  546. EXPECT_EQ(open_paren_token,
  547. buffer.GetMatchedOpeningToken(close_paren_token));
  548. EXPECT_EQ(inner_close_paren_token,
  549. buffer.GetMatchedClosingToken(inner_open_paren_token));
  550. EXPECT_EQ(inner_open_paren_token,
  551. buffer.GetMatchedOpeningToken(inner_close_paren_token));
  552. auto eof_token = *it++;
  553. EXPECT_EQ(buffer.GetKind(eof_token), TokenKind::FileEnd);
  554. EXPECT_EQ(buffer.tokens().end(), it);
  555. }
  556. }
  557. TEST_F(LexerTest, MismatchedGroups) {
  558. auto buffer = Lex("{");
  559. EXPECT_TRUE(buffer.has_errors());
  560. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  561. {.kind = TokenKind::FileStart},
  562. {.kind = TokenKind::Error, .text = "{"},
  563. {.kind = TokenKind::FileEnd},
  564. }));
  565. buffer = Lex("}");
  566. EXPECT_TRUE(buffer.has_errors());
  567. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  568. {.kind = TokenKind::FileStart},
  569. {.kind = TokenKind::Error, .text = "}"},
  570. {.kind = TokenKind::FileEnd},
  571. }));
  572. buffer = Lex("{(}");
  573. EXPECT_TRUE(buffer.has_errors());
  574. EXPECT_THAT(
  575. buffer,
  576. HasTokens(llvm::ArrayRef<ExpectedToken>{
  577. {.kind = TokenKind::FileStart},
  578. {.kind = TokenKind::OpenCurlyBrace, .column = 1},
  579. {.kind = TokenKind::OpenParen, .column = 2},
  580. {.kind = TokenKind::CloseParen, .column = 3, .recovery = true},
  581. {.kind = TokenKind::CloseCurlyBrace, .column = 3},
  582. {.kind = TokenKind::FileEnd},
  583. }));
  584. buffer = Lex(")({)");
  585. EXPECT_TRUE(buffer.has_errors());
  586. EXPECT_THAT(
  587. buffer,
  588. HasTokens(llvm::ArrayRef<ExpectedToken>{
  589. {.kind = TokenKind::FileStart},
  590. {.kind = TokenKind::Error, .column = 1, .text = ")"},
  591. {.kind = TokenKind::OpenParen, .column = 2},
  592. {.kind = TokenKind::OpenCurlyBrace, .column = 3},
  593. {.kind = TokenKind::CloseCurlyBrace, .column = 4, .recovery = true},
  594. {.kind = TokenKind::CloseParen, .column = 4},
  595. {.kind = TokenKind::FileEnd},
  596. }));
  597. }
  598. TEST_F(LexerTest, Whitespace) {
  599. auto buffer = Lex("{( } {(");
  600. // Whether there should be whitespace before/after each token.
  601. bool space[] = {false,
  602. // start-of-file
  603. true,
  604. // {
  605. false,
  606. // (
  607. true,
  608. // inserted )
  609. true,
  610. // }
  611. true,
  612. // error {
  613. false,
  614. // error (
  615. true,
  616. // EOF
  617. false};
  618. int pos = 0;
  619. for (TokenIndex token : buffer.tokens()) {
  620. SCOPED_TRACE(
  621. llvm::formatv("Token #{0}: '{1}'", token, buffer.GetTokenText(token)));
  622. ASSERT_LT(pos, std::size(space));
  623. EXPECT_THAT(buffer.HasLeadingWhitespace(token), Eq(space[pos]));
  624. ++pos;
  625. ASSERT_LT(pos, std::size(space));
  626. EXPECT_THAT(buffer.HasTrailingWhitespace(token), Eq(space[pos]));
  627. }
  628. ASSERT_EQ(pos + 1, std::size(space));
  629. }
  630. TEST_F(LexerTest, Keywords) {
  631. TokenKind keywords[] = {
  632. #define CARBON_TOKEN(TokenName)
  633. #define CARBON_KEYWORD_TOKEN(TokenName, ...) TokenKind::TokenName,
  634. #include "toolchain/lex/token_kind.def"
  635. };
  636. for (const auto& keyword : keywords) {
  637. auto buffer = Lex(keyword.fixed_spelling());
  638. EXPECT_FALSE(buffer.has_errors());
  639. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  640. {.kind = TokenKind::FileStart},
  641. {.kind = keyword, .column = 1, .indent_column = 1},
  642. {.kind = TokenKind::FileEnd},
  643. }));
  644. }
  645. }
  646. TEST_F(LexerTest, Comments) {
  647. auto buffer = Lex(" ;\n // foo\n ;\n");
  648. EXPECT_FALSE(buffer.has_errors());
  649. EXPECT_THAT(
  650. buffer,
  651. HasTokens(llvm::ArrayRef<ExpectedToken>{
  652. {.kind = TokenKind::FileStart, .line = 1, .column = 1},
  653. {.kind = TokenKind::Semi, .line = 1, .column = 2, .indent_column = 2},
  654. {.kind = TokenKind::Semi, .line = 3, .column = 3, .indent_column = 3},
  655. {.kind = TokenKind::FileEnd, .line = 3, .column = 4},
  656. }));
  657. buffer = Lex("// foo\n//\n// bar");
  658. EXPECT_FALSE(buffer.has_errors());
  659. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  660. {.kind = TokenKind::FileStart},
  661. {.kind = TokenKind::FileEnd}}));
  662. // Make sure weird characters aren't a problem.
  663. buffer = Lex(" // foo#$!^?@-_💩🍫⃠ [̲̅$̲̅(̲̅ ͡° ͜ʖ ͡°̲̅)̲̅$̲̅]");
  664. EXPECT_FALSE(buffer.has_errors());
  665. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  666. {.kind = TokenKind::FileStart},
  667. {.kind = TokenKind::FileEnd}}));
  668. // Make sure we can lex a comment at the end of the input.
  669. buffer = Lex("//");
  670. EXPECT_FALSE(buffer.has_errors());
  671. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  672. {.kind = TokenKind::FileStart},
  673. {.kind = TokenKind::FileEnd}}));
  674. }
  675. TEST_F(LexerTest, InvalidComments) {
  676. llvm::StringLiteral testcases[] = {
  677. " /// foo\n",
  678. "foo // bar\n",
  679. "//! hello",
  680. " //world",
  681. };
  682. for (llvm::StringLiteral testcase : testcases) {
  683. auto buffer = Lex(testcase);
  684. EXPECT_TRUE(buffer.has_errors());
  685. }
  686. }
  687. TEST_F(LexerTest, Identifiers) {
  688. auto buffer = Lex(" foobar");
  689. EXPECT_FALSE(buffer.has_errors());
  690. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  691. {.kind = TokenKind::FileStart},
  692. {.kind = TokenKind::Identifier,
  693. .column = 4,
  694. .indent_column = 4,
  695. .text = "foobar"},
  696. {.kind = TokenKind::FileEnd},
  697. }));
  698. // Check different kinds of identifier character sequences.
  699. buffer = Lex("_foo_bar");
  700. EXPECT_FALSE(buffer.has_errors());
  701. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  702. {.kind = TokenKind::FileStart},
  703. {.kind = TokenKind::Identifier, .text = "_foo_bar"},
  704. {.kind = TokenKind::FileEnd},
  705. }));
  706. buffer = Lex("foo2bar00");
  707. EXPECT_FALSE(buffer.has_errors());
  708. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  709. {.kind = TokenKind::FileStart},
  710. {.kind = TokenKind::Identifier, .text = "foo2bar00"},
  711. {.kind = TokenKind::FileEnd},
  712. }));
  713. // Check that we can parse identifiers that start with a keyword.
  714. buffer = Lex("fnord");
  715. EXPECT_FALSE(buffer.has_errors());
  716. EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
  717. {.kind = TokenKind::FileStart},
  718. {.kind = TokenKind::Identifier, .text = "fnord"},
  719. {.kind = TokenKind::FileEnd},
  720. }));
  721. // Check multiple identifiers with indent and interning.
  722. buffer = Lex(" foo;bar\nbar \n foo\tfoo");
  723. EXPECT_FALSE(buffer.has_errors());
  724. EXPECT_THAT(buffer,
  725. HasTokens(llvm::ArrayRef<ExpectedToken>{
  726. {.kind = TokenKind::FileStart, .line = 1, .column = 1},
  727. {.kind = TokenKind::Identifier,
  728. .line = 1,
  729. .column = 4,
  730. .indent_column = 4,
  731. .text = "foo"},
  732. {.kind = TokenKind::Semi},
  733. {.kind = TokenKind::Identifier,
  734. .line = 1,
  735. .column = 8,
  736. .indent_column = 4,
  737. .text = "bar"},
  738. {.kind = TokenKind::Identifier,
  739. .line = 2,
  740. .column = 1,
  741. .indent_column = 1,
  742. .text = "bar"},
  743. {.kind = TokenKind::Identifier,
  744. .line = 3,
  745. .column = 3,
  746. .indent_column = 3,
  747. .text = "foo"},
  748. {.kind = TokenKind::Identifier,
  749. .line = 3,
  750. .column = 7,
  751. .indent_column = 3,
  752. .text = "foo"},
  753. {.kind = TokenKind::FileEnd, .line = 3, .column = 10},
  754. }));
  755. }
  756. TEST_F(LexerTest, StringLiterals) {
  757. llvm::StringLiteral testcase = R"(
  758. "hello world\n"
  759. '''foo
  760. test \
  761. \xAB
  762. ''' trailing
  763. #"""#
  764. "\0"
  765. #"\0"foo"\1"#
  766. """x"""
  767. )";
  768. auto buffer = Lex(testcase);
  769. EXPECT_FALSE(buffer.has_errors());
  770. EXPECT_THAT(buffer,
  771. HasTokens(llvm::ArrayRef<ExpectedToken>{
  772. {.kind = TokenKind::FileStart, .line = 1, .column = 1},
  773. {.kind = TokenKind::StringLiteral,
  774. .line = 2,
  775. .column = 5,
  776. .indent_column = 5,
  777. .value_stores = &value_stores_,
  778. .string_contents = {"hello world\n"}},
  779. {.kind = TokenKind::StringLiteral,
  780. .line = 4,
  781. .column = 5,
  782. .indent_column = 5,
  783. .value_stores = &value_stores_,
  784. .string_contents = {" test \xAB\n"}},
  785. {.kind = TokenKind::Identifier,
  786. .line = 7,
  787. .column = 10,
  788. .indent_column = 5,
  789. .text = "trailing"},
  790. {.kind = TokenKind::StringLiteral,
  791. .line = 9,
  792. .column = 7,
  793. .indent_column = 7,
  794. .value_stores = &value_stores_,
  795. .string_contents = {"\""}},
  796. {.kind = TokenKind::StringLiteral,
  797. .line = 11,
  798. .column = 5,
  799. .indent_column = 5,
  800. .value_stores = &value_stores_,
  801. .string_contents = llvm::StringLiteral::withInnerNUL("\0")},
  802. {.kind = TokenKind::StringLiteral,
  803. .line = 13,
  804. .column = 5,
  805. .indent_column = 5,
  806. .value_stores = &value_stores_,
  807. .string_contents = {"\\0\"foo\"\\1"}},
  808. // """x""" is three string literals, not one invalid
  809. // attempt at a block string literal.
  810. {.kind = TokenKind::StringLiteral,
  811. .line = 15,
  812. .column = 5,
  813. .indent_column = 5,
  814. .value_stores = &value_stores_,
  815. .string_contents = {""}},
  816. {.kind = TokenKind::StringLiteral,
  817. .line = 15,
  818. .column = 7,
  819. .indent_column = 5,
  820. .value_stores = &value_stores_,
  821. .string_contents = {"x"}},
  822. {.kind = TokenKind::StringLiteral,
  823. .line = 15,
  824. .column = 10,
  825. .indent_column = 5,
  826. .value_stores = &value_stores_,
  827. .string_contents = {""}},
  828. {.kind = TokenKind::FileEnd, .line = 16, .column = 3},
  829. }));
  830. }
  831. TEST_F(LexerTest, InvalidStringLiterals) {
  832. llvm::StringLiteral invalid[] = {
  833. // clang-format off
  834. R"(")",
  835. R"('''
  836. '')",
  837. R"("\)",
  838. R"("\")",
  839. R"("\\)",
  840. R"("\\\")",
  841. R"(''')",
  842. R"('''
  843. )",
  844. R"('''\)",
  845. R"(#'''
  846. ''')",
  847. // clang-format on
  848. };
  849. for (llvm::StringLiteral test : invalid) {
  850. SCOPED_TRACE(test);
  851. auto buffer = Lex(test);
  852. EXPECT_TRUE(buffer.has_errors());
  853. // We should have formed at least one error token.
  854. bool found_error = false;
  855. for (TokenIndex token : buffer.tokens()) {
  856. if (buffer.GetKind(token) == TokenKind::Error) {
  857. found_error = true;
  858. break;
  859. }
  860. }
  861. EXPECT_TRUE(found_error);
  862. }
  863. }
  864. TEST_F(LexerTest, TypeLiterals) {
  865. llvm::StringLiteral testcase = R"(
  866. i0 i1 i20 i999999999999 i0x1
  867. u0 u1 u64 u64b
  868. f32 f80 f1 fi
  869. s1
  870. )";
  871. auto buffer = Lex(testcase);
  872. EXPECT_FALSE(buffer.has_errors());
  873. ASSERT_THAT(buffer,
  874. HasTokens(llvm::ArrayRef<ExpectedToken>{
  875. {.kind = TokenKind::FileStart, .line = 1, .column = 1},
  876. {.kind = TokenKind::Identifier,
  877. .line = 2,
  878. .column = 5,
  879. .indent_column = 5,
  880. .text = {"i0"}},
  881. {.kind = TokenKind::IntTypeLiteral,
  882. .line = 2,
  883. .column = 8,
  884. .indent_column = 5,
  885. .text = {"i1"}},
  886. {.kind = TokenKind::IntTypeLiteral,
  887. .line = 2,
  888. .column = 11,
  889. .indent_column = 5,
  890. .text = {"i20"}},
  891. {.kind = TokenKind::IntTypeLiteral,
  892. .line = 2,
  893. .column = 15,
  894. .indent_column = 5,
  895. .text = {"i999999999999"}},
  896. {.kind = TokenKind::Identifier,
  897. .line = 2,
  898. .column = 29,
  899. .indent_column = 5,
  900. .text = {"i0x1"}},
  901. {.kind = TokenKind::Identifier,
  902. .line = 3,
  903. .column = 5,
  904. .indent_column = 5,
  905. .text = {"u0"}},
  906. {.kind = TokenKind::UnsignedIntTypeLiteral,
  907. .line = 3,
  908. .column = 8,
  909. .indent_column = 5,
  910. .text = {"u1"}},
  911. {.kind = TokenKind::UnsignedIntTypeLiteral,
  912. .line = 3,
  913. .column = 11,
  914. .indent_column = 5,
  915. .text = {"u64"}},
  916. {.kind = TokenKind::Identifier,
  917. .line = 3,
  918. .column = 15,
  919. .indent_column = 5,
  920. .text = {"u64b"}},
  921. {.kind = TokenKind::FloatTypeLiteral,
  922. .line = 4,
  923. .column = 5,
  924. .indent_column = 5,
  925. .text = {"f32"}},
  926. {.kind = TokenKind::FloatTypeLiteral,
  927. .line = 4,
  928. .column = 9,
  929. .indent_column = 5,
  930. .text = {"f80"}},
  931. {.kind = TokenKind::FloatTypeLiteral,
  932. .line = 4,
  933. .column = 13,
  934. .indent_column = 5,
  935. .text = {"f1"}},
  936. {.kind = TokenKind::Identifier,
  937. .line = 4,
  938. .column = 16,
  939. .indent_column = 5,
  940. .text = {"fi"}},
  941. {.kind = TokenKind::Identifier,
  942. .line = 5,
  943. .column = 5,
  944. .indent_column = 5,
  945. .text = {"s1"}},
  946. {.kind = TokenKind::FileEnd, .line = 6, .column = 3},
  947. }));
  948. auto type_size = [&](int token_index) {
  949. auto token = buffer.tokens().begin()[token_index];
  950. return value_stores_.ints().Get(buffer.GetTypeLiteralSize(token));
  951. };
  952. EXPECT_EQ(type_size(2), 1);
  953. EXPECT_EQ(type_size(3), 20);
  954. EXPECT_EQ(type_size(4), 999999999999ULL);
  955. EXPECT_EQ(type_size(7), 1);
  956. EXPECT_EQ(type_size(8), 64);
  957. EXPECT_EQ(type_size(10), 32);
  958. EXPECT_EQ(type_size(11), 80);
  959. EXPECT_EQ(type_size(12), 1);
  960. }
  961. TEST_F(LexerTest, TypeLiteralTooManyDigits) {
  962. std::string code = "i";
  963. constexpr int Count = 10000;
  964. code.append(Count, '9');
  965. Testing::MockDiagnosticConsumer consumer;
  966. EXPECT_CALL(consumer,
  967. HandleDiagnostic(IsSingleDiagnostic(
  968. DiagnosticKind::TooManyDigits, DiagnosticLevel::Error, 1, 2,
  969. HasSubstr(llvm::formatv(" {0} ", Count)))));
  970. auto buffer = Lex(code, consumer);
  971. EXPECT_TRUE(buffer.has_errors());
  972. ASSERT_THAT(buffer,
  973. HasTokens(llvm::ArrayRef<ExpectedToken>{
  974. {.kind = TokenKind::FileStart, .line = 1, .column = 1},
  975. {.kind = TokenKind::Error,
  976. .line = 1,
  977. .column = 1,
  978. .indent_column = 1,
  979. .text = code},
  980. {.kind = TokenKind::FileEnd, .line = 1, .column = Count + 2},
  981. }));
  982. }
  983. TEST_F(LexerTest, DiagnosticTrailingComment) {
  984. llvm::StringLiteral testcase = R"(
  985. // Hello!
  986. var String x; // trailing comment
  987. )";
  988. Testing::MockDiagnosticConsumer consumer;
  989. EXPECT_CALL(consumer, HandleDiagnostic(IsSingleDiagnostic(
  990. DiagnosticKind::TrailingComment,
  991. DiagnosticLevel::Error, 3, 19, _)));
  992. Lex(testcase, consumer);
  993. }
  994. TEST_F(LexerTest, DiagnosticWhitespace) {
  995. Testing::MockDiagnosticConsumer consumer;
  996. EXPECT_CALL(consumer, HandleDiagnostic(IsSingleDiagnostic(
  997. DiagnosticKind::NoWhitespaceAfterCommentIntroducer,
  998. DiagnosticLevel::Error, 1, 3, _)));
  999. Lex("//no space after comment", consumer);
  1000. }
  1001. TEST_F(LexerTest, DiagnosticUnrecognizedEscape) {
  1002. Testing::MockDiagnosticConsumer consumer;
  1003. EXPECT_CALL(consumer, HandleDiagnostic(IsSingleDiagnostic(
  1004. DiagnosticKind::UnknownEscapeSequence,
  1005. DiagnosticLevel::Error, 1, 8, HasSubstr("`b`"))));
  1006. Lex(R"("hello\bworld")", consumer);
  1007. }
  1008. TEST_F(LexerTest, DiagnosticBadHex) {
  1009. Testing::MockDiagnosticConsumer consumer;
  1010. EXPECT_CALL(consumer, HandleDiagnostic(IsSingleDiagnostic(
  1011. DiagnosticKind::HexadecimalEscapeMissingDigits,
  1012. DiagnosticLevel::Error, 1, 9, _)));
  1013. Lex(R"("hello\xabworld")", consumer);
  1014. }
  1015. TEST_F(LexerTest, DiagnosticInvalidDigit) {
  1016. Testing::MockDiagnosticConsumer consumer;
  1017. EXPECT_CALL(consumer, HandleDiagnostic(IsSingleDiagnostic(
  1018. DiagnosticKind::InvalidDigit,
  1019. DiagnosticLevel::Error, 1, 6, HasSubstr("'a'"))));
  1020. Lex("0x123abc", consumer);
  1021. }
  1022. TEST_F(LexerTest, DiagnosticMissingTerminator) {
  1023. Testing::MockDiagnosticConsumer consumer;
  1024. EXPECT_CALL(consumer, HandleDiagnostic(IsSingleDiagnostic(
  1025. DiagnosticKind::UnterminatedString,
  1026. DiagnosticLevel::Error, 1, 1, _)));
  1027. Lex(R"(#" ")", consumer);
  1028. }
  1029. TEST_F(LexerTest, DiagnosticUnrecognizedChar) {
  1030. Testing::MockDiagnosticConsumer consumer;
  1031. EXPECT_CALL(consumer, HandleDiagnostic(IsSingleDiagnostic(
  1032. DiagnosticKind::UnrecognizedCharacters,
  1033. DiagnosticLevel::Error, 1, 1, _)));
  1034. Lex("\b", consumer);
  1035. }
  1036. TEST_F(LexerTest, PrintingOutputYaml) {
  1037. // Test that we can parse this into YAML and verify line and indent data.
  1038. auto buffer = Lex("\n ;\n\n\n; ;\n\n\n\n\n\n\n\n\n\n\n");
  1039. ASSERT_FALSE(buffer.has_errors());
  1040. TestRawOstream print_stream;
  1041. buffer.Print(print_stream);
  1042. EXPECT_THAT(
  1043. Yaml::Value::FromText(print_stream.TakeStr()),
  1044. IsYaml(ElementsAre(Yaml::Sequence(ElementsAre(Yaml::Mapping(ElementsAre(
  1045. Pair("filename", source_storage_.front().filename().str()),
  1046. Pair("tokens", Yaml::Sequence(ElementsAre(
  1047. Yaml::Mapping(ElementsAre(
  1048. Pair("index", "0"), Pair("kind", "FileStart"),
  1049. Pair("line", "1"), Pair("column", "1"),
  1050. Pair("indent", "1"), Pair("spelling", ""))),
  1051. Yaml::Mapping(ElementsAre(
  1052. Pair("index", "1"), Pair("kind", "Semi"),
  1053. Pair("line", "2"), Pair("column", "2"),
  1054. Pair("indent", "2"), Pair("spelling", ";"),
  1055. Pair("has_leading_space", "true"))),
  1056. Yaml::Mapping(ElementsAre(
  1057. Pair("index", "2"), Pair("kind", "Semi"),
  1058. Pair("line", "5"), Pair("column", "1"),
  1059. Pair("indent", "1"), Pair("spelling", ";"),
  1060. Pair("has_leading_space", "true"))),
  1061. Yaml::Mapping(ElementsAre(
  1062. Pair("index", "3"), Pair("kind", "Semi"),
  1063. Pair("line", "5"), Pair("column", "3"),
  1064. Pair("indent", "1"), Pair("spelling", ";"),
  1065. Pair("has_leading_space", "true"))),
  1066. Yaml::Mapping(ElementsAre(
  1067. Pair("index", "4"), Pair("kind", "FileEnd"),
  1068. Pair("line", "15"), Pair("column", "1"),
  1069. Pair("indent", "1"), Pair("spelling", ""),
  1070. Pair("has_leading_space", "true")))))))))))));
  1071. }
  1072. } // namespace
  1073. } // namespace Carbon::Lex