test_file.cpp 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #include "testing/file_test/test_file.h"
  5. #include <fstream>
  6. #include <optional>
  7. #include <string>
  8. #include <utility>
  9. #include "common/check.h"
  10. #include "common/error.h"
  11. #include "common/find.h"
  12. #include "common/raw_string_ostream.h"
  13. #include "common/set.h"
  14. #include "llvm/ADT/StringExtras.h"
  15. #include "llvm/Support/JSON.h"
  16. #include "testing/base/file_helpers.h"
  17. #include "testing/file_test/line.h"
  18. namespace Carbon::Testing {
  19. using ::testing::Matcher;
  20. using ::testing::MatchesRegex;
  21. using ::testing::StrEq;
  22. // Represents the different kinds of version-control conflict markers that are
  23. // relevant for the autoupdater. One key concern here is the distinction between
  24. // "snapshot" and "diff" conflict regions. Snapshot regions are the more
  25. // traditional kind, where the entire region between two markers represents the
  26. // exact state of a region of the underlying file at some snapshot (e.g. the
  27. // base commit or one of the conflicting commits). Diff regions are
  28. // produced by jj. They show the diff between the base and one side of the
  29. // conflict, using a prefix character on each line: '+' indicates an added line,
  30. // '-' indicates a removed line, and ' ' indicates an unchanged line. Note that
  31. // a single conflict may contain both snapshot and diff regions.
  32. //
  33. // See https://docs.jj-vcs.dev/latest/conflicts/ for more information.
  34. enum class MarkerKind {
  35. // Represents a line that is not a conflict marker.
  36. None,
  37. // Marks the start of a conflict, and potentially a snapshot region.
  38. Start,
  39. // Marks the end of a conflict.
  40. End,
  41. // Marks the start of a snapshot region.
  42. Snapshot,
  43. // Marks the start of a diff region.
  44. Diff
  45. };
  46. // Processes conflict markers, including tracking the previous conflict marker.
  47. // Returns true if the line is consumed.
  48. static auto TryConsumeConflictMarker(bool running_autoupdate,
  49. llvm::StringRef line,
  50. llvm::StringRef line_trimmed,
  51. MarkerKind& previous_marker)
  52. -> ErrorOr<bool> {
  53. MarkerKind new_marker;
  54. if (line.starts_with("<<<<<<<")) {
  55. new_marker = MarkerKind::Start;
  56. } else if (line.starts_with(">>>>>>>")) {
  57. new_marker = MarkerKind::End;
  58. } else if (line.starts_with("=======") || line.starts_with("|||||||") ||
  59. line.starts_with("+++++++") || line.starts_with("-------")) {
  60. // git uses "=======" and "|||||||" to mark boundaries between conflict
  61. // regions (which are always snapshots). jj uses "+++++++" and "-------" to
  62. // mark the start of different kinds of snapshot regions.
  63. new_marker = MarkerKind::Snapshot;
  64. } else if (line.starts_with("%%%%%%%") || line.starts_with(R"(\\\\\\\)")) {
  65. // jj uses "%%%%%%%" to mark the start of a diff region, and "\\\\\\\" to
  66. // add a second line to a "%%%%%%%" marker for formatting purposes.
  67. new_marker = MarkerKind::Diff;
  68. } else {
  69. new_marker = MarkerKind::None;
  70. }
  71. // When running the test, any conflict marker is an error.
  72. if (!running_autoupdate && (new_marker != MarkerKind::None)) {
  73. return ErrorBuilder() << "Conflict marker found:\n" << line;
  74. }
  75. bool inside_conflict_marker = [&] {
  76. switch (previous_marker) {
  77. case MarkerKind::None:
  78. case MarkerKind::End:
  79. return false;
  80. case MarkerKind::Start:
  81. case MarkerKind::Snapshot:
  82. case MarkerKind::Diff:
  83. return true;
  84. }
  85. }();
  86. switch (new_marker) {
  87. case MarkerKind::End:
  88. case MarkerKind::Snapshot:
  89. case MarkerKind::Diff:
  90. if (!inside_conflict_marker) {
  91. return ErrorBuilder()
  92. << "Unexpected conflict marker outside conflict:\n"
  93. << line;
  94. }
  95. previous_marker = new_marker;
  96. return true;
  97. case MarkerKind::Start:
  98. if (inside_conflict_marker) {
  99. return ErrorBuilder() << "Unexpected conflict marker inside conflict:\n"
  100. << line;
  101. }
  102. previous_marker = new_marker;
  103. return true;
  104. case MarkerKind::None:
  105. if (!inside_conflict_marker) {
  106. return false;
  107. }
  108. if (previous_marker == MarkerKind::Diff) {
  109. if (!line.consume_front(" ") && !line.consume_front("+") &&
  110. !line.consume_front("-")) {
  111. return ErrorBuilder() << "Line inside diff-style conflict doesn't "
  112. "start with '+', '-', or ' ':\n"
  113. << line;
  114. }
  115. line_trimmed = line.ltrim();
  116. }
  117. // Look for CHECK and TIP lines, which can be discarded.
  118. if (line_trimmed.starts_with("// CHECK:STDOUT:") ||
  119. line_trimmed.starts_with("// CHECK:STDERR:") ||
  120. line_trimmed.starts_with("// TIP:")) {
  121. return true;
  122. }
  123. return ErrorBuilder() << "Autoupdate can't discard non-CHECK lines "
  124. "inside conflicts:\n";
  125. }
  126. }
  127. // State for file splitting logic: TryConsumeSplit and FinishSplit.
  128. struct SplitState {
  129. auto has_splits() const -> bool { return file_index > 0; }
  130. auto add_content(llvm::StringRef line) -> void {
  131. content.append(line.str());
  132. content.append("\n");
  133. }
  134. // Whether content has been found. Only updated before a file split is found
  135. // (which may be never).
  136. bool found_code_pre_split = false;
  137. // The current file name, considering splits. Empty for the default file.
  138. llvm::StringRef filename = "";
  139. // The accumulated content for the file being built. This may elide some of
  140. // the original content, such as conflict markers.
  141. std::string content;
  142. // The current file index.
  143. int file_index = 0;
  144. };
  145. // Given a `file:/<filename>` URI, returns the filename.
  146. static auto ExtractFilePathFromUri(llvm::StringRef uri)
  147. -> ErrorOr<llvm::StringRef> {
  148. static constexpr llvm::StringRef FilePrefix = "file:/";
  149. if (!uri.starts_with(FilePrefix)) {
  150. return ErrorBuilder() << "uri `" << uri << "` is not a file uri";
  151. }
  152. return uri.drop_front(FilePrefix.size());
  153. }
  154. // When `FROM_FILE_SPLIT` is used in path `textDocument.text`, populate the
  155. // value from the split matching the `uri`. Only used for
  156. // `textDocument/didOpen`.
  157. static auto AutoFillDidOpenParams(llvm::json::Object& params,
  158. llvm::ArrayRef<TestFile::Split> splits)
  159. -> ErrorOr<Success> {
  160. auto* text_document = params.getObject("textDocument");
  161. if (text_document == nullptr) {
  162. return Success();
  163. }
  164. auto attr_it = text_document->find("text");
  165. if (attr_it == text_document->end() || attr_it->second != "FROM_FILE_SPLIT") {
  166. return Success();
  167. }
  168. auto uri = text_document->getString("uri");
  169. if (!uri) {
  170. return Error("missing uri in params.textDocument");
  171. }
  172. CARBON_ASSIGN_OR_RETURN(auto file_path, ExtractFilePathFromUri(*uri));
  173. const auto* split = FindIfOrNull(splits, [&](const TestFile::Split& split) {
  174. return split.filename == file_path;
  175. });
  176. if (!split) {
  177. return ErrorBuilder() << "No split found for uri: " << *uri;
  178. }
  179. attr_it->second = split->content;
  180. return Success();
  181. }
  182. // Reformats `[[@LSP:` and similar keyword as an LSP call with headers. Returns
  183. // the position to start a find for the next keyword.
  184. static auto ReplaceLspKeywordAt(std::string& content, size_t keyword_pos,
  185. int& lsp_call_id,
  186. llvm::ArrayRef<TestFile::Split> splits)
  187. -> ErrorOr<size_t> {
  188. llvm::StringRef content_at_keyword =
  189. llvm::StringRef(content).substr(keyword_pos);
  190. auto [keyword, body_start] = content_at_keyword.split(":");
  191. if (keyword.size() == content_at_keyword.size()) {
  192. return ErrorBuilder() << "Missing `:` for `"
  193. << content_at_keyword.take_front(10) << "`";
  194. }
  195. // Whether the first param is a method or id.
  196. llvm::StringRef method_or_id_label = "method";
  197. // Whether to attach the `lsp_call_id`.
  198. bool use_call_id = false;
  199. // The JSON label for extra content.
  200. llvm::StringRef extra_content_label;
  201. if (keyword == "[[@LSP-CALL") {
  202. use_call_id = true;
  203. extra_content_label = "params";
  204. } else if (keyword == "[[@LSP-NOTIFY") {
  205. extra_content_label = "params";
  206. } else if (keyword == "[[@LSP-REPLY") {
  207. method_or_id_label = "id";
  208. extra_content_label = "result";
  209. } else if (keyword != "[[@LSP") {
  210. return ErrorBuilder() << "Unrecognized @LSP keyword at `"
  211. << keyword.take_front(10) << "`";
  212. }
  213. static constexpr llvm::StringLiteral LspEnd = "]]";
  214. auto [body, rest] = body_start.split("]]");
  215. if (body.size() == body_start.size()) {
  216. return ErrorBuilder() << "Missing `" << LspEnd << "` after `" << keyword
  217. << "`";
  218. }
  219. auto [method_or_id, extra_content] = body.split(":");
  220. llvm::json::Value parsed_extra_content = nullptr;
  221. if (!extra_content.empty()) {
  222. std::string extra_content_as_object =
  223. llvm::formatv("{{{0}}", extra_content);
  224. auto parse_result = llvm::json::parse(extra_content_as_object);
  225. if (auto err = parse_result.takeError()) {
  226. return ErrorBuilder() << "Error parsing extra content: " << err;
  227. }
  228. parsed_extra_content = std::move(*parse_result);
  229. CARBON_CHECK(parsed_extra_content.kind() == llvm::json::Value::Object);
  230. if (extra_content_label == "params" &&
  231. method_or_id == "textDocument/didOpen") {
  232. CARBON_RETURN_IF_ERROR(
  233. AutoFillDidOpenParams(*parsed_extra_content.getAsObject(), splits));
  234. }
  235. }
  236. // Form the JSON.
  237. RawStringOstream buffer;
  238. llvm::json::OStream json(buffer);
  239. json.object([&] {
  240. json.attribute("jsonrpc", "2.0");
  241. json.attribute(method_or_id_label, method_or_id);
  242. if (use_call_id) {
  243. json.attribute("id", ++lsp_call_id);
  244. }
  245. if (parsed_extra_content != nullptr) {
  246. if (!extra_content_label.empty()) {
  247. json.attribute(extra_content_label, parsed_extra_content);
  248. } else {
  249. for (const auto& [key, value] : *parsed_extra_content.getAsObject()) {
  250. json.attribute(key, value);
  251. }
  252. }
  253. }
  254. });
  255. // Add the Content-Length header. The `2` accounts for extra newlines.
  256. int content_length = buffer.size() + 2;
  257. auto json_with_header = llvm::formatv("Content-Length: {0}\n\n{1}\n",
  258. content_length, buffer.TakeStr())
  259. .str();
  260. size_t keyword_len = rest.data() - keyword.data();
  261. content.replace(keyword_pos, keyword_len, json_with_header);
  262. return keyword_pos + json_with_header.size();
  263. }
  264. // Replaces `[[@0xAB]]` with the raw byte with value 0xAB. Returns the position
  265. // to start a find for the next keyword.
  266. static auto ReplaceRawByteKeywordAt(std::string& content, size_t keyword_pos)
  267. -> ErrorOr<size_t> {
  268. llvm::StringRef content_at_keyword =
  269. llvm::StringRef(content).substr(keyword_pos);
  270. auto [keyword, rest] = content_at_keyword.split("]]");
  271. if (keyword.size() == content_at_keyword.size()) {
  272. return ErrorBuilder() << "Missing `]]` after " << keyword.take_front(10)
  273. << "`";
  274. }
  275. unsigned char byte_value;
  276. if (keyword.substr(std::size("[[@0x") - 1).getAsInteger(16, byte_value)) {
  277. return ErrorBuilder() << "Invalid raw byte specifier `"
  278. << keyword.take_front(10) << "`";
  279. }
  280. content.replace(keyword_pos, keyword.size() + 2, 1, byte_value);
  281. return keyword_pos + 1;
  282. }
  283. // Replaces the keyword at the given position. Returns the position to start a
  284. // find for the next keyword.
  285. static auto ReplaceContentKeywordAt(std::string& content, size_t keyword_pos,
  286. llvm::StringRef test_name, int& lsp_call_id,
  287. llvm::ArrayRef<TestFile::Split> splits)
  288. -> ErrorOr<size_t> {
  289. auto keyword = llvm::StringRef(content).substr(keyword_pos);
  290. // Line replacements aren't handled here.
  291. static constexpr llvm::StringLiteral Line = "[[@LINE";
  292. if (keyword.starts_with(Line)) {
  293. // Just move past the prefix to find the next one.
  294. return keyword_pos + Line.size();
  295. }
  296. // Replaced with the actual test name.
  297. static constexpr llvm::StringLiteral TestName = "[[@TEST_NAME]]";
  298. if (keyword.starts_with(TestName)) {
  299. content.replace(keyword_pos, TestName.size(), test_name);
  300. return keyword_pos + test_name.size();
  301. }
  302. if (keyword.starts_with("[[@LSP")) {
  303. return ReplaceLspKeywordAt(content, keyword_pos, lsp_call_id, splits);
  304. }
  305. if (keyword.starts_with("[[@0x")) {
  306. return ReplaceRawByteKeywordAt(content, keyword_pos);
  307. }
  308. return ErrorBuilder() << "Unexpected use of `[[@` at `"
  309. << keyword.substr(0, 5) << "`";
  310. }
  311. // Replaces the content keywords.
  312. //
  313. // This handles content keywords such as [[@TEST_NAME]] and [[@LSP*]]. Unknown
  314. // content keywords are diagnosed.
  315. static auto ReplaceContentKeywords(llvm::StringRef filename,
  316. std::string& content,
  317. llvm::ArrayRef<TestFile::Split> splits)
  318. -> ErrorOr<Success> {
  319. static constexpr llvm::StringLiteral Prefix = "[[@";
  320. auto keyword_pos = content.find(Prefix);
  321. // Return early if not finding anything.
  322. if (keyword_pos == std::string::npos) {
  323. return Success();
  324. }
  325. // Construct the test name by getting the base name without the extension,
  326. // then removing any "fail_" or "todo_" prefixes.
  327. llvm::StringRef test_name = filename;
  328. if (auto last_slash = test_name.rfind("/");
  329. last_slash != llvm::StringRef::npos) {
  330. test_name = test_name.substr(last_slash + 1);
  331. }
  332. if (auto ext_dot = test_name.find("."); ext_dot != llvm::StringRef::npos) {
  333. test_name = test_name.substr(0, ext_dot);
  334. }
  335. // Note this also handles `fail_todo_` and `todo_fail_`.
  336. test_name.consume_front("todo_");
  337. test_name.consume_front("fail_");
  338. test_name.consume_front("todo_");
  339. // A counter for LSP calls.
  340. int lsp_call_id = 0;
  341. while (keyword_pos != std::string::npos) {
  342. CARBON_ASSIGN_OR_RETURN(
  343. auto keyword_end,
  344. ReplaceContentKeywordAt(content, keyword_pos, test_name, lsp_call_id,
  345. splits));
  346. keyword_pos = content.find(Prefix, keyword_end);
  347. }
  348. return Success();
  349. }
  350. // Adds a file. Used for both split and unsplit test files.
  351. static auto AddSplit(llvm::StringRef filename, std::string& content,
  352. llvm::SmallVector<TestFile::Split>& file_splits)
  353. -> ErrorOr<Success> {
  354. CARBON_RETURN_IF_ERROR(
  355. ReplaceContentKeywords(filename, content, file_splits));
  356. file_splits.push_back(
  357. {.filename = filename.str(), .content = std::move(content)});
  358. content.clear();
  359. return Success();
  360. }
  361. // Process file split ("---") lines when found. Returns true if the line is
  362. // consumed. `non_check_lines` is only provided for the main file, and will be
  363. // null for includes.
  364. static auto TryConsumeSplit(llvm::StringRef line, llvm::StringRef line_trimmed,
  365. bool missing_autoupdate, int& line_index,
  366. SplitState& split,
  367. llvm::SmallVector<TestFile::Split>& file_splits,
  368. llvm::SmallVector<FileTestLine>* non_check_lines)
  369. -> ErrorOr<bool> {
  370. if (!line_trimmed.consume_front("// ---")) {
  371. if (!split.has_splits() && !line_trimmed.starts_with("//") &&
  372. !line_trimmed.empty()) {
  373. split.found_code_pre_split = true;
  374. }
  375. // Add the line to the current file's content (which may not be a split
  376. // file).
  377. split.add_content(line);
  378. return false;
  379. }
  380. if (missing_autoupdate) {
  381. // If there's a split, all output is appended at the end of each file
  382. // before AUTOUPDATE. We may want to change that, but it's not
  383. // necessary to handle right now.
  384. return Error(
  385. "AUTOUPDATE/NOAUTOUPDATE setting must be in "
  386. "the first file.");
  387. }
  388. // On a file split, add the previous file, then start a new one.
  389. if (split.has_splits()) {
  390. CARBON_RETURN_IF_ERROR(
  391. AddSplit(split.filename, split.content, file_splits));
  392. } else {
  393. split.content.clear();
  394. if (split.found_code_pre_split) {
  395. // For the first split, we make sure there was no content prior.
  396. return Error(
  397. "When using split files, there must be no content before the first "
  398. "split file.");
  399. }
  400. }
  401. ++split.file_index;
  402. split.filename = line_trimmed.trim();
  403. if (split.filename.empty()) {
  404. return Error("Missing filename for split.");
  405. }
  406. // The split line is added to non_check_lines for retention in autoupdate, but
  407. // is not added to the test file content.
  408. line_index = 0;
  409. if (non_check_lines) {
  410. non_check_lines->push_back(
  411. FileTestLine(split.file_index, line_index, line));
  412. }
  413. return true;
  414. }
  415. // Converts a `FileCheck`-style expectation string into a single complete regex
  416. // string by escaping all regex characters outside of the designated `{{...}}`
  417. // regex sequences, and switching those to a normal regex sub-pattern syntax.
  418. static auto ConvertExpectationStringToRegex(std::string& str) -> void {
  419. for (int pos = 0; pos < static_cast<int>(str.size());) {
  420. switch (str[pos]) {
  421. case '(':
  422. case ')':
  423. case '[':
  424. case ']':
  425. case '}':
  426. case '.':
  427. case '^':
  428. case '$':
  429. case '*':
  430. case '+':
  431. case '?':
  432. case '|':
  433. case '\\': {
  434. // Escape regex characters.
  435. str.insert(pos, "\\");
  436. pos += 2;
  437. break;
  438. }
  439. case '{': {
  440. if (pos + 1 == static_cast<int>(str.size()) || str[pos + 1] != '{') {
  441. // Single `{`, escape it.
  442. str.insert(pos, "\\");
  443. pos += 2;
  444. break;
  445. }
  446. // Replace the `{{...}}` regex syntax with standard `(...)` syntax.
  447. str.replace(pos, 2, "(");
  448. for (++pos; pos < static_cast<int>(str.size() - 1); ++pos) {
  449. if (str[pos] == '}' && str[pos + 1] == '}') {
  450. str.replace(pos, 2, ")");
  451. ++pos;
  452. break;
  453. }
  454. }
  455. break;
  456. }
  457. default: {
  458. ++pos;
  459. }
  460. }
  461. }
  462. }
  463. // Transforms an expectation on a given line from `FileCheck` syntax into a
  464. // standard regex matcher.
  465. static auto TransformExpectation(int line_index, llvm::StringRef in)
  466. -> ErrorOr<Matcher<std::string>> {
  467. if (in.empty()) {
  468. return Matcher<std::string>{StrEq("")};
  469. }
  470. if (!in.consume_front(" ")) {
  471. return ErrorBuilder() << "Malformated CHECK line: " << in;
  472. }
  473. // Check early if we have a regex component as we can avoid building an
  474. // expensive matcher when not using those.
  475. bool has_regex = in.find("{{") != llvm::StringRef::npos;
  476. // Now scan the string and expand any keywords. Note that this needs to be
  477. // `size_t` to correctly store `npos`.
  478. size_t keyword_pos = in.find("[[");
  479. // If there are neither keywords nor regex sequences, we can match the
  480. // incoming string directly.
  481. if (!has_regex && keyword_pos == llvm::StringRef::npos) {
  482. return Matcher<std::string>{StrEq(in)};
  483. }
  484. std::string str = in.str();
  485. // First expand the keywords.
  486. while (keyword_pos != std::string::npos) {
  487. llvm::StringRef line_keyword_cursor =
  488. llvm::StringRef(str).substr(keyword_pos);
  489. CARBON_CHECK(line_keyword_cursor.consume_front("[["));
  490. static constexpr llvm::StringLiteral LineKeyword = "@LINE";
  491. if (!line_keyword_cursor.consume_front(LineKeyword)) {
  492. return ErrorBuilder()
  493. << "Unexpected [[, should be {{\\[\\[}} at `"
  494. << line_keyword_cursor.substr(0, 5) << "` in: " << in;
  495. }
  496. // Allow + or - here; consumeInteger handles -.
  497. line_keyword_cursor.consume_front("+");
  498. int offset;
  499. // consumeInteger returns true for errors, not false.
  500. if (line_keyword_cursor.consumeInteger(10, offset) ||
  501. !line_keyword_cursor.consume_front("]]")) {
  502. return ErrorBuilder()
  503. << "Unexpected @LINE offset at `"
  504. << line_keyword_cursor.substr(0, 5) << "` in: " << in;
  505. }
  506. std::string int_str = llvm::Twine(line_index + offset).str();
  507. int remove_len = (line_keyword_cursor.data() - str.data()) - keyword_pos;
  508. str.replace(keyword_pos, remove_len, int_str);
  509. keyword_pos += int_str.size();
  510. // Find the next keyword start or the end of the string.
  511. keyword_pos = str.find("[[", keyword_pos);
  512. }
  513. // If there was no regex, we can directly match the adjusted string.
  514. if (!has_regex) {
  515. return Matcher<std::string>{StrEq(str)};
  516. }
  517. // Otherwise, we need to turn the entire string into a regex by escaping
  518. // things outside the regex region and transforming the regex region into a
  519. // normal syntax.
  520. ConvertExpectationStringToRegex(str);
  521. return Matcher<std::string>{MatchesRegex(str)};
  522. }
  523. // Once all content is processed, do any remaining split processing.
  524. static auto FinishSplit(llvm::StringRef filename, bool is_include_file,
  525. SplitState& split,
  526. llvm::SmallVector<TestFile::Split>& file_splits)
  527. -> ErrorOr<Success> {
  528. if (split.has_splits()) {
  529. return AddSplit(split.filename, split.content, file_splits);
  530. } else {
  531. // If no file splitting happened, use the main file as the test file.
  532. // There will always be a `/` unless tests are in the repo root.
  533. std::string split_name = std::filesystem::path(filename.str()).filename();
  534. if (is_include_file) {
  535. split_name.insert(0, "include_files/");
  536. }
  537. return AddSplit(split_name, split.content, file_splits);
  538. }
  539. }
  540. // Process CHECK lines when found. Returns true if the line is consumed.
  541. // `expected_stdout` and `expected_stderr` are null in included files, where
  542. // it's an error to use `CHECK`.
  543. static auto TryConsumeCheck(
  544. bool running_autoupdate, int line_index, llvm::StringRef line,
  545. llvm::StringRef line_trimmed,
  546. llvm::SmallVector<testing::Matcher<std::string>>* expected_stdout,
  547. llvm::SmallVector<testing::Matcher<std::string>>* expected_stderr)
  548. -> ErrorOr<bool> {
  549. if (!line_trimmed.consume_front("// CHECK")) {
  550. return false;
  551. }
  552. if (!expected_stdout) {
  553. return ErrorBuilder() << "Included files can't add CHECKs: "
  554. << line_trimmed;
  555. }
  556. // Don't build expectations when doing an autoupdate. We don't want to
  557. // break the autoupdate on an invalid CHECK line.
  558. if (!running_autoupdate) {
  559. llvm::SmallVector<Matcher<std::string>>* expected;
  560. if (line_trimmed.consume_front(":STDOUT:")) {
  561. expected = expected_stdout;
  562. } else if (line_trimmed.consume_front(":STDERR:")) {
  563. expected = expected_stderr;
  564. } else {
  565. return ErrorBuilder() << "Unexpected CHECK in input: " << line.str();
  566. }
  567. CARBON_ASSIGN_OR_RETURN(Matcher<std::string> check_matcher,
  568. TransformExpectation(line_index, line_trimmed));
  569. expected->push_back(check_matcher);
  570. }
  571. return true;
  572. }
  573. // Processes ARGS and EXTRA-ARGS lines when found. Returns true if the line is
  574. // consumed.
  575. static auto TryConsumeArgs(llvm::StringRef line, llvm::StringRef line_trimmed,
  576. llvm::SmallVector<std::string>& args)
  577. -> ErrorOr<bool> {
  578. if (!line_trimmed.consume_front("// ARGS: ")) {
  579. return false;
  580. }
  581. if (!args.empty()) {
  582. return ErrorBuilder() << "ARGS specified multiple times: " << line.str();
  583. }
  584. // Split the line into arguments.
  585. std::pair<llvm::StringRef, llvm::StringRef> cursor =
  586. llvm::getToken(line_trimmed);
  587. while (!cursor.first.empty()) {
  588. args.push_back(std::string(cursor.first));
  589. cursor = llvm::getToken(cursor.second);
  590. }
  591. return true;
  592. }
  593. static auto TryConsumeExtraArgs(llvm::StringRef line_trimmed,
  594. llvm::SmallVector<std::string>& extra_args)
  595. -> ErrorOr<bool> {
  596. if (!line_trimmed.consume_front("// EXTRA-ARGS: ")) {
  597. return false;
  598. }
  599. // Split the line into arguments.
  600. std::pair<llvm::StringRef, llvm::StringRef> cursor =
  601. llvm::getToken(line_trimmed);
  602. while (!cursor.first.empty()) {
  603. extra_args.push_back(std::string(cursor.first));
  604. cursor = llvm::getToken(cursor.second);
  605. }
  606. return true;
  607. }
  608. static auto TryConsumeIncludeFile(llvm::StringRef line_trimmed,
  609. llvm::SmallVector<std::string>& include_files)
  610. -> ErrorOr<bool> {
  611. if (!line_trimmed.consume_front("// INCLUDE-FILE: ")) {
  612. return false;
  613. }
  614. include_files.push_back(line_trimmed.str());
  615. return true;
  616. }
  617. // Processes AUTOUPDATE lines when found. Returns true if the line is consumed.
  618. // `found_autoupdate` and `autoupdate_line_number` are only provided for the
  619. // main file; it's an error to have autoupdate in included files.
  620. static auto TryConsumeAutoupdate(int line_index, llvm::StringRef line_trimmed,
  621. bool* found_autoupdate,
  622. std::optional<int>* autoupdate_line_number)
  623. -> ErrorOr<bool> {
  624. static constexpr llvm::StringLiteral Autoupdate = "// AUTOUPDATE";
  625. static constexpr llvm::StringLiteral NoAutoupdate = "// NOAUTOUPDATE";
  626. if (line_trimmed != Autoupdate && line_trimmed != NoAutoupdate) {
  627. return false;
  628. }
  629. if (!found_autoupdate) {
  630. return ErrorBuilder() << "Included files can't control autoupdate: "
  631. << line_trimmed;
  632. }
  633. if (*found_autoupdate) {
  634. return Error("Multiple AUTOUPDATE/NOAUTOUPDATE settings found");
  635. }
  636. *found_autoupdate = true;
  637. if (line_trimmed == Autoupdate) {
  638. *autoupdate_line_number = line_index;
  639. }
  640. return true;
  641. }
  642. // Processes SET-* lines when found. Returns true if the line is consumed.
  643. // If `flag` is null, we're in an included file where the flag can't be set.
  644. static auto TryConsumeSetFlag(llvm::StringRef line_trimmed,
  645. llvm::StringLiteral flag_name, bool* flag)
  646. -> ErrorOr<bool> {
  647. if (!line_trimmed.consume_front("// ") || line_trimmed != flag_name) {
  648. return false;
  649. }
  650. if (!flag) {
  651. return ErrorBuilder() << "Included files can't set flag: " << line_trimmed;
  652. }
  653. if (*flag) {
  654. return ErrorBuilder() << flag_name << " was specified multiple times";
  655. }
  656. *flag = true;
  657. return true;
  658. }
  659. // Process content for either the main file (with `test_file` and
  660. // `found_autoupdate` provided) or an included file (with those arguments null).
  661. //
  662. // - `found_autoupdate` is set to true when either `AUTOUPDATE` or
  663. // `NOAUTOUPDATE` are found.
  664. // - `args` is set from `ARGS`.
  665. // - `extra_args` accumulates `EXTRA-ARGS`.
  666. // - `splits` accumulates split form for the test (`// --- <filename>`, or the
  667. // full file named as `filename` when there are no splits in the file).
  668. // - `include_files` accumulates `INCLUDE-FILE`.
  669. static auto ProcessFileContent(llvm::StringRef filename,
  670. llvm::StringRef content_cursor,
  671. bool running_autoupdate, TestFile* test_file,
  672. bool* found_autoupdate,
  673. llvm::SmallVector<std::string>& args,
  674. llvm::SmallVector<std::string>& extra_args,
  675. llvm::SmallVector<TestFile::Split>& splits,
  676. llvm::SmallVector<std::string>& include_files)
  677. -> ErrorOr<Success> {
  678. // The index in the current test file. Will be reset on splits.
  679. int line_index = 0;
  680. // When autoupdating, we track whether we're inside conflict markers.
  681. // Otherwise conflict markers are errors.
  682. auto previous_conflict_marker = MarkerKind::None;
  683. SplitState split_state;
  684. while (!content_cursor.empty()) {
  685. auto [line, next_cursor] = content_cursor.split("\n");
  686. content_cursor = next_cursor;
  687. auto line_trimmed = line.ltrim();
  688. bool is_consumed = false;
  689. CARBON_ASSIGN_OR_RETURN(
  690. is_consumed,
  691. TryConsumeConflictMarker(running_autoupdate, line, line_trimmed,
  692. previous_conflict_marker));
  693. if (is_consumed) {
  694. continue;
  695. }
  696. // At this point, remaining lines are part of the test input.
  697. // We need to consume a split, but the main file has a little more handling.
  698. bool missing_autoupdate = false;
  699. llvm::SmallVector<FileTestLine>* non_check_lines = nullptr;
  700. if (test_file) {
  701. missing_autoupdate = !*found_autoupdate;
  702. non_check_lines = &test_file->non_check_lines;
  703. }
  704. CARBON_ASSIGN_OR_RETURN(
  705. is_consumed,
  706. TryConsumeSplit(line, line_trimmed, missing_autoupdate, line_index,
  707. split_state, splits, non_check_lines));
  708. if (is_consumed) {
  709. continue;
  710. }
  711. ++line_index;
  712. // TIP lines have no impact on validation.
  713. if (line_trimmed.starts_with("// TIP:")) {
  714. continue;
  715. }
  716. CARBON_ASSIGN_OR_RETURN(
  717. is_consumed,
  718. TryConsumeCheck(running_autoupdate, line_index, line, line_trimmed,
  719. test_file ? &test_file->expected_stdout : nullptr,
  720. test_file ? &test_file->expected_stderr : nullptr));
  721. if (is_consumed) {
  722. continue;
  723. }
  724. if (test_file) {
  725. // At this point, lines are retained as non-CHECK lines.
  726. test_file->non_check_lines.push_back(
  727. FileTestLine(split_state.file_index, line_index, line));
  728. }
  729. CARBON_ASSIGN_OR_RETURN(is_consumed,
  730. TryConsumeArgs(line, line_trimmed, args));
  731. if (is_consumed) {
  732. continue;
  733. }
  734. CARBON_ASSIGN_OR_RETURN(is_consumed,
  735. TryConsumeExtraArgs(line_trimmed, extra_args));
  736. if (is_consumed) {
  737. continue;
  738. }
  739. CARBON_ASSIGN_OR_RETURN(is_consumed,
  740. TryConsumeIncludeFile(line_trimmed, include_files));
  741. if (is_consumed) {
  742. continue;
  743. }
  744. CARBON_ASSIGN_OR_RETURN(
  745. is_consumed,
  746. TryConsumeAutoupdate(
  747. line_index, line_trimmed, found_autoupdate,
  748. test_file ? &test_file->autoupdate_line_number : nullptr));
  749. if (is_consumed) {
  750. continue;
  751. }
  752. CARBON_ASSIGN_OR_RETURN(
  753. is_consumed,
  754. TryConsumeSetFlag(
  755. line_trimmed, "SET-CAPTURE-CONSOLE-OUTPUT",
  756. test_file ? &test_file->capture_console_output : nullptr));
  757. if (is_consumed) {
  758. continue;
  759. }
  760. CARBON_ASSIGN_OR_RETURN(
  761. is_consumed,
  762. TryConsumeSetFlag(line_trimmed, "SET-CHECK-SUBSET",
  763. test_file ? &test_file->check_subset : nullptr));
  764. if (is_consumed) {
  765. continue;
  766. }
  767. }
  768. CARBON_RETURN_IF_ERROR(FinishSplit(filename, /*is_include_file=*/!test_file,
  769. split_state, splits));
  770. if (test_file) {
  771. test_file->has_splits = split_state.has_splits();
  772. }
  773. return Success();
  774. }
  775. auto ProcessTestFile(llvm::StringRef test_name, bool running_autoupdate)
  776. -> ErrorOr<TestFile> {
  777. TestFile test_file;
  778. // Store the original content, to avoid a read when autoupdating.
  779. CARBON_ASSIGN_OR_RETURN(test_file.input_content, ReadFile(test_name.str()));
  780. // Whether either AUTOUDPATE or NOAUTOUPDATE was found.
  781. bool found_autoupdate = false;
  782. // INCLUDE-FILE uses, accumulated across both the main file and any includes
  783. // (recursively).
  784. llvm::SmallVector<std::string> include_files;
  785. // Store the main file's `EXTRA-ARGS` so that they can be put after any that
  786. // come from `INCLUDE-FILE`.
  787. llvm::SmallVector<std::string> main_extra_args;
  788. // Process the main file.
  789. CARBON_RETURN_IF_ERROR(ProcessFileContent(
  790. test_name, test_file.input_content, running_autoupdate, &test_file,
  791. &found_autoupdate, test_file.test_args, main_extra_args,
  792. test_file.file_splits, include_files));
  793. if (!found_autoupdate) {
  794. return ErrorBuilder() << "Missing AUTOUPDATE/NOAUTOUPDATE setting";
  795. }
  796. constexpr llvm::StringLiteral AutoupdateSplit = "AUTOUPDATE-SPLIT";
  797. // Validate AUTOUPDATE-SPLIT use, and remove it from test files if present.
  798. if (test_file.has_splits) {
  799. for (const auto& test_file :
  800. llvm::ArrayRef(test_file.file_splits).drop_back()) {
  801. if (test_file.filename == AutoupdateSplit) {
  802. return Error("AUTOUPDATE-SPLIT must be the last split");
  803. }
  804. }
  805. if (test_file.file_splits.back().filename == AutoupdateSplit) {
  806. if (!test_file.autoupdate_line_number) {
  807. return Error("AUTOUPDATE-SPLIT requires AUTOUPDATE");
  808. }
  809. test_file.autoupdate_split = true;
  810. test_file.file_splits.pop_back();
  811. }
  812. }
  813. // Assume there is always a suffix `\n` in output.
  814. if (!test_file.expected_stdout.empty()) {
  815. test_file.expected_stdout.push_back(StrEq(""));
  816. }
  817. if (!test_file.expected_stderr.empty()) {
  818. test_file.expected_stderr.push_back(StrEq(""));
  819. }
  820. // Process includes. This can add entries to `include_files`.
  821. Set<std::string> processed_includes;
  822. for (size_t i = 0; i < include_files.size(); ++i) {
  823. const auto& filename = include_files[i];
  824. if (!processed_includes.Insert(filename).is_inserted()) {
  825. // Ignore repeated includes, mainly so that included files can include the
  826. // same file (i.e., repeated indirectly).
  827. continue;
  828. }
  829. CARBON_ASSIGN_OR_RETURN(std::string content, ReadFile(filename));
  830. // Note autoupdate never touches included files.
  831. CARBON_RETURN_IF_ERROR(ProcessFileContent(
  832. filename, content, /*running_autoupdate=*/false,
  833. /*test_file=*/nullptr,
  834. /*found_autoupdate=*/nullptr, test_file.test_args, test_file.extra_args,
  835. test_file.include_file_splits, include_files));
  836. }
  837. for (const auto& split : test_file.include_file_splits) {
  838. if (split.filename == AutoupdateSplit) {
  839. return Error("AUTOUPDATE-SPLIT is disallowed in included files");
  840. }
  841. }
  842. // Copy over `EXTRA-ARGS` from the main file (after includes).
  843. test_file.extra_args.append(main_extra_args);
  844. return std::move(test_file);
  845. }
  846. } // namespace Carbon::Testing