tomteb
/
carbon-lang


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660
							// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
// Exceptions. See /LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#include "testing/file_test/test_file.h"

#include <fstream>

#include "llvm/ADT/StringExtras.h"
#include "testing/base/file_helpers.h"

namespace Carbon::Testing {

using ::testing::Matcher;
using ::testing::MatchesRegex;
using ::testing::StrEq;

// Processes conflict markers, including tracking of whether code is within a
// conflict marker. Returns true if the line is consumed.
static auto TryConsumeConflictMarker(bool running_autoupdate,
                                     llvm::StringRef line,
                                     llvm::StringRef line_trimmed,
                                     bool* inside_conflict_marker)
    -> ErrorOr<bool> {
  bool is_start = line.starts_with("<<<<<<<");
  bool is_middle = line.starts_with("=======") || line.starts_with("|||||||");
  bool is_end = line.starts_with(">>>>>>>");

  // When running the test, any conflict marker is an error.
  if (!running_autoupdate && (is_start || is_middle || is_end)) {
    return ErrorBuilder() << "Conflict marker found:\n" << line;
  }

  // Autoupdate tracks conflict markers for context, and will discard
  // conflicting lines when it can autoupdate them.
  if (*inside_conflict_marker) {
    if (is_start) {
      return ErrorBuilder() << "Unexpected conflict marker inside conflict:\n"
                            << line;
    }
    if (is_middle) {
      return true;
    }
    if (is_end) {
      *inside_conflict_marker = false;
      return true;
    }

    // Look for CHECK and TIP lines, which can be discarded.
    if (line_trimmed.starts_with("// CHECK:STDOUT:") ||
        line_trimmed.starts_with("// CHECK:STDERR:") ||
        line_trimmed.starts_with("// TIP:")) {
      return true;
    }

    return ErrorBuilder()
           << "Autoupdate can't discard non-CHECK lines inside conflicts:\n"
           << line;
  } else {
    if (is_start) {
      *inside_conflict_marker = true;
      return true;
    }
    if (is_middle || is_end) {
      return ErrorBuilder() << "Unexpected conflict marker outside conflict:\n"
                            << line;
    }
    return false;
  }
}

// State for file splitting logic: TryConsumeSplit and FinishSplit.
struct SplitState {
  auto has_splits() const -> bool { return file_index > 0; }

  auto add_content(llvm::StringRef line) -> void {
    content.append(line.str());
    content.append("\n");
  }

  // Whether content has been found. Only updated before a file split is found
  // (which may be never).
  bool found_code_pre_split = false;

  // The current file name, considering splits. Empty for the default file.
  llvm::StringRef filename = "";

  // The accumulated content for the file being built. This may elide some of
  // the original content, such as conflict markers.
  std::string content;

  // The current file index.
  int file_index = 0;
};

// Reformats `[[@LSP:` and similar keyword as an LSP call with headers.
static auto ReplaceLspKeywordAt(std::string* content, size_t keyword_pos,
                                int& lsp_call_id) -> ErrorOr<size_t> {
  llvm::StringRef content_at_keyword =
      llvm::StringRef(*content).substr(keyword_pos);

  auto [keyword, body_start] = content_at_keyword.split(":");
  if (body_start.empty()) {
    return ErrorBuilder() << "Missing `:` for `"
                          << content_at_keyword.take_front(10) << "`";
  }

  // Whether the first param is a method or id.
  llvm::StringRef method_or_id_label = "method";
  // Whether to attach the `lsp_call_id`.
  bool use_call_id = false;
  // The JSON label for extra content.
  llvm::StringRef extra_content_label;
  if (keyword == "[[@LSP-CALL") {
    use_call_id = true;
    extra_content_label = "params";
  } else if (keyword == "[[@LSP-NOTIFY") {
    extra_content_label = "params";
  } else if (keyword == "[[@LSP-REPLY") {
    method_or_id_label = "id";
    extra_content_label = "result";
  } else if (keyword != "[[@LSP") {
    return ErrorBuilder() << "Unrecognized @LSP keyword at `"
                          << keyword.take_front(10) << "`";
  }

  static constexpr llvm::StringLiteral LspEnd = "]]";
  auto body_end = body_start.find(LspEnd);
  if (body_end == std::string::npos) {
    return ErrorBuilder() << "Missing `" << LspEnd << "` after `" << keyword
                          << "`";
  }
  llvm::StringRef body = body_start.take_front(body_end);
  auto [method_or_id, extra_content] = body.split(":");

  // Form the JSON.
  std::string json = llvm::formatv(R"({{"jsonrpc": "2.0", "{0}": "{1}")",
                                   method_or_id_label, method_or_id);
  if (use_call_id) {
    // Omit quotes on the ID because we know it's an integer.
    json += llvm::formatv(R"(, "id": {0})", ++lsp_call_id);
  }
  if (!extra_content.empty()) {
    json += ",";
    if (extra_content_label.empty()) {
      if (!extra_content.starts_with("\n")) {
        json += " ";
      }
      json += extra_content;
    } else {
      json += llvm::formatv(R"( "{0}": {{{1}})", extra_content_label,
                            extra_content);
    }
  }
  json += "}";

  // Add the Content-Length header. The `2` accounts for extra newlines.
  auto json_with_header =
      llvm::formatv("Content-Length: {0}\n\n{1}\n", json.size() + 2, json)
          .str();
  int keyword_len =
      (body_start.data() + body_end + LspEnd.size()) - keyword.data();
  content->replace(keyword_pos, keyword_len, json_with_header);
  return keyword_pos + json_with_header.size();
}

// Replaces the keyword at the given position. Returns the position to start a
// find for the next keyword.
static auto ReplaceContentKeywordAt(std::string* content, size_t keyword_pos,
                                    llvm::StringRef test_name, int& lsp_call_id)
    -> ErrorOr<size_t> {
  auto keyword = llvm::StringRef(*content).substr(keyword_pos);

  // Line replacements aren't handled here.
  static constexpr llvm::StringLiteral Line = "[[@LINE";
  if (keyword.starts_with(Line)) {
    // Just move past the prefix to find the next one.
    return keyword_pos + Line.size();
  }

  // Replaced with the actual test name.
  static constexpr llvm::StringLiteral TestName = "[[@TEST_NAME]]";
  if (keyword.starts_with(TestName)) {
    content->replace(keyword_pos, TestName.size(), test_name);
    return keyword_pos + test_name.size();
  }

  if (keyword.starts_with("[[@LSP")) {
    return ReplaceLspKeywordAt(content, keyword_pos, lsp_call_id);
  }

  return ErrorBuilder() << "Unexpected use of `[[@` at `"
                        << keyword.substr(0, 5) << "`";
}

// Replaces the content keywords.
//
// TEST_NAME is the only content keyword at present, but we do validate that
// other names are reserved.
static auto ReplaceContentKeywords(llvm::StringRef filename,
                                   std::string* content) -> ErrorOr<Success> {
  static constexpr llvm::StringLiteral Prefix = "[[@";

  auto keyword_pos = content->find(Prefix);
  // Return early if not finding anything.
  if (keyword_pos == std::string::npos) {
    return Success();
  }

  // Construct the test name by getting the base name without the extension,
  // then removing any "fail_" or "todo_" prefixes.
  llvm::StringRef test_name = filename;
  if (auto last_slash = test_name.rfind("/");
      last_slash != llvm::StringRef::npos) {
    test_name = test_name.substr(last_slash + 1);
  }
  if (auto ext_dot = test_name.find("."); ext_dot != llvm::StringRef::npos) {
    test_name = test_name.substr(0, ext_dot);
  }
  // Note this also handles `fail_todo_` and `todo_fail_`.
  test_name.consume_front("todo_");
  test_name.consume_front("fail_");
  test_name.consume_front("todo_");

  // A counter for LSP calls.
  int lsp_call_id = 0;
  while (keyword_pos != std::string::npos) {
    CARBON_ASSIGN_OR_RETURN(
        auto keyword_end,
        ReplaceContentKeywordAt(content, keyword_pos, test_name, lsp_call_id));
    keyword_pos = content->find(Prefix, keyword_end);
  }
  return Success();
}

// Adds a file. Used for both split and unsplit test files.
static auto AddSplit(llvm::StringRef filename, std::string* content,
                     llvm::SmallVector<TestFile::Split>* file_splits)
    -> ErrorOr<Success> {
  CARBON_RETURN_IF_ERROR(ReplaceContentKeywords(filename, content));
  file_splits->push_back(
      {.filename = filename.str(), .content = std::move(*content)});
  content->clear();
  return Success();
}

// Process file split ("---") lines when found. Returns true if the line is
// consumed.
static auto TryConsumeSplit(llvm::StringRef line, llvm::StringRef line_trimmed,
                            bool found_autoupdate, int* line_index,
                            SplitState* split,
                            llvm::SmallVector<TestFile::Split>* file_splits,
                            llvm::SmallVector<FileTestLine>* non_check_lines)
    -> ErrorOr<bool> {
  if (!line_trimmed.consume_front("// ---")) {
    if (!split->has_splits() && !line_trimmed.starts_with("//") &&
        !line_trimmed.empty()) {
      split->found_code_pre_split = true;
    }

    // Add the line to the current file's content (which may not be a split
    // file).
    split->add_content(line);
    return false;
  }

  if (!found_autoupdate) {
    // If there's a split, all output is appended at the end of each file
    // before AUTOUPDATE. We may want to change that, but it's not
    // necessary to handle right now.
    return ErrorBuilder() << "AUTOUPDATE/NOAUTOUPDATE setting must be in "
                             "the first file.";
  }

  // On a file split, add the previous file, then start a new one.
  if (split->has_splits()) {
    CARBON_RETURN_IF_ERROR(
        AddSplit(split->filename, &split->content, file_splits));
  } else {
    split->content.clear();
    if (split->found_code_pre_split) {
      // For the first split, we make sure there was no content prior.
      return ErrorBuilder() << "When using split files, there must be no "
                               "content before the first split file.";
    }
  }

  ++split->file_index;
  split->filename = line_trimmed.trim();
  if (split->filename.empty()) {
    return ErrorBuilder() << "Missing filename for split.";
  }
  // The split line is added to non_check_lines for retention in autoupdate, but
  // is not added to the test file content.
  *line_index = 0;
  non_check_lines->push_back(
      FileTestLine(split->file_index, *line_index, line));
  return true;
}

// Converts a `FileCheck`-style expectation string into a single complete regex
// string by escaping all regex characters outside of the designated `{{...}}`
// regex sequences, and switching those to a normal regex sub-pattern syntax.
static void ConvertExpectationStringToRegex(std::string& str) {
  for (int pos = 0; pos < static_cast<int>(str.size());) {
    switch (str[pos]) {
      case '(':
      case ')':
      case '[':
      case ']':
      case '}':
      case '.':
      case '^':
      case '$':
      case '*':
      case '+':
      case '?':
      case '|':
      case '\\': {
        // Escape regex characters.
        str.insert(pos, "\\");
        pos += 2;
        break;
      }
      case '{': {
        if (pos + 1 == static_cast<int>(str.size()) || str[pos + 1] != '{') {
          // Single `{`, escape it.
          str.insert(pos, "\\");
          pos += 2;
          break;
        }

        // Replace the `{{...}}` regex syntax with standard `(...)` syntax.
        str.replace(pos, 2, "(");
        for (++pos; pos < static_cast<int>(str.size() - 1); ++pos) {
          if (str[pos] == '}' && str[pos + 1] == '}') {
            str.replace(pos, 2, ")");
            ++pos;
            break;
          }
        }
        break;
      }
      default: {
        ++pos;
      }
    }
  }
}

// Transforms an expectation on a given line from `FileCheck` syntax into a
// standard regex matcher.
static auto TransformExpectation(int line_index, llvm::StringRef in)
    -> ErrorOr<Matcher<std::string>> {
  if (in.empty()) {
    return Matcher<std::string>{StrEq("")};
  }
  if (!in.consume_front(" ")) {
    return ErrorBuilder() << "Malformated CHECK line: " << in;
  }

  // Check early if we have a regex component as we can avoid building an
  // expensive matcher when not using those.
  bool has_regex = in.find("{{") != llvm::StringRef::npos;

  // Now scan the string and expand any keywords. Note that this needs to be
  // `size_t` to correctly store `npos`.
  size_t keyword_pos = in.find("[[");

  // If there are neither keywords nor regex sequences, we can match the
  // incoming string directly.
  if (!has_regex && keyword_pos == llvm::StringRef::npos) {
    return Matcher<std::string>{StrEq(in)};
  }

  std::string str = in.str();

  // First expand the keywords.
  while (keyword_pos != std::string::npos) {
    llvm::StringRef line_keyword_cursor =
        llvm::StringRef(str).substr(keyword_pos);
    CARBON_CHECK(line_keyword_cursor.consume_front("[["));

    static constexpr llvm::StringLiteral LineKeyword = "@LINE";
    if (!line_keyword_cursor.consume_front(LineKeyword)) {
      return ErrorBuilder()
             << "Unexpected [[, should be {{\\[\\[}} at `"
             << line_keyword_cursor.substr(0, 5) << "` in: " << in;
    }

    // Allow + or - here; consumeInteger handles -.
    line_keyword_cursor.consume_front("+");
    int offset;
    // consumeInteger returns true for errors, not false.
    if (line_keyword_cursor.consumeInteger(10, offset) ||
        !line_keyword_cursor.consume_front("]]")) {
      return ErrorBuilder()
             << "Unexpected @LINE offset at `"
             << line_keyword_cursor.substr(0, 5) << "` in: " << in;
    }
    std::string int_str = llvm::Twine(line_index + offset).str();
    int remove_len = (line_keyword_cursor.data() - str.data()) - keyword_pos;
    str.replace(keyword_pos, remove_len, int_str);
    keyword_pos += int_str.size();
    // Find the next keyword start or the end of the string.
    keyword_pos = str.find("[[", keyword_pos);
  }

  // If there was no regex, we can directly match the adjusted string.
  if (!has_regex) {
    return Matcher<std::string>{StrEq(str)};
  }

  // Otherwise, we need to turn the entire string into a regex by escaping
  // things outside the regex region and transforming the regex region into a
  // normal syntax.
  ConvertExpectationStringToRegex(str);
  return Matcher<std::string>{MatchesRegex(str)};
}

// Once all content is processed, do any remaining split processing.
static auto FinishSplit(llvm::StringRef test_name, SplitState* split,
                        llvm::SmallVector<TestFile::Split>* file_splits)
    -> ErrorOr<Success> {
  if (split->has_splits()) {
    return AddSplit(split->filename, &split->content, file_splits);
  } else {
    // If no file splitting happened, use the main file as the test file.
    // There will always be a `/` unless tests are in the repo root.
    return AddSplit(test_name.drop_front(test_name.rfind("/") + 1),
                    &split->content, file_splits);
  }
}

// Process CHECK lines when found. Returns true if the line is consumed.
static auto TryConsumeCheck(
    bool running_autoupdate, int line_index, llvm::StringRef line,
    llvm::StringRef line_trimmed,
    llvm::SmallVector<testing::Matcher<std::string>>* expected_stdout,
    llvm::SmallVector<testing::Matcher<std::string>>* expected_stderr)
    -> ErrorOr<bool> {
  if (!line_trimmed.consume_front("// CHECK")) {
    return false;
  }

  // Don't build expectations when doing an autoupdate. We don't want to
  // break the autoupdate on an invalid CHECK line.
  if (!running_autoupdate) {
    llvm::SmallVector<Matcher<std::string>>* expected;
    if (line_trimmed.consume_front(":STDOUT:")) {
      expected = expected_stdout;
    } else if (line_trimmed.consume_front(":STDERR:")) {
      expected = expected_stderr;
    } else {
      return ErrorBuilder() << "Unexpected CHECK in input: " << line.str();
    }
    CARBON_ASSIGN_OR_RETURN(Matcher<std::string> check_matcher,
                            TransformExpectation(line_index, line_trimmed));
    expected->push_back(check_matcher);
  }
  return true;
}

// Processes ARGS and EXTRA-ARGS lines when found. Returns true if the line is
// consumed.
static auto TryConsumeArgs(llvm::StringRef line, llvm::StringRef line_trimmed,
                           llvm::SmallVector<std::string>* args,
                           llvm::SmallVector<std::string>* extra_args)
    -> ErrorOr<bool> {
  llvm::SmallVector<std::string>* arg_list = nullptr;
  if (line_trimmed.consume_front("// ARGS: ")) {
    arg_list = args;
  } else if (line_trimmed.consume_front("// EXTRA-ARGS: ")) {
    arg_list = extra_args;
  } else {
    return false;
  }

  if (!args->empty() || !extra_args->empty()) {
    return ErrorBuilder() << "ARGS / EXTRA-ARGS specified multiple times: "
                          << line.str();
  }

  // Split the line into arguments.
  std::pair<llvm::StringRef, llvm::StringRef> cursor =
      llvm::getToken(line_trimmed);
  while (!cursor.first.empty()) {
    arg_list->push_back(std::string(cursor.first));
    cursor = llvm::getToken(cursor.second);
  }

  return true;
}

// Processes AUTOUPDATE lines when found. Returns true if the line is consumed.
static auto TryConsumeAutoupdate(int line_index, llvm::StringRef line_trimmed,
                                 bool* found_autoupdate,
                                 std::optional<int>* autoupdate_line_number)
    -> ErrorOr<bool> {
  static constexpr llvm::StringLiteral Autoupdate = "// AUTOUPDATE";
  static constexpr llvm::StringLiteral NoAutoupdate = "// NOAUTOUPDATE";
  if (line_trimmed != Autoupdate && line_trimmed != NoAutoupdate) {
    return false;
  }
  if (*found_autoupdate) {
    return ErrorBuilder() << "Multiple AUTOUPDATE/NOAUTOUPDATE settings found";
  }
  *found_autoupdate = true;
  if (line_trimmed == Autoupdate) {
    *autoupdate_line_number = line_index;
  }
  return true;
}

// Processes SET-* lines when found. Returns true if the line is consumed.
static auto TryConsumeSetFlag(llvm::StringRef line_trimmed,
                              llvm::StringLiteral flag_name, bool* flag)
    -> ErrorOr<bool> {
  if (!line_trimmed.consume_front("// ") || line_trimmed != flag_name) {
    return false;
  }
  if (*flag) {
    return ErrorBuilder() << flag_name << " was specified multiple times";
  }
  *flag = true;
  return true;
}

auto ProcessTestFile(llvm::StringRef test_name, bool running_autoupdate)
    -> ErrorOr<TestFile> {
  TestFile test_file;

  // Store the file so that file_splits can use references to content.
  CARBON_ASSIGN_OR_RETURN(test_file.input_content, ReadFile(test_name.str()));

  // Original file content, and a cursor for walking through it.
  llvm::StringRef file_content = test_file.input_content;
  llvm::StringRef cursor = file_content;

  // Whether either AUTOUDPATE or NOAUTOUPDATE was found.
  bool found_autoupdate = false;

  // The index in the current test file. Will be reset on splits.
  int line_index = 0;

  SplitState split;

  // When autoupdating, we track whether we're inside conflict markers.
  // Otherwise conflict markers are errors.
  bool inside_conflict_marker = false;

  while (!cursor.empty()) {
    auto [line, next_cursor] = cursor.split("\n");
    cursor = next_cursor;
    auto line_trimmed = line.ltrim();

    bool is_consumed = false;
    CARBON_ASSIGN_OR_RETURN(
        is_consumed,
        TryConsumeConflictMarker(running_autoupdate, line, line_trimmed,
                                 &inside_conflict_marker));
    if (is_consumed) {
      continue;
    }

    // At this point, remaining lines are part of the test input.
    CARBON_ASSIGN_OR_RETURN(
        is_consumed,
        TryConsumeSplit(line, line_trimmed, found_autoupdate, &line_index,
                        &split, &test_file.file_splits,
                        &test_file.non_check_lines));
    if (is_consumed) {
      continue;
    }

    ++line_index;

    // TIP lines have no impact on validation.
    if (line_trimmed.starts_with("// TIP:")) {
      continue;
    }

    CARBON_ASSIGN_OR_RETURN(
        is_consumed, TryConsumeCheck(running_autoupdate, line_index, line,
                                     line_trimmed, &test_file.expected_stdout,
                                     &test_file.expected_stderr));
    if (is_consumed) {
      continue;
    }

    // At this point, lines are retained as non-CHECK lines.
    test_file.non_check_lines.push_back(
        FileTestLine(split.file_index, line_index, line));

    CARBON_ASSIGN_OR_RETURN(
        is_consumed, TryConsumeArgs(line, line_trimmed, &test_file.test_args,
                                    &test_file.extra_args));
    if (is_consumed) {
      continue;
    }
    CARBON_ASSIGN_OR_RETURN(
        is_consumed,
        TryConsumeAutoupdate(line_index, line_trimmed, &found_autoupdate,
                             &test_file.autoupdate_line_number));
    if (is_consumed) {
      continue;
    }
    CARBON_ASSIGN_OR_RETURN(
        is_consumed,
        TryConsumeSetFlag(line_trimmed, "SET-CAPTURE-CONSOLE-OUTPUT",
                          &test_file.capture_console_output));
    if (is_consumed) {
      continue;
    }
    CARBON_ASSIGN_OR_RETURN(is_consumed,
                            TryConsumeSetFlag(line_trimmed, "SET-CHECK-SUBSET",
                                              &test_file.check_subset));
    if (is_consumed) {
      continue;
    }
  }

  if (!found_autoupdate) {
    return Error("Missing AUTOUPDATE/NOAUTOUPDATE setting");
  }

  test_file.has_splits = split.has_splits();
  CARBON_RETURN_IF_ERROR(
      FinishSplit(test_name, &split, &test_file.file_splits));

  // Validate AUTOUPDATE-SPLIT use, and remove it from test files if present.
  if (test_file.has_splits) {
    constexpr llvm::StringLiteral AutoupdateSplit = "AUTOUPDATE-SPLIT";
    for (const auto& test_file :
         llvm::ArrayRef(test_file.file_splits).drop_back()) {
      if (test_file.filename == AutoupdateSplit) {
        return Error("AUTOUPDATE-SPLIT must be the last split");
      }
    }
    if (test_file.file_splits.back().filename == AutoupdateSplit) {
      if (!test_file.autoupdate_line_number) {
        return Error("AUTOUPDATE-SPLIT requires AUTOUPDATE");
      }
      test_file.autoupdate_split = true;
      test_file.file_splits.pop_back();
    }
  }

  // Assume there is always a suffix `\n` in output.
  if (!test_file.expected_stdout.empty()) {
    test_file.expected_stdout.push_back(StrEq(""));
  }
  if (!test_file.expected_stderr.empty()) {
    test_file.expected_stderr.push_back(StrEq(""));
  }

  return std::move(test_file);
}

}  // namespace Carbon::Testing