ソースを参照

Refactor FileTestBase to split logic between multiple files (#4968)

I'm refactoring logic to try to make these files more manageable,
particularly as I'm looking at ways to use more threads. I'm renaming
FileTestBase::TestContext to TestFile and FileTestBase::TestFile to
TestFile::Split to try to be more consistent in how we talk about test
files and file splits in general. This change is not expected to change
any behavior, it's just refactoring.

This PR has two commits:

- Copying files for viewing deltas in GH
- Moving logic

The delta of the two commits is the main PR. The second commit can be
viewed on its own to see the delta versus file_test_base.* files where
the logic currently rests (there's still a reordering in run_test.cpp as
part of making one function static).
Jon Ross-Perkins 1 年間 前
コミット
3d6f14fca5

+ 7 - 1
testing/file_test/BUILD

@@ -29,7 +29,13 @@ cc_library(
 cc_library(
 cc_library(
     name = "file_test_base",
     name = "file_test_base",
     testonly = 1,
     testonly = 1,
-    srcs = ["file_test_base.cpp"],
+    srcs = [
+        "file_test_base.cpp",
+        "run_test.cpp",
+        "run_test.h",
+        "test_file.cpp",
+        "test_file.h",
+    ],
     hdrs = ["file_test_base.h"],
     hdrs = ["file_test_base.h"],
     deps = [
     deps = [
         ":autoupdate",
         ":autoupdate",

+ 71 - 871
testing/file_test/file_test_base.cpp

@@ -4,8 +4,6 @@
 
 
 #include "testing/file_test/file_test_base.h"
 #include "testing/file_test/file_test_base.h"
 
 
-#include <gmock/gmock.h>
-
 #include <filesystem>
 #include <filesystem>
 #include <optional>
 #include <optional>
 #include <string>
 #include <string>
@@ -18,9 +16,7 @@
 #include "common/exe_path.h"
 #include "common/exe_path.h"
 #include "common/init_llvm.h"
 #include "common/init_llvm.h"
 #include "common/raw_string_ostream.h"
 #include "common/raw_string_ostream.h"
-#include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/Twine.h"
 #include "llvm/Support/CrashRecoveryContext.h"
 #include "llvm/Support/CrashRecoveryContext.h"
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -29,6 +25,8 @@
 #include "llvm/Support/ThreadPool.h"
 #include "llvm/Support/ThreadPool.h"
 #include "testing/base/file_helpers.h"
 #include "testing/base/file_helpers.h"
 #include "testing/file_test/autoupdate.h"
 #include "testing/file_test/autoupdate.h"
+#include "testing/file_test/run_test.h"
+#include "testing/file_test/test_file.h"
 
 
 ABSL_FLAG(std::vector<std::string>, file_tests, {},
 ABSL_FLAG(std::vector<std::string>, file_tests, {},
           "A comma-separated list of repo-relative names of test files. "
           "A comma-separated list of repo-relative names of test files. "
@@ -47,19 +45,6 @@ ABSL_FLAG(bool, dump_output, false,
 
 
 namespace Carbon::Testing {
 namespace Carbon::Testing {
 
 
-// While these are marked as "internal" APIs, they seem to work and be pretty
-// widely used for their exact documented behavior.
-using ::testing::internal::CaptureStderr;
-using ::testing::internal::CaptureStdout;
-using ::testing::internal::GetCapturedStderr;
-using ::testing::internal::GetCapturedStdout;
-
-using ::testing::Matcher;
-using ::testing::MatchesRegex;
-using ::testing::StrEq;
-
-static constexpr llvm::StringLiteral StdinFilename = "STDIN";
-
 // Splits outputs to string_view because gtest handles string_view by default.
 // Splits outputs to string_view because gtest handles string_view by default.
 static auto SplitOutput(llvm::StringRef output)
 static auto SplitOutput(llvm::StringRef output)
     -> llvm::SmallVector<std::string_view> {
     -> llvm::SmallVector<std::string_view> {
@@ -121,6 +106,44 @@ static auto GetBazelCommand(BazelMode mode, llvm::StringRef test_name)
   return args.TakeStr();
   return args.TakeStr();
 }
 }
 
 
+// Runs the FileTestAutoupdater, returning the result.
+static auto RunAutoupdater(FileTestBase* test_base, const TestFile& test_file,
+                           bool dry_run) -> bool {
+  if (!test_file.autoupdate_line_number) {
+    return false;
+  }
+
+  llvm::SmallVector<llvm::StringRef> filenames;
+  filenames.reserve(test_file.non_check_lines.size());
+  if (test_file.has_splits) {
+    // There are splits, so we provide an empty name for the first file.
+    filenames.push_back({});
+  }
+  for (const auto& file : test_file.file_splits) {
+    filenames.push_back(file.filename);
+  }
+
+  llvm::ArrayRef expected_filenames = filenames;
+  if (filenames.size() > 1) {
+    expected_filenames = expected_filenames.drop_front();
+  }
+
+  return FileTestAutoupdater(
+             std::filesystem::absolute(test_base->test_name().str()),
+             GetBazelCommand(BazelMode::Test, test_base->test_name()),
+             GetBazelCommand(BazelMode::Dump, test_base->test_name()),
+             test_file.input_content, filenames,
+             *test_file.autoupdate_line_number, test_file.autoupdate_split,
+             test_file.non_check_lines, test_file.actual_stdout,
+             test_file.actual_stderr,
+             test_base->GetDefaultFileRE(expected_filenames),
+             test_base->GetLineNumberReplacements(expected_filenames),
+             [&](std::string& line) {
+               test_base->DoExtraCheckReplacements(line);
+             })
+      .Run(dry_run);
+}
+
 // Runs a test and compares output. This keeps output split by line so that
 // Runs a test and compares output. This keeps output split by line so that
 // issues are a little easier to identify by the different line.
 // issues are a little easier to identify by the different line.
 auto FileTestBase::TestBody() -> void {
 auto FileTestBase::TestBody() -> void {
@@ -130,19 +153,20 @@ auto FileTestBase::TestBody() -> void {
   llvm::errs() << "\nTo test this file alone, run:\n  " << test_command
   llvm::errs() << "\nTo test this file alone, run:\n  " << test_command
                << "\n\n";
                << "\n\n";
 
 
-  TestContext context;
-  auto run_result = ProcessTestFileAndRun(context);
-  ASSERT_TRUE(run_result.ok()) << run_result.error();
+  ErrorOr<TestFile> test_file =
+      ProcessTestFileAndRun(this, output_mutex_, /*dump_output=*/false,
+                            absl::GetFlag(FLAGS_autoupdate));
+  ASSERT_TRUE(test_file.ok()) << test_file.error();
   ValidateRun();
   ValidateRun();
   auto test_filename = std::filesystem::path(test_name_.str()).filename();
   auto test_filename = std::filesystem::path(test_name_.str()).filename();
 
 
   // Check success/failure against `fail_` prefixes.
   // Check success/failure against `fail_` prefixes.
-  if (context.run_result.per_file_success.empty()) {
-    CompareFailPrefix(test_filename.string(), context.run_result.success);
+  if (test_file->run_result.per_file_success.empty()) {
+    CompareFailPrefix(test_filename.string(), test_file->run_result.success);
   } else {
   } else {
     bool require_overall_failure = false;
     bool require_overall_failure = false;
     for (const auto& [filename, success] :
     for (const auto& [filename, success] :
-         context.run_result.per_file_success) {
+         test_file->run_result.per_file_success) {
       CompareFailPrefix(filename, success);
       CompareFailPrefix(filename, success);
       if (!success) {
       if (!success) {
         require_overall_failure = true;
         require_overall_failure = true;
@@ -150,20 +174,20 @@ auto FileTestBase::TestBody() -> void {
     }
     }
 
 
     if (require_overall_failure) {
     if (require_overall_failure) {
-      EXPECT_FALSE(context.run_result.success)
+      EXPECT_FALSE(test_file->run_result.success)
           << "There is a per-file failure expectation, so the overall result "
           << "There is a per-file failure expectation, so the overall result "
              "should have been a failure.";
              "should have been a failure.";
     } else {
     } else {
       // Individual files all succeeded, so the prefix is enforced on the main
       // Individual files all succeeded, so the prefix is enforced on the main
       // test file.
       // test file.
-      CompareFailPrefix(test_filename.string(), context.run_result.success);
+      CompareFailPrefix(test_filename.string(), test_file->run_result.success);
     }
     }
   }
   }
 
 
   // Check results. Include a reminder of the autoupdate command for any
   // Check results. Include a reminder of the autoupdate command for any
   // stdout/stderr differences.
   // stdout/stderr differences.
   std::string update_message;
   std::string update_message;
-  if (context.autoupdate_line_number) {
+  if (test_file->autoupdate_line_number) {
     update_message = llvm::formatv(
     update_message = llvm::formatv(
         "If these differences are expected, try the autoupdater:\n  {0}",
         "If these differences are expected, try the autoupdater:\n  {0}",
         GetBazelCommand(BazelMode::Autoupdate, test_name_));
         GetBazelCommand(BazelMode::Autoupdate, test_name_));
@@ -172,91 +196,52 @@ auto FileTestBase::TestBody() -> void {
         "If these differences are expected, content must be updated manually.";
         "If these differences are expected, content must be updated manually.";
   }
   }
   SCOPED_TRACE(update_message);
   SCOPED_TRACE(update_message);
-  if (context.check_subset) {
-    EXPECT_THAT(SplitOutput(context.actual_stdout),
-                IsSupersetOf(context.expected_stdout));
-    EXPECT_THAT(SplitOutput(context.actual_stderr),
-                IsSupersetOf(context.expected_stderr));
+  if (test_file->check_subset) {
+    EXPECT_THAT(SplitOutput(test_file->actual_stdout),
+                IsSupersetOf(test_file->expected_stdout));
+    EXPECT_THAT(SplitOutput(test_file->actual_stderr),
+                IsSupersetOf(test_file->expected_stderr));
 
 
   } else {
   } else {
-    EXPECT_THAT(SplitOutput(context.actual_stdout),
-                ElementsAreArray(context.expected_stdout));
-    EXPECT_THAT(SplitOutput(context.actual_stderr),
-                ElementsAreArray(context.expected_stderr));
+    EXPECT_THAT(SplitOutput(test_file->actual_stdout),
+                ElementsAreArray(test_file->expected_stdout));
+    EXPECT_THAT(SplitOutput(test_file->actual_stderr),
+                ElementsAreArray(test_file->expected_stderr));
   }
   }
 
 
   // If there are no other test failures, check if autoupdate would make
   // If there are no other test failures, check if autoupdate would make
   // changes. We don't do this when there _are_ failures because the
   // changes. We don't do this when there _are_ failures because the
   // SCOPED_TRACE already contains the autoupdate reminder.
   // SCOPED_TRACE already contains the autoupdate reminder.
-  if (!HasFailure() && RunAutoupdater(context, /*dry_run=*/true)) {
+  if (!HasFailure() && RunAutoupdater(this, *test_file, /*dry_run=*/true)) {
     ADD_FAILURE() << "Autoupdate would make changes to the file content.";
     ADD_FAILURE() << "Autoupdate would make changes to the file content.";
   }
   }
 }
 }
 
 
-auto FileTestBase::RunAutoupdater(const TestContext& context, bool dry_run)
-    -> bool {
-  if (!context.autoupdate_line_number) {
-    return false;
-  }
-
-  llvm::SmallVector<llvm::StringRef> filenames;
-  filenames.reserve(context.non_check_lines.size());
-  if (context.has_splits) {
-    // There are splits, so we provide an empty name for the first file.
-    filenames.push_back({});
-  }
-  for (const auto& file : context.test_files) {
-    filenames.push_back(file.filename);
-  }
-
-  llvm::ArrayRef expected_filenames = filenames;
-  if (filenames.size() > 1) {
-    expected_filenames = expected_filenames.drop_front();
-  }
-
-  return FileTestAutoupdater(
-             std::filesystem::absolute(test_name_.str()),
-             GetBazelCommand(BazelMode::Test, test_name_),
-             GetBazelCommand(BazelMode::Dump, test_name_),
-             context.input_content, filenames, *context.autoupdate_line_number,
-             context.autoupdate_split, context.non_check_lines,
-             context.actual_stdout, context.actual_stderr,
-             GetDefaultFileRE(expected_filenames),
-             GetLineNumberReplacements(expected_filenames),
-             [&](std::string& line) { DoExtraCheckReplacements(line); })
-      .Run(dry_run);
-}
-
 auto FileTestBase::Autoupdate() -> ErrorOr<bool> {
 auto FileTestBase::Autoupdate() -> ErrorOr<bool> {
   // Add a crash trace entry mentioning which file we're updating.
   // Add a crash trace entry mentioning which file we're updating.
   std::string stack_trace_string =
   std::string stack_trace_string =
       llvm::formatv("performing autoupdate for {0}", test_name_);
       llvm::formatv("performing autoupdate for {0}", test_name_);
   llvm::PrettyStackTraceString stack_trace_entry(stack_trace_string.c_str());
   llvm::PrettyStackTraceString stack_trace_entry(stack_trace_string.c_str());
 
 
-  TestContext context;
-  auto run_result = ProcessTestFileAndRun(context);
-  if (!run_result.ok()) {
-    return ErrorBuilder() << "Error updating " << test_name_ << ": "
-                          << run_result.error();
-  }
-  return RunAutoupdater(context, /*dry_run=*/false);
+  CARBON_ASSIGN_OR_RETURN(
+      TestFile test_file,
+      ProcessTestFileAndRun(this, output_mutex_, /*dump_output=*/false,
+                            absl::GetFlag(FLAGS_autoupdate)));
+  return RunAutoupdater(this, test_file, /*dry_run=*/false);
 }
 }
 
 
 auto FileTestBase::DumpOutput() -> ErrorOr<Success> {
 auto FileTestBase::DumpOutput() -> ErrorOr<Success> {
-  TestContext context;
-  context.dump_output = true;
   std::string banner(79, '=');
   std::string banner(79, '=');
   banner.append("\n");
   banner.append("\n");
   llvm::errs() << banner << "= " << test_name_ << "\n";
   llvm::errs() << banner << "= " << test_name_ << "\n";
 
 
-  auto run_result = ProcessTestFileAndRun(context);
-  if (!run_result.ok()) {
-    return ErrorBuilder() << "Error updating " << test_name_ << ": "
-                          << run_result.error();
-  }
-  llvm::errs() << banner << context.actual_stdout << banner
+  CARBON_ASSIGN_OR_RETURN(
+      TestFile test_file,
+      ProcessTestFileAndRun(this, output_mutex_, /*dump_output=*/true,
+                            absl::GetFlag(FLAGS_autoupdate)));
+  llvm::errs() << banner << test_file.actual_stdout << banner
                << "= Exit with success: "
                << "= Exit with success: "
-               << (context.run_result.success ? "true" : "false") << "\n"
+               << (test_file.run_result.success ? "true" : "false") << "\n"
                << banner;
                << banner;
   return Success();
   return Success();
 }
 }
@@ -270,791 +255,6 @@ auto FileTestBase::GetLineNumberReplacements(
            .line_formatv = R"({0})"}};
            .line_formatv = R"({0})"}};
 }
 }
 
 
-auto FileTestBase::ProcessTestFileAndRun(TestContext& context)
-    -> ErrorOr<Success> {
-  // Store the file so that test_files can use references to content.
-  CARBON_ASSIGN_OR_RETURN(context.input_content, ReadFile(test_name_.str()));
-
-  // Load expected output.
-  CARBON_RETURN_IF_ERROR(ProcessTestFile(context));
-
-  // Process arguments.
-  if (context.test_args.empty()) {
-    context.test_args = GetDefaultArgs();
-    context.test_args.append(context.extra_args);
-  }
-  CARBON_RETURN_IF_ERROR(
-      DoArgReplacements(context.test_args, context.test_files));
-
-  // stdin needs to exist on-disk for compatibility. We'll use a pointer for it.
-  FILE* input_stream = nullptr;
-  auto erase_input_on_exit = llvm::make_scope_exit([&input_stream]() {
-    if (input_stream) {
-      // fclose should delete the tmpfile.
-      fclose(input_stream);
-      input_stream = nullptr;
-    }
-  });
-
-  // Create the files in-memory.
-  llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> fs =
-      new llvm::vfs::InMemoryFileSystem;
-  for (const auto& test_file : context.test_files) {
-    if (test_file.filename == StdinFilename) {
-      input_stream = tmpfile();
-      fwrite(test_file.content.c_str(), sizeof(char), test_file.content.size(),
-             input_stream);
-      rewind(input_stream);
-    } else if (!fs->addFile(test_file.filename, /*ModificationTime=*/0,
-                            llvm::MemoryBuffer::getMemBuffer(
-                                test_file.content, test_file.filename,
-                                /*RequiresNullTerminator=*/false))) {
-      return ErrorBuilder() << "File is repeated: " << test_file.filename;
-    }
-  }
-  // Convert the arguments to StringRef and const char* to match the
-  // expectations of PrettyStackTraceProgram and Run.
-  llvm::SmallVector<llvm::StringRef> test_args_ref;
-  llvm::SmallVector<const char*> test_argv_for_stack_trace;
-  test_args_ref.reserve(context.test_args.size());
-  test_argv_for_stack_trace.reserve(context.test_args.size() + 1);
-  for (const auto& arg : context.test_args) {
-    test_args_ref.push_back(arg);
-    test_argv_for_stack_trace.push_back(arg.c_str());
-  }
-  // Add a trailing null so that this is a proper argv.
-  test_argv_for_stack_trace.push_back(nullptr);
-
-  // Add a stack trace entry for the test invocation.
-  llvm::PrettyStackTraceProgram stack_trace_entry(
-      test_argv_for_stack_trace.size() - 1, test_argv_for_stack_trace.data());
-
-  // Execution must be serialized for either serial tests or console output.
-  std::unique_lock<std::mutex> output_lock;
-  if (output_mutex_ &&
-      (context.capture_console_output || !AllowParallelRun())) {
-    output_lock = std::unique_lock<std::mutex>(*output_mutex_);
-  }
-
-  // Conditionally capture console output. We use a scope exit to ensure the
-  // captures terminate even on run failures.
-  if (context.capture_console_output) {
-    CaptureStderr();
-    CaptureStdout();
-  }
-  auto add_output_on_exit = llvm::make_scope_exit([&]() {
-    if (context.capture_console_output) {
-      // No need to flush stderr.
-      llvm::outs().flush();
-      context.actual_stdout += GetCapturedStdout();
-      context.actual_stderr += GetCapturedStderr();
-    }
-  });
-
-  // Prepare string streams to capture output. In order to address casting
-  // constraints, we split calls to Run as a ternary based on whether we want to
-  // capture output.
-  llvm::raw_svector_ostream output_stream(context.actual_stdout);
-  llvm::raw_svector_ostream error_stream(context.actual_stderr);
-  CARBON_ASSIGN_OR_RETURN(
-      context.run_result,
-      context.dump_output
-          ? Run(test_args_ref, fs, input_stream, llvm::outs(), llvm::errs())
-          : Run(test_args_ref, fs, input_stream, output_stream, error_stream));
-
-  return Success();
-}
-
-auto FileTestBase::DoArgReplacements(
-    llvm::SmallVector<std::string>& test_args,
-    const llvm::SmallVector<TestFile>& test_files) -> ErrorOr<Success> {
-  auto replacements = GetArgReplacements();
-  for (auto* it = test_args.begin(); it != test_args.end(); ++it) {
-    auto percent = it->find("%");
-    if (percent == std::string::npos) {
-      continue;
-    }
-
-    if (percent + 1 >= it->size()) {
-      return ErrorBuilder() << "% is not allowed on its own: " << *it;
-    }
-    char c = (*it)[percent + 1];
-    switch (c) {
-      case 's': {
-        if (*it != "%s") {
-          return ErrorBuilder() << "%s must be the full argument: " << *it;
-        }
-        it = test_args.erase(it);
-        for (const auto& file : test_files) {
-          const std::string& filename = file.filename;
-          if (filename == StdinFilename || filename.ends_with(".h")) {
-            continue;
-          }
-          it = test_args.insert(it, filename);
-          ++it;
-        }
-        // Back up once because the for loop will advance.
-        --it;
-        break;
-      }
-      case 't': {
-        char* tmpdir = getenv("TEST_TMPDIR");
-        CARBON_CHECK(tmpdir != nullptr);
-        it->replace(percent, 2, llvm::formatv("{0}/temp_file", tmpdir));
-        break;
-      }
-      case '{': {
-        auto end_brace = it->find('}', percent);
-        if (end_brace == std::string::npos) {
-          return ErrorBuilder() << "%{ without closing }: " << *it;
-        }
-        llvm::StringRef substr(&*(it->begin() + percent + 2),
-                               end_brace - percent - 2);
-        auto replacement = replacements.find(substr);
-        if (replacement == replacements.end()) {
-          return ErrorBuilder()
-                 << "unknown substitution: %{" << substr << "}: " << *it;
-        }
-        it->replace(percent, end_brace - percent + 1, replacement->second);
-        break;
-      }
-      default:
-        return ErrorBuilder() << "%" << c << " is not supported: " << *it;
-    }
-  }
-  return Success();
-}
-
-// Processes conflict markers, including tracking of whether code is within a
-// conflict marker. Returns true if the line is consumed.
-static auto TryConsumeConflictMarker(llvm::StringRef line,
-                                     llvm::StringRef line_trimmed,
-                                     bool* inside_conflict_marker)
-    -> ErrorOr<bool> {
-  bool is_start = line.starts_with("<<<<<<<");
-  bool is_middle = line.starts_with("=======") || line.starts_with("|||||||");
-  bool is_end = line.starts_with(">>>>>>>");
-
-  // When running the test, any conflict marker is an error.
-  if (!absl::GetFlag(FLAGS_autoupdate) && (is_start || is_middle || is_end)) {
-    return ErrorBuilder() << "Conflict marker found:\n" << line;
-  }
-
-  // Autoupdate tracks conflict markers for context, and will discard
-  // conflicting lines when it can autoupdate them.
-  if (*inside_conflict_marker) {
-    if (is_start) {
-      return ErrorBuilder() << "Unexpected conflict marker inside conflict:\n"
-                            << line;
-    }
-    if (is_middle) {
-      return true;
-    }
-    if (is_end) {
-      *inside_conflict_marker = false;
-      return true;
-    }
-
-    // Look for CHECK and TIP lines, which can be discarded.
-    if (line_trimmed.starts_with("// CHECK:STDOUT:") ||
-        line_trimmed.starts_with("// CHECK:STDERR:") ||
-        line_trimmed.starts_with("// TIP:")) {
-      return true;
-    }
-
-    return ErrorBuilder()
-           << "Autoupdate can't discard non-CHECK lines inside conflicts:\n"
-           << line;
-  } else {
-    if (is_start) {
-      *inside_conflict_marker = true;
-      return true;
-    }
-    if (is_middle || is_end) {
-      return ErrorBuilder() << "Unexpected conflict marker outside conflict:\n"
-                            << line;
-    }
-    return false;
-  }
-}
-
-// State for file splitting logic: TryConsumeSplit and FinishSplit.
-struct SplitState {
-  auto has_splits() const -> bool { return file_index > 0; }
-
-  auto add_content(llvm::StringRef line) -> void {
-    content.append(line.str());
-    content.append("\n");
-  }
-
-  // Whether content has been found. Only updated before a file split is found
-  // (which may be never).
-  bool found_code_pre_split = false;
-
-  // The current file name, considering splits. Empty for the default file.
-  llvm::StringRef filename = "";
-
-  // The accumulated content for the file being built. This may elide some of
-  // the original content, such as conflict markers.
-  std::string content;
-
-  // The current file index.
-  int file_index = 0;
-};
-
-// Reformats `[[@LSP:` and similar keyword as an LSP call with headers.
-static auto ReplaceLspKeywordAt(std::string* content, size_t keyword_pos,
-                                int& lsp_call_id) -> ErrorOr<size_t> {
-  llvm::StringRef content_at_keyword =
-      llvm::StringRef(*content).substr(keyword_pos);
-
-  auto [keyword, body_start] = content_at_keyword.split(":");
-  if (body_start.empty()) {
-    return ErrorBuilder() << "Missing `:` for `"
-                          << content_at_keyword.take_front(10) << "`";
-  }
-
-  // Whether the first param is a method or id.
-  llvm::StringRef method_or_id_label = "method";
-  // Whether to attach the `lsp_call_id`.
-  bool use_call_id = false;
-  // The JSON label for extra content.
-  llvm::StringRef extra_content_label;
-  if (keyword == "[[@LSP-CALL") {
-    use_call_id = true;
-    extra_content_label = "params";
-  } else if (keyword == "[[@LSP-NOTIFY") {
-    extra_content_label = "params";
-  } else if (keyword == "[[@LSP-REPLY") {
-    method_or_id_label = "id";
-    extra_content_label = "result";
-  } else if (keyword != "[[@LSP") {
-    return ErrorBuilder() << "Unrecognized @LSP keyword at `"
-                          << keyword.take_front(10) << "`";
-  }
-
-  static constexpr llvm::StringLiteral LspEnd = "]]";
-  auto body_end = body_start.find(LspEnd);
-  if (body_end == std::string::npos) {
-    return ErrorBuilder() << "Missing `" << LspEnd << "` after `" << keyword
-                          << "`";
-  }
-  llvm::StringRef body = body_start.take_front(body_end);
-  auto [method_or_id, extra_content] = body.split(":");
-
-  // Form the JSON.
-  std::string json = llvm::formatv(R"({{"jsonrpc": "2.0", "{0}": "{1}")",
-                                   method_or_id_label, method_or_id);
-  if (use_call_id) {
-    // Omit quotes on the ID because we know it's an integer.
-    json += llvm::formatv(R"(, "id": {0})", ++lsp_call_id);
-  }
-  if (!extra_content.empty()) {
-    json += ",";
-    if (extra_content_label.empty()) {
-      if (!extra_content.starts_with("\n")) {
-        json += " ";
-      }
-      json += extra_content;
-    } else {
-      json += llvm::formatv(R"( "{0}": {{{1}})", extra_content_label,
-                            extra_content);
-    }
-  }
-  json += "}";
-
-  // Add the Content-Length header. The `2` accounts for extra newlines.
-  auto json_with_header =
-      llvm::formatv("Content-Length: {0}\n\n{1}\n", json.size() + 2, json)
-          .str();
-  int keyword_len =
-      (body_start.data() + body_end + LspEnd.size()) - keyword.data();
-  content->replace(keyword_pos, keyword_len, json_with_header);
-  return keyword_pos + json_with_header.size();
-}
-
-// Replaces the keyword at the given position. Returns the position to start a
-// find for the next keyword.
-static auto ReplaceContentKeywordAt(std::string* content, size_t keyword_pos,
-                                    llvm::StringRef test_name, int& lsp_call_id)
-    -> ErrorOr<size_t> {
-  auto keyword = llvm::StringRef(*content).substr(keyword_pos);
-
-  // Line replacements aren't handled here.
-  static constexpr llvm::StringLiteral Line = "[[@LINE";
-  if (keyword.starts_with(Line)) {
-    // Just move past the prefix to find the next one.
-    return keyword_pos + Line.size();
-  }
-
-  // Replaced with the actual test name.
-  static constexpr llvm::StringLiteral TestName = "[[@TEST_NAME]]";
-  if (keyword.starts_with(TestName)) {
-    content->replace(keyword_pos, TestName.size(), test_name);
-    return keyword_pos + test_name.size();
-  }
-
-  if (keyword.starts_with("[[@LSP")) {
-    return ReplaceLspKeywordAt(content, keyword_pos, lsp_call_id);
-  }
-
-  return ErrorBuilder() << "Unexpected use of `[[@` at `"
-                        << keyword.substr(0, 5) << "`";
-}
-
-// Replaces the content keywords.
-//
-// TEST_NAME is the only content keyword at present, but we do validate that
-// other names are reserved.
-static auto ReplaceContentKeywords(llvm::StringRef filename,
-                                   std::string* content) -> ErrorOr<Success> {
-  static constexpr llvm::StringLiteral Prefix = "[[@";
-
-  auto keyword_pos = content->find(Prefix);
-  // Return early if not finding anything.
-  if (keyword_pos == std::string::npos) {
-    return Success();
-  }
-
-  // Construct the test name by getting the base name without the extension,
-  // then removing any "fail_" or "todo_" prefixes.
-  llvm::StringRef test_name = filename;
-  if (auto last_slash = test_name.rfind("/");
-      last_slash != llvm::StringRef::npos) {
-    test_name = test_name.substr(last_slash + 1);
-  }
-  if (auto ext_dot = test_name.find("."); ext_dot != llvm::StringRef::npos) {
-    test_name = test_name.substr(0, ext_dot);
-  }
-  // Note this also handles `fail_todo_` and `todo_fail_`.
-  test_name.consume_front("todo_");
-  test_name.consume_front("fail_");
-  test_name.consume_front("todo_");
-
-  // A counter for LSP calls.
-  int lsp_call_id = 0;
-  while (keyword_pos != std::string::npos) {
-    CARBON_ASSIGN_OR_RETURN(
-        auto keyword_end,
-        ReplaceContentKeywordAt(content, keyword_pos, test_name, lsp_call_id));
-    keyword_pos = content->find(Prefix, keyword_end);
-  }
-  return Success();
-}
-
-// Adds a file. Used for both split and unsplit test files.
-static auto AddTestFile(llvm::StringRef filename, std::string* content,
-                        llvm::SmallVector<FileTestBase::TestFile>* test_files)
-    -> ErrorOr<Success> {
-  CARBON_RETURN_IF_ERROR(ReplaceContentKeywords(filename, content));
-  test_files->push_back(
-      {.filename = filename.str(), .content = std::move(*content)});
-  content->clear();
-  return Success();
-}
-
-// Process file split ("---") lines when found. Returns true if the line is
-// consumed.
-static auto TryConsumeSplit(
-    llvm::StringRef line, llvm::StringRef line_trimmed, bool found_autoupdate,
-    int* line_index, SplitState* split,
-    llvm::SmallVector<FileTestBase::TestFile>* test_files,
-    llvm::SmallVector<FileTestLine>* non_check_lines) -> ErrorOr<bool> {
-  if (!line_trimmed.consume_front("// ---")) {
-    if (!split->has_splits() && !line_trimmed.starts_with("//") &&
-        !line_trimmed.empty()) {
-      split->found_code_pre_split = true;
-    }
-
-    // Add the line to the current file's content (which may not be a split
-    // file).
-    split->add_content(line);
-    return false;
-  }
-
-  if (!found_autoupdate) {
-    // If there's a split, all output is appended at the end of each file
-    // before AUTOUPDATE. We may want to change that, but it's not
-    // necessary to handle right now.
-    return ErrorBuilder() << "AUTOUPDATE/NOAUTOUPDATE setting must be in "
-                             "the first file.";
-  }
-
-  // On a file split, add the previous file, then start a new one.
-  if (split->has_splits()) {
-    CARBON_RETURN_IF_ERROR(
-        AddTestFile(split->filename, &split->content, test_files));
-  } else {
-    split->content.clear();
-    if (split->found_code_pre_split) {
-      // For the first split, we make sure there was no content prior.
-      return ErrorBuilder() << "When using split files, there must be no "
-                               "content before the first split file.";
-    }
-  }
-
-  ++split->file_index;
-  split->filename = line_trimmed.trim();
-  if (split->filename.empty()) {
-    return ErrorBuilder() << "Missing filename for split.";
-  }
-  // The split line is added to non_check_lines for retention in autoupdate, but
-  // is not added to the test file content.
-  *line_index = 0;
-  non_check_lines->push_back(
-      FileTestLine(split->file_index, *line_index, line));
-  return true;
-}
-
-// Converts a `FileCheck`-style expectation string into a single complete regex
-// string by escaping all regex characters outside of the designated `{{...}}`
-// regex sequences, and switching those to a normal regex sub-pattern syntax.
-static void ConvertExpectationStringToRegex(std::string& str) {
-  for (int pos = 0; pos < static_cast<int>(str.size());) {
-    switch (str[pos]) {
-      case '(':
-      case ')':
-      case '[':
-      case ']':
-      case '}':
-      case '.':
-      case '^':
-      case '$':
-      case '*':
-      case '+':
-      case '?':
-      case '|':
-      case '\\': {
-        // Escape regex characters.
-        str.insert(pos, "\\");
-        pos += 2;
-        break;
-      }
-      case '{': {
-        if (pos + 1 == static_cast<int>(str.size()) || str[pos + 1] != '{') {
-          // Single `{`, escape it.
-          str.insert(pos, "\\");
-          pos += 2;
-          break;
-        }
-
-        // Replace the `{{...}}` regex syntax with standard `(...)` syntax.
-        str.replace(pos, 2, "(");
-        for (++pos; pos < static_cast<int>(str.size() - 1); ++pos) {
-          if (str[pos] == '}' && str[pos + 1] == '}') {
-            str.replace(pos, 2, ")");
-            ++pos;
-            break;
-          }
-        }
-        break;
-      }
-      default: {
-        ++pos;
-      }
-    }
-  }
-}
-
-// Transforms an expectation on a given line from `FileCheck` syntax into a
-// standard regex matcher.
-static auto TransformExpectation(int line_index, llvm::StringRef in)
-    -> ErrorOr<Matcher<std::string>> {
-  if (in.empty()) {
-    return Matcher<std::string>{StrEq("")};
-  }
-  if (!in.consume_front(" ")) {
-    return ErrorBuilder() << "Malformated CHECK line: " << in;
-  }
-
-  // Check early if we have a regex component as we can avoid building an
-  // expensive matcher when not using those.
-  bool has_regex = in.find("{{") != llvm::StringRef::npos;
-
-  // Now scan the string and expand any keywords. Note that this needs to be
-  // `size_t` to correctly store `npos`.
-  size_t keyword_pos = in.find("[[");
-
-  // If there are neither keywords nor regex sequences, we can match the
-  // incoming string directly.
-  if (!has_regex && keyword_pos == llvm::StringRef::npos) {
-    return Matcher<std::string>{StrEq(in)};
-  }
-
-  std::string str = in.str();
-
-  // First expand the keywords.
-  while (keyword_pos != std::string::npos) {
-    llvm::StringRef line_keyword_cursor =
-        llvm::StringRef(str).substr(keyword_pos);
-    CARBON_CHECK(line_keyword_cursor.consume_front("[["));
-
-    static constexpr llvm::StringLiteral LineKeyword = "@LINE";
-    if (!line_keyword_cursor.consume_front(LineKeyword)) {
-      return ErrorBuilder()
-             << "Unexpected [[, should be {{\\[\\[}} at `"
-             << line_keyword_cursor.substr(0, 5) << "` in: " << in;
-    }
-
-    // Allow + or - here; consumeInteger handles -.
-    line_keyword_cursor.consume_front("+");
-    int offset;
-    // consumeInteger returns true for errors, not false.
-    if (line_keyword_cursor.consumeInteger(10, offset) ||
-        !line_keyword_cursor.consume_front("]]")) {
-      return ErrorBuilder()
-             << "Unexpected @LINE offset at `"
-             << line_keyword_cursor.substr(0, 5) << "` in: " << in;
-    }
-    std::string int_str = llvm::Twine(line_index + offset).str();
-    int remove_len = (line_keyword_cursor.data() - str.data()) - keyword_pos;
-    str.replace(keyword_pos, remove_len, int_str);
-    keyword_pos += int_str.size();
-    // Find the next keyword start or the end of the string.
-    keyword_pos = str.find("[[", keyword_pos);
-  }
-
-  // If there was no regex, we can directly match the adjusted string.
-  if (!has_regex) {
-    return Matcher<std::string>{StrEq(str)};
-  }
-
-  // Otherwise, we need to turn the entire string into a regex by escaping
-  // things outside the regex region and transforming the regex region into a
-  // normal syntax.
-  ConvertExpectationStringToRegex(str);
-  return Matcher<std::string>{MatchesRegex(str)};
-}
-
-// Once all content is processed, do any remaining split processing.
-static auto FinishSplit(llvm::StringRef test_name, SplitState* split,
-                        llvm::SmallVector<FileTestBase::TestFile>* test_files)
-    -> ErrorOr<Success> {
-  if (split->has_splits()) {
-    return AddTestFile(split->filename, &split->content, test_files);
-  } else {
-    // If no file splitting happened, use the main file as the test file.
-    // There will always be a `/` unless tests are in the repo root.
-    return AddTestFile(test_name.drop_front(test_name.rfind("/") + 1),
-                       &split->content, test_files);
-  }
-}
-
-// Process CHECK lines when found. Returns true if the line is consumed.
-static auto TryConsumeCheck(
-    int line_index, llvm::StringRef line, llvm::StringRef line_trimmed,
-    llvm::SmallVector<testing::Matcher<std::string>>* expected_stdout,
-    llvm::SmallVector<testing::Matcher<std::string>>* expected_stderr)
-    -> ErrorOr<bool> {
-  if (!line_trimmed.consume_front("// CHECK")) {
-    return false;
-  }
-
-  // Don't build expectations when doing an autoupdate. We don't want to
-  // break the autoupdate on an invalid CHECK line.
-  if (!absl::GetFlag(FLAGS_autoupdate)) {
-    llvm::SmallVector<Matcher<std::string>>* expected;
-    if (line_trimmed.consume_front(":STDOUT:")) {
-      expected = expected_stdout;
-    } else if (line_trimmed.consume_front(":STDERR:")) {
-      expected = expected_stderr;
-    } else {
-      return ErrorBuilder() << "Unexpected CHECK in input: " << line.str();
-    }
-    CARBON_ASSIGN_OR_RETURN(Matcher<std::string> check_matcher,
-                            TransformExpectation(line_index, line_trimmed));
-    expected->push_back(check_matcher);
-  }
-  return true;
-}
-
-// Processes ARGS and EXTRA-ARGS lines when found. Returns true if the line is
-// consumed.
-static auto TryConsumeArgs(llvm::StringRef line, llvm::StringRef line_trimmed,
-                           llvm::SmallVector<std::string>* args,
-                           llvm::SmallVector<std::string>* extra_args)
-    -> ErrorOr<bool> {
-  llvm::SmallVector<std::string>* arg_list = nullptr;
-  if (line_trimmed.consume_front("// ARGS: ")) {
-    arg_list = args;
-  } else if (line_trimmed.consume_front("// EXTRA-ARGS: ")) {
-    arg_list = extra_args;
-  } else {
-    return false;
-  }
-
-  if (!args->empty() || !extra_args->empty()) {
-    return ErrorBuilder() << "ARGS / EXTRA-ARGS specified multiple times: "
-                          << line.str();
-  }
-
-  // Split the line into arguments.
-  std::pair<llvm::StringRef, llvm::StringRef> cursor =
-      llvm::getToken(line_trimmed);
-  while (!cursor.first.empty()) {
-    arg_list->push_back(std::string(cursor.first));
-    cursor = llvm::getToken(cursor.second);
-  }
-
-  return true;
-}
-
-// Processes AUTOUPDATE lines when found. Returns true if the line is consumed.
-static auto TryConsumeAutoupdate(int line_index, llvm::StringRef line_trimmed,
-                                 bool* found_autoupdate,
-                                 std::optional<int>* autoupdate_line_number)
-    -> ErrorOr<bool> {
-  static constexpr llvm::StringLiteral Autoupdate = "// AUTOUPDATE";
-  static constexpr llvm::StringLiteral NoAutoupdate = "// NOAUTOUPDATE";
-  if (line_trimmed != Autoupdate && line_trimmed != NoAutoupdate) {
-    return false;
-  }
-  if (*found_autoupdate) {
-    return ErrorBuilder() << "Multiple AUTOUPDATE/NOAUTOUPDATE settings found";
-  }
-  *found_autoupdate = true;
-  if (line_trimmed == Autoupdate) {
-    *autoupdate_line_number = line_index;
-  }
-  return true;
-}
-
-// Processes SET-* lines when found. Returns true if the line is consumed.
-static auto TryConsumeSetFlag(llvm::StringRef line_trimmed,
-                              llvm::StringLiteral flag_name, bool* flag)
-    -> ErrorOr<bool> {
-  if (!line_trimmed.consume_front("// ") || line_trimmed != flag_name) {
-    return false;
-  }
-  if (*flag) {
-    return ErrorBuilder() << flag_name << " was specified multiple times";
-  }
-  *flag = true;
-  return true;
-}
-
-auto FileTestBase::ProcessTestFile(TestContext& context) -> ErrorOr<Success> {
-  // Original file content, and a cursor for walking through it.
-  llvm::StringRef file_content = context.input_content;
-  llvm::StringRef cursor = file_content;
-
-  // Whether either AUTOUDPATE or NOAUTOUPDATE was found.
-  bool found_autoupdate = false;
-
-  // The index in the current test file. Will be reset on splits.
-  int line_index = 0;
-
-  SplitState split;
-
-  // When autoupdating, we track whether we're inside conflict markers.
-  // Otherwise conflict markers are errors.
-  bool inside_conflict_marker = false;
-
-  while (!cursor.empty()) {
-    auto [line, next_cursor] = cursor.split("\n");
-    cursor = next_cursor;
-    auto line_trimmed = line.ltrim();
-
-    bool is_consumed = false;
-    CARBON_ASSIGN_OR_RETURN(
-        is_consumed,
-        TryConsumeConflictMarker(line, line_trimmed, &inside_conflict_marker));
-    if (is_consumed) {
-      continue;
-    }
-
-    // At this point, remaining lines are part of the test input.
-    CARBON_ASSIGN_OR_RETURN(
-        is_consumed,
-        TryConsumeSplit(line, line_trimmed, found_autoupdate, &line_index,
-                        &split, &context.test_files, &context.non_check_lines));
-    if (is_consumed) {
-      continue;
-    }
-
-    ++line_index;
-
-    // TIP lines have no impact on validation.
-    if (line_trimmed.starts_with("// TIP:")) {
-      continue;
-    }
-
-    CARBON_ASSIGN_OR_RETURN(
-        is_consumed,
-        TryConsumeCheck(line_index, line, line_trimmed,
-                        &context.expected_stdout, &context.expected_stderr));
-    if (is_consumed) {
-      continue;
-    }
-
-    // At this point, lines are retained as non-CHECK lines.
-    context.non_check_lines.push_back(
-        FileTestLine(split.file_index, line_index, line));
-
-    CARBON_ASSIGN_OR_RETURN(
-        is_consumed, TryConsumeArgs(line, line_trimmed, &context.test_args,
-                                    &context.extra_args));
-    if (is_consumed) {
-      continue;
-    }
-    CARBON_ASSIGN_OR_RETURN(
-        is_consumed,
-        TryConsumeAutoupdate(line_index, line_trimmed, &found_autoupdate,
-                             &context.autoupdate_line_number));
-    if (is_consumed) {
-      continue;
-    }
-    CARBON_ASSIGN_OR_RETURN(
-        is_consumed,
-        TryConsumeSetFlag(line_trimmed, "SET-CAPTURE-CONSOLE-OUTPUT",
-                          &context.capture_console_output));
-    if (is_consumed) {
-      continue;
-    }
-    CARBON_ASSIGN_OR_RETURN(is_consumed,
-                            TryConsumeSetFlag(line_trimmed, "SET-CHECK-SUBSET",
-                                              &context.check_subset));
-    if (is_consumed) {
-      continue;
-    }
-  }
-
-  if (!found_autoupdate) {
-    return Error("Missing AUTOUPDATE/NOAUTOUPDATE setting");
-  }
-
-  context.has_splits = split.has_splits();
-  CARBON_RETURN_IF_ERROR(FinishSplit(test_name_, &split, &context.test_files));
-
-  // Validate AUTOUPDATE-SPLIT use, and remove it from test files if present.
-  if (context.has_splits) {
-    constexpr llvm::StringLiteral AutoupdateSplit = "AUTOUPDATE-SPLIT";
-    for (const auto& test_file :
-         llvm::ArrayRef(context.test_files).drop_back()) {
-      if (test_file.filename == AutoupdateSplit) {
-        return Error("AUTOUPDATE-SPLIT must be the last split");
-      }
-    }
-    if (context.test_files.back().filename == AutoupdateSplit) {
-      if (!context.autoupdate_line_number) {
-        return Error("AUTOUPDATE-SPLIT requires AUTOUPDATE");
-      }
-      context.autoupdate_split = true;
-      context.test_files.pop_back();
-    }
-  }
-
-  // Assume there is always a suffix `\n` in output.
-  if (!context.expected_stdout.empty()) {
-    context.expected_stdout.push_back(StrEq(""));
-  }
-  if (!context.expected_stderr.empty()) {
-    context.expected_stderr.push_back(StrEq(""));
-  }
-
-  return Success();
-}
-
 // Returns the tests to run.
 // Returns the tests to run.
 static auto GetTests() -> llvm::SmallVector<std::string> {
 static auto GetTests() -> llvm::SmallVector<std::string> {
   // Prefer a user-specified list if present.
   // Prefer a user-specified list if present.

+ 0 - 81
testing/file_test/file_test_base.h

@@ -25,18 +25,6 @@ namespace Carbon::Testing {
 // A framework for testing files. See README.md for documentation.
 // A framework for testing files. See README.md for documentation.
 class FileTestBase : public testing::Test {
 class FileTestBase : public testing::Test {
  public:
  public:
-  struct TestFile {
-    friend void PrintTo(const TestFile& f, std::ostream* os) {
-      // Print content escaped.
-      llvm::raw_os_ostream os_wrap(*os);
-      os_wrap << "TestFile(" << f.filename << ", \"" << FormatEscaped(f.content)
-              << "\")";
-    }
-
-    std::string filename;
-    std::string content;
-  };
-
   // Provided for child class convenience.
   // Provided for child class convenience.
   using LineNumberReplacement = FileTestAutoupdater::LineNumberReplacement;
   using LineNumberReplacement = FileTestAutoupdater::LineNumberReplacement;
 
 
@@ -136,75 +124,6 @@ class FileTestBase : public testing::Test {
   auto test_name() const -> llvm::StringRef { return test_name_; }
   auto test_name() const -> llvm::StringRef { return test_name_; }
 
 
  private:
  private:
-  // Encapsulates test context generated by processing and running.
-  struct TestContext {
-    // The input test file content. Other parts may reference this.
-    std::string input_content;
-
-    // Lines which don't contain CHECKs, and thus need to be retained by
-    // autoupdate. Their file and line numbers are attached.
-    //
-    // If there are splits, then the splitting line is in the respective file.
-    // For N splits, the 0th file is the parts of the input file which are not
-    // in any split, plus one file per split file.
-    llvm::SmallVector<FileTestLine> non_check_lines;
-
-    // Whether there are splits.
-    bool has_splits = false;
-
-    // Arguments for the test, generated from ARGS.
-    llvm::SmallVector<std::string> test_args;
-
-    // Extra arguments for the test, generated from EXTRA-ARGS. Unlike ARGS,
-    // setting EXTRA-ARGS does not suppress the default arguments.
-    llvm::SmallVector<std::string> extra_args;
-
-    // Files in the test, generated by content and splits.
-    llvm::SmallVector<TestFile> test_files;
-
-    // The location of the autoupdate marker, for autoupdated files.
-    std::optional<int> autoupdate_line_number;
-
-    // Whether there should be an AUTOUPDATE-SPLIT.
-    bool autoupdate_split = false;
-
-    // Whether to capture stderr and stdout that would head to console,
-    // generated from SET-CAPTURE-CONSOLE-OUTPUT.
-    bool capture_console_output = false;
-
-    // Whether checks are a subset, generated from SET-CHECK-SUBSET.
-    bool check_subset = false;
-
-    // Whether `--dump_output` is specified, causing `Run` output to go to the
-    // console. Output is typically captured for tests and autoupdate.
-    bool dump_output = false;
-
-    // stdout and stderr based on CHECK lines in the file.
-    llvm::SmallVector<testing::Matcher<std::string>> expected_stdout;
-    llvm::SmallVector<testing::Matcher<std::string>> expected_stderr;
-
-    // stdout and stderr from Run. 16 is arbitrary but a required value.
-    llvm::SmallString<16> actual_stdout;
-    llvm::SmallString<16> actual_stderr;
-
-    RunResult run_result = {.success = false};
-  };
-
-  // Processes the test file and runs the test. Returns an error if something
-  // went wrong.
-  auto ProcessTestFileAndRun(TestContext& context) -> ErrorOr<Success>;
-
-  // Does replacements in ARGS for %s and %t.
-  auto DoArgReplacements(llvm::SmallVector<std::string>& test_args,
-                         const llvm::SmallVector<TestFile>& test_files)
-      -> ErrorOr<Success>;
-
-  // Processes the test input, producing test files and expected output.
-  auto ProcessTestFile(TestContext& context) -> ErrorOr<Success>;
-
-  // Runs the FileTestAutoupdater, returning the result.
-  auto RunAutoupdater(const TestContext& context, bool dry_run) -> bool;
-
   // An optional mutex for output. If provided, it will be locked during `Run`
   // An optional mutex for output. If provided, it will be locked during `Run`
   // when stderr/stdout are being captured (SET-CAPTURE-CONSOLE-OUTPUT), in
   // when stderr/stdout are being captured (SET-CAPTURE-CONSOLE-OUTPUT), in
   // order to avoid output conflicts.
   // order to avoid output conflicts.

+ 191 - 0
testing/file_test/run_test.cpp

@@ -0,0 +1,191 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "testing/file_test/run_test.h"
+
+#include <gtest/gtest.h>
+
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/PrettyStackTrace.h"
+#include "testing/file_test/file_test_base.h"
+#include "testing/file_test/test_file.h"
+
+namespace Carbon::Testing {
+
+// While these are marked as "internal" APIs, they seem to work and be pretty
+// widely used for their exact documented behavior.
+using ::testing::internal::CaptureStderr;
+using ::testing::internal::CaptureStdout;
+using ::testing::internal::GetCapturedStderr;
+using ::testing::internal::GetCapturedStdout;
+
+static constexpr llvm::StringLiteral StdinFilename = "STDIN";
+
+// Does replacements in ARGS for %s and %t.
+static auto DoArgReplacements(
+
+    llvm::SmallVector<std::string>& test_args,
+    const llvm::StringMap<std::string>& replacements,
+    const llvm::SmallVector<TestFile::Split>& split_files) -> ErrorOr<Success> {
+  for (auto* it = test_args.begin(); it != test_args.end(); ++it) {
+    auto percent = it->find("%");
+    if (percent == std::string::npos) {
+      continue;
+    }
+
+    if (percent + 1 >= it->size()) {
+      return ErrorBuilder() << "% is not allowed on its own: " << *it;
+    }
+    char c = (*it)[percent + 1];
+    switch (c) {
+      case 's': {
+        if (*it != "%s") {
+          return ErrorBuilder() << "%s must be the full argument: " << *it;
+        }
+        it = test_args.erase(it);
+        for (const auto& split : split_files) {
+          const std::string& filename = split.filename;
+          if (filename == StdinFilename || filename.ends_with(".h")) {
+            continue;
+          }
+          it = test_args.insert(it, filename);
+          ++it;
+        }
+        // Back up once because the for loop will advance.
+        --it;
+        break;
+      }
+      case 't': {
+        char* tmpdir = getenv("TEST_TMPDIR");
+        CARBON_CHECK(tmpdir != nullptr);
+        it->replace(percent, 2, llvm::formatv("{0}/temp_file", tmpdir));
+        break;
+      }
+      case '{': {
+        auto end_brace = it->find('}', percent);
+        if (end_brace == std::string::npos) {
+          return ErrorBuilder() << "%{ without closing }: " << *it;
+        }
+        llvm::StringRef substr(&*(it->begin() + percent + 2),
+                               end_brace - percent - 2);
+        auto replacement = replacements.find(substr);
+        if (replacement == replacements.end()) {
+          return ErrorBuilder()
+                 << "unknown substitution: %{" << substr << "}: " << *it;
+        }
+        it->replace(percent, end_brace - percent + 1, replacement->second);
+        break;
+      }
+      default:
+        return ErrorBuilder() << "%" << c << " is not supported: " << *it;
+    }
+  }
+  return Success();
+}
+
+auto ProcessTestFileAndRun(FileTestBase* test_base, std::mutex* output_mutex,
+                           bool dump_output, bool running_autoupdate)
+    -> ErrorOr<TestFile> {
+  // Load expected output.
+  CARBON_ASSIGN_OR_RETURN(
+      TestFile test_file,
+      ProcessTestFile(test_base->test_name(), running_autoupdate));
+
+  // Process arguments.
+  if (test_file.test_args.empty()) {
+    test_file.test_args = test_base->GetDefaultArgs();
+    test_file.test_args.append(test_file.extra_args);
+  }
+  CARBON_RETURN_IF_ERROR(DoArgReplacements(test_file.test_args,
+                                           test_base->GetArgReplacements(),
+                                           test_file.file_splits));
+
+  // stdin needs to exist on-disk for compatibility. We'll use a pointer for it.
+  FILE* input_stream = nullptr;
+  auto erase_input_on_exit = llvm::make_scope_exit([&input_stream]() {
+    if (input_stream) {
+      // fclose should delete the tmpfile.
+      fclose(input_stream);
+      input_stream = nullptr;
+    }
+  });
+
+  // Create the files in-memory.
+  llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> fs =
+      new llvm::vfs::InMemoryFileSystem;
+  for (const auto& split : test_file.file_splits) {
+    if (split.filename == StdinFilename) {
+      input_stream = tmpfile();
+      fwrite(split.content.c_str(), sizeof(char), split.content.size(),
+             input_stream);
+      rewind(input_stream);
+    } else if (!fs->addFile(split.filename, /*ModificationTime=*/0,
+                            llvm::MemoryBuffer::getMemBuffer(
+                                split.content, split.filename,
+                                /*RequiresNullTerminator=*/false))) {
+      return ErrorBuilder() << "File is repeated: " << split.filename;
+    }
+  }
+  // Convert the arguments to StringRef and const char* to match the
+  // expectations of PrettyStackTraceProgram and Run.
+  llvm::SmallVector<llvm::StringRef> test_args_ref;
+  llvm::SmallVector<const char*> test_argv_for_stack_trace;
+  test_args_ref.reserve(test_file.test_args.size());
+  test_argv_for_stack_trace.reserve(test_file.test_args.size() + 1);
+  for (const auto& arg : test_file.test_args) {
+    test_args_ref.push_back(arg);
+    test_argv_for_stack_trace.push_back(arg.c_str());
+  }
+  // Add a trailing null so that this is a proper argv.
+  test_argv_for_stack_trace.push_back(nullptr);
+
+  // Add a stack trace entry for the test invocation.
+  llvm::PrettyStackTraceProgram stack_trace_entry(
+      test_argv_for_stack_trace.size() - 1, test_argv_for_stack_trace.data());
+
+  // Execution must be serialized for either serial tests or console output.
+  std::unique_lock<std::mutex> output_lock;
+  if (output_mutex &&
+      (test_file.capture_console_output || !test_base->AllowParallelRun())) {
+    output_lock = std::unique_lock<std::mutex>(*output_mutex);
+  }
+
+  // Conditionally capture console output. We use a scope exit to ensure the
+  // captures terminate even on run failures.
+  if (test_file.capture_console_output) {
+    CaptureStderr();
+    CaptureStdout();
+  }
+
+  // Prepare string streams to capture output. In order to address casting
+  // constraints, we split calls to Run as a ternary based on whether we want to
+  // capture output.
+  llvm::raw_svector_ostream output_stream(test_file.actual_stdout);
+  llvm::raw_svector_ostream error_stream(test_file.actual_stderr);
+
+  ErrorOr<FileTestBase::RunResult> run_result =
+      dump_output ? test_base->Run(test_args_ref, fs, input_stream,
+                                   llvm::outs(), llvm::errs())
+                  : test_base->Run(test_args_ref, fs, input_stream,
+                                   output_stream, error_stream);
+
+  // Ensure stdout/stderr are always fetched, even when discarded on error.
+  if (test_file.capture_console_output) {
+    // No need to flush stderr.
+    llvm::outs().flush();
+    test_file.actual_stdout += GetCapturedStdout();
+    test_file.actual_stderr += GetCapturedStderr();
+  }
+
+  if (!run_result.ok()) {
+    return std::move(run_result).error();
+  }
+  test_file.run_result = std::move(*run_result);
+  return test_file;
+}
+
+}  // namespace Carbon::Testing

+ 22 - 0
testing/file_test/run_test.h

@@ -0,0 +1,22 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef CARBON_TESTING_FILE_TEST_RUN_TEST_H_
+#define CARBON_TESTING_FILE_TEST_RUN_TEST_H_
+
+#include "common/error.h"
+#include "testing/file_test/file_test_base.h"
+#include "testing/file_test/test_file.h"
+
+namespace Carbon::Testing {
+
+// Processes the test file and runs the test. Returns an error if something
+// went wrong.
+auto ProcessTestFileAndRun(FileTestBase* test_base, std::mutex* output_mutex,
+                           bool dump_output, bool running_autoupdate)
+    -> ErrorOr<TestFile>;
+
+}  // namespace Carbon::Testing
+
+#endif  // CARBON_TESTING_FILE_TEST_RUN_TEST_H_

+ 660 - 0
testing/file_test/test_file.cpp

@@ -0,0 +1,660 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "testing/file_test/test_file.h"
+
+#include <fstream>
+
+#include "llvm/ADT/StringExtras.h"
+#include "testing/base/file_helpers.h"
+
+namespace Carbon::Testing {
+
+using ::testing::Matcher;
+using ::testing::MatchesRegex;
+using ::testing::StrEq;
+
+// Processes conflict markers, including tracking of whether code is within a
+// conflict marker. Returns true if the line is consumed.
+static auto TryConsumeConflictMarker(bool running_autoupdate,
+                                     llvm::StringRef line,
+                                     llvm::StringRef line_trimmed,
+                                     bool* inside_conflict_marker)
+    -> ErrorOr<bool> {
+  bool is_start = line.starts_with("<<<<<<<");
+  bool is_middle = line.starts_with("=======") || line.starts_with("|||||||");
+  bool is_end = line.starts_with(">>>>>>>");
+
+  // When running the test, any conflict marker is an error.
+  if (!running_autoupdate && (is_start || is_middle || is_end)) {
+    return ErrorBuilder() << "Conflict marker found:\n" << line;
+  }
+
+  // Autoupdate tracks conflict markers for context, and will discard
+  // conflicting lines when it can autoupdate them.
+  if (*inside_conflict_marker) {
+    if (is_start) {
+      return ErrorBuilder() << "Unexpected conflict marker inside conflict:\n"
+                            << line;
+    }
+    if (is_middle) {
+      return true;
+    }
+    if (is_end) {
+      *inside_conflict_marker = false;
+      return true;
+    }
+
+    // Look for CHECK and TIP lines, which can be discarded.
+    if (line_trimmed.starts_with("// CHECK:STDOUT:") ||
+        line_trimmed.starts_with("// CHECK:STDERR:") ||
+        line_trimmed.starts_with("// TIP:")) {
+      return true;
+    }
+
+    return ErrorBuilder()
+           << "Autoupdate can't discard non-CHECK lines inside conflicts:\n"
+           << line;
+  } else {
+    if (is_start) {
+      *inside_conflict_marker = true;
+      return true;
+    }
+    if (is_middle || is_end) {
+      return ErrorBuilder() << "Unexpected conflict marker outside conflict:\n"
+                            << line;
+    }
+    return false;
+  }
+}
+
+// State for file splitting logic: TryConsumeSplit and FinishSplit.
+struct SplitState {
+  auto has_splits() const -> bool { return file_index > 0; }
+
+  auto add_content(llvm::StringRef line) -> void {
+    content.append(line.str());
+    content.append("\n");
+  }
+
+  // Whether content has been found. Only updated before a file split is found
+  // (which may be never).
+  bool found_code_pre_split = false;
+
+  // The current file name, considering splits. Empty for the default file.
+  llvm::StringRef filename = "";
+
+  // The accumulated content for the file being built. This may elide some of
+  // the original content, such as conflict markers.
+  std::string content;
+
+  // The current file index.
+  int file_index = 0;
+};
+
+// Reformats `[[@LSP:` and similar keyword as an LSP call with headers.
+static auto ReplaceLspKeywordAt(std::string* content, size_t keyword_pos,
+                                int& lsp_call_id) -> ErrorOr<size_t> {
+  llvm::StringRef content_at_keyword =
+      llvm::StringRef(*content).substr(keyword_pos);
+
+  auto [keyword, body_start] = content_at_keyword.split(":");
+  if (body_start.empty()) {
+    return ErrorBuilder() << "Missing `:` for `"
+                          << content_at_keyword.take_front(10) << "`";
+  }
+
+  // Whether the first param is a method or id.
+  llvm::StringRef method_or_id_label = "method";
+  // Whether to attach the `lsp_call_id`.
+  bool use_call_id = false;
+  // The JSON label for extra content.
+  llvm::StringRef extra_content_label;
+  if (keyword == "[[@LSP-CALL") {
+    use_call_id = true;
+    extra_content_label = "params";
+  } else if (keyword == "[[@LSP-NOTIFY") {
+    extra_content_label = "params";
+  } else if (keyword == "[[@LSP-REPLY") {
+    method_or_id_label = "id";
+    extra_content_label = "result";
+  } else if (keyword != "[[@LSP") {
+    return ErrorBuilder() << "Unrecognized @LSP keyword at `"
+                          << keyword.take_front(10) << "`";
+  }
+
+  static constexpr llvm::StringLiteral LspEnd = "]]";
+  auto body_end = body_start.find(LspEnd);
+  if (body_end == std::string::npos) {
+    return ErrorBuilder() << "Missing `" << LspEnd << "` after `" << keyword
+                          << "`";
+  }
+  llvm::StringRef body = body_start.take_front(body_end);
+  auto [method_or_id, extra_content] = body.split(":");
+
+  // Form the JSON.
+  std::string json = llvm::formatv(R"({{"jsonrpc": "2.0", "{0}": "{1}")",
+                                   method_or_id_label, method_or_id);
+  if (use_call_id) {
+    // Omit quotes on the ID because we know it's an integer.
+    json += llvm::formatv(R"(, "id": {0})", ++lsp_call_id);
+  }
+  if (!extra_content.empty()) {
+    json += ",";
+    if (extra_content_label.empty()) {
+      if (!extra_content.starts_with("\n")) {
+        json += " ";
+      }
+      json += extra_content;
+    } else {
+      json += llvm::formatv(R"( "{0}": {{{1}})", extra_content_label,
+                            extra_content);
+    }
+  }
+  json += "}";
+
+  // Add the Content-Length header. The `2` accounts for extra newlines.
+  auto json_with_header =
+      llvm::formatv("Content-Length: {0}\n\n{1}\n", json.size() + 2, json)
+          .str();
+  int keyword_len =
+      (body_start.data() + body_end + LspEnd.size()) - keyword.data();
+  content->replace(keyword_pos, keyword_len, json_with_header);
+  return keyword_pos + json_with_header.size();
+}
+
+// Replaces the keyword at the given position. Returns the position to start a
+// find for the next keyword.
+static auto ReplaceContentKeywordAt(std::string* content, size_t keyword_pos,
+                                    llvm::StringRef test_name, int& lsp_call_id)
+    -> ErrorOr<size_t> {
+  auto keyword = llvm::StringRef(*content).substr(keyword_pos);
+
+  // Line replacements aren't handled here.
+  static constexpr llvm::StringLiteral Line = "[[@LINE";
+  if (keyword.starts_with(Line)) {
+    // Just move past the prefix to find the next one.
+    return keyword_pos + Line.size();
+  }
+
+  // Replaced with the actual test name.
+  static constexpr llvm::StringLiteral TestName = "[[@TEST_NAME]]";
+  if (keyword.starts_with(TestName)) {
+    content->replace(keyword_pos, TestName.size(), test_name);
+    return keyword_pos + test_name.size();
+  }
+
+  if (keyword.starts_with("[[@LSP")) {
+    return ReplaceLspKeywordAt(content, keyword_pos, lsp_call_id);
+  }
+
+  return ErrorBuilder() << "Unexpected use of `[[@` at `"
+                        << keyword.substr(0, 5) << "`";
+}
+
+// Replaces the content keywords.
+//
+// TEST_NAME is the only content keyword at present, but we do validate that
+// other names are reserved.
+static auto ReplaceContentKeywords(llvm::StringRef filename,
+                                   std::string* content) -> ErrorOr<Success> {
+  static constexpr llvm::StringLiteral Prefix = "[[@";
+
+  auto keyword_pos = content->find(Prefix);
+  // Return early if not finding anything.
+  if (keyword_pos == std::string::npos) {
+    return Success();
+  }
+
+  // Construct the test name by getting the base name without the extension,
+  // then removing any "fail_" or "todo_" prefixes.
+  llvm::StringRef test_name = filename;
+  if (auto last_slash = test_name.rfind("/");
+      last_slash != llvm::StringRef::npos) {
+    test_name = test_name.substr(last_slash + 1);
+  }
+  if (auto ext_dot = test_name.find("."); ext_dot != llvm::StringRef::npos) {
+    test_name = test_name.substr(0, ext_dot);
+  }
+  // Note this also handles `fail_todo_` and `todo_fail_`.
+  test_name.consume_front("todo_");
+  test_name.consume_front("fail_");
+  test_name.consume_front("todo_");
+
+  // A counter for LSP calls.
+  int lsp_call_id = 0;
+  while (keyword_pos != std::string::npos) {
+    CARBON_ASSIGN_OR_RETURN(
+        auto keyword_end,
+        ReplaceContentKeywordAt(content, keyword_pos, test_name, lsp_call_id));
+    keyword_pos = content->find(Prefix, keyword_end);
+  }
+  return Success();
+}
+
+// Adds a file. Used for both split and unsplit test files.
+static auto AddSplit(llvm::StringRef filename, std::string* content,
+                     llvm::SmallVector<TestFile::Split>* file_splits)
+    -> ErrorOr<Success> {
+  CARBON_RETURN_IF_ERROR(ReplaceContentKeywords(filename, content));
+  file_splits->push_back(
+      {.filename = filename.str(), .content = std::move(*content)});
+  content->clear();
+  return Success();
+}
+
+// Process file split ("---") lines when found. Returns true if the line is
+// consumed.
+static auto TryConsumeSplit(llvm::StringRef line, llvm::StringRef line_trimmed,
+                            bool found_autoupdate, int* line_index,
+                            SplitState* split,
+                            llvm::SmallVector<TestFile::Split>* file_splits,
+                            llvm::SmallVector<FileTestLine>* non_check_lines)
+    -> ErrorOr<bool> {
+  if (!line_trimmed.consume_front("// ---")) {
+    if (!split->has_splits() && !line_trimmed.starts_with("//") &&
+        !line_trimmed.empty()) {
+      split->found_code_pre_split = true;
+    }
+
+    // Add the line to the current file's content (which may not be a split
+    // file).
+    split->add_content(line);
+    return false;
+  }
+
+  if (!found_autoupdate) {
+    // If there's a split, all output is appended at the end of each file
+    // before AUTOUPDATE. We may want to change that, but it's not
+    // necessary to handle right now.
+    return ErrorBuilder() << "AUTOUPDATE/NOAUTOUPDATE setting must be in "
+                             "the first file.";
+  }
+
+  // On a file split, add the previous file, then start a new one.
+  if (split->has_splits()) {
+    CARBON_RETURN_IF_ERROR(
+        AddSplit(split->filename, &split->content, file_splits));
+  } else {
+    split->content.clear();
+    if (split->found_code_pre_split) {
+      // For the first split, we make sure there was no content prior.
+      return ErrorBuilder() << "When using split files, there must be no "
+                               "content before the first split file.";
+    }
+  }
+
+  ++split->file_index;
+  split->filename = line_trimmed.trim();
+  if (split->filename.empty()) {
+    return ErrorBuilder() << "Missing filename for split.";
+  }
+  // The split line is added to non_check_lines for retention in autoupdate, but
+  // is not added to the test file content.
+  *line_index = 0;
+  non_check_lines->push_back(
+      FileTestLine(split->file_index, *line_index, line));
+  return true;
+}
+
+// Converts a `FileCheck`-style expectation string into a single complete regex
+// string by escaping all regex characters outside of the designated `{{...}}`
+// regex sequences, and switching those to a normal regex sub-pattern syntax.
+static void ConvertExpectationStringToRegex(std::string& str) {
+  for (int pos = 0; pos < static_cast<int>(str.size());) {
+    switch (str[pos]) {
+      case '(':
+      case ')':
+      case '[':
+      case ']':
+      case '}':
+      case '.':
+      case '^':
+      case '$':
+      case '*':
+      case '+':
+      case '?':
+      case '|':
+      case '\\': {
+        // Escape regex characters.
+        str.insert(pos, "\\");
+        pos += 2;
+        break;
+      }
+      case '{': {
+        if (pos + 1 == static_cast<int>(str.size()) || str[pos + 1] != '{') {
+          // Single `{`, escape it.
+          str.insert(pos, "\\");
+          pos += 2;
+          break;
+        }
+
+        // Replace the `{{...}}` regex syntax with standard `(...)` syntax.
+        str.replace(pos, 2, "(");
+        for (++pos; pos < static_cast<int>(str.size() - 1); ++pos) {
+          if (str[pos] == '}' && str[pos + 1] == '}') {
+            str.replace(pos, 2, ")");
+            ++pos;
+            break;
+          }
+        }
+        break;
+      }
+      default: {
+        ++pos;
+      }
+    }
+  }
+}
+
+// Transforms an expectation on a given line from `FileCheck` syntax into a
+// standard regex matcher.
+static auto TransformExpectation(int line_index, llvm::StringRef in)
+    -> ErrorOr<Matcher<std::string>> {
+  if (in.empty()) {
+    return Matcher<std::string>{StrEq("")};
+  }
+  if (!in.consume_front(" ")) {
+    return ErrorBuilder() << "Malformated CHECK line: " << in;
+  }
+
+  // Check early if we have a regex component as we can avoid building an
+  // expensive matcher when not using those.
+  bool has_regex = in.find("{{") != llvm::StringRef::npos;
+
+  // Now scan the string and expand any keywords. Note that this needs to be
+  // `size_t` to correctly store `npos`.
+  size_t keyword_pos = in.find("[[");
+
+  // If there are neither keywords nor regex sequences, we can match the
+  // incoming string directly.
+  if (!has_regex && keyword_pos == llvm::StringRef::npos) {
+    return Matcher<std::string>{StrEq(in)};
+  }
+
+  std::string str = in.str();
+
+  // First expand the keywords.
+  while (keyword_pos != std::string::npos) {
+    llvm::StringRef line_keyword_cursor =
+        llvm::StringRef(str).substr(keyword_pos);
+    CARBON_CHECK(line_keyword_cursor.consume_front("[["));
+
+    static constexpr llvm::StringLiteral LineKeyword = "@LINE";
+    if (!line_keyword_cursor.consume_front(LineKeyword)) {
+      return ErrorBuilder()
+             << "Unexpected [[, should be {{\\[\\[}} at `"
+             << line_keyword_cursor.substr(0, 5) << "` in: " << in;
+    }
+
+    // Allow + or - here; consumeInteger handles -.
+    line_keyword_cursor.consume_front("+");
+    int offset;
+    // consumeInteger returns true for errors, not false.
+    if (line_keyword_cursor.consumeInteger(10, offset) ||
+        !line_keyword_cursor.consume_front("]]")) {
+      return ErrorBuilder()
+             << "Unexpected @LINE offset at `"
+             << line_keyword_cursor.substr(0, 5) << "` in: " << in;
+    }
+    std::string int_str = llvm::Twine(line_index + offset).str();
+    int remove_len = (line_keyword_cursor.data() - str.data()) - keyword_pos;
+    str.replace(keyword_pos, remove_len, int_str);
+    keyword_pos += int_str.size();
+    // Find the next keyword start or the end of the string.
+    keyword_pos = str.find("[[", keyword_pos);
+  }
+
+  // If there was no regex, we can directly match the adjusted string.
+  if (!has_regex) {
+    return Matcher<std::string>{StrEq(str)};
+  }
+
+  // Otherwise, we need to turn the entire string into a regex by escaping
+  // things outside the regex region and transforming the regex region into a
+  // normal syntax.
+  ConvertExpectationStringToRegex(str);
+  return Matcher<std::string>{MatchesRegex(str)};
+}
+
+// Once all content is processed, do any remaining split processing.
+static auto FinishSplit(llvm::StringRef test_name, SplitState* split,
+                        llvm::SmallVector<TestFile::Split>* file_splits)
+    -> ErrorOr<Success> {
+  if (split->has_splits()) {
+    return AddSplit(split->filename, &split->content, file_splits);
+  } else {
+    // If no file splitting happened, use the main file as the test file.
+    // There will always be a `/` unless tests are in the repo root.
+    return AddSplit(test_name.drop_front(test_name.rfind("/") + 1),
+                    &split->content, file_splits);
+  }
+}
+
+// Process CHECK lines when found. Returns true if the line is consumed.
+static auto TryConsumeCheck(
+    bool running_autoupdate, int line_index, llvm::StringRef line,
+    llvm::StringRef line_trimmed,
+    llvm::SmallVector<testing::Matcher<std::string>>* expected_stdout,
+    llvm::SmallVector<testing::Matcher<std::string>>* expected_stderr)
+    -> ErrorOr<bool> {
+  if (!line_trimmed.consume_front("// CHECK")) {
+    return false;
+  }
+
+  // Don't build expectations when doing an autoupdate. We don't want to
+  // break the autoupdate on an invalid CHECK line.
+  if (!running_autoupdate) {
+    llvm::SmallVector<Matcher<std::string>>* expected;
+    if (line_trimmed.consume_front(":STDOUT:")) {
+      expected = expected_stdout;
+    } else if (line_trimmed.consume_front(":STDERR:")) {
+      expected = expected_stderr;
+    } else {
+      return ErrorBuilder() << "Unexpected CHECK in input: " << line.str();
+    }
+    CARBON_ASSIGN_OR_RETURN(Matcher<std::string> check_matcher,
+                            TransformExpectation(line_index, line_trimmed));
+    expected->push_back(check_matcher);
+  }
+  return true;
+}
+
+// Processes ARGS and EXTRA-ARGS lines when found. Returns true if the line is
+// consumed.
+static auto TryConsumeArgs(llvm::StringRef line, llvm::StringRef line_trimmed,
+                           llvm::SmallVector<std::string>* args,
+                           llvm::SmallVector<std::string>* extra_args)
+    -> ErrorOr<bool> {
+  llvm::SmallVector<std::string>* arg_list = nullptr;
+  if (line_trimmed.consume_front("// ARGS: ")) {
+    arg_list = args;
+  } else if (line_trimmed.consume_front("// EXTRA-ARGS: ")) {
+    arg_list = extra_args;
+  } else {
+    return false;
+  }
+
+  if (!args->empty() || !extra_args->empty()) {
+    return ErrorBuilder() << "ARGS / EXTRA-ARGS specified multiple times: "
+                          << line.str();
+  }
+
+  // Split the line into arguments.
+  std::pair<llvm::StringRef, llvm::StringRef> cursor =
+      llvm::getToken(line_trimmed);
+  while (!cursor.first.empty()) {
+    arg_list->push_back(std::string(cursor.first));
+    cursor = llvm::getToken(cursor.second);
+  }
+
+  return true;
+}
+
+// Processes AUTOUPDATE lines when found. Returns true if the line is consumed.
+static auto TryConsumeAutoupdate(int line_index, llvm::StringRef line_trimmed,
+                                 bool* found_autoupdate,
+                                 std::optional<int>* autoupdate_line_number)
+    -> ErrorOr<bool> {
+  static constexpr llvm::StringLiteral Autoupdate = "// AUTOUPDATE";
+  static constexpr llvm::StringLiteral NoAutoupdate = "// NOAUTOUPDATE";
+  if (line_trimmed != Autoupdate && line_trimmed != NoAutoupdate) {
+    return false;
+  }
+  if (*found_autoupdate) {
+    return ErrorBuilder() << "Multiple AUTOUPDATE/NOAUTOUPDATE settings found";
+  }
+  *found_autoupdate = true;
+  if (line_trimmed == Autoupdate) {
+    *autoupdate_line_number = line_index;
+  }
+  return true;
+}
+
+// Processes SET-* lines when found. Returns true if the line is consumed.
+static auto TryConsumeSetFlag(llvm::StringRef line_trimmed,
+                              llvm::StringLiteral flag_name, bool* flag)
+    -> ErrorOr<bool> {
+  if (!line_trimmed.consume_front("// ") || line_trimmed != flag_name) {
+    return false;
+  }
+  if (*flag) {
+    return ErrorBuilder() << flag_name << " was specified multiple times";
+  }
+  *flag = true;
+  return true;
+}
+
+auto ProcessTestFile(llvm::StringRef test_name, bool running_autoupdate)
+    -> ErrorOr<TestFile> {
+  TestFile test_file;
+
+  // Store the file so that file_splits can use references to content.
+  CARBON_ASSIGN_OR_RETURN(test_file.input_content, ReadFile(test_name.str()));
+
+  // Original file content, and a cursor for walking through it.
+  llvm::StringRef file_content = test_file.input_content;
+  llvm::StringRef cursor = file_content;
+
+  // Whether either AUTOUDPATE or NOAUTOUPDATE was found.
+  bool found_autoupdate = false;
+
+  // The index in the current test file. Will be reset on splits.
+  int line_index = 0;
+
+  SplitState split;
+
+  // When autoupdating, we track whether we're inside conflict markers.
+  // Otherwise conflict markers are errors.
+  bool inside_conflict_marker = false;
+
+  while (!cursor.empty()) {
+    auto [line, next_cursor] = cursor.split("\n");
+    cursor = next_cursor;
+    auto line_trimmed = line.ltrim();
+
+    bool is_consumed = false;
+    CARBON_ASSIGN_OR_RETURN(
+        is_consumed,
+        TryConsumeConflictMarker(running_autoupdate, line, line_trimmed,
+                                 &inside_conflict_marker));
+    if (is_consumed) {
+      continue;
+    }
+
+    // At this point, remaining lines are part of the test input.
+    CARBON_ASSIGN_OR_RETURN(
+        is_consumed,
+        TryConsumeSplit(line, line_trimmed, found_autoupdate, &line_index,
+                        &split, &test_file.file_splits,
+                        &test_file.non_check_lines));
+    if (is_consumed) {
+      continue;
+    }
+
+    ++line_index;
+
+    // TIP lines have no impact on validation.
+    if (line_trimmed.starts_with("// TIP:")) {
+      continue;
+    }
+
+    CARBON_ASSIGN_OR_RETURN(
+        is_consumed, TryConsumeCheck(running_autoupdate, line_index, line,
+                                     line_trimmed, &test_file.expected_stdout,
+                                     &test_file.expected_stderr));
+    if (is_consumed) {
+      continue;
+    }
+
+    // At this point, lines are retained as non-CHECK lines.
+    test_file.non_check_lines.push_back(
+        FileTestLine(split.file_index, line_index, line));
+
+    CARBON_ASSIGN_OR_RETURN(
+        is_consumed, TryConsumeArgs(line, line_trimmed, &test_file.test_args,
+                                    &test_file.extra_args));
+    if (is_consumed) {
+      continue;
+    }
+    CARBON_ASSIGN_OR_RETURN(
+        is_consumed,
+        TryConsumeAutoupdate(line_index, line_trimmed, &found_autoupdate,
+                             &test_file.autoupdate_line_number));
+    if (is_consumed) {
+      continue;
+    }
+    CARBON_ASSIGN_OR_RETURN(
+        is_consumed,
+        TryConsumeSetFlag(line_trimmed, "SET-CAPTURE-CONSOLE-OUTPUT",
+                          &test_file.capture_console_output));
+    if (is_consumed) {
+      continue;
+    }
+    CARBON_ASSIGN_OR_RETURN(is_consumed,
+                            TryConsumeSetFlag(line_trimmed, "SET-CHECK-SUBSET",
+                                              &test_file.check_subset));
+    if (is_consumed) {
+      continue;
+    }
+  }
+
+  if (!found_autoupdate) {
+    return Error("Missing AUTOUPDATE/NOAUTOUPDATE setting");
+  }
+
+  test_file.has_splits = split.has_splits();
+  CARBON_RETURN_IF_ERROR(
+      FinishSplit(test_name, &split, &test_file.file_splits));
+
+  // Validate AUTOUPDATE-SPLIT use, and remove it from test files if present.
+  if (test_file.has_splits) {
+    constexpr llvm::StringLiteral AutoupdateSplit = "AUTOUPDATE-SPLIT";
+    for (const auto& test_file :
+         llvm::ArrayRef(test_file.file_splits).drop_back()) {
+      if (test_file.filename == AutoupdateSplit) {
+        return Error("AUTOUPDATE-SPLIT must be the last split");
+      }
+    }
+    if (test_file.file_splits.back().filename == AutoupdateSplit) {
+      if (!test_file.autoupdate_line_number) {
+        return Error("AUTOUPDATE-SPLIT requires AUTOUPDATE");
+      }
+      test_file.autoupdate_split = true;
+      test_file.file_splits.pop_back();
+    }
+  }
+
+  // Assume there is always a suffix `\n` in output.
+  if (!test_file.expected_stdout.empty()) {
+    test_file.expected_stdout.push_back(StrEq(""));
+  }
+  if (!test_file.expected_stderr.empty()) {
+    test_file.expected_stderr.push_back(StrEq(""));
+  }
+
+  return std::move(test_file);
+}
+
+}  // namespace Carbon::Testing

+ 91 - 0
testing/file_test/test_file.h

@@ -0,0 +1,91 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef CARBON_TESTING_FILE_TEST_TEST_FILE_H_
+#define CARBON_TESTING_FILE_TEST_TEST_FILE_H_
+
+#include <gmock/gmock.h>
+
+#include <string>
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "testing/file_test/file_test_base.h"
+#include "testing/file_test/line.h"
+
+namespace Carbon::Testing {
+
+// Encapsulates test context generated by processing and running.
+//
+// Note this should remain internal to `FileTestBase`, not exposed to individual
+// tests.
+struct TestFile {
+  // Represents a split within the test file.
+  struct Split {
+    friend void PrintTo(const Split& f, std::ostream* os) {
+      // Print content escaped.
+      llvm::raw_os_ostream os_wrap(*os);
+      os_wrap << "Split(" << f.filename << ", \"" << FormatEscaped(f.content)
+              << "\")";
+    }
+
+    std::string filename;
+    std::string content;
+  };
+
+  // The input test file content. Other parts may reference this.
+  std::string input_content;
+
+  // Lines which don't contain CHECKs, and thus need to be retained by
+  // autoupdate. Their file and line numbers are attached.
+  //
+  // If there are splits, then the splitting line is in the respective file.
+  // For N splits, the 0th file is the parts of the input file which are not
+  // in any split, plus one file per split file.
+  llvm::SmallVector<FileTestLine> non_check_lines;
+
+  // Whether there are splits.
+  bool has_splits = false;
+
+  // Arguments for the test, generated from ARGS.
+  llvm::SmallVector<std::string> test_args;
+
+  // Extra arguments for the test, generated from EXTRA-ARGS. Unlike ARGS,
+  // setting EXTRA-ARGS does not suppress the default arguments.
+  llvm::SmallVector<std::string> extra_args;
+
+  // Files in the test, generated by content and splits.
+  llvm::SmallVector<Split> file_splits;
+
+  // The location of the autoupdate marker, for autoupdated files.
+  std::optional<int> autoupdate_line_number;
+
+  // Whether there should be an AUTOUPDATE-SPLIT.
+  bool autoupdate_split = false;
+
+  // Whether to capture stderr and stdout that would head to console,
+  // generated from SET-CAPTURE-CONSOLE-OUTPUT.
+  bool capture_console_output = false;
+
+  // Whether checks are a subset, generated from SET-CHECK-SUBSET.
+  bool check_subset = false;
+
+  // stdout and stderr based on CHECK lines in the file.
+  llvm::SmallVector<testing::Matcher<std::string>> expected_stdout;
+  llvm::SmallVector<testing::Matcher<std::string>> expected_stderr;
+
+  // stdout and stderr from Run. 16 is arbitrary but a required value.
+  llvm::SmallString<16> actual_stdout;
+  llvm::SmallString<16> actual_stderr;
+
+  FileTestBase::RunResult run_result = {.success = false};
+};
+
+// Processes the test input, producing test files and expected output.
+auto ProcessTestFile(llvm::StringRef test_name, bool running_autoupdate)
+    -> ErrorOr<TestFile>;
+
+}  // namespace Carbon::Testing
+
+#endif  // CARBON_TESTING_FILE_TEST_TEST_FILE_H_