Эх сурвалжийг харах

Use a thread pool when building runtimes (#6133)

This parallelizes the compilations and dramatically reduces the time to
build runtimes.

As part of this, teach the driver infrastructure to have an option to
control the use of threads and to build the relevant thread pool and
thread it into the various APIs.

However, it requires our `ClangRunner` to become thread-safe and to
invoke Clang in a way that is thread-safe. This is somewhat challenging
as the code in `clang_main` is distinctly _not_ thread-safe.

To address this, the relevant logic of `clang_main`, especially the CC1
execution, is extracted into our runner and cleaned up to be much more
appropriate in a multithreaded context. Much of this code should
eventually be factored back into Clang, but that will be a follow-up
patch to upstream.

Last but not least, this rearranges the `ClangRunner` API to make a bit
more sense out of the different options for building runtimes, and have
a clean model for which things need to be passed in at which points.

---------

Co-authored-by: Dana Jansens <danakj@orodu.net>
Chandler Carruth 7 сар өмнө
parent
commit
35fb000536

+ 3 - 0
toolchain/driver/BUILD

@@ -33,8 +33,11 @@ cc_library(
         "//toolchain/install:install_paths",
         "@llvm-project//clang:basic",
         "@llvm-project//clang:clang-driver",
+        "@llvm-project//clang:codegen",
         "@llvm-project//clang:driver",
         "@llvm-project//clang:frontend",
+        "@llvm-project//clang:frontend_tool",
+        "@llvm-project//clang:serialization",
         "@llvm-project//llvm:Core",
         "@llvm-project//llvm:Object",
         "@llvm-project//llvm:Support",

+ 4 - 3
toolchain/driver/build_runtimes_subcommand.cpp

@@ -68,8 +68,8 @@ auto BuildRuntimesSubcommand::Run(DriverEnv& driver_env) -> DriverResult {
 
 auto BuildRuntimesSubcommand::RunInternal(DriverEnv& driver_env)
     -> ErrorOr<std::filesystem::path> {
-  ClangRunner runner(driver_env.installation, &driver_env.runtimes_cache,
-                     driver_env.fs, driver_env.vlog_stream);
+  ClangRunner runner(driver_env.installation, driver_env.fs,
+                     driver_env.vlog_stream);
 
   Runtimes::Cache::Features features = {
       .target = options_.codegen_options.target.str()};
@@ -92,7 +92,8 @@ auto BuildRuntimesSubcommand::RunInternal(DriverEnv& driver_env)
                : Runtimes::Make(explicit_output_path, driver_env.vlog_stream));
   CARBON_ASSIGN_OR_RETURN(auto tmp_dir, Filesystem::MakeTmpDir());
 
-  return runner.BuildTargetResourceDir(features, runtimes, tmp_dir.abs_path());
+  return runner.BuildTargetResourceDir(features, runtimes, tmp_dir.abs_path(),
+                                       *driver_env.thread_pool);
 }
 
 }  // namespace Carbon

+ 239 - 79
toolchain/driver/clang_runner.cpp

@@ -18,14 +18,21 @@
 
 #include "clang/Basic/Diagnostic.h"
 #include "clang/Basic/DiagnosticOptions.h"
+#include "clang/CodeGen/ObjectFilePCHContainerWriter.h"
 #include "clang/Driver/Compilation.h"
 #include "clang/Driver/Driver.h"
+#include "clang/Frontend/CompilerInstance.h"
 #include "clang/Frontend/CompilerInvocation.h"
+#include "clang/Frontend/TextDiagnosticBuffer.h"
 #include "clang/Frontend/TextDiagnosticPrinter.h"
+#include "clang/Frontend/Utils.h"
+#include "clang/FrontendTool/Utils.h"
+#include "clang/Serialization/ObjectFilePCHContainerReader.h"
 #include "common/filesystem.h"
 #include "common/vlog.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/IR/LLVMContext.h"
@@ -36,31 +43,17 @@
 #include "llvm/Support/LLVMDriver.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Program.h"
+#include "llvm/Support/TimeProfiler.h"
+#include "llvm/Support/Timer.h"
 #include "llvm/TargetParser/Host.h"
 #include "toolchain/base/runtime_sources.h"
 
-// Defined in:
-// https://github.com/llvm/llvm-project/blob/main/clang/tools/driver/driver.cpp
-//
-// While not in a header, this is the API used by llvm-driver.cpp for
-// busyboxing.
-//
-// NOLINTNEXTLINE(readability-identifier-naming)
-auto clang_main(int Argc, char** Argv, const llvm::ToolContext& ToolContext)
-    -> int;
-
 namespace Carbon {
 
 ClangRunner::ClangRunner(const InstallPaths* install_paths,
-                         Runtimes::Cache* runtimes_cache,
                          llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> fs,
-                         llvm::raw_ostream* vlog_stream,
-                         bool build_runtimes_on_demand)
-    : ToolRunnerBase(install_paths, vlog_stream),
-      runtimes_cache_(runtimes_cache),
-      fs_(std::move(fs)),
-      diagnostic_ids_(new clang::DiagnosticIDs()),
-      build_runtimes_on_demand_(build_runtimes_on_demand) {}
+                         llvm::raw_ostream* vlog_stream)
+    : ToolRunnerBase(install_paths, vlog_stream), fs_(std::move(fs)) {}
 
 // Searches an argument list to a Clang execution to determine the expected
 // target string, suitable for use with `llvm::Triple`.
@@ -133,31 +126,41 @@ static auto IsNonLinkCommand(llvm::ArrayRef<llvm::StringRef> args) -> bool {
   });
 }
 
-auto ClangRunner::Run(llvm::ArrayRef<llvm::StringRef> args,
-                      Runtimes* prebuilt_runtimes) -> ErrorOr<bool> {
+auto ClangRunner::RunWithPrebuiltRuntimes(llvm::ArrayRef<llvm::StringRef> args,
+                                          Runtimes& prebuilt_runtimes)
+    -> ErrorOr<bool> {
   // Check the args to see if we have a known target-independent command. If so,
   // directly dispatch it to avoid the cost of building the target resource
   // directory.
   // TODO: Maybe handle response file expansion similar to the Clang CLI?
-  if (args.empty() || args[0].starts_with("-cc1") || IsNonLinkCommand(args) ||
-      (!build_runtimes_on_demand_ && !prebuilt_runtimes)) {
-    return RunTargetIndependentCommand(args);
+  if (args.empty() || args[0].starts_with("-cc1") || IsNonLinkCommand(args)) {
+    return RunWithNoRuntimes(args);
   }
 
   std::string target = ComputeClangTarget(args);
 
-  // If we have pre-built runtimes, use them rather than building on demand.
-  if (prebuilt_runtimes) {
-    CARBON_ASSIGN_OR_RETURN(std::filesystem::path prebuilt_resource_dir_path,
-                            prebuilt_runtimes->Get(Runtimes::ClangResourceDir));
-    return RunInternal(args, target, prebuilt_resource_dir_path.native());
+  CARBON_ASSIGN_OR_RETURN(std::filesystem::path prebuilt_resource_dir_path,
+                          prebuilt_runtimes.Get(Runtimes::ClangResourceDir));
+  return RunInternal(args, target, prebuilt_resource_dir_path.native());
+}
+
+auto ClangRunner::Run(llvm::ArrayRef<llvm::StringRef> args,
+                      Runtimes::Cache& runtimes_cache,
+                      llvm::ThreadPoolInterface& runtimes_build_thread_pool)
+    -> ErrorOr<bool> {
+  // Check the args to see if we have a known target-independent command. If so,
+  // directly dispatch it to avoid the cost of building the target resource
+  // directory.
+  // TODO: Maybe handle response file expansion similar to the Clang CLI?
+  if (args.empty() || args[0].starts_with("-cc1") || IsNonLinkCommand(args)) {
+    return RunWithNoRuntimes(args);
   }
-  CARBON_CHECK(build_runtimes_on_demand_);
 
-  // Otherwise, we need to build a target resource directory.
+  std::string target = ComputeClangTarget(args);
+
   CARBON_VLOG("Building target resource dir...\n");
   Runtimes::Cache::Features features = {.target = target};
-  CARBON_ASSIGN_OR_RETURN(Runtimes runtimes, runtimes_cache_->Lookup(features));
+  CARBON_ASSIGN_OR_RETURN(Runtimes runtimes, runtimes_cache.Lookup(features));
 
   // We need to build the Clang resource directory for these runtimes. This
   // requires a temporary directory as well as the destination directory for
@@ -165,11 +168,14 @@ auto ClangRunner::Run(llvm::ArrayRef<llvm::StringRef> args,
   // not once we are running Clang with the built runtime.
   std::filesystem::path resource_dir_path;
   {
+    // Build the temporary directory and threadpool needed.
     CARBON_ASSIGN_OR_RETURN(Filesystem::RemovingDir tmp_dir,
                             Filesystem::MakeTmpDir());
+
     CARBON_ASSIGN_OR_RETURN(
         resource_dir_path,
-        BuildTargetResourceDir(features, runtimes, tmp_dir.abs_path()));
+        BuildTargetResourceDir(features, runtimes, tmp_dir.abs_path(),
+                               runtimes_build_thread_pool));
   }
 
   // Note that this function always successfully runs `clang` and returns a bool
@@ -179,15 +185,16 @@ auto ClangRunner::Run(llvm::ArrayRef<llvm::StringRef> args,
   return RunInternal(args, target, resource_dir_path.native());
 }
 
-auto ClangRunner::RunTargetIndependentCommand(
-    llvm::ArrayRef<llvm::StringRef> args) -> bool {
+auto ClangRunner::RunWithNoRuntimes(llvm::ArrayRef<llvm::StringRef> args)
+    -> bool {
   std::string target = ComputeClangTarget(args);
   return RunInternal(args, target, std::nullopt);
 }
 
 auto ClangRunner::BuildTargetResourceDir(
     const Runtimes::Cache::Features& features, Runtimes& runtimes,
-    const std::filesystem::path& tmp_path) -> ErrorOr<std::filesystem::path> {
+    const std::filesystem::path& tmp_path, llvm::ThreadPoolInterface& threads)
+    -> ErrorOr<std::filesystem::path> {
   // Disable any leaking of memory while building the target resource dir, and
   // restore the previous setting at the end.
   auto restore_leak_flag = llvm::make_scope_exit(
@@ -223,21 +230,171 @@ auto ClangRunner::BuildTargetResourceDir(
     return Error("TODO: Windows runtimes are untested and not yet supported.");
   }
 
+  llvm::ThreadPoolTaskGroup task_group(threads);
+
   // For Linux targets, the system libc (typically glibc) doesn't necessarily
   // provide the CRT begin/end files, and so we need to build them.
   if (target_triple.isOSLinux()) {
-    BuildCrtFile(target, RuntimeSources::CrtBegin,
-                 builder.path() / lib_path / "clang_rt.crtbegin.o");
-    BuildCrtFile(target, RuntimeSources::CrtEnd,
-                 builder.path() / lib_path / "clang_rt.crtend.o");
+    task_group.async(
+        [this, target,
+         path = builder.path() / lib_path / "clang_rt.crtbegin.o"] {
+          BuildCrtFile(target, RuntimeSources::CrtBegin, path);
+        });
+    task_group.async(
+        [this, target, path = builder.path() / lib_path / "clang_rt.crtend.o"] {
+          BuildCrtFile(target, RuntimeSources::CrtEnd, path);
+        });
   }
 
   CARBON_RETURN_IF_ERROR(
-      BuildBuiltinsLib(target, target_triple, tmp_path, lib_dir));
+      BuildBuiltinsLib(target, target_triple, tmp_path, lib_dir, threads));
+
+  // Now wait for all the queued builds to complete before we commit the
+  // runtimes into the cache.
+  task_group.wait();
 
   return std::move(builder).Commit();
 }
 
+auto ClangRunner::RunCC1(llvm::SmallVectorImpl<const char*>& cc1_args) -> int {
+  llvm::BumpPtrAllocator allocator;
+  llvm::cl::ExpansionContext expansion_context(
+      allocator, llvm::cl::TokenizeGNUCommandLine);
+  if (llvm::Error error = expansion_context.expandResponseFiles(cc1_args)) {
+    llvm::errs() << toString(std::move(error)) << '\n';
+    return 1;
+  }
+  CARBON_CHECK(cc1_args[1] == llvm::StringRef("-cc1"));
+
+  llvm::IntrusiveRefCntPtr<clang::DiagnosticIDs> diag_ids =
+      clang::DiagnosticIDs::create();
+
+  // Register the support for object-file-wrapped Clang modules.
+  auto pch_ops = std::make_shared<clang::PCHContainerOperations>();
+  pch_ops->registerWriter(
+      std::make_unique<clang::ObjectFilePCHContainerWriter>());
+  pch_ops->registerReader(
+      std::make_unique<clang::ObjectFilePCHContainerReader>());
+
+  // Buffer diagnostics from argument parsing so that we can output them using a
+  // well formed diagnostic object.
+  clang::DiagnosticOptions diag_opts;
+  clang::TextDiagnosticBuffer diag_buffer;
+  clang::DiagnosticsEngine diags(diag_ids, diag_opts, &diag_buffer,
+                                 /*ShouldOwnClient=*/false);
+
+  // Setup round-trip remarks for the DiagnosticsEngine used in CreateFromArgs.
+  if (llvm::find(cc1_args, llvm::StringRef("-Rround-trip-cc1-args")) !=
+      cc1_args.end()) {
+    diags.setSeverity(clang::diag::remark_cc1_round_trip_generated,
+                      clang::diag::Severity::Remark, {});
+  }
+
+  auto invocation = std::make_shared<clang::CompilerInvocation>();
+  bool success = clang::CompilerInvocation::CreateFromArgs(
+      *invocation, llvm::ArrayRef(cc1_args).slice(1), diags, cc1_args[0]);
+
+  // Heap allocate the compiler instance so that if we disable freeing we can
+  // discard the pointer without destroying or deallocating it.
+  auto clang_instance = std::make_unique<clang::CompilerInstance>(
+      std::move(invocation), std::move(pch_ops));
+
+  // Override the disabling of free when we don't want to leak memory.
+  if (!enable_leaking_) {
+    clang_instance->getFrontendOpts().DisableFree = false;
+    clang_instance->getCodeGenOpts().DisableFree = false;
+  }
+
+  if (!clang_instance->getFrontendOpts().TimeTracePath.empty()) {
+    llvm::timeTraceProfilerInitialize(
+        clang_instance->getFrontendOpts().TimeTraceGranularity, cc1_args[0],
+        clang_instance->getFrontendOpts().TimeTraceVerbose);
+  }
+
+  // TODO: These options should take priority over the actual compilation.
+  // However, their implementation is currently not accessible from a library.
+  // We should factor the implementation into a reusable location and then use
+  // that here.
+  CARBON_CHECK(!clang_instance->getFrontendOpts().PrintSupportedCPUs &&
+               !clang_instance->getFrontendOpts().PrintSupportedExtensions &&
+               !clang_instance->getFrontendOpts().PrintEnabledExtensions);
+
+  // Infer the builtin include path if unspecified.
+  if (clang_instance->getHeaderSearchOpts().UseBuiltinIncludes &&
+      clang_instance->getHeaderSearchOpts().ResourceDir.empty()) {
+    clang_instance->getHeaderSearchOpts().ResourceDir =
+        installation_->clang_resource_path();
+  }
+
+  // Create the actual diagnostics engine.
+  clang_instance->createDiagnostics(*fs_);
+  if (!clang_instance->hasDiagnostics()) {
+    return EXIT_FAILURE;
+  }
+
+  // Now flush the buffered diagnostics into the Clang instance's diagnostic
+  // engine. If we've already hit an error, we can exit early once that's done.
+  diag_buffer.FlushDiagnostics(clang_instance->getDiagnostics());
+  if (!success) {
+    clang_instance->getDiagnosticClient().finish();
+    return EXIT_FAILURE;
+  }
+
+  // Execute the frontend actions.
+  {
+    llvm::TimeTraceScope time_scope("ExecuteCompiler");
+    bool time_passes = clang_instance->getCodeGenOpts().TimePasses;
+    if (time_passes) {
+      clang_instance->createFrontendTimer();
+    }
+    llvm::TimeRegion timer(time_passes ? &clang_instance->getFrontendTimer()
+                                       : nullptr);
+    success = clang::ExecuteCompilerInvocation(clang_instance.get());
+  }
+
+  // If any timers were active but haven't been destroyed yet, print their
+  // results now.  This happens in -disable-free mode.
+  std::unique_ptr<llvm::raw_ostream> io_file = llvm::CreateInfoOutputFile();
+  if (clang_instance->getCodeGenOpts().TimePassesJson) {
+    *io_file << "{\n";
+    llvm::TimerGroup::printAllJSONValues(*io_file, "");
+    *io_file << "\n}\n";
+  } else if (!clang_instance->getCodeGenOpts().TimePassesStatsFile) {
+    llvm::TimerGroup::printAll(*io_file);
+  }
+  llvm::TimerGroup::clearAll();
+
+  if (llvm::timeTraceProfilerEnabled()) {
+    // It is possible that the compiler instance doesn't own a file manager here
+    // if we're compiling a module unit, since the file manager is owned by the
+    // AST when we're compiling a module unit. So the file manager may be
+    // invalid here.
+    //
+    // It should be fine to create file manager here since the file system
+    // options are stored in the compiler invocation and we can recreate the VFS
+    // from the compiler invocation.
+    if (!clang_instance->hasFileManager()) {
+      clang_instance->createFileManager(fs_);
+    }
+
+    if (auto profiler_output = clang_instance->createOutputFile(
+            clang_instance->getFrontendOpts().TimeTracePath, /*Binary=*/false,
+            /*RemoveFileOnSignal=*/false,
+            /*useTemporary=*/false)) {
+      llvm::timeTraceProfilerWrite(*profiler_output);
+      profiler_output.reset();
+      llvm::timeTraceProfilerCleanup();
+      clang_instance->clearOutputFiles(false);
+    }
+  }
+
+  // When running with -disable-free, don't do any destruction or shutdown.
+  if (clang_instance->getFrontendOpts().DisableFree) {
+    llvm::BuryPointer(std::move(clang_instance));
+  }
+  return success ? EXIT_SUCCESS : EXIT_FAILURE;
+}
+
 auto ClangRunner::RunInternal(
     llvm::ArrayRef<llvm::StringRef> args, llvm::StringRef target,
     std::optional<llvm::StringRef> target_resource_dir_path) -> bool {
@@ -250,12 +407,8 @@ auto ClangRunner::RunInternal(
 
   // Handle special dispatch for CC1 commands as they don't use the driver.
   if (!args.empty() && args[0].starts_with("-cc1")) {
-    CARBON_VLOG("Calling clang_main for cc1...");
-    // cstr_args[0] will be the `clang_path` so we don't need the prepend arg.
-    llvm::ToolContext tool_context = {
-        .Path = cstr_args[0], .PrependArg = "clang", .NeedsPrependArg = false};
-    int exit_code = clang_main(
-        cstr_args.size(), const_cast<char**>(cstr_args.data()), tool_context);
+    CARBON_VLOG("Calling Clang's CC1...");
+    int exit_code = RunCC1(cstr_args);
     // TODO: Should this be forwarding the full exit code?
     return exit_code == 0;
   }
@@ -273,8 +426,10 @@ auto ClangRunner::RunInternal(
   clang::TextDiagnosticPrinter diagnostic_client(llvm::errs(),
                                                  *diagnostic_options);
 
-  clang::DiagnosticsEngine diagnostics(diagnostic_ids_, *diagnostic_options,
-                                       &diagnostic_client,
+  // Note that the `DiagnosticsEngine` takes ownership (via a ref count) of the
+  // DiagnosticIDs, unlike the other parameters.
+  clang::DiagnosticsEngine diagnostics(clang::DiagnosticIDs::create(),
+                                       *diagnostic_options, &diagnostic_client,
                                        /*ShouldOwnClient=*/false);
   clang::ProcessWarningOptions(diagnostics, *diagnostic_options, *fs_);
 
@@ -323,18 +478,8 @@ auto ClangRunner::RunInternal(
   //
   // Also note that we only do `-disable-free` filtering in the in-process
   // execution here, as subprocesses leaking memory won't impact this process.
-  auto cc1_main = [enable_leaking = enable_leaking_](
-                      llvm::SmallVectorImpl<const char*>& cc1_args) -> int {
-    if (!enable_leaking) {
-      // Last-flag wins, so this forcibly re-enables freeing memory.
-      cc1_args.push_back("-no-disable-free");
-    }
-
-    // cc1_args[0] will be the `clang_path` so we don't need the prepend arg.
-    llvm::ToolContext tool_context = {
-        .Path = cc1_args[0], .PrependArg = "clang", .NeedsPrependArg = false};
-    return clang_main(cc1_args.size(), const_cast<char**>(cc1_args.data()),
-                      tool_context);
+  auto cc1_main = [this](llvm::SmallVectorImpl<const char*>& cc1_args) -> int {
+    return RunCC1(cc1_args);
   };
   driver.CC1Main = cc1_main;
 
@@ -384,7 +529,7 @@ auto ClangRunner::BuildCrtFile(llvm::StringRef target, llvm::StringRef src_file,
   CARBON_VLOG("Building `{0}' from `{1}`...\n", out_path, src_path);
 
   std::string target_arg = llvm::formatv("--target={0}", target).str();
-  CARBON_CHECK(RunTargetIndependentCommand({
+  CARBON_CHECK(RunWithNoRuntimes({
       "-no-canonical-prefixes",
       target_arg,
       "-DCRT_HAS_INITFINI_ARRAY",
@@ -463,7 +608,7 @@ auto ClangRunner::BuildBuiltinsFile(llvm::StringRef target,
   CARBON_VLOG("Building `{0}' from `{1}`...\n", out_path, src_path);
 
   std::string target_arg = llvm::formatv("--target={0}", target).str();
-  CARBON_CHECK(RunTargetIndependentCommand({
+  CARBON_CHECK(RunWithNoRuntimes({
       "-no-canonical-prefixes",
       target_arg,
       "-O3",
@@ -484,7 +629,8 @@ auto ClangRunner::BuildBuiltinsFile(llvm::StringRef target,
 auto ClangRunner::BuildBuiltinsLib(llvm::StringRef target,
                                    const llvm::Triple& target_triple,
                                    const std::filesystem::path& tmp_path,
-                                   Filesystem::DirRef lib_dir)
+                                   Filesystem::DirRef lib_dir,
+                                   llvm::ThreadPoolInterface& threads)
     -> ErrorOr<Success> {
   llvm::SmallVector<llvm::StringRef> src_files =
       CollectBuiltinsSrcFiles(target_triple);
@@ -492,25 +638,29 @@ auto ClangRunner::BuildBuiltinsLib(llvm::StringRef target,
   CARBON_ASSIGN_OR_RETURN(Filesystem::Dir tmp_dir,
                           Filesystem::Cwd().OpenDir(tmp_path));
 
-  llvm::SmallVector<llvm::NewArchiveMember> objs;
-  objs.reserve(src_files.size());
-  for (llvm::StringRef src_file : src_files) {
+  // `NewArchiveMember` isn't default constructable unfortunately, so we first
+  // build the objects using an optional wrapper.
+  llvm::SmallVector<std::optional<llvm::NewArchiveMember>> objs;
+  objs.resize(src_files.size());
+  llvm::ThreadPoolTaskGroup member_group(threads);
+  for (auto [src_file, obj] : llvm::zip_equal(src_files, objs)) {
     // Create any subdirectories needed for this file.
     std::filesystem::path src_path = src_file.str();
     if (src_path.has_parent_path()) {
       CARBON_RETURN_IF_ERROR(tmp_dir.CreateDirectories(src_path.parent_path()));
     }
 
-    std::filesystem::path obj_path = tmp_path / std::string_view(src_file);
-    obj_path += ".o";
-    BuildBuiltinsFile(target, src_file, obj_path);
-
-    llvm::Expected<llvm::NewArchiveMember> obj =
-        llvm::NewArchiveMember::getFile(obj_path.native(),
-                                        /*Deterministic=*/true);
-    CARBON_CHECK(obj, "TODO: Diagnose this: {0}",
-                 llvm::fmt_consume(obj.takeError()));
-    objs.push_back(std::move(*obj));
+    member_group.async([this, target, src_file, &obj, &tmp_path] {
+      std::filesystem::path obj_path = tmp_path / std::string_view(src_file);
+      obj_path += ".o";
+      BuildBuiltinsFile(target, src_file, obj_path);
+
+      auto obj_result = llvm::NewArchiveMember::getFile(obj_path.native(),
+                                                        /*Deterministic=*/true);
+      CARBON_CHECK(obj_result, "TODO: Diagnose this: {0}",
+                   llvm::fmt_consume(obj_result.takeError()));
+      obj = std::move(*obj_result);
+    });
   }
 
   // Now build an archive out of the `.o` files for the builtins. Note that we
@@ -520,11 +670,22 @@ auto ClangRunner::BuildBuiltinsLib(llvm::StringRef target,
   CARBON_ASSIGN_OR_RETURN(
       Filesystem::WriteFile builtins_a_file,
       lib_dir.OpenWriteOnly(builtins_a_path, Filesystem::CreateAlways));
+
+  // Wait for all the object compiles to complete, and then move the objects out
+  // of their optional wrappers to match the API required by the archive writer.
+  member_group.wait();
+  llvm::SmallVector<llvm::NewArchiveMember> unwrapped_objs;
+  unwrapped_objs.reserve(objs.size());
+  for (auto& obj : objs) {
+    unwrapped_objs.push_back(*std::move(obj));
+  }
+  objs.clear();
+
+  // Write the actual archive.
   {
     llvm::raw_fd_ostream builtins_a_os = builtins_a_file.WriteStream();
-
     llvm::Error archive_err = llvm::writeArchiveToStream(
-        builtins_a_os, objs, llvm::SymtabWritingMode::NormalSymtab,
+        builtins_a_os, unwrapped_objs, llvm::SymtabWritingMode::NormalSymtab,
         target_triple.isOSDarwin() ? llvm::object::Archive::K_DARWIN
                                    : llvm::object::Archive::K_GNU,
         /*Deterministic=*/true, /*Thin=*/false);
@@ -534,7 +695,6 @@ auto ClangRunner::BuildBuiltinsLib(llvm::StringRef target,
     }
   }
   CARBON_RETURN_IF_ERROR(std::move(builtins_a_file).Close());
-
   return Success();
 }
 

+ 48 - 25
toolchain/driver/clang_runner.h

@@ -12,6 +12,7 @@
 #include "common/ostream.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/ThreadPool.h"
 #include "llvm/Support/VirtualFileSystem.h"
 #include "llvm/TargetParser/Triple.h"
 #include "toolchain/driver/runtimes_cache.h"
@@ -25,11 +26,15 @@ namespace Carbon {
 // incorporating custom command line flags from user invocations that we don't
 // parse, but will pass transparently along to Clang itself.
 //
+// This class is thread safe, allowing multiple threads to share a single runner
+// and concurrently invoke Clang.
+//
 // This doesn't literally use a subprocess to invoke Clang; it instead tries to
 // directly use the Clang command line driver library. We also work to simplify
 // how that driver operates and invoke it in an opinionated way to get the best
 // behavior for our expected use cases in the Carbon driver:
 //
+// - Ensure thread-safe invocation of Clang to enable concurrent usage.
 // - Minimize canonicalization of file names to try to preserve the paths as
 //   users type them.
 // - Minimize the use of subprocess invocations which are expensive on some
@@ -42,43 +47,57 @@ namespace Carbon {
 // standard output and standard error, and otherwise can only read and write
 // files based on their names described in the arguments. It doesn't provide any
 // higher-level abstraction such as streams for inputs or outputs.
+//
+// TODO: Switch the diagnostic machinery to buffer and do locked output so that
+// concurrent invocations of Clang don't intermingle their diagnostic output.
+//
+// TODO: If support for thread-local overrides of `llvm::errs` and `llvm::outs`
+// becomes available upstream, also buffer and synchronize those streams to
+// further improve the behavior of concurrent invocations.
 class ClangRunner : ToolRunnerBase {
  public:
-  // Build a Clang runner that uses the provided `exe_name` and `err_stream`.
+  // Build a Clang runner that uses the provided installation and filesystem.
   //
-  // If `verbose` is passed as true, will enable verbose logging to the
-  // `err_stream` both from the runner and Clang itself.
+  // Optionally accepts a `vlog_stream` to enable verbose logging from Carbon to
+  // that stream. The verbose output from Clang goes to stderr regardless.
   ClangRunner(const InstallPaths* install_paths,
-              Runtimes::Cache* on_demand_runtimes_cache,
               llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> fs,
-              llvm::raw_ostream* vlog_stream = nullptr,
-              bool build_runtimes_on_demand = false);
+              llvm::raw_ostream* vlog_stream = nullptr);
 
-  // Run Clang with the provided arguments.
+  // Run Clang with the provided arguments and a runtime cache for on-demand
+  // runtime building.
   //
   // This works to support all of the Clang commandline, including commands that
   // use target-dependent resources like linking. When it detects such commands,
-  // it will either use the provided target resource-dir path, or if building
-  // runtimes on demand is enabled it will build the needed resource-dir.
+  // it will use runtimes from the provided cache. If not available in the
+  // cache, it will build the necessary runtimes using the provided thread pool
+  // both to use and incorporate into the cache.
   //
   // Returns an error only if unable to successfully run Clang with the
   // arguments. If able to run Clang, no error is returned a bool indicating
   // whether than Clang invocation succeeded is returned.
-  //
-  // TODO: Eventually, this will need to accept an abstraction that can
-  // represent multiple different pre-built runtimes.
   auto Run(llvm::ArrayRef<llvm::StringRef> args,
-           Runtimes* prebuilt_runtimes = nullptr) -> ErrorOr<bool>;
+           Runtimes::Cache& runtimes_cache,
+           llvm::ThreadPoolInterface& runtimes_build_thread_pool)
+      -> ErrorOr<bool>;
+
+  // Run Clang with the provided arguments and prebuilt runtimes.
+  //
+  // Similar to `Run`, but requires and uses pre-built runtimes rather than a
+  // cache or building them on demand.
+  auto RunWithPrebuiltRuntimes(llvm::ArrayRef<llvm::StringRef> args,
+                               Runtimes& prebuilt_runtimes) -> ErrorOr<bool>;
 
-  // Run Clang with the provided arguments and without any target-dependent
-  // resources.
+  // Run Clang with the provided arguments and without any target runtimes.
   //
   // This method can be used to avoid building target-dependent resources when
   // unnecessary, but not all Clang command lines will work correctly.
   // Specifically, compile-only commands will typically work, while linking will
   // not.
-  auto RunTargetIndependentCommand(llvm::ArrayRef<llvm::StringRef> args)
-      -> bool;
+  //
+  // This function simply returns true or false depending on whether Clang runs
+  // successfully, as it should display any needed error messages.
+  auto RunWithNoRuntimes(llvm::ArrayRef<llvm::StringRef> args) -> bool;
 
   // Builds the target-specific resource directory for Clang.
   //
@@ -88,7 +107,8 @@ class ClangRunner : ToolRunnerBase {
   // return the path.
   auto BuildTargetResourceDir(const Runtimes::Cache::Features& features,
                               Runtimes& runtimes,
-                              const std::filesystem::path& tmp_path)
+                              const std::filesystem::path& tmp_path,
+                              llvm::ThreadPoolInterface& threads)
       -> ErrorOr<std::filesystem::path>;
 
   // Enable leaking memory.
@@ -103,6 +123,14 @@ class ClangRunner : ToolRunnerBase {
   auto EnableLeakingMemory() -> void { enable_leaking_ = true; }
 
  private:
+  // Emulates `cc1_main` but in a way that doesn't assume it is running in the
+  // main thread and can more easily fit into library calls to do compiles.
+  //
+  // TODO: Much of the logic here should be factored out of the CC1
+  // implementation in Clang's driver and into a reusable part of its libraries.
+  // That should allow reducing the code here to a minimal amount.
+  auto RunCC1(llvm::SmallVectorImpl<const char*>& cc1_args) -> int;
+
   // Handles building the Clang driver and passing the arguments down to it.
   auto RunInternal(llvm::ArrayRef<llvm::StringRef> args, llvm::StringRef target,
                    std::optional<llvm::StringRef> target_resource_dir_path)
@@ -125,16 +153,11 @@ class ClangRunner : ToolRunnerBase {
   auto BuildBuiltinsLib(llvm::StringRef target,
                         const llvm::Triple& target_triple,
                         const std::filesystem::path& tmp_path,
-                        Filesystem::DirRef lib_dir) -> ErrorOr<Success>;
-
-  Runtimes::Cache* runtimes_cache_;
+                        Filesystem::DirRef lib_dir,
+                        llvm::ThreadPoolInterface& threads) -> ErrorOr<Success>;
 
   llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> fs_;
-  llvm::IntrusiveRefCntPtr<clang::DiagnosticIDs> diagnostic_ids_;
-
-  std::optional<std::filesystem::path> prebuilt_runtimes_path_;
 
-  bool build_runtimes_on_demand_;
   bool enable_leaking_ = false;
 };
 

+ 15 - 16
toolchain/driver/clang_runner_test.cpp

@@ -70,13 +70,12 @@ class ClangRunnerTest : public ::testing::Test {
 
 TEST_F(ClangRunnerTest, Version) {
   RawStringOstream test_os;
-  ClangRunner runner(&install_paths_, &runtimes_cache_, vfs_, &test_os);
+  ClangRunner runner(&install_paths_, vfs_, &test_os);
 
   std::string out;
   std::string err;
-  EXPECT_TRUE(Testing::CallWithCapturedOutput(out, err, [&] {
-    return runner.RunTargetIndependentCommand({"--version"});
-  }));
+  EXPECT_TRUE(Testing::CallWithCapturedOutput(
+      out, err, [&] { return runner.RunWithNoRuntimes({"--version"}); }));
   // The arguments to Clang should be part of the verbose log.
   EXPECT_THAT(test_os.TakeStr(), HasSubstr("--version"));
 
@@ -101,13 +100,13 @@ TEST_F(ClangRunnerTest, DashC) {
   std::filesystem::path test_output = *Testing::WriteTestFile("test.o", "");
 
   RawStringOstream verbose_out;
-  ClangRunner runner(&install_paths_, &runtimes_cache_, vfs_, &verbose_out);
+  ClangRunner runner(&install_paths_, vfs_, &verbose_out);
   std::string out;
   std::string err;
   EXPECT_TRUE(Testing::CallWithCapturedOutput(
       out, err,
       [&] {
-        return runner.RunTargetIndependentCommand(
+        return runner.RunWithNoRuntimes(
             {"-c", test_file.string(), "-o", test_output.string()});
       }))
       << "Verbose output from runner:\n"
@@ -130,13 +129,13 @@ TEST_F(ClangRunnerTest, BuitinHeaders) {
   std::filesystem::path test_output = *Testing::WriteTestFile("test.o", "");
 
   RawStringOstream verbose_out;
-  ClangRunner runner(&install_paths_, &runtimes_cache_, vfs_, &verbose_out);
+  ClangRunner runner(&install_paths_, vfs_, &verbose_out);
   std::string out;
   std::string err;
   EXPECT_TRUE(Testing::CallWithCapturedOutput(
       out, err,
       [&] {
-        return runner.RunTargetIndependentCommand(
+        return runner.RunWithNoRuntimes(
             {"-c", test_file.string(), "-o", test_output.string()});
       }))
       << "Verbose output from runner:\n"
@@ -157,13 +156,13 @@ TEST_F(ClangRunnerTest, CompileMultipleFiles) {
     std::filesystem::path output = *Testing::WriteTestFile(output_file, "");
 
     RawStringOstream verbose_out;
-    ClangRunner runner(&install_paths_, &runtimes_cache_, vfs_, &verbose_out);
+    ClangRunner runner(&install_paths_, vfs_, &verbose_out);
     std::string out;
     std::string err;
     EXPECT_TRUE(Testing::CallWithCapturedOutput(
         out, err,
         [&] {
-          return runner.RunTargetIndependentCommand(
+          return runner.RunWithNoRuntimes(
               {"-c", file.string(), "-o", output.string()});
         }))
         << "Verbose output from runner:\n"
@@ -180,8 +179,7 @@ TEST_F(ClangRunnerTest, CompileMultipleFiles) {
 }
 
 TEST_F(ClangRunnerTest, BuildResourceDir) {
-  ClangRunner runner(&install_paths_, &runtimes_cache_, vfs_, &llvm::errs(),
-                     /*build_runtimes_on_demand=*/true);
+  ClangRunner runner(&install_paths_, vfs_, &llvm::errs());
 
   // Note that we can't test arbitrary targets here as we need to be able to
   // compile the builtin functions for the target. We use the default target as
@@ -191,8 +189,9 @@ TEST_F(ClangRunnerTest, BuildResourceDir) {
   Runtimes::Cache::Features features = {.target = target};
   auto runtimes = *runtimes_cache_.Lookup(features);
   auto tmp_dir = *Filesystem::MakeTmpDir();
-  auto build_result =
-      runner.BuildTargetResourceDir(features, runtimes, tmp_dir.abs_path());
+  llvm::DefaultThreadPool threads(llvm::optimal_concurrency());
+  auto build_result = runner.BuildTargetResourceDir(
+      features, runtimes, tmp_dir.abs_path(), threads);
   ASSERT_TRUE(build_result.ok()) << build_result.error();
   std::filesystem::path resource_dir_path = std::move(*build_result);
 
@@ -270,7 +269,7 @@ TEST_F(ClangRunnerTest, LinkCommandEcho) {
   std::filesystem::path bar_file = *Testing::WriteTestFile("bar.o", "");
 
   RawStringOstream verbose_out;
-  ClangRunner runner(&install_paths_, &runtimes_cache_, vfs_, &verbose_out);
+  ClangRunner runner(&install_paths_, vfs_, &verbose_out);
   std::string out;
   std::string err;
   EXPECT_TRUE(Testing::CallWithCapturedOutput(
@@ -280,7 +279,7 @@ TEST_F(ClangRunnerTest, LinkCommandEcho) {
         // we're just getting the echo-ed output back. For this to actually
         // link, we'd need to have the target-dependent resources, but those are
         // expensive to build so we only want to test them once (above).
-        return runner.RunTargetIndependentCommand(
+        return runner.RunWithNoRuntimes(
             {"-###", "-o", "binary", foo_file.string(), bar_file.string()});
       }))
       << "Verbose output from runner:\n"

+ 12 - 7
toolchain/driver/clang_subcommand.cpp

@@ -69,10 +69,8 @@ ClangSubcommand::ClangSubcommand() : DriverSubcommand(SubcommandInfo) {}
 // add more.
 // https://github.com/llvm/llvm-project/blob/main/clang/tools/driver/driver.cpp
 auto ClangSubcommand::Run(DriverEnv& driver_env) -> DriverResult {
-  ClangRunner runner(
-      driver_env.installation, &driver_env.runtimes_cache, driver_env.fs,
-      driver_env.vlog_stream,
-      /*build_runtimes_on_demand=*/options_.build_runtimes_on_demand);
+  ClangRunner runner(driver_env.installation, driver_env.fs,
+                     driver_env.vlog_stream);
 
   // Don't run Clang when fuzzing, it is known to not be reliable under fuzzing
   // due to many unfixed issues.
@@ -85,9 +83,16 @@ auto ClangSubcommand::Run(DriverEnv& driver_env) -> DriverResult {
     runner.EnableLeakingMemory();
   }
 
-  ErrorOr<bool> run_result = runner.Run(
-      options_.args,
-      driver_env.prebuilt_runtimes ? &*driver_env.prebuilt_runtimes : nullptr);
+  ErrorOr<bool> run_result = false;
+  if (driver_env.prebuilt_runtimes) {
+    run_result = runner.RunWithPrebuiltRuntimes(options_.args,
+                                                *driver_env.prebuilt_runtimes);
+  } else if (options_.build_runtimes_on_demand) {
+    run_result = runner.Run(options_.args, driver_env.runtimes_cache,
+                            *driver_env.thread_pool);
+  } else {
+    run_result = runner.RunWithNoRuntimes(options_.args);
+  }
   if (!run_result.ok()) {
     // This is not a Clang failure, but a failure to even run Clang, so we need
     // to diagnose it here.

+ 28 - 0
toolchain/driver/driver.cpp

@@ -32,6 +32,7 @@ struct Options {
   bool verbose = false;
   bool fuzzing = false;
   bool include_diagnostic_kind = false;
+  bool threads = true;
 
   llvm::StringRef runtimes_cache_path;
   llvm::StringRef prebuilt_runtimes_path;
@@ -124,6 +125,25 @@ applies to each message that forms a diagnostic, not just the primary message.
       },
       [&](auto& arg_b) { arg_b.Set(&include_diagnostic_kind); });
 
+  b.AddFlag(
+      {
+          .name = "threads",
+          .help = R"""(
+Controls whether threads are used to build runtimes.
+
+When enabled (the default), Carbon will try to build runtime libraries using
+threads to parallelize the operation. How many threads is controlled
+automatically by the system.
+
+Disabling threads ensures a single threaded build of the runtimes which can help
+when there are errors or other output.
+)""",
+      },
+      [&](auto& arg_b) {
+        arg_b.Default(true);
+        arg_b.Set(&threads);
+      });
+
   runtimes.AddTo(b, &selected_subcommand);
   clang.AddTo(b, &selected_subcommand);
   compile.AddTo(b, &selected_subcommand);
@@ -208,6 +228,14 @@ auto Driver::RunCommand(llvm::ArrayRef<llvm::StringRef> args) -> DriverResult {
     driver_env_.fuzzing = true;
   }
 
+  llvm::SingleThreadExecutor single_thread({.ThreadsRequested = 1});
+  std::optional<llvm::DefaultThreadPool> threads;
+  driver_env_.thread_pool = &single_thread;
+  if (options.threads) {
+    threads.emplace(llvm::optimal_concurrency());
+    driver_env_.thread_pool = &*threads;
+  }
+
   CARBON_CHECK(options.selected_subcommand != nullptr);
   return options.selected_subcommand->Run(driver_env_);
 }

+ 5 - 0
toolchain/driver/driver_env.h

@@ -9,6 +9,8 @@
 #include <utility>
 
 #include "common/ostream.h"
+#include "llvm/Support/ThreadPool.h"
+#include "llvm/Support/Threading.h"
 #include "llvm/Support/VirtualFileSystem.h"
 #include "toolchain/diagnostics/diagnostic_emitter.h"
 #include "toolchain/driver/runtimes_cache.h"
@@ -62,6 +64,9 @@ struct DriverEnv {
   // A diagnostic emitter that has no locations.
   Diagnostics::NoLocEmitter emitter;
 
+  // Thread pool available for use when concurrency is needed.
+  llvm::ThreadPoolInterface* thread_pool;
+
   // For CARBON_VLOG.
   llvm::raw_pwrite_stream* vlog_stream = nullptr;
 

+ 4 - 3
toolchain/driver/link_subcommand.cpp

@@ -118,9 +118,10 @@ auto LinkSubcommand::Run(DriverEnv& driver_env) -> DriverResult {
   clang_args.append(options_.object_filenames.begin(),
                     options_.object_filenames.end());
 
-  ClangRunner runner(driver_env.installation, &driver_env.runtimes_cache,
-                     driver_env.fs, driver_env.vlog_stream);
-  ErrorOr<bool> run_result = runner.Run(clang_args);
+  ClangRunner runner(driver_env.installation, driver_env.fs,
+                     driver_env.vlog_stream);
+  ErrorOr<bool> run_result = runner.Run(clang_args, driver_env.runtimes_cache,
+                                        *driver_env.thread_pool);
   if (!run_result.ok()) {
     // This is not a Clang failure, but a failure to even run Clang, so we need
     // to diagnose it here.

+ 9 - 18
toolchain/driver/lld_runner_test.cpp

@@ -85,22 +85,17 @@ static auto CompileTwoSources(const InstallPaths& install_paths,
   // First compile the two source files to `.o` files with Clang.
   RawStringOstream verbose_out;
   auto vfs = llvm::vfs::getRealFileSystem();
-  ClangRunner clang(&install_paths, /*on_demand_runtimes_cache=*/nullptr, vfs,
-                    &verbose_out);
+  ClangRunner clang(&install_paths, vfs, &verbose_out);
   std::string target_arg = llvm::formatv("--target={0}", target).str();
   std::string out;
   std::string err;
   CARBON_CHECK(Testing::CallWithCapturedOutput(
                    out, err,
                    [&] {
-                     auto run_result = clang.Run({target_arg, "-fPIE", "-c",
-                                                  test_a_file.string(), "-o",
-                                                  test_a_output.string()});
-                     if (!run_result.ok()) {
-                       err = run_result.error().message();
-                       return false;
-                     }
-                     return *run_result;
+                     auto run_result = clang.RunWithNoRuntimes(
+                         {target_arg, "-fPIE", "-c", test_a_file.string(), "-o",
+                          test_a_output.string()});
+                     return run_result;
                    }),
                "Verbose output from runner:\n{0}\nStderr:\n{1}\n",
                verbose_out.TakeStr(), err);
@@ -109,14 +104,10 @@ static auto CompileTwoSources(const InstallPaths& install_paths,
   CARBON_CHECK(Testing::CallWithCapturedOutput(
                    out, err,
                    [&] {
-                     auto run_result = clang.Run({target_arg, "-fPIE", "-c",
-                                                  test_b_file.string(), "-o",
-                                                  test_b_output.string()});
-                     if (!run_result.ok()) {
-                       err = run_result.error().message();
-                       return false;
-                     }
-                     return *run_result;
+                     auto run_result = clang.RunWithNoRuntimes(
+                         {target_arg, "-fPIE", "-c", test_b_file.string(), "-o",
+                          test_b_output.string()});
+                     return run_result;
                    }),
                "Verbose output from runner:\n{0}\nStderr:\n{1}\n",
                verbose_out.TakeStr(), err);