Просмотр исходного кода

Introduce a Bazel runtimes building system (#6548)

This allows us to re-use the on-demand runtimes building, but in
a framework that is (much) more Bazel compatible:

- It creates a Bazel rule to generate the runtimes tree
- The generated runtimes tree is adjusted to integrate with Bazel's
  output tracking and caching infrastructure so it doesn't need to be
  rebuilt when a cached set of runtimes is available
- The build occurs during the build phase and the action informs Bazel
  about the CPU usage to give Bazel a chance to not run other parts of
  the build when there are no execution resources available
- The binary is factored into a stand-alone program for the Clang
  runtimes, which depends on a minimal amount of Carbon and notably
  avoids the busybox or installation. This should cause almost all
  builds to get a cache hit here unless Clang itself is updated.

Some refactoring of the codegen options was done to support this. I've
tried to factor some of the code between this and the `build-runtimes`
subcommand, but it was challenging to do more without adding substantial
complexity or dependencies on more Carbon infrastructure than is
necessary. I think the result is tolerable, but open to suggestions
here if folks see specific changes that would improve things.

---------

Co-authored-by: Richard Smith <richard@metafoo.co.uk>
Chandler Carruth 3 месяцев назад
Родитель
Сommit
be4a95aef3

+ 2 - 0
.bazelrc

@@ -35,9 +35,11 @@ common:non-fatal-checks --per_file_copt=common/check_internal.cpp@-DCARBON_NON_F
 # enable use of the target config here to make our build and tests more
 # efficient, see the documentation in //bazel/carbon_rules/BUILD for details.
 common --flag_alias=use_target_config_carbon_rules=//bazel/carbon_rules:use_target_config_carbon_rules
+common --flag_alias=use_target_config_runtimes_builder=//toolchain/driver:use_target_config_runtimes_builder
 # Bazel doesn't track what commands the flag_alias is valid for, so we can't use
 # common here.
 build --use_target_config_carbon_rules
+build --use_target_config_runtimes_builder
 
 # Default to using a disk cache to minimize re-building LLVM and Clang which we
 # try to avoid updating too frequently to minimize rebuild cost. The location

+ 76 - 3
toolchain/driver/BUILD

@@ -2,10 +2,12 @@
 # Exceptions. See /LICENSE for license information.
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
+load("@bazel_skylib//rules:common_settings.bzl", "bool_flag")
 load("@rules_shell//shell:sh_test.bzl", "sh_test")
 load("//bazel/cc_rules:defs.bzl", "cc_binary", "cc_library", "cc_test")
 load("//bazel/cc_toolchains:defs.bzl", "cc_env")
 load("//testing/fuzzing:rules.bzl", "cc_fuzz_test")
+load(":prebuilt_runtimes.bzl", "prebuilt_runtimes")
 
 package(default_visibility = ["//visibility:public"])
 
@@ -86,7 +88,10 @@ cc_test(
     name = "clang_runtimes_test",
     size = "medium",
     srcs = ["clang_runtimes_test.cpp"],
-    data = ["//toolchain/install:install_data"],
+    data = [
+        ":prebuilt_runtimes",
+        "//toolchain/install:install_data",
+    ],
     deps = [
         ":clang_runner",
         ":llvm_runner",
@@ -101,6 +106,7 @@ cc_test(
         "//testing/base:gtest_main",
         "//toolchain/base:install_paths",
         "//toolchain/base:llvm_tools",
+        "@bazel_tools//tools/cpp/runfiles",
         "@googletest//:gtest",
         "@llvm-project//llvm:Object",
         "@llvm-project//llvm:Support",
@@ -137,6 +143,21 @@ sh_test(
     env = cc_env(),
 )
 
+cc_library(
+    name = "codegen_options",
+    srcs = [
+        "codegen_options.cpp",
+    ],
+    hdrs = [
+        "codegen_options.h",
+    ],
+    deps = [
+        "//common:command_line",
+        "@llvm-project//llvm:Support",
+        "@llvm-project//llvm:TargetParser",
+    ],
+)
+
 cc_library(
     name = "driver",
     srcs = [
@@ -144,8 +165,6 @@ cc_library(
         "build_runtimes_subcommand.h",
         "clang_subcommand.cpp",
         "clang_subcommand.h",
-        "codegen_options.cpp",
-        "codegen_options.h",
         "compile_subcommand.cpp",
         "compile_subcommand.h",
         "driver.cpp",
@@ -172,6 +191,7 @@ cc_library(
     textual_hdrs = ["flags.def"],
     deps = [
         ":clang_runner",
+        ":codegen_options",
         ":lld_runner",
         ":llvm_runner",
         ":runtimes_cache",
@@ -371,3 +391,56 @@ cc_library(
         "@llvm-project//llvm:Support",
     ],
 )
+
+cc_binary(
+    name = "bazel_build_clang_runtimes",
+    srcs = ["bazel_build_clang_runtimes.cpp"],
+    data = [
+        "//toolchain/install:install_data.no_driver",
+        "@llvm-project//clang:clang",
+    ],
+    deps = [
+        ":clang_runner",
+        ":codegen_options",
+        ":runtimes_cache",
+        "//common:all_llvm_targets",
+        "//common:bazel_working_dir",
+        "//common:check",
+        "//common:command_line",
+        "//common:error",
+        "//common:exe_path",
+        "//common:filesystem",
+        "//common:init_llvm",
+        "//common:raw_string_ostream",
+        "//common:version",
+        "//toolchain/base:install_paths",
+        "@bazel_tools//tools/cpp/runfiles",
+        "@llvm-project//llvm:Support",
+        "@llvm-project//llvm:TargetParser",
+    ],
+)
+
+# Flag controlling whether the target config is used for the
+# tools used by the `prebuilt_runtimes` rules.
+#
+# Using the exec config is more correct and will also optimize the tools used to
+# build the runtimes, potentially making them run faster. However, it will
+# likely double the number of compiles needed to build everything necessary in
+# that configuration. As a consequence, it is useful in development and CI when
+# the target config is compatible with the exec config to set this flag.
+bool_flag(
+    name = "use_target_config_runtimes_builder",
+    build_setting_default = False,
+)
+
+config_setting(
+    name = "use_target_config_runtimes_builder_config",
+    flag_values = {":use_target_config_runtimes_builder": "True"},
+)
+
+# TODO: Correctly set the `target` argument here based on the Bazel target
+# platform. Without this, we will generate invalid prebuilt runtimes when cross
+# compiling.
+prebuilt_runtimes(
+    name = "prebuilt_runtimes",
+)

+ 266 - 0
toolchain/driver/bazel_build_clang_runtimes.cpp

@@ -0,0 +1,266 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <unistd.h>
+
+#include <cstdlib>
+#include <cstring>
+#include <filesystem>
+#include <memory>
+#include <optional>
+#include <string>
+#include <system_error>
+#include <utility>
+
+#include "common/bazel_working_dir.h"
+#include "common/check.h"
+#include "common/command_line.h"
+#include "common/error.h"
+#include "common/exe_path.h"
+#include "common/filesystem.h"
+#include "common/init_llvm.h"
+#include "common/raw_string_ostream.h"
+#include "common/version.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/ThreadPool.h"
+#include "llvm/Support/Threading.h"
+#include "llvm/Support/VirtualFileSystem.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Triple.h"
+#include "toolchain/base/install_paths.h"
+#include "toolchain/driver/clang_runner.h"
+#include "toolchain/driver/clang_runtimes.h"
+#include "toolchain/driver/codegen_options.h"
+#include "toolchain/driver/runtimes_cache.h"
+#include "tools/cpp/runfiles/runfiles.h"
+
+namespace Carbon {
+
+namespace {
+struct Options {
+  static const CommandLine::CommandInfo Info;
+
+  auto Build(CommandLine::CommandBuilder& b) -> void;
+
+  bool verbose = false;
+  bool force = false;
+  bool threads = true;
+
+  llvm::StringRef directory;
+  CodegenOptions codegen_options;
+};
+}  // namespace
+
+// Note that this is not constexpr so that it can include information generated
+// in separate translation units and potentially overridden at link time in the
+// version string.
+const CommandLine::CommandInfo Options::Info = {
+    .name = "bazel_build_clang_runtimes",
+    .version = Version::ToolchainInfo,
+    .help = R"""(
+A dedicated tool for use with Bazel to build Carbon's _Clang_ runtimes.
+
+This works similarly to the Carbon `build-runtimes` subcommand with some key
+differences:
+
+1) It only builds the Clang runtimes, not any Carbon-specific runtimes. This is
+   important due to the next point...
+
+2) It is a stand-alone command with minimal dependencies on Carbon to allow
+   Bazel to cache the Clang-based runtimes across most changes to the Carbon
+   toolchain when the LLVM version stays the same.
+
+3) It removes any symlinks in the built runtimes tree into other parts of the
+   installation that are not generated by this command. This allows a Bazel rule
+   to re-create those in using Bazel-specific logic that connects those parts of
+   the runtimes tree to their respective inputs.
+)""",
+};
+
+auto Options::Build(CommandLine::CommandBuilder& b) -> void {
+  b.AddFlag(
+      {
+          .name = "verbose",
+          .short_name = "v",
+          .help = "Enable verbose logging to the stderr stream.",
+      },
+      [&](CommandLine::FlagBuilder& arg_b) { arg_b.Set(&verbose); });
+
+  b.AddFlag(
+      {
+          .name = "force",
+          .help = R"""(
+Force re-creating the provided output path from scratch
+
+This will **remove** the provided output path and re-create it from scratch.
+)""",
+      },
+      [&](CommandLine::FlagBuilder& arg_b) { arg_b.Set(&force); });
+
+  b.AddFlag(
+      {
+          .name = "threads",
+          .help = R"""(
+Controls whether threads are used to build runtimes.
+
+When enabled (the default), Carbon will try to build runtime libraries using
+threads to parallelize the operation. How many threads is controlled
+automatically by the system.
+
+Disabling threads ensures a single threaded build of the runtimes which can help
+when there are errors or other output.
+)""",
+      },
+      [&](auto& arg_b) {
+        arg_b.Default(true);
+        arg_b.Set(&threads);
+      });
+
+  codegen_options.Build(b);
+
+  b.AddStringPositionalArg(
+      {
+          .name = "output-directory",
+          .help = R"""(
+The directory to populate with runtime libraries suitable for the selected code
+generation options.
+)""",
+      },
+      [&](auto& arg_b) {
+        arg_b.Required(true);
+        arg_b.Set(&directory);
+      });
+
+  b.Do([] {});
+}
+
+static auto MakeInstallPaths(const std::filesystem::path& exe_path)
+    -> InstallPaths {
+  CARBON_CHECK(*Filesystem::Cwd().Access(exe_path,
+                                         Filesystem::AccessCheckFlags::Execute),
+               "Invoked with a non-executable `argv[0]`: {0}", exe_path);
+  return InstallPaths::MakeForBazelRunfiles(exe_path.native());
+}
+
+static auto GetClangPath(const std::filesystem::path& exe_path)
+    -> std::filesystem::path {
+  std::string runtimes_error;
+  using bazel::tools::cpp::runfiles::Runfiles;
+  std::unique_ptr<Runfiles> runfiles(
+      Runfiles::Create(exe_path.native(), &runtimes_error));
+  CARBON_CHECK(runfiles != nullptr, "Failed to find runtimes tree: {0}",
+               runtimes_error);
+  std::filesystem::path clang_path =
+      runfiles->Rlocation("llvm-project/clang/clang");
+  CARBON_CHECK(!clang_path.empty());
+  CARBON_CHECK(*Filesystem::Cwd().Access(clang_path));
+  return clang_path;
+}
+
+static auto ParseOptions(int argc, char** argv) -> ErrorOr<Options> {
+  Options options;
+  llvm::OwningArrayRef<llvm::StringRef> args(argc - 1);
+  for (auto [i, arg] : llvm::enumerate(args)) {
+    arg = argv[i + 1];
+  }
+  CARBON_ASSIGN_OR_RETURN(
+      auto result, CommandLine::Parse(args, llvm::outs(), Options::Info,
+                                      [&](CommandLine::CommandBuilder& b) {
+                                        options.Build(b);
+                                      }));
+  if (result == CommandLine::ParseResult::MetaSuccess) {
+    // Exit immediately with success if this was just a meta invocation.
+#if !defined(__APPLE__)
+    std::quick_exit(0);
+#else
+    // No `std::quick_exit` on macOS, despite the standard including it.
+    _Exit(0);
+#endif
+  }
+  return options;
+}
+
+// The actual `main` implementation. Can return an exit code or an `Error`
+// (which causes EXIT_FAILURE).
+//
+// Note that this is primarily an internal utility for use with a specific set
+// of Bazel rules, and so many errors are directly `CARBON_CHECK`-ed instead of
+// propagated. Only basic command line errors are propagated using the `Error`
+// side of the return. Other errors in the execution environment are `CHECK`-ed
+// to provide useful backtraces when debugging.
+static auto Main(int argc, char** argv) -> ErrorOr<int> {
+  InitLLVM init_llvm(argc, argv);
+  if (argc < 1) {
+    return Error("Invoked without command line arguments");
+  }
+
+  std::filesystem::path exe_path = argv[0];
+  exe_path = SetWorkingDirForBazelRun(exe_path);
+
+  const auto install_paths = MakeInstallPaths(exe_path);
+
+  CARBON_ASSIGN_OR_RETURN(Options options, ParseOptions(argc, argv));
+
+  std::filesystem::path clang_path = GetClangPath(exe_path);
+  auto fs = llvm::vfs::getRealFileSystem();
+  llvm::raw_ostream* vlog_stream = options.verbose ? &llvm::errs() : nullptr;
+  ClangRunner runner(&install_paths, fs, vlog_stream, std::move(clang_path));
+
+  Runtimes::Cache::Features features = {
+      .target = options.codegen_options.target.str()};
+
+  llvm::SingleThreadExecutor single_thread({.ThreadsRequested = 1});
+  std::optional<llvm::DefaultThreadPool> threads;
+  llvm::ThreadPoolInterface* thread_pool = &single_thread;
+  if (options.threads) {
+    threads.emplace(llvm::optimal_concurrency());
+    thread_pool = &*threads;
+  }
+
+  auto runtimes = *Runtimes::Make(options.directory.str(), vlog_stream);
+
+  if (options.force) {
+    // Remove existing runtimes to force a rebuild.
+    runtimes.Remove(Runtimes::ClangResourceDir).Check();
+    runtimes.Remove(Runtimes::LibUnwind).Check();
+    runtimes.Remove(Runtimes::Libcxx).Check();
+  }
+
+  ClangResourceDirBuilder resource_dir_builder(
+      &runner, thread_pool, llvm::Triple(features.target), &runtimes);
+  ClangArchiveRuntimesBuilder<Runtimes::LibUnwind> lib_unwind_builder(
+      &runner, thread_pool, llvm::Triple(features.target), &runtimes);
+  ClangArchiveRuntimesBuilder<Runtimes::Libcxx> libcxx_builder(
+      &runner, thread_pool, llvm::Triple(features.target), &runtimes);
+
+  std::filesystem::path resource_dir_path =
+      *std::move(resource_dir_builder).Wait();
+  std::move(lib_unwind_builder).Wait().Check();
+  std::move(libcxx_builder).Wait().Check();
+
+  // Now remove the `include` symlink from the resource_dir. We'll re-create
+  // this tree in the Bazel rule, as the symlink currently is an absolute
+  // (non-hermetic) path. We want Bazel to manage this directory with links to
+  // the actual input files.
+  Filesystem::Dir resource_dir = *Filesystem::Cwd().OpenDir(resource_dir_path);
+  resource_dir.Unlink("include").Check();
+
+  return EXIT_SUCCESS;
+}
+
+}  // namespace Carbon
+
+auto main(int argc, char** argv) -> int {
+  auto result = Carbon::Main(argc, argv);
+  if (result.ok()) {
+    return *result;
+  } else {
+    llvm::errs() << "error: " << result.error() << "\n";
+    return EXIT_FAILURE;
+  }
+}

+ 18 - 14
toolchain/driver/clang_runner.cpp

@@ -69,10 +69,15 @@ auto clang_main(int Argc, char** Argv, const llvm::ToolContext& ToolContext)
 
 namespace Carbon {
 
-ClangRunner::ClangRunner(const InstallPaths* install_paths,
-                         llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> fs,
-                         llvm::raw_ostream* vlog_stream)
-    : ToolRunnerBase(install_paths, vlog_stream), fs_(std::move(fs)) {}
+ClangRunner::ClangRunner(
+    const InstallPaths* install_paths,
+    llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> fs,
+    llvm::raw_ostream* vlog_stream,
+    std::optional<std::filesystem::path> override_clang_path)
+    : ToolRunnerBase(install_paths, vlog_stream),
+      fs_(std::move(fs)),
+      clang_path_(override_clang_path ? *std::move(override_clang_path)
+                                      : installation_->clang_path()) {}
 
 // Searches an argument list to a Clang execution to determine the expected
 // target string, suitable for use with `llvm::Triple`.
@@ -211,8 +216,6 @@ auto ClangRunner::RunInternal(
     llvm::ArrayRef<llvm::StringRef> args, llvm::StringRef target,
     std::optional<llvm::StringRef> target_resource_dir_path,
     bool enable_leaking) -> bool {
-  std::string clang_path = installation_->clang_path();
-
   // Rebuild the args as C-string args.
   llvm::OwningArrayRef<char> cstr_arg_storage;
 
@@ -220,7 +223,7 @@ auto ClangRunner::RunInternal(
   // we don't synthesize any default arguments there.
   if (!args.empty() && args[0].starts_with("-cc1")) {
     llvm::SmallVector<const char*, 64> cstr_args =
-        BuildCStrArgs(clang_path, args, cstr_arg_storage);
+        BuildCStrArgs(clang_path_.native(), args, cstr_arg_storage);
     if (args[0] == "-cc1") {
       CARBON_VLOG("Dispatching `-cc1` command line...");
       int exit_code =
@@ -259,7 +262,7 @@ auto ClangRunner::RunInternal(
 
   // Rebuild the args as C-string args.
   llvm::SmallVector<const char*, 64> cstr_args =
-      BuildCStrArgs(clang_path, prefix_args, args, cstr_arg_storage);
+      BuildCStrArgs(clang_path_.native(), prefix_args, args, cstr_arg_storage);
 
   CARBON_VLOG("Running Clang driver with the following arguments:\n");
   for (const char* cstr_arg : llvm::ArrayRef(cstr_args)) {
@@ -286,8 +289,9 @@ auto ClangRunner::RunInternal(
 
   // Note that we configure the driver's *default* target here, not the expected
   // target as that will be parsed out of the command line below.
-  clang::driver::Driver driver(clang_path, llvm::sys::getDefaultTargetTriple(),
-                               diagnostics, "clang LLVM compiler", fs_);
+  clang::driver::Driver driver(clang_path_.native(),
+                               llvm::sys::getDefaultTargetTriple(), diagnostics,
+                               "clang LLVM compiler", fs_);
 
   llvm::Triple target_triple(target);
 
@@ -305,10 +309,10 @@ auto ClangRunner::RunInternal(
   }
 
   // If we have a target-specific resource directory, set it as the default
-  // here.
-  if (target_resource_dir_path) {
-    driver.ResourceDir = target_resource_dir_path->str();
-  }
+  // here, otherwise use the installation's resource directory.
+  driver.ResourceDir = target_resource_dir_path
+                           ? target_resource_dir_path->str()
+                           : installation_->clang_resource_path().native();
 
   // Configure the install directory to find other tools and data files.
   //

+ 4 - 1
toolchain/driver/clang_runner.h

@@ -64,7 +64,8 @@ class ClangRunner : ToolRunnerBase {
   // that stream. The verbose output from Clang goes to stderr regardless.
   ClangRunner(const InstallPaths* install_paths,
               llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> fs,
-              llvm::raw_ostream* vlog_stream = nullptr);
+              llvm::raw_ostream* vlog_stream = nullptr,
+              std::optional<std::filesystem::path> override_clang_path = {});
 
   // Run Clang with the provided arguments and a runtime cache for on-demand
   // runtime building.
@@ -123,6 +124,8 @@ class ClangRunner : ToolRunnerBase {
                         llvm::ThreadPoolTaskGroup& threads) -> ErrorOr<Success>;
 
   llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> fs_;
+
+  std::filesystem::path clang_path_;
 };
 
 }  // namespace Carbon

+ 0 - 6
toolchain/driver/clang_runtimes.cpp

@@ -527,12 +527,6 @@ auto ClangResourceDirBuilder::Setup() -> void {
     result_ = std::move(result).error();
     return;
   }
-  if (auto result = runtimes_builder_->dir().Symlink(
-          "share", install_resource_path / "share");
-      !result.ok()) {
-    result_ = std::move(result).error();
-    return;
-  }
 
   // Create the target's `lib` directory.
   auto lib_dir_result = runtimes_builder_->dir().CreateDirectories(lib_path_);

+ 142 - 112
toolchain/driver/clang_runtimes_test.cpp

@@ -8,6 +8,7 @@
 #include <gtest/gtest.h>
 
 #include <filesystem>
+#include <memory>
 #include <string>
 #include <utility>
 
@@ -29,10 +30,12 @@
 #include "toolchain/driver/clang_runner.h"
 #include "toolchain/driver/llvm_runner.h"
 #include "toolchain/driver/runtimes_cache.h"
+#include "tools/cpp/runfiles/runfiles.h"
 
 namespace Carbon {
 namespace {
 
+using ::bazel::tools::cpp::runfiles::Runfiles;
 using ::testing::Each;
 using ::testing::Eq;
 using ::testing::HasSubstr;
@@ -70,6 +73,12 @@ MATCHER(IsBasename, "") {
 
 class ClangRuntimesTest : public ::testing::Test {
  public:
+  ClangRuntimesTest() {
+    std::string error;
+    test_runfiles_.reset(Runfiles::Create(exe_path_, &error));
+    CARBON_CHECK(test_runfiles_ != nullptr, "{0}", error);
+  }
+
   // Helper to get the `llvm-nm` listing of defined symbols for an archive.
   //
   // TODO: It would be nice to use a library API and matchers instead of
@@ -84,7 +93,7 @@ class ClangRuntimesTest : public ::testing::Test {
           LLVMTool::Nm, {"--format=just-symbols", "--defined-only", "--quiet",
                          archive.native()});
     });
-    CARBON_CHECK(result, "Unable to run `llvm-nm`:\n{1}", err);
+    CARBON_CHECK(result, "Unable to run `llvm-nm`:\n{0}", err);
 
     return out;
   }
@@ -122,8 +131,110 @@ class ClangRuntimesTest : public ::testing::Test {
     return result;
   }
 
-  InstallPaths install_paths_ =
-      InstallPaths::MakeForBazelRunfiles(Testing::GetExePath());
+  auto TestResourceDir(std::filesystem::path resource_dir_path) -> void {
+    // For Linux we can directly check the CRT begin/end object files.
+    if (target_triple_.isOSLinux()) {
+      std::filesystem::path crt_begin_path =
+          resource_dir_path / "lib" / target_ / "clang_rt.crtbegin.o";
+      ASSERT_TRUE(std::filesystem::is_regular_file(crt_begin_path));
+      auto begin_result =
+          llvm::object::ObjectFile::createObjectFile(crt_begin_path.native());
+      llvm::object::ObjectFile& crtbegin = *begin_result->getBinary();
+      EXPECT_TRUE(crtbegin.isELF());
+      EXPECT_TRUE(crtbegin.isObject());
+      EXPECT_THAT(crtbegin.getArch(), Eq(target_triple_.getArch()));
+
+      llvm::SmallVector<llvm::object::SymbolRef> symbols(crtbegin.symbols());
+      // The first symbol should come from the source file.
+      EXPECT_THAT(*symbols.front().getName(), Eq("crtbegin.c"));
+
+      // Check for representative symbols of `crtbegin.o` -- we always use
+      // `.init_array` in our runtimes build so we have predictable functions.
+      EXPECT_THAT(symbols, IsSupersetOf({TextSymbolNamed("__do_init"),
+                                         TextSymbolNamed("__do_fini")}));
+
+      std::filesystem::path crt_end_path =
+          resource_dir_path / "lib" / target_ / "clang_rt.crtend.o";
+      ASSERT_TRUE(std::filesystem::is_regular_file(crt_end_path));
+      auto end_result =
+          llvm::object::ObjectFile::createObjectFile(crt_end_path.native());
+      llvm::object::ObjectFile& crtend = *end_result->getBinary();
+      EXPECT_TRUE(crtend.isELF());
+      EXPECT_TRUE(crtend.isObject());
+      EXPECT_THAT(crtend.getArch(), Eq(target_triple_.getArch()));
+
+      // Just check the source file symbol, not much of interest in the end.
+      llvm::object::SymbolRef crtend_front_symbol = *crtend.symbol_begin();
+      EXPECT_THAT(*crtend_front_symbol.getName(), Eq("crtend.c"));
+    }
+
+    // Across all targets, check that the builtins archive exists, and contains
+    // a relevant symbol by running the `llvm-nm` tool over it. Using `nm`
+    // rather than directly inspecting the objects is a bit awkward, but lets us
+    // easily ignore the wrapping in an archive file.
+    std::filesystem::path builtins_path =
+        resource_dir_path / "lib" / target_ / "libclang_rt.builtins.a";
+    std::string builtins_symbols = NmListDefinedSymbols(builtins_path);
+
+    // Check that we found a definition of `__mulodi4`, a builtin function
+    // provided by Compiler-RT.
+    ExpectSymbol(builtins_symbols, "__mulodi4");
+
+    // Check that we don't include the `chkstk` builtins outside of Windows.
+    if (!target_triple_.isOSWindows()) {
+      EXPECT_THAT(builtins_symbols, Not(HasSubstr("chkstk")));
+    }
+
+    // Check that member names don't contain full paths, as that is the
+    // canonical format produced by `ar`.
+    auto member_names = ListArchiveMemberNames(builtins_path);
+    EXPECT_THAT(member_names, Each(IsBasename()));
+  }
+
+  auto TestLibunwind(std::filesystem::path libunwind_path) -> void {
+    std::string libunwind_symbols = NmListDefinedSymbols(libunwind_path);
+
+    // Check a few of the main exported symbols here. The set here is somewhat
+    // arbitrary, but chosen to be among the more stable names and have at least
+    // one from most of the object files that should be linked into the archive.
+    ExpectSymbol(libunwind_symbols, "_Unwind_Resume");
+    ExpectSymbol(libunwind_symbols, "_Unwind_Backtrace");
+    ExpectSymbol(libunwind_symbols, "__unw_getcontext");
+    ExpectSymbol(libunwind_symbols, "__unw_get_proc_info");
+
+    // Check that member names don't contain full paths, as that is the
+    // canonical format produced by `ar`.
+    auto member_names = ListArchiveMemberNames(libunwind_path);
+    EXPECT_THAT(member_names, Each(IsBasename()));
+  }
+
+  auto TestLibcxx(std::filesystem::path libcxx_path) -> void {
+    std::string libcxx_symbols = NmListDefinedSymbols(libcxx_path);
+
+    // First check a few fundamental symbols from libc++.a, including symbols
+    // both within the ABI namespace and outside of it.
+    ExpectSymbol(libcxx_symbols, "_ZNKSt12bad_any_cast4whatEv");
+    ExpectSymbol(libcxx_symbols, "_ZNSt2_C8to_charsEPcS0_d");
+    ExpectSymbol(libcxx_symbols, "_ZSt17current_exceptionv");
+    ExpectSymbol(libcxx_symbols, "_ZNKSt2_C10filesystem4path10__filenameEv");
+
+    // Check that several of the libc++abi object files are also included in the
+    // archive.
+    ExpectSymbol(libcxx_symbols, "__cxa_bad_cast");
+    ExpectSymbol(libcxx_symbols, "__cxa_new_handler");
+    ExpectSymbol(libcxx_symbols, "__cxa_demangle");
+    ExpectSymbol(libcxx_symbols, "__cxa_get_globals");
+    ExpectSymbol(libcxx_symbols, "_ZSt9terminatev");
+
+    // Check that member names don't contain full paths, as that is the
+    // canonical format produced by `ar`.
+    auto member_names = ListArchiveMemberNames(libcxx_path);
+    EXPECT_THAT(member_names, Each(IsBasename()));
+  }
+
+  std::string exe_path_ = Testing::GetExePath().str();
+  std::unique_ptr<Runfiles> test_runfiles_;
+  InstallPaths install_paths_ = InstallPaths::MakeForBazelRunfiles(exe_path_);
   llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> vfs_ =
       llvm::vfs::getRealFileSystem();
   // Note that for debugging, you can pass `llvm::errs()` as the vlog stream,
@@ -152,65 +263,7 @@ TEST_F(ClangRuntimesTest, ResourceDir) {
                                                target_triple_, &runtimes_);
   auto build_result = std::move(resource_dir_builder).Wait();
   ASSERT_TRUE(build_result.ok()) << build_result.error();
-  std::filesystem::path resource_dir_path = std::move(*build_result);
-
-  // For Linux we can directly check the CRT begin/end object files.
-  if (target_triple_.isOSLinux()) {
-    std::filesystem::path crt_begin_path =
-        resource_dir_path / "lib" / target_ / "clang_rt.crtbegin.o";
-    ASSERT_TRUE(std::filesystem::is_regular_file(crt_begin_path));
-    auto begin_result =
-        llvm::object::ObjectFile::createObjectFile(crt_begin_path.native());
-    llvm::object::ObjectFile& crtbegin = *begin_result->getBinary();
-    EXPECT_TRUE(crtbegin.isELF());
-    EXPECT_TRUE(crtbegin.isObject());
-    EXPECT_THAT(crtbegin.getArch(), Eq(target_triple_.getArch()));
-
-    llvm::SmallVector<llvm::object::SymbolRef> symbols(crtbegin.symbols());
-    // The first symbol should come from the source file.
-    EXPECT_THAT(*symbols.front().getName(), Eq("crtbegin.c"));
-
-    // Check for representative symbols of `crtbegin.o` -- we always use
-    // `.init_array` in our runtimes build so we have predictable functions.
-    EXPECT_THAT(symbols, IsSupersetOf({TextSymbolNamed("__do_init"),
-                                       TextSymbolNamed("__do_fini")}));
-
-    std::filesystem::path crt_end_path =
-        resource_dir_path / "lib" / target_ / "clang_rt.crtend.o";
-    ASSERT_TRUE(std::filesystem::is_regular_file(crt_end_path));
-    auto end_result =
-        llvm::object::ObjectFile::createObjectFile(crt_end_path.native());
-    llvm::object::ObjectFile& crtend = *end_result->getBinary();
-    EXPECT_TRUE(crtend.isELF());
-    EXPECT_TRUE(crtend.isObject());
-    EXPECT_THAT(crtend.getArch(), Eq(target_triple_.getArch()));
-
-    // Just check the source file symbol, not much of interest in the end.
-    llvm::object::SymbolRef crtend_front_symbol = *crtend.symbol_begin();
-    EXPECT_THAT(*crtend_front_symbol.getName(), Eq("crtend.c"));
-  }
-
-  // Across all targets, check that the builtins archive exists, and contains a
-  // relevant symbol by running the `llvm-nm` tool over it. Using `nm` rather
-  // than directly inspecting the objects is a bit awkward, but lets us easily
-  // ignore the wrapping in an archive file.
-  std::filesystem::path builtins_path =
-      resource_dir_path / "lib" / target_ / "libclang_rt.builtins.a";
-  std::string builtins_symbols = NmListDefinedSymbols(builtins_path);
-
-  // Check that we found a definition of `__mulodi4`, a builtin function
-  // provided by Compiler-RT.
-  ExpectSymbol(builtins_symbols, "__mulodi4");
-
-  // Check that we don't include the `chkstk` builtins outside of Windows.
-  if (!target_triple_.isOSWindows()) {
-    EXPECT_THAT(builtins_symbols, Not(HasSubstr("chkstk")));
-  }
-
-  // Check that member names don't contain full paths, as that is the
-  // canonical format produced by `ar`.
-  auto member_names = ListArchiveMemberNames(builtins_path);
-  EXPECT_THAT(member_names, Each(IsBasename()));
+  TestResourceDir(std::move(*build_result));
 }
 
 TEST_F(ClangRuntimesTest, Libunwind) {
@@ -219,63 +272,40 @@ TEST_F(ClangRuntimesTest, Libunwind) {
   auto build_result = std::move(libunwind_builder).Wait();
   ASSERT_TRUE(build_result.ok()) << build_result.error();
   std::filesystem::path runtimes_path = std::move(*build_result);
-
-  std::filesystem::path libunwind_path = runtimes_path / "lib/libunwind.a";
-  std::string libunwind_symbols = NmListDefinedSymbols(libunwind_path);
-
-  // Check a few of the main exported symbols here. The set here is somewhat
-  // arbitrary, but chosen to be among the more stable names and have at least
-  // one from most of the object files that should be linked into the archive.
-  ExpectSymbol(libunwind_symbols, "_Unwind_Resume");
-  ExpectSymbol(libunwind_symbols, "_Unwind_Backtrace");
-  ExpectSymbol(libunwind_symbols, "__unw_getcontext");
-  ExpectSymbol(libunwind_symbols, "__unw_get_proc_info");
-
-  // Check that member names don't contain full paths, as that is the
-  // canonical format produced by `ar`.
-  auto member_names = ListArchiveMemberNames(libunwind_path);
-  EXPECT_THAT(member_names, Each(IsBasename()));
+  TestLibunwind(runtimes_path / "lib/libunwind.a");
 }
 
-TEST_F(ClangRuntimesTest, Libcxx) {
-#if __has_feature(address_sanitizer)
-  // ASan causes Clang and LLVM to be _egregiously_ inefficient at compiling
-  // libc++, taking 5x - 10x longer than without ASan. Rough estimate is that it
-  // would take 5-10 minutes on GitHub's Linux runner. Given the limited utility
-  // of this test coverage, skip it in that configuration. This also misses
-  // assert-coverage for building libc++, but we don't really expect issues
-  // there. Misconfiguration and other common issues should still be covered in
-  // fully optimized builds at much lower cost.
-  GTEST_SKIP() << "Skipping build of libc++ with an ASan-itized Clang";
-#endif
-
+// ASan causes Clang and LLVM to be _egregiously_ inefficient at compiling
+// libc++, taking 5x - 10x longer than without ASan. Rough estimate is that it
+// would take 5-10 minutes on GitHub's Linux runner.
+//
+// We test libc++ in the prebuilt runtimes below in a more cache friendly and
+// sustainable way. Given that, we disable this test by default but include it
+// for debugging purposes.
+TEST_F(ClangRuntimesTest, DISABLED_Libcxx) {
   LibcxxBuilder libcxx_builder(&runner_, &threads_, target_triple_, &runtimes_);
   auto build_result = std::move(libcxx_builder).Wait();
   ASSERT_TRUE(build_result.ok()) << build_result.error();
   std::filesystem::path runtimes_path = std::move(*build_result);
+  TestLibcxx(runtimes_path / "lib/libc++.a");
+}
+
+TEST_F(ClangRuntimesTest, PrebuiltResourceDir) {
+  std::filesystem::path prebuilt_runtimes_path = test_runfiles_->Rlocation(
+      "carbon/toolchain/driver/prebuilt_runtimes_tree");
+  TestResourceDir(prebuilt_runtimes_path / "clang_resource_dir");
+}
+
+TEST_F(ClangRuntimesTest, PrebuiltLibunwind) {
+  std::filesystem::path prebuilt_runtimes_path = test_runfiles_->Rlocation(
+      "carbon/toolchain/driver/prebuilt_runtimes_tree");
+  TestLibunwind(prebuilt_runtimes_path / "libunwind/lib/libunwind.a");
+}
 
-  std::filesystem::path libcxx_path = runtimes_path / "lib/libc++.a";
-  std::string libcxx_symbols = NmListDefinedSymbols(libcxx_path);
-
-  // First check a few fundamental symbols from libc++.a, including symbols both
-  // within the ABI namespace and outside of it.
-  ExpectSymbol(libcxx_symbols, "_ZNKSt12bad_any_cast4whatEv");
-  ExpectSymbol(libcxx_symbols, "_ZNSt2_C8to_charsEPcS0_d");
-  ExpectSymbol(libcxx_symbols, "_ZSt17current_exceptionv");
-  ExpectSymbol(libcxx_symbols, "_ZNKSt2_C10filesystem4path10__filenameEv");
-
-  // Check that several of the libc++abi object files are also included in the
-  // archive.
-  ExpectSymbol(libcxx_symbols, "__cxa_bad_cast");
-  ExpectSymbol(libcxx_symbols, "__cxa_new_handler");
-  ExpectSymbol(libcxx_symbols, "__cxa_demangle");
-  ExpectSymbol(libcxx_symbols, "__cxa_get_globals");
-  ExpectSymbol(libcxx_symbols, "_ZSt9terminatev");
-
-  // Check that member names don't contain full paths, as that is the
-  // canonical format produced by `ar`.
-  auto member_names = ListArchiveMemberNames(libcxx_path);
-  EXPECT_THAT(member_names, Each(IsBasename()));
+TEST_F(ClangRuntimesTest, PrebuiltLibcxx) {
+  std::filesystem::path prebuilt_runtimes_path = test_runfiles_->Rlocation(
+      "carbon/toolchain/driver/prebuilt_runtimes_tree");
+  TestLibcxx(prebuilt_runtimes_path / "libcxx/lib/libc++.a");
 }
 
 }  // namespace

+ 177 - 0
toolchain/driver/prebuilt_runtimes.bzl

@@ -0,0 +1,177 @@
+# Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+# Exceptions. See /LICENSE for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+"""Rules for pre-building a runtimes tree."""
+
+def _prebuilt_clang_runtimes_impl(ctx):
+    runtimes_builder = ctx.executable.internal_exec_runtimes_builder
+    if runtimes_builder == None:
+        runtimes_builder = ctx.executable.internal_target_runtimes_builder
+
+    # Declare the output directories created when building the runtimes. We
+    # can't declare just the top-level directory as we need to overlay an
+    # `include` tree into the `clang_resource_dir`, but we try to use
+    # directories to minimize writing out the complete list of files in each
+    # runtime.
+    output_dirs = [
+        ctx.actions.declare_directory(ctx.attr.runtimes_path + "/clang_resource_dir/lib"),
+        ctx.actions.declare_directory(ctx.attr.runtimes_path + "/libunwind/lib"),
+        ctx.actions.declare_directory(ctx.attr.runtimes_path + "/libcxx/lib"),
+    ]
+
+    # Build up the arguments to use with the runtimes build command.
+    args = ctx.actions.args()
+    args.add("--force")
+
+    # Provide an explicit target if requested.
+    if ctx.attr.target:
+        args.add(ctx.attr.target, format = "--target=%s")
+
+    # Compute the path to the root of the runtimes tree we're trying to build.
+    # We work backwards from the declared `clang_resource_dir/lib` entry.
+    root_path_arg = output_dirs[0].path
+    if not root_path_arg.endswith("/clang_resource_dir/lib"):
+        fail("Unexpected path structure: " + root_path_arg)
+    root_path_arg = root_path_arg[:-len("/clang_resource_dir/lib")]
+    args.add(root_path_arg)
+
+    # Run the runtimes building tool with the arguments.
+    ctx.actions.run(
+        outputs = output_dirs,
+        executable = runtimes_builder,
+        arguments = [args],
+        mnemonic = "BuildRuntimes",
+        progress_message = "Building runtimes target %{label}",
+
+        # Building runtimes will use all the available CPUs. We can't directly
+        # model this in Bazel, so we use a somewhat arbitrary but large number
+        # of cores here to indicate that this is a _very_ expensive action. This
+        # should minimize the risk of other actions running in parallel in
+        # constrained environments and timing out.
+        execution_requirements = {"cpu:64": ""},
+    )
+
+    # Now overlay Clang's builtin headers to the `clang_resource_dir` as we
+    # can't have separate library search and header search paths for that
+    # specific runtime, and we want Bazel to be aware of the origin of these
+    # headers rather than creating copies when building the runtimes.
+    target_include_root = ctx.attr.runtimes_path + "/clang_resource_dir/include"
+
+    # We need to compute the relative path of each header file within the
+    # resource directory's `include` directory. We do this by stripping off
+    # their prefix.
+    #
+    # TODO: It would be nice to find a cleaner way to do this that avoids
+    # hard-coding both the repository name's spelling and the rule layout.
+    headers_prefix = "/external/+llvm_project+llvm-project/clang/staging/include/"
+
+    # Walk all the headers and symlink them into the runtimes tree below the
+    # target root. Collect the results for use in establishing dependencies.
+    input_headers = ctx.attr._builtin_headers.files.to_list()
+    output_headers = []
+    for f in input_headers:
+        # Ensure the file actually lives under the expected path
+        path = f.path[len(f.root.path):]
+        if not path.startswith(headers_prefix):
+            fail("Header file '{}' is not under the expected prefix '{}'".format(
+                path,
+                headers_prefix,
+            ))
+
+        path = path[len(headers_prefix):]
+
+        # Declare the output file preserving the relative structure. Bazel
+        # automatically creates any intermediate directories (e.g. `sanitizer/`)
+        out_file = ctx.actions.declare_file("{}/{}".format(target_include_root, path))
+
+        ctx.actions.symlink(output = out_file, target_file = f)
+        output_headers.append(out_file)
+
+    return [
+        DefaultInfo(
+            # Build the actual runtimes tree when the target is built.
+            files = depset(output_dirs + output_headers),
+            # Make the resulting tree available in the Bazel runfiles tree.
+            runfiles = ctx.runfiles(files = output_dirs + output_headers),
+        ),
+    ]
+
+# The rule implementing the Clang runtimes build. This is kept in its own rule
+# and uses separate tools so that it can isolate its dependencies and thus how
+# often it has to be re-built as much as possible.
+_prebuilt_clang_runtimes_internal = rule(
+    implementation = _prebuilt_clang_runtimes_impl,
+    attrs = {
+        # These are technically builtin attributes, but we provide them via a
+        # macro in order to use select to configure them based on a flag.
+        "internal_exec_runtimes_builder": attr.label(
+            allow_single_file = True,
+            executable = True,
+            cfg = "exec",
+        ),
+        "internal_target_runtimes_builder": attr.label(
+            allow_single_file = True,
+            executable = True,
+            cfg = "target",
+        ),
+        "runtimes_path": attr.string(
+            doc = "The path for the root of the runtimes",
+            mandatory = True,
+        ),
+        "target": attr.string(
+            doc = "Optional target for the built runtimes",
+        ),
+        "_builtin_headers": attr.label(
+            default = Label("//toolchain/install:clang_headers"),
+        ),
+    },
+)
+
+def prebuilt_runtimes(name, target = None, tags = []):
+    """Build a a runtimes tree.
+
+    The runtimes will be built into the directory `name + "_tree"`, and
+    collected into a filegroup with the provided name for use in rules accessing
+    these runtimes.
+
+    Args:
+      name: The name of the runtimes build target.
+      target: Optional `--target` flag value to use when building the runtimes.
+      tags: Tags to apply to the rule.
+    """
+    runtimes_path = name + "_tree"
+
+    _prebuilt_clang_runtimes_internal(
+        name = name + "_clang",
+        runtimes_path = runtimes_path,
+        target = target,
+        tags = tags,
+
+        # Synthesize mirrored `select`-filled attributes here so that they can
+        # have different internal properties (that can't be `select`-ed) and we
+        # can select between the attributes instead.
+        internal_exec_runtimes_builder = select({
+            "//toolchain/driver:use_target_config_runtimes_builder_config": None,
+            "//conditions:default": "//toolchain/driver:bazel_build_clang_runtimes",
+        }),
+        internal_target_runtimes_builder = select({
+            "//toolchain/driver:use_target_config_runtimes_builder_config": "//toolchain/driver:bazel_build_clang_runtimes",
+            "//conditions:default": None,
+        }),
+    )
+
+    # TODO: Add building of the Carbon runtimes here using a parallel rule to
+    # the above, but adjusted to use the Carbon driver itself as there will be
+    # no dependency reduction to be gained with a dedicated tool. It should
+    # reuse `runtimes_path` so that we get a unified runtimes tree for
+    # downstream use.
+
+    # Assemble the various runtimes into a single filegroup for easy
+    # dependencies.
+    native.filegroup(
+        name = name,
+        srcs = [
+            ":" + name + "_clang",
+        ],
+    )