Jelajahi Sumber

Add utilities for managing a toolchain install, and install and use LLD. (#3993)

The install directory contains the BUILD logic for creating an
installable tree of data files and executables for the toolchain, and
a library to facilitate toolchain code accessing the paths to their data
within this installation.

Then adds an installation of LLD in a synthetic LLVM installation, and
teaches the Clang runner to configure this and use it for linking
instead of the system linker.

Currently, the install paths only really manage access to the LLVM
binaries installed and used by the Clang runner for linking, but
eventually other data files like the prelude and runtime libraries will
be fleshed out as well. There are TODOs for moving more things over here
such as the prelude.

One interesting aspect of this is where to put helpers like parts of
LLVM in our install. This PR suggests nesting those files under
`lib/carbon`. While using a `lib` subdirectory isn't a perfect fit for
the FHS (Filesystem Hierarchy Standard), having a single location where
private data is collected is significantly superior to spreading them
across the system. This also matches similar patterns used by Clang
itself and several other language toolchains and standard libraries.

The install directory also provides a natural place for us to build out
packaging rules to create installable packages in various formats, but
that remains future work.

---------

Co-authored-by: Jon Ross-Perkins <jperkins@google.com>
Chandler Carruth 1 tahun lalu
induk
melakukan
d3a5b0eee7

+ 9 - 7
bazel/carbon_rules/defs.bzl

@@ -22,6 +22,9 @@ def carbon_binary(name, srcs):
         #
         # TODO: This is a hack; replace with something better once the toolchain
         # supports doing so.
+        #
+        # TODO: Switch to the `prefix_root` based rule similar to linking when
+        # the prelude moves there.
         out = src + ".o"
         srcs_reordered = [s for s in srcs if s != src] + [src]
         run_binary(
@@ -40,14 +43,13 @@ def carbon_binary(name, srcs):
     #
     # TODO: This will need to be revisited eventually.
     objs = [s + ".o" for s in srcs]
-    run_binary(
+    native.genrule(
         name = name + ".link",
-        tool = "//toolchain/driver:carbon",
-        args = (["link"] +
-                ["$(location %s)" % s for s in objs] +
-                ["--output=$(location %s)" % name]),
+        tools = [
+            "//toolchain/install:prefix_root/bin/carbon",
+            "//toolchain/install:install_data",
+        ],
+        cmd = "$(execpath //toolchain/install:prefix_root/bin/carbon) link --output=$@ $(SRCS)",
         srcs = objs,
         outs = [name],
-        # `link` has a dependency on ld, which should be in /usr/bin.
-        env = {"PATH": "/usr/bin"},
     )

+ 11 - 1
bazel/check_deps/check_non_test_cc_deps.py

@@ -42,7 +42,13 @@ for dep in deps:
 
         # Other packages in the LLVM project shouldn't be accidentally used
         # in Carbon. We can expand the above list if use cases emerge.
-        if package not in ("llvm", "lld", "clang", "clang-tools-extra/clangd"):
+        if package not in (
+            "llvm",
+            "lld",
+            "clang",
+            "clang-tools-extra/clangd",
+            "libunwind",
+        ):
             sys.exit(
                 "ERROR: unexpected dependency into the LLVM project: %s" % dep
             )
@@ -69,6 +75,10 @@ for dep in deps:
     if repo_base == "@@rules_cc" and rule == ":link_extra_lib":
         continue
 
+    # An utility library provided by Bazel that is under a compatible license.
+    if repo_base == "@@bazel_tools" and rule == "tools/cpp/runfiles:runfiles":
+        continue
+
     # These are stubs wrapping system libraries for LLVM. They aren't
     # distributed and so should be fine.
     if repo_base in (

+ 2 - 1
explorer/file_test.cpp

@@ -18,7 +18,8 @@ namespace {
 
 class ExplorerFileTest : public FileTestBase {
  public:
-  explicit ExplorerFileTest(llvm::StringRef test_name)
+  explicit ExplorerFileTest(llvm::StringRef /*exe_path*/,
+                            llvm::StringRef test_name)
       : FileTestBase(test_name),
         prelude_line_re_(R"(prelude.carbon:(\d+))"),
         timing_re_(R"((Time elapsed in \w+: )\d+(ms))") {

+ 1 - 0
testing/file_test/BUILD

@@ -33,6 +33,7 @@ cc_library(
         ":autoupdate",
         "//common:check",
         "//common:error",
+        "//common:exe_path",
         "//common:init_llvm",
         "//common:ostream",
         "@abseil-cpp//absl/flags:flag",

+ 14 - 8
testing/file_test/file_test_base.cpp

@@ -16,6 +16,7 @@
 #include "absl/flags/parse.h"
 #include "common/check.h"
 #include "common/error.h"
+#include "common/exe_path.h"
 #include "common/init_llvm.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
@@ -826,6 +827,8 @@ static auto Main(int argc, char** argv) -> int {
     return EXIT_FAILURE;
   }
 
+  std::string exe_path = FindExecutablePath(argv[0]);
+
   // Tests might try to read from stdin. Ensure those reads fail by closing
   // stdin and reopening it as /dev/null. Note that STDIN_FILENO doesn't exist
   // on Windows, but POSIX requires it to be 0.
@@ -854,8 +857,9 @@ static auto Main(int argc, char** argv) -> int {
     std::mutex errs_mutex;
 
     for (const auto& test_name : tests) {
-      pool.async([&test_factory, &errs_mutex, test_name] {
-        std::unique_ptr<FileTestBase> test(test_factory.factory_fn(test_name));
+      pool.async([&test_factory, &errs_mutex, &exe_path, test_name] {
+        std::unique_ptr<FileTestBase> test(
+            test_factory.factory_fn(exe_path, test_name));
         auto result = test->Autoupdate();
 
         // Guard access to llvm::errs, which is not thread-safe.
@@ -872,7 +876,8 @@ static auto Main(int argc, char** argv) -> int {
     return EXIT_SUCCESS;
   } else if (absl::GetFlag(FLAGS_dump_output)) {
     for (const auto& test_name : tests) {
-      std::unique_ptr<FileTestBase> test(test_factory.factory_fn(test_name));
+      std::unique_ptr<FileTestBase> test(
+          test_factory.factory_fn(exe_path, test_name));
       auto result = test->DumpOutput();
       if (!result.ok()) {
         llvm::errs() << "\n" << result.error().message() << "\n";
@@ -882,11 +887,12 @@ static auto Main(int argc, char** argv) -> int {
     return EXIT_SUCCESS;
   } else {
     for (llvm::StringRef test_name : tests) {
-      testing::RegisterTest(test_factory.name, test_name.data(), nullptr,
-                            test_name.data(), __FILE__, __LINE__,
-                            [&test_factory, test_name = test_name]() {
-                              return test_factory.factory_fn(test_name);
-                            });
+      testing::RegisterTest(
+          test_factory.name, test_name.data(), nullptr, test_name.data(),
+          __FILE__, __LINE__,
+          [&test_factory, &exe_path, test_name = test_name]() {
+            return test_factory.factory_fn(exe_path, test_name);
+          });
     }
     return RUN_ALL_TESTS();
   }

+ 8 - 4
testing/file_test/file_test_base.h

@@ -184,7 +184,9 @@ struct FileTestFactory {
   const char* name;
 
   // A factory function for tests.
-  std::function<FileTestBase*(llvm::StringRef path)> factory_fn;
+  std::function<FileTestBase*(llvm::StringRef exe_path,
+                              llvm::StringRef test_name)>
+      factory_fn;
 };
 
 // Must be implemented by the individual file_test to initialize tests.
@@ -198,9 +200,11 @@ struct FileTestFactory {
 extern auto GetFileTestFactory() -> FileTestFactory;
 
 // Provides a standard GetFileTestFactory implementation.
-#define CARBON_FILE_TEST_FACTORY(Name)                                   \
-  auto GetFileTestFactory() -> FileTestFactory {                         \
-    return {#Name, [](llvm::StringRef path) { return new Name(path); }}; \
+#define CARBON_FILE_TEST_FACTORY(Name)                                       \
+  auto GetFileTestFactory() -> FileTestFactory {                             \
+    return {#Name, [](llvm::StringRef exe_path, llvm::StringRef test_name) { \
+              return new Name(exe_path, test_name);                          \
+            }};                                                              \
   }
 
 }  // namespace Carbon::Testing

+ 2 - 1
testing/file_test/file_test_base_test.cpp

@@ -16,7 +16,8 @@ namespace {
 
 class FileTestBaseTest : public FileTestBase {
  public:
-  using FileTestBase::FileTestBase;
+  FileTestBaseTest(llvm::StringRef /*exe_path*/, llvm::StringRef test_name)
+      : FileTestBase(test_name) {}
 
   auto Run(const llvm::SmallVector<llvm::StringRef>& test_args,
            llvm::vfs::InMemoryFileSystem& fs, llvm::raw_pwrite_stream& stdout,

+ 3 - 1
toolchain/check/check_fuzzer.cpp

@@ -26,8 +26,10 @@ extern "C" int LLVMFuzzerTestOneInput(const unsigned char* data,
       llvm::MemoryBuffer::getMemBuffer(data_ref, /*BufferName=*/TestFileName,
                                        /*RequiresNullTerminator=*/false)));
 
+  // TODO: We should try to thread the executable path into here.
+  const auto install_paths = InstallPaths::Make("");
   llvm::raw_null_ostream null_ostream;
-  Driver driver(fs, "", null_ostream, null_ostream);
+  Driver driver(fs, &install_paths, "", null_ostream, null_ostream);
 
   // TODO: Get checking to a point where it can handle invalid parse trees
   // without crashing.

+ 12 - 1
toolchain/driver/BUILD

@@ -17,10 +17,14 @@ cc_library(
     name = "clang_runner",
     srcs = ["clang_runner.cpp"],
     hdrs = ["clang_runner.h"],
+    data = [
+        "//toolchain/install:llvm_link_data",
+    ],
     deps = [
         "//common:command_line",
         "//common:ostream",
         "//common:vlog",
+        "//toolchain/install:install_paths",
         "@llvm-project//clang:basic",
         "@llvm-project//clang:driver",
         "@llvm-project//clang:frontend",
@@ -52,7 +56,10 @@ cc_library(
     name = "driver",
     srcs = ["driver.cpp"],
     hdrs = ["driver.h"],
-    data = ["//core:prelude"],
+    data = [
+        "//core:prelude",
+        "//toolchain/install:install_lib_data",
+    ],
     textual_hdrs = ["flags.def"],
     deps = [
         ":clang_runner",
@@ -63,6 +70,7 @@ cc_library(
         "//toolchain/codegen",
         "//toolchain/diagnostics:diagnostic_emitter",
         "//toolchain/diagnostics:sorting_diagnostic_consumer",
+        "//toolchain/install:install_paths",
         "//toolchain/lex",
         "//toolchain/lower",
         "//toolchain/parse",
@@ -86,6 +94,7 @@ cc_test(
         "//testing/base:gtest_main",
         "//testing/base:test_raw_ostream",
         "//toolchain/diagnostics:diagnostic_emitter",
+        "//toolchain/install:install_paths",
         "//toolchain/lex:tokenized_buffer_test_helpers",
         "//toolchain/testing:yaml_test_helpers",
         "@googletest//:gtest",
@@ -102,6 +111,7 @@ cc_fuzz_test(
     deps = [
         ":driver",
         "//testing/base:test_raw_ostream",
+        "//toolchain/install:install_paths",
         "@llvm-project//llvm:Support",
     ],
 )
@@ -116,6 +126,7 @@ cc_binary(
         "//common:bazel_working_dir",
         "//common:exe_path",
         "//common:init_llvm",
+        "//toolchain/install:install_paths",
         "@llvm-project//llvm:Support",
     ],
 )

+ 17 - 19
toolchain/driver/clang_runner.cpp

@@ -30,22 +30,9 @@
 
 namespace Carbon {
 
-static auto GetExecutablePath(llvm::StringRef exe_name) -> std::string {
-  // If the `exe_name` isn't already a valid path, look it up.
-  if (!llvm::sys::fs::exists(exe_name)) {
-    if (llvm::ErrorOr<std::string> path_result =
-            llvm::sys::findProgramByName(exe_name)) {
-      return *path_result;
-    }
-  }
-
-  return exe_name.str();
-}
-
-ClangRunner::ClangRunner(llvm::StringRef exe_name, llvm::StringRef target,
-                         llvm::raw_ostream* vlog_stream)
-    : exe_name_(exe_name),
-      exe_path_(GetExecutablePath(exe_name)),
+ClangRunner::ClangRunner(const InstallPaths* install_paths,
+                         llvm::StringRef target, llvm::raw_ostream* vlog_stream)
+    : installation_(install_paths),
       target_(target),
       vlog_stream_(vlog_stream),
       diagnostic_ids_(new clang::DiagnosticIDs()) {}
@@ -70,7 +57,10 @@ auto ClangRunner::Run(llvm::ArrayRef<llvm::StringRef> args) -> bool {
   // Render the arguments into null-terminated C-strings for use by the Clang
   // driver. Command lines can get quite long in build systems so this tries to
   // minimize the memory allocation overhead.
-  std::array<llvm::StringRef, 1> exe_arg = {exe_name_};
+
+  // Start with a dummy executable name. We'll manually set the install
+  // directory below.
+  std::array<llvm::StringRef, 1> exe_arg = {"clang-runner"};
   auto args_range =
       llvm::concat<const llvm::StringRef>(exe_arg, maybe_v_arg, args);
   int total_size = 0;
@@ -91,7 +81,7 @@ auto ClangRunner::Run(llvm::ArrayRef<llvm::StringRef> args) -> bool {
     cstr_arg_storage[i] = '\0';
     ++i;
   }
-  for (const char* cstr_arg : llvm::ArrayRef(cstr_args).drop_front()) {
+  for (const char* cstr_arg : llvm::ArrayRef(cstr_args)) {
     CARBON_VLOG() << "    '" << cstr_arg << "'\n";
   }
 
@@ -113,7 +103,15 @@ auto ClangRunner::Run(llvm::ArrayRef<llvm::StringRef> args) -> bool {
       /*ShouldOwnClient=*/false);
   clang::ProcessWarningOptions(diagnostics, *diagnostic_options);
 
-  clang::driver::Driver driver(exe_path_, target_, diagnostics);
+  clang::driver::Driver driver("clang-runner", target_, diagnostics);
+
+  // Configure the install directory to find other tools and data files.
+  //
+  // We directly override the detected directory as we use a synthetic path
+  // above. This makes it appear that our binary was in the installed binaries
+  // directory, and allows finding tools relative to it.
+  driver.Dir = installation_->llvm_install_bin();
+  CARBON_VLOG() << "Setting bin directory to: " << driver.Dir << "\n";
 
   // TODO: Directly run in-process rather than using a subprocess. This is both
   // more efficient and makes debugging (much) easier. Needs code like:

+ 4 - 3
toolchain/driver/clang_runner.h

@@ -9,6 +9,7 @@
 #include "common/ostream.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringRef.h"
+#include "toolchain/install/install_paths.h"
 
 namespace Carbon {
 
@@ -40,15 +41,15 @@ class ClangRunner {
   //
   // If `verbose` is passed as true, will enable verbose logging to the
   // `err_stream` both from the runner and Clang itself.
-  ClangRunner(llvm::StringRef exe_name, llvm::StringRef target,
+  ClangRunner(const InstallPaths* install_paths, llvm::StringRef target,
               llvm::raw_ostream* vlog_stream = nullptr);
 
   // Run Clang with the provided arguments.
   auto Run(llvm::ArrayRef<llvm::StringRef> args) -> bool;
 
  private:
-  llvm::StringRef exe_name_;
-  std::string exe_path_;
+  const InstallPaths* installation_;
+
   llvm::StringRef target_;
   llvm::raw_ostream* vlog_stream_;
 

+ 10 - 4
toolchain/driver/clang_runner_test.cpp

@@ -18,6 +18,7 @@
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/Program.h"
 #include "llvm/TargetParser/Host.h"
+#include "testing/base/gtest_main.h"
 #include "testing/base/test_raw_ostream.h"
 
 namespace Carbon {
@@ -54,8 +55,10 @@ static auto RunWithCapturedOutput(std::string& out, std::string& err,
 
 TEST(ClangRunnerTest, Version) {
   TestRawOstream test_os;
+  const auto install_paths =
+      InstallPaths::MakeForBazelRunfiles(Testing::GetTestExePath());
   std::string target = llvm::sys::getDefaultTargetTriple();
-  ClangRunner runner("./toolchain/driver/run_clang_test", target, &test_os);
+  ClangRunner runner(&install_paths, target, &test_os);
 
   std::string out;
   std::string err;
@@ -72,8 +75,9 @@ TEST(ClangRunnerTest, Version) {
   EXPECT_THAT(out, HasSubstr("clang version"));
   // The target should match what we provided.
   EXPECT_THAT(out, HasSubstr((llvm::Twine("Target: ") + target).str()));
-  // The installation should come from the above path of the test binary.
-  EXPECT_THAT(out, HasSubstr("InstalledDir: ./toolchain/driver"));
+  // Clang's install should be our private LLVM install bin directory.
+  EXPECT_THAT(out, HasSubstr(std::string("InstalledDir: ") +
+                             install_paths.llvm_install_bin()));
 }
 
 // Utility to write a test file. We don't need the full power provided here yet,
@@ -120,10 +124,12 @@ TEST(ClangRunnerTest, LinkCommandEcho) {
   std::filesystem::path foo_file = WriteTestFile("foo.o", "");
   std::filesystem::path bar_file = WriteTestFile("bar.o", "");
 
+  const auto install_paths =
+      InstallPaths::MakeForBazelRunfiles(Testing::GetTestExePath());
   std::string verbose_out;
   llvm::raw_string_ostream verbose_os(verbose_out);
   std::string target = llvm::sys::getDefaultTargetTriple();
-  ClangRunner runner("./toolchain/driver/run_clang_test", target, &verbose_os);
+  ClangRunner runner(&install_paths, target, &verbose_os);
   std::string out;
   std::string err;
   EXPECT_TRUE(RunWithCapturedOutput(out, err,

+ 4 - 1
toolchain/driver/driver.cpp

@@ -943,6 +943,9 @@ auto Driver::Link(const LinkOptions& options,
   // We link using a C++ mode of the driver.
   clang_args.push_back("--driver-mode=g++");
 
+  // Use LLD, which we provide in our install directory, for linking.
+  clang_args.push_back("-fuse-ld=lld");
+
   // Add OS-specific flags based on the target.
   AddOSFlags(codegen_options.target, clang_args);
 
@@ -951,7 +954,7 @@ auto Driver::Link(const LinkOptions& options,
   clang_args.append(options.object_filenames.begin(),
                     options.object_filenames.end());
 
-  ClangRunner runner("FIXME", codegen_options.target, vlog_stream_);
+  ClangRunner runner(installation_, codegen_options.target, vlog_stream_);
   return {.success = runner.Run(clang_args)};
 }
 

+ 8 - 2
toolchain/driver/driver.h

@@ -10,6 +10,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/VirtualFileSystem.h"
 #include "llvm/Support/raw_ostream.h"
+#include "toolchain/install/install_paths.h"
 
 namespace Carbon {
 
@@ -32,10 +33,11 @@ class Driver {
 
   // Constructs a driver with any error or informational output directed to a
   // specified stream.
-  Driver(llvm::vfs::FileSystem& fs, llvm::StringRef data_dir,
-         llvm::raw_pwrite_stream& output_stream,
+  Driver(llvm::vfs::FileSystem& fs, const InstallPaths* installation,
+         llvm::StringRef data_dir, llvm::raw_pwrite_stream& output_stream,
          llvm::raw_pwrite_stream& error_stream)
       : fs_(fs),
+        installation_(installation),
         data_dir_(data_dir),
         output_stream_(output_stream),
         error_stream_(error_stream) {}
@@ -82,7 +84,11 @@ class Driver {
   // The filesystem for source code.
   llvm::vfs::FileSystem& fs_;
 
+  // Helper to locate the toolchain installation's files.
+  const InstallPaths* installation_;
+
   // The path within fs for data files.
+  // TODO: Replace with use of `installation_` once everything is moved over.
   std::string data_dir_;
 
   // Standard output; stdout.

+ 4 - 1
toolchain/driver/driver_fuzzer.cpp

@@ -10,6 +10,7 @@
 #include "llvm/Support/raw_ostream.h"
 #include "testing/base/test_raw_ostream.h"
 #include "toolchain/driver/driver.h"
+#include "toolchain/install/install_paths.h"
 
 namespace Carbon::Testing {
 
@@ -66,9 +67,11 @@ extern "C" auto LLVMFuzzerTestOneInput(const unsigned char* data, size_t size)
   }
 
   llvm::vfs::InMemoryFileSystem fs;
+  // TODO: We should try to thread the executable path into here.
+  const auto install_paths = InstallPaths::Make("");
   TestRawOstream error_stream;
   llvm::raw_null_ostream dest;
-  Driver d(fs, "", dest, error_stream);
+  Driver d(fs, &install_paths, "", dest, error_stream);
   if (!d.RunCommand(args).success) {
     if (error_stream.TakeStr().find("ERROR:") == std::string::npos) {
       llvm::errs() << "No error message on a failure!\n";

+ 6 - 1
toolchain/driver/driver_main.cpp

@@ -11,6 +11,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Path.h"
 #include "toolchain/driver/driver.h"
+#include "toolchain/install/install_paths.h"
 
 auto main(int argc, char** argv) -> int {
   Carbon::InitLLVM init_llvm(argc, argv);
@@ -26,12 +27,16 @@ auto main(int argc, char** argv) -> int {
   llvm::SmallVector<llvm::StringRef> args(argv + 1, argv + argc);
   auto fs = llvm::vfs::getRealFileSystem();
 
+  const auto install_paths = Carbon::InstallPaths::MakeExeRelative(exe_path);
+
   // Construct the data directory relative to the executable location.
+  // TODO: Will be removed when everything moves to the install_paths.
   llvm::SmallString<256> data_dir(llvm::sys::path::parent_path(exe_path));
   llvm::sys::path::append(data_dir, llvm::sys::path::Style::posix,
                           "carbon.runfiles/_main/");
 
-  Carbon::Driver driver(*fs, data_dir, llvm::outs(), llvm::errs());
+  Carbon::Driver driver(*fs, &install_paths, data_dir, llvm::outs(),
+                        llvm::errs());
   bool success = driver.RunCommand(args).success;
   return success ? EXIT_SUCCESS : EXIT_FAILURE;
 }

+ 7 - 1
toolchain/driver/driver_test.cpp

@@ -14,6 +14,7 @@
 #include "llvm/ADT/ScopeExit.h"
 #include "llvm/Object/Binary.h"
 #include "llvm/Support/FormatVariadic.h"
+#include "testing/base/gtest_main.h"
 #include "testing/base/test_raw_ostream.h"
 #include "toolchain/testing/yaml_test_helpers.h"
 
@@ -40,7 +41,11 @@ static auto ReadFile(std::filesystem::path path) -> std::string {
 
 class DriverTest : public testing::Test {
  protected:
-  DriverTest() : driver_(fs_, "", test_output_stream_, test_error_stream_) {
+  DriverTest()
+      : installation_(
+            InstallPaths::MakeForBazelRunfiles(Testing::GetTestExePath())),
+        driver_(fs_, &installation_, "", test_output_stream_,
+                test_error_stream_) {
     char* tmpdir_env = getenv("TEST_TMPDIR");
     CARBON_CHECK(tmpdir_env != nullptr);
     test_tmpdir_ = tmpdir_env;
@@ -87,6 +92,7 @@ class DriverTest : public testing::Test {
   }
 
   llvm::vfs::InMemoryFileSystem fs_;
+  const InstallPaths installation_;
   TestRawOstream test_output_stream_;
   TestRawOstream test_error_stream_;
 

+ 141 - 0
toolchain/install/BUILD

@@ -0,0 +1,141 @@
+# Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+# Exceptions. See /LICENSE for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+load("@bazel_skylib//rules:write_file.bzl", "write_file")
+load("@llvm-project//llvm:binary_alias.bzl", "binary_alias")
+load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library", "cc_test")
+load("symlink_filegroup.bzl", "symlink_filegroup")
+
+package(default_visibility = ["//visibility:public"])
+
+# Build rules supporting the install data tree for the Carbon toolchain.
+#
+# This populates a synthetic Carbon toolchain installation under the
+# `prefix_root` directory. For details on its layout, see `install_paths.h`.
+
+# A marker of a valid Carbon install. All filegroups in the install should
+# include this one.
+write_file(
+    name = "install_marker",
+    out = "prefix_root/lib/carbon/carbon_install.txt",
+    content = [
+        "// Part of the Carbon Language project, under the Apache License v2.0 with LLVM",
+        "// Exceptions. See /LICENSE for license information.",
+        "// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception",
+        "",
+        "This marks a valid Carbon install tree.",
+    ],
+)
+
+# Copy Clang and LLVM toolchain files into a synthetic LLVM installation under
+# `prefix_root/lib/carbon/llvm` so that parts of Clang that expect to find an LLVM
+# installation at relative paths work correctly without exposing these in an
+# installed 'bin' directory where it might get added to a user's PATH.
+binary_alias(
+    name = "prefix_root/lib/carbon/llvm/bin/lld",
+    binary = "@llvm-project//lld:lld",
+)
+
+lld_bin_names = [
+    "ld.lld",
+    "ld64.lld",
+    "lld-link",
+    "wasm-ld",
+]
+
+[
+    binary_alias(
+        name = "prefix_root/lib/carbon/llvm/bin/" + bin_name,
+        binary = "@llvm-project//lld:lld",
+    )
+    for bin_name in lld_bin_names
+]
+
+filegroup(
+    name = "llvm_link_data",
+    srcs = [
+        "prefix_root/lib/carbon/llvm/bin/lld",
+        ":install_marker",
+    ] + [
+        "prefix_root/lib/carbon/llvm/bin/" + bin_name
+        for bin_name in lld_bin_names
+    ],
+)
+
+# All of the install data except for the user-facing binaries. This is typically
+# a reasonable data dependency for libraries and the user-facing binaries
+# without creating a cycle.
+filegroup(
+    name = "install_lib_data",
+    srcs = [
+        ":install_marker",
+        ":llvm_link_data",
+    ],
+)
+
+binary_alias(
+    name = "prefix_root/bin/carbon",
+    binary = "//toolchain/driver:carbon",
+)
+
+filegroup(
+    name = "install_data",
+    srcs = [
+        "prefix_root/bin/carbon",
+        ":install_lib_data",
+        ":install_marker",
+    ],
+)
+
+# A library for computing install paths for the toolchain. Note that this
+# library does *not* include the data itself, as that would form a dependency
+# cycle. Each part of the toolchain should add the narrow data file groups to
+# their data dependencies, and then use this library to locate them.
+cc_library(
+    name = "install_paths",
+    srcs = ["install_paths.cpp"],
+    hdrs = ["install_paths.h"],
+    deps = [
+        "//common:check",
+        "//common:error",
+        "@bazel_tools//tools/cpp/runfiles",
+        "@llvm-project//llvm:Support",
+    ],
+)
+
+# Build up some trees of data to use in testing our install detection.
+symlink_filegroup(
+    name = "test_installed_data",
+    testonly = 1,
+    srcs = ["//toolchain/install:install_data"],
+    out_prefix = "test_installed_root/",
+    remove_prefix = "toolchain/install/prefix_root/",
+)
+
+cc_binary(
+    name = "test_binary",
+    testonly = 1,
+    srcs = ["test_binary.cpp"],
+    data = [":install_data"],
+)
+
+cc_test(
+    name = "install_paths_test",
+    size = "small",
+    srcs = ["install_paths_test.cpp"],
+    data = [
+        ":install_data",
+        ":test_binary",
+        ":test_installed_data",
+    ],
+    deps = [
+        ":install_paths",
+        "//common:check",
+        "//common:ostream",
+        "//testing/base:gtest_main",
+        "@bazel_tools//tools/cpp/runfiles",
+        "@googletest//:gtest",
+        "@llvm-project//llvm:Support",
+    ],
+)

+ 109 - 0
toolchain/install/install_paths.cpp

@@ -0,0 +1,109 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "toolchain/install/install_paths.h"
+
+#include <memory>
+
+#include "common/check.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
+#include "tools/cpp/runfiles/runfiles.h"
+
+namespace Carbon {
+
+// The location within our Bazel output tree of the prefix_root.
+static constexpr llvm::StringLiteral PrefixRoot =
+    "_main/toolchain/install/prefix_root/";
+
+// Path within an install prefix for our marker of a valid install.
+static constexpr llvm::StringLiteral MarkerPath =
+    "lib/carbon/carbon_install.txt";
+
+auto InstallPaths::MakeExeRelative(llvm::StringRef exe_path) -> InstallPaths {
+  InstallPaths paths;
+
+  // Map from the executable path from the executable path to an install
+  // prefix path.
+  if (!llvm::sys::fs::exists(exe_path)) {
+    paths.SetError(llvm::Twine("No file at executable path: ") + exe_path);
+    return paths;
+  }
+  paths = InstallPaths(exe_path);
+
+  // TODO: Detect a Windows executable path and use custom logic to map to the
+  // correct install prefix for that platform.
+
+  // We assume an executable will be in a `bin` directory and this is a
+  // FHS-like install prefix. We remove the filename and walk up to find the
+  // expected install prefix.
+  llvm::sys::path::remove_filename(paths.prefix_);
+  llvm::sys::path::append(paths.prefix_, llvm::sys::path::Style::posix, "../");
+
+  paths.CheckMarkerFile();
+  return paths;
+}
+
+auto InstallPaths::MakeForBazelRunfiles(llvm::StringRef exe_path)
+    -> InstallPaths {
+  using bazel::tools::cpp::runfiles::Runfiles;
+  std::string runtimes_error;
+  std::unique_ptr<Runfiles> runfiles(
+      Runfiles::Create(exe_path.str(), &runtimes_error));
+  CARBON_CHECK(runfiles != nullptr)
+      << "Failed to find runtimes tree: " << runtimes_error;
+
+  std::string relative_marker_path = (PrefixRoot.str() + MarkerPath).str();
+  std::string runtimes_marker_path = runfiles->Rlocation(relative_marker_path);
+
+  // Start from the marker, remove that filename, and walk up to find the
+  // install prefix.
+  InstallPaths paths(runtimes_marker_path);
+  llvm::sys::path::remove_filename(paths.prefix_);
+  llvm::sys::path::append(paths.prefix_, llvm::sys::path::Style::posix,
+                          "../../");
+
+  paths.CheckMarkerFile();
+  CARBON_CHECK(!paths.error()) << *paths.error();
+  return paths;
+}
+
+auto InstallPaths::Make(llvm::StringRef install_prefix) -> InstallPaths {
+  InstallPaths paths(install_prefix);
+  paths.CheckMarkerFile();
+  return paths;
+}
+
+auto InstallPaths::SetError(llvm::Twine message) -> void {
+  // Use an empty prefix on error as that should use the working directory which
+  // is the least likely problematic.
+  prefix_ = "";
+  error_ = {message.str()};
+}
+
+auto InstallPaths::CheckMarkerFile() -> void {
+  llvm::SmallString<256> path(prefix_);
+  llvm::sys::path::append(path, llvm::sys::path::Style::posix, MarkerPath);
+  if (!llvm::sys::fs::exists(path)) {
+    SetError(llvm::Twine("No install marker at path: ") + path);
+  }
+}
+
+auto InstallPaths::driver() const -> std::string {
+  llvm::SmallString<256> path(prefix_);
+  // TODO: Adjust this to work equally well on Windows.
+  llvm::sys::path::append(path, llvm::sys::path::Style::posix, "bin/carbon");
+  return path.str().str();
+}
+
+auto InstallPaths::llvm_install_bin() const -> std::string {
+  llvm::SmallString<256> path(prefix_);
+  // TODO: Adjust this to work equally well on Windows.
+  llvm::sys::path::append(path, llvm::sys::path::Style::posix,
+                          "lib/carbon/llvm/bin/");
+  return path.str().str();
+}
+
+}  // namespace Carbon

+ 124 - 0
toolchain/install/install_paths.h

@@ -0,0 +1,124 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef CARBON_TOOLCHAIN_INSTALL_INSTALL_PATHS_H_
+#define CARBON_TOOLCHAIN_INSTALL_INSTALL_PATHS_H_
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+
+namespace Carbon {
+
+// Locates the toolchain installation and provides paths to various components.
+//
+// The Carbon toolchain expects to be installed into some install prefix. For
+// example, this is expected to be similar to the CMake install prefix:
+//
+// - `C:/Program Files/Carbon` or similar on Windows.
+// - `/usr` or `/usr/local` on Linux and most BSDs.
+// - `/opt/homebrew` or similar on macOS with Homebrew.
+// - `bazel-bin/some/bazel/target.runfiles/_main/toolchain/install/prefix_root`
+//   for unit tests and just-built binaries during development.
+//
+// See https://cmake.org/cmake/help/latest/variable/CMAKE_INSTALL_PREFIX.html
+// for more details. While we don't build the toolchain with CMake, we expect
+// our installation to behave in a similar and compatible way.
+//
+// There are multiple ways of locating an install's prefix:
+//   - MakeExeRelative for command line tools in an install.
+//   - MakeForBazelRunfiles for locating through Bazel's runfile tree.
+//   - Make for an explicit path, for example in tests.
+//
+// When locating an install, we verify it by
+// looking for the `carbon_install.txt` marker file at a specific location
+// below. When errors occur, the install prefix is made empty, and error() can
+// be used for diagnostics; InstallPaths remains minimally functional.
+//
+// Within this prefix, we expect a hierarchy on Unix-y platforms:
+//
+// - `prefix_root/bin/carbon` - the main CLI driver
+// - `prefix_root/lib/carbon/carbon_install.txt` - a marker for the install
+// - `prefix_root/lib/carbon/...` - private data & binaries
+//
+// This is loosely based on the FHS (Filesystem Hierarchy Standard).
+//
+// An instance of this class provides methods that query for specific paths
+// within the install. Note that we want to abstract away any platform
+// differences in the installation layout, and so while there are some broad
+// paths available here, like the `prefix` method, those should primarily be
+// used for logging or debugging. When a specific part of the install is needed,
+// a dedicated accessor should be added that computes the path for that
+// component.
+//
+// Path accessor methods on the class return `llvm::StringRef` for any paths
+// that are stored in the class, and a `std::string` for any that are computed
+// on demand.
+//
+// TODO: Need to check the installation structure of LLVM on Windows and figure
+// out what Carbon's should be within a Windows prefix and how much of the
+// structure we can share with the Unix-y layout of the prefix.
+class InstallPaths {
+ public:
+  // Provide the current executable's path to detect the correct installation
+  // prefix path. This assumes the toolchain to be in its installed layout.
+  //
+  // If detection fails, this reverts to using the current working directory as
+  // the install prefix, and the error detected can be checked with `errors()`.
+  static auto MakeExeRelative(llvm::StringRef exe_path) -> InstallPaths;
+
+  // Provide the current executable's path, and use that to detect a Bazel or
+  // Bazel-compatible runfiles install prefix path. This should only be used
+  // where it is reasonable to rely on this rather than a fixed install location
+  // such as for internal development purposes or other Bazel users of the
+  // Carbon library.
+  //
+  // This method of construction also ensures the result is valid. If detection
+  // fails for any reason, it will `CARBON_CHECK` fail with the error message.
+  static auto MakeForBazelRunfiles(llvm::StringRef exe_path) -> InstallPaths;
+
+  // Provide an explicit install paths prefix. This is useful for testing or for
+  // using Carbon in an environment with an unusual path to the installed files.
+  static auto Make(llvm::StringRef install_prefix) -> InstallPaths;
+
+  // Check for an error detecting the install paths correctly.
+  //
+  // A nullopt return means no errors encountered and the paths should work
+  // correctly.
+  //
+  // A string return means there was an error, and details of the error are
+  // in the `StringRef` for inclusion in any user report.
+  [[nodiscard]] auto error() const -> std::optional<llvm::StringRef> {
+    return error_;
+  };
+
+  // The computed installation prefix. This should correspond to the
+  // `prefix_root` directory in Bazel's output, or to some prefix the toolchain
+  // is installed into on a system such as `/usr/local` or `/home/$USER`.
+  //
+  // In the event of an error, this will be the empty string.
+  auto prefix() const -> llvm::StringRef { return prefix_; }
+
+  auto driver() const -> std::string;
+  auto llvm_install_bin() const -> std::string;
+
+ private:
+  InstallPaths() : error_("No prefix provided!") {}
+  explicit InstallPaths(llvm::StringRef prefix) : prefix_(prefix) {}
+
+  // Set an error message on the install paths and reset the prefix to empty,
+  // which should use the current working directory.
+  auto SetError(llvm::Twine message) -> void;
+
+  // Check that the install paths have a marker file at the expected location,
+  // and if not calls `SetError` with the relevant error message.
+  auto CheckMarkerFile() -> void;
+
+  llvm::SmallString<256> prefix_;
+  std::optional<std::string> error_;
+};
+
+}  // namespace Carbon
+
+#endif  // CARBON_TOOLCHAIN_INSTALL_INSTALL_PATHS_H_

+ 130 - 0
toolchain/install/install_paths_test.cpp

@@ -0,0 +1,130 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "toolchain/install/install_paths.h"
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+#include "common/check.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/Path.h"
+#include "testing/base/gtest_main.h"
+#include "tools/cpp/runfiles/runfiles.h"
+
+namespace Carbon {
+namespace {
+
+using ::bazel::tools::cpp::runfiles::Runfiles;
+using ::testing::Eq;
+using ::testing::HasSubstr;
+using ::testing::Optional;
+using ::testing::StartsWith;
+
+class InstallPathsTest : public ::testing::Test {
+ protected:
+  InstallPathsTest() {
+    std::string error;
+    test_runfiles_.reset(
+        Runfiles::Create(Testing::GetTestExePath().str(), &error));
+    CARBON_CHECK(test_runfiles_ != nullptr) << error;
+  }
+
+  // Test the install paths found with the given `exe_path`. Will check that
+  // the detected install prefix path starts with `prefix_startswith`, and then
+  // check that the path accessors point to the right kind of file or
+  // directory.
+  auto TestInstallPaths(const InstallPaths& paths) -> void {
+    SCOPED_TRACE(llvm::formatv("Install prefix: '%s'", paths.prefix()));
+
+    // Grab a the prefix into a string to make it easier to use in the test.
+    std::string prefix = paths.prefix().str();
+    EXPECT_TRUE(llvm::sys::fs::exists(prefix));
+    EXPECT_TRUE(llvm::sys::fs::is_directory(prefix));
+
+    // Now check that all the expected parts of the toolchain's install are in
+    // fact found using the API.
+    std::string driver_path = paths.driver();
+    ASSERT_THAT(driver_path, StartsWith(prefix));
+    EXPECT_TRUE(llvm::sys::fs::exists(driver_path)) << "path: " << driver_path;
+    EXPECT_TRUE(llvm::sys::fs::can_execute(driver_path))
+        << "path: " << driver_path;
+
+    std::string llvm_bin_path = paths.llvm_install_bin();
+    ASSERT_THAT(llvm_bin_path, StartsWith(prefix));
+    EXPECT_TRUE(llvm::sys::fs::exists(llvm_bin_path))
+        << "path: " << llvm_bin_path;
+    EXPECT_TRUE(llvm::sys::fs::is_directory(llvm_bin_path))
+        << "path: " << llvm_bin_path;
+
+    for (llvm::StringRef llvm_bin :
+         {"lld", "ld.lld", "ld64.lld", "lld-link", "wasm-ld"}) {
+      llvm::SmallString<128> bin_path;
+      bin_path.assign(llvm_bin_path);
+      llvm::sys::path::append(bin_path, llvm_bin);
+
+      EXPECT_TRUE(llvm::sys::fs::exists(bin_path)) << "path: " << bin_path;
+      EXPECT_TRUE(llvm::sys::fs::can_execute(bin_path)) << "path: " << bin_path;
+    }
+  }
+
+  std::unique_ptr<Runfiles> test_runfiles_;
+};
+
+TEST_F(InstallPathsTest, PrefixRootDriver) {
+  std::string installed_driver_path = test_runfiles_->Rlocation(
+      "_main/toolchain/install/prefix_root/bin/carbon");
+
+  auto paths = InstallPaths::MakeExeRelative(installed_driver_path);
+  ASSERT_THAT(paths.error(), Eq(std::nullopt)) << *paths.error();
+  TestInstallPaths(paths);
+}
+
+TEST_F(InstallPathsTest, PrefixRootExplicit) {
+  std::string marker_path = test_runfiles_->Rlocation(
+      "_main/toolchain/install/prefix_root/lib/carbon/carbon_install.txt");
+
+  llvm::StringRef prefix_path = marker_path;
+  CARBON_CHECK(prefix_path.consume_back("lib/carbon/carbon_install.txt"))
+      << "Unexpected suffix of the marker path: " << marker_path;
+
+  auto paths = InstallPaths::Make(prefix_path);
+  ASSERT_THAT(paths.error(), Eq(std::nullopt)) << *paths.error();
+  TestInstallPaths(paths);
+}
+
+TEST_F(InstallPathsTest, TestRunfiles) {
+  auto paths = InstallPaths::MakeForBazelRunfiles(Testing::GetTestExePath());
+  ASSERT_THAT(paths.error(), Eq(std::nullopt)) << *paths.error();
+  TestInstallPaths(paths);
+}
+
+TEST_F(InstallPathsTest, BinaryRunfiles) {
+  std::string test_binary_path =
+      test_runfiles_->Rlocation("_main/toolchain/install/test_binary");
+  CARBON_CHECK(llvm::sys::fs::can_execute(test_binary_path))
+      << test_binary_path;
+
+  auto paths = InstallPaths::MakeForBazelRunfiles(test_binary_path);
+  ASSERT_THAT(paths.error(), Eq(std::nullopt)) << *paths.error();
+  TestInstallPaths(paths);
+}
+
+TEST_F(InstallPathsTest, Errors) {
+  auto paths = InstallPaths::Make("foo/bar/baz");
+  EXPECT_THAT(paths.error(), Optional(HasSubstr("foo/bar/baz")));
+  EXPECT_THAT(paths.prefix(), Eq(""));
+
+  paths = InstallPaths::MakeExeRelative("foo/bar/baz");
+  EXPECT_THAT(paths.error(), Optional(HasSubstr("foo/bar/baz")));
+  EXPECT_THAT(paths.prefix(), Eq(""));
+
+  // Note that we can't test the runfiles code path from within a test because
+  // it succeeds some of the time even with a bogus executable name.
+}
+
+}  // namespace
+}  // namespace Carbon

+ 36 - 0
toolchain/install/symlink_filegroup.bzl

@@ -0,0 +1,36 @@
+# Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+# Exceptions. See /LICENSE for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+"""Rule for symlinking an entire filegroup, preserving its structure."""
+
+def _symlink_filegroup_impl(ctx):
+    prefix = ctx.attr.out_prefix
+    remove_prefix = ctx.attr.remove_prefix
+
+    outputs = []
+    for f in ctx.files.srcs:
+        out = ctx.actions.declare_file(
+            prefix + f.short_path.removeprefix(remove_prefix),
+        )
+        outputs.append(out)
+        ctx.actions.symlink(output = out, target_file = f)
+
+    if len(ctx.files.srcs) != len(outputs):
+        fail("Output count mismatch!")
+
+    return [
+        DefaultInfo(
+            files = depset(outputs),
+            runfiles = ctx.runfiles(files = outputs),
+        ),
+    ]
+
+symlink_filegroup = rule(
+    implementation = _symlink_filegroup_impl,
+    attrs = {
+        "out_prefix": attr.string(mandatory = True),
+        "remove_prefix": attr.string(default = ""),
+        "srcs": attr.label_list(mandatory = True),
+    },
+)

+ 5 - 0
toolchain/install/test_binary.cpp

@@ -0,0 +1,5 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+auto main() -> int { return 0; }

+ 4 - 1
toolchain/sem_ir/yaml_test.cpp

@@ -8,6 +8,7 @@
 #include "common/ostream.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/VirtualFileSystem.h"
+#include "testing/base/gtest_main.h"
 #include "testing/base/test_raw_ostream.h"
 #include "toolchain/driver/driver.h"
 #include "toolchain/testing/yaml_test_helpers.h"
@@ -34,8 +35,10 @@ TEST(SemIRTest, YAML) {
   CARBON_CHECK(fs.addFile(
       "test.carbon", /*ModificationTime=*/0,
       llvm::MemoryBuffer::getMemBuffer("fn F() { var x: () = (); return; }")));
+  const auto install_paths =
+      InstallPaths::MakeForBazelRunfiles(Testing::GetTestExePath());
   TestRawOstream print_stream;
-  Driver d(fs, "", print_stream, llvm::errs());
+  Driver d(fs, &install_paths, "", print_stream, llvm::errs());
   auto run_result =
       d.RunCommand({"compile", "--no-prelude-import", "--phase=check",
                     "--dump-raw-sem-ir", "test.carbon"});

+ 7 - 3
toolchain/testing/file_test.cpp

@@ -23,8 +23,11 @@ namespace {
 // phase subdirectories.
 class ToolchainFileTest : public FileTestBase {
  public:
-  explicit ToolchainFileTest(llvm::StringRef test_name)
-      : FileTestBase(test_name), component_(GetComponent(test_name)) {}
+  explicit ToolchainFileTest(llvm::StringRef exe_path,
+                             llvm::StringRef test_name)
+      : FileTestBase(test_name),
+        component_(GetComponent(test_name)),
+        installation_(InstallPaths::MakeForBazelRunfiles(exe_path)) {}
 
   auto Run(const llvm::SmallVector<llvm::StringRef>& test_args,
            llvm::vfs::InMemoryFileSystem& fs, llvm::raw_pwrite_stream& stdout,
@@ -39,7 +42,7 @@ class ToolchainFileTest : public FileTestBase {
       CARBON_RETURN_IF_ERROR(AddFile(fs, file));
     }
 
-    Driver driver(fs, data_dir, stdout, stderr);
+    Driver driver(fs, &installation_, data_dir, stdout, stderr);
     auto driver_result = driver.RunCommand(test_args);
 
     RunResult result{
@@ -152,6 +155,7 @@ class ToolchainFileTest : public FileTestBase {
   }
 
   const llvm::StringRef component_;
+  const InstallPaths installation_;
 };
 
 }  // namespace