Просмотр исходного кода

Implement a basic busybox for carbon/clang. (#4406)

For reference, we're going down the busyboxing route because Carbon
depends on Clang, and we want both to be available as binaries.
Busyboxing allows this while avoiding duplicating symbols between
multiple binaries.

I'm removing the `cc_binary` for `driver:carbon` because I want to avoid
a significant increase in binary outputs; `bazel run //toolchain` still
works great.

This still doesn't have great test coverage (but non-zero:
`//examples:sieve` still builds/runs, for example). The problem is that
we want to avoid subprocessing for performance, but this mainly deals
with subprocessing. I'm still thinking about good approaches for that,
since we'll probably want more significant testing for `clang`
interaction... the solution might involve busyboxing `file_test` too.

Note development on this ran into the argv issue being fixed in #4405
Jon Ross-Perkins 1 год назад
Родитель
Сommit
957599b2ab

+ 1 - 1
bazel/check_deps/BUILD

@@ -18,7 +18,7 @@ filegroup(
         "//language_server",
         "//migrate_cpp:rewriter",
         "//migrate_cpp/cpp_refactoring",
-        "//toolchain/driver:carbon",
+        "//toolchain/install:carbon-busybox",
         "//utils/treesitter",
     ],
     tags = ["manual"],

+ 0 - 19
toolchain/driver/BUILD

@@ -3,7 +3,6 @@
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library", "cc_test")
-load("//bazel/cc_toolchains:defs.bzl", "cc_env")
 load("//testing/fuzzing:rules.bzl", "cc_fuzz_test")
 
 package(default_visibility = ["//visibility:public"])
@@ -165,21 +164,3 @@ cc_fuzz_test(
         "@llvm-project//llvm:Support",
     ],
 )
-
-# This target doesn't include prelude libraries. To get a target that has the
-# prelude available, use //toolchain.
-cc_binary(
-    name = "carbon",
-    srcs = ["driver_main.cpp"],
-    env = cc_env(),
-    deps = [
-        ":driver",
-        "//common:all_llvm_targets",
-        "//common:bazel_working_dir",
-        "//common:exe_path",
-        "//common:init_llvm",
-        "//common:version_stamp",
-        "//toolchain/install:install_paths",
-        "@llvm-project//llvm:Support",
-    ],
-)

+ 23 - 15
toolchain/driver/clang_runner.cpp

@@ -68,9 +68,10 @@ auto ClangRunner::Run(llvm::ArrayRef<llvm::StringRef> args) -> bool {
   // driver. Command lines can get quite long in build systems so this tries to
   // minimize the memory allocation overhead.
 
-  // Start with a dummy executable name. We'll manually set the install
-  // directory below.
-  std::array<llvm::StringRef, 1> exe_arg = {"clang-runner"};
+  // Provide the wrapped `clang` path in order to support subprocessing. We also
+  // set the install directory below.
+  std::string clang_path = installation_->clang_path();
+  std::array<llvm::StringRef, 1> exe_arg = {clang_path};
   auto args_range =
       llvm::concat<const llvm::StringRef>(exe_arg, maybe_v_arg, args);
   int total_size = 0;
@@ -95,6 +96,17 @@ auto ClangRunner::Run(llvm::ArrayRef<llvm::StringRef> args) -> bool {
     CARBON_VLOG("    '{0}'\n", cstr_arg);
   }
 
+  if (!args.empty() && args[0].starts_with("-cc1")) {
+    CARBON_VLOG("Calling clang_main for cc1...");
+    // cstr_args[0] will be the `clang_path` so we don't need the prepend arg.
+    llvm::ToolContext tool_context = {
+        .Path = cstr_args[0], .PrependArg = "clang", .NeedsPrependArg = false};
+    int exit_code = clang_main(
+        cstr_args.size(), const_cast<char**>(cstr_args.data()), tool_context);
+    // TODO: Should this be forwarding the full exit code?
+    return exit_code == 0;
+  }
+
   CARBON_VLOG("Preparing Clang driver...\n");
 
   // Create the diagnostic options and parse arguments controlling them out of
@@ -113,7 +125,7 @@ auto ClangRunner::Run(llvm::ArrayRef<llvm::StringRef> args) -> bool {
       /*ShouldOwnClient=*/false);
   clang::ProcessWarningOptions(diagnostics, *diagnostic_options);
 
-  clang::driver::Driver driver("clang-runner", target_, diagnostics);
+  clang::driver::Driver driver(clang_path, target_, diagnostics);
 
   // Configure the install directory to find other tools and data files.
   //
@@ -128,18 +140,14 @@ auto ClangRunner::Run(llvm::ArrayRef<llvm::StringRef> args) -> bool {
   // still subprocess. See `InProcess` comment at:
   // https://github.com/llvm/llvm-project/blob/86ce8e4504c06ecc3cc42f002ad4eb05cac10925/clang/lib/Driver/Job.cpp#L411-L413
   //
-  // TODO: It would be nice to find a way to set up the driver's understanding
-  // of the executable name in a way that causes the multiple `cc1` invocations
-  // to actually result in `carbon clang -- ...` invocations (even if as
-  // subprocesses). This may dovetail with having symlinks that redirect to a
-  // busybox of LLD as well, and having even the subprocesses consistently run
-  // the Carbon install toolchain and not a system toolchain whenever possible.
-  driver.CC1Main = [](llvm::SmallVectorImpl<const char*>& argv) -> int {
-    // TODO: Try to use a better path for argv[0] (maybe in the LLVM install
-    // paths). This works for now.
+  // Note the subprocessing will effectively call `clang -cc1`, which turns into
+  // `carbon-busybox clang -cc1`, which results in an equivalent `clang_main`
+  // call.
+  driver.CC1Main = [](llvm::SmallVectorImpl<const char*>& cc1_args) -> int {
+    // cc1_args[0] will be the `clang_path` so we don't need the prepend arg.
     llvm::ToolContext tool_context = {
-        .Path = argv[0], .PrependArg = "clang", .NeedsPrependArg = true};
-    return clang_main(argv.size(), const_cast<char**>(argv.data()),
+        .Path = cc1_args[0], .PrependArg = "clang", .NeedsPrependArg = false};
+    return clang_main(cc1_args.size(), const_cast<char**>(cc1_args.data()),
                       tool_context);
   };
 

+ 5 - 0
toolchain/driver/driver.cpp

@@ -88,6 +88,11 @@ auto Options::Build(CommandLine::CommandBuilder& b) -> void {
 }
 
 auto Driver::RunCommand(llvm::ArrayRef<llvm::StringRef> args) -> DriverResult {
+  if (driver_env_.installation->error()) {
+    llvm::errs() << "error: " << *driver_env_.installation->error() << "\n";
+    return {.success = false};
+  }
+
   Options options;
 
   CommandLine::ParseResult result = CommandLine::Parse(

+ 0 - 38
toolchain/driver/driver_main.cpp

@@ -1,38 +0,0 @@
-// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
-// Exceptions. See /LICENSE for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-#include <cstdlib>
-
-#include "common/bazel_working_dir.h"
-#include "common/exe_path.h"
-#include "common/init_llvm.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
-#include "toolchain/driver/driver.h"
-#include "toolchain/install/install_paths.h"
-
-auto main(int argc, char** argv) -> int {
-  Carbon::InitLLVM init_llvm(argc, argv);
-
-  if (argc < 1) {
-    return EXIT_FAILURE;
-  }
-
-  // Resolve paths before calling SetWorkingDirForBazel.
-  std::string exe_path = Carbon::FindExecutablePath(argv[0]);
-  const auto install_paths = Carbon::InstallPaths::MakeExeRelative(exe_path);
-  if (install_paths.error()) {
-    llvm::errs() << "error: " << *install_paths.error();
-    return EXIT_FAILURE;
-  }
-
-  Carbon::SetWorkingDirForBazel();
-
-  llvm::SmallVector<llvm::StringRef> args(argv + 1, argv + argc);
-  auto fs = llvm::vfs::getRealFileSystem();
-
-  Carbon::Driver driver(*fs, &install_paths, llvm::outs(), llvm::errs());
-  bool success = driver.RunCommand(args).success;
-  return success ? EXIT_SUCCESS : EXIT_FAILURE;
-}

+ 35 - 3
toolchain/install/BUILD

@@ -70,6 +70,25 @@ cc_library(
     ],
 )
 
+# This target doesn't include prelude libraries. To get a target that has the
+# prelude available, use //toolchain.
+cc_binary(
+    name = "carbon-busybox",
+    srcs = ["busybox_main.cpp"],
+    env = cc_env(),
+    deps = [
+        ":install_paths",
+        "//common:all_llvm_targets",
+        "//common:bazel_working_dir",
+        "//common:error",
+        "//common:exe_path",
+        "//common:init_llvm",
+        "//common:version_stamp",
+        "//toolchain/driver",
+        "@llvm-project//llvm:Support",
+    ],
+)
+
 lld_aliases = [
     "ld.lld",
     "ld64.lld",
@@ -88,15 +107,20 @@ lld_aliases = [
 # based on the FHS (Filesystem Hierarchy Standard).
 install_dirs = {
     "bin": [
-        install_target(
+        install_symlink(
             "carbon",
-            "//toolchain/driver:carbon",
-            executable = True,
+            "../lib/carbon/carbon-busybox",
             is_driver = True,
         ),
     ],
     "lib/carbon": [
         install_target("carbon_install.txt", "carbon_install.txt"),
+        install_target(
+            "carbon-busybox",
+            ":carbon-busybox",
+            executable = True,
+            is_driver = True,
+        ),
         install_filegroup("core", "//core:prelude"),
     ],
     "lib/carbon/llvm/bin": [
@@ -105,6 +129,11 @@ install_dirs = {
             "@llvm-project//lld:lld",
             executable = True,
         ),
+        install_symlink(
+            "clang",
+            "../../carbon-busybox",
+            is_driver = True,
+        ),
     ] + [install_symlink(name, "lld") for name in lld_aliases],
 }
 
@@ -123,6 +152,9 @@ pkg_naming_variables(
 # We build both a compressed and uncompressed tar file with the same code here.
 # This lets us use the tar file in testing as it is fast to create, but ship the
 # compressed version as a release.
+#
+# For manual tests, the tar rules are `carbon_toolchain_tar_rule` and
+# `carbon_toolchain_tar_gz_rule`.
 pkg_tar_and_test(
     srcs = [":pkg_data"],
     name_base = "carbon_toolchain",

+ 93 - 0
toolchain/install/busybox_main.cpp

@@ -0,0 +1,93 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <unistd.h>
+
+#include <cstdlib>
+#include <filesystem>
+
+#include "common/bazel_working_dir.h"
+#include "common/error.h"
+#include "common/exe_path.h"
+#include "common/init_llvm.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/LLVMDriver.h"
+#include "toolchain/driver/driver.h"
+#include "toolchain/install/install_paths.h"
+
+namespace Carbon {
+
+namespace {
+struct BusyboxInfo {
+  // The path to `carbon-busybox`.
+  std::filesystem::path bin_path;
+  // The mode, such as `carbon` or `clang`.
+  std::optional<std::string> mode;
+};
+}  // namespace
+
+// Returns the busybox information, given argv[0]. This primarily handles
+// resolving symlinks that point at the busybox.
+static auto GetBusyboxInfo(llvm::StringRef argv0) -> ErrorOr<BusyboxInfo> {
+  BusyboxInfo info = BusyboxInfo{argv0.str(), std::nullopt};
+  while (true) {
+    std::string filename = info.bin_path.filename();
+    if (filename == "carbon-busybox") {
+      return info;
+    }
+    std::error_code ec;
+    auto symlink_target = std::filesystem::read_symlink(info.bin_path, ec);
+    if (ec) {
+      return ErrorBuilder()
+             << "expected carbon-busybox symlink at `" << info.bin_path << "`";
+    }
+    info.mode = filename;
+    info.bin_path = symlink_target;
+  }
+}
+
+// The actual `main` implementation. Can return an exit code or an `Error`
+// (which causes EXIT_FAILRUE).
+static auto Main(int argc, char** argv) -> ErrorOr<int> {
+  Carbon::InitLLVM init_llvm(argc, argv);
+
+  // Start by resolving any symlinks.
+  CARBON_ASSIGN_OR_RETURN(auto busybox_info, Carbon::GetBusyboxInfo(argv[0]));
+
+  auto fs = llvm::vfs::getRealFileSystem();
+
+  // Resolve paths before calling SetWorkingDirForBazel.
+  std::string exe_path =
+      Carbon::FindExecutablePath(busybox_info.bin_path.string());
+  const auto install_paths = Carbon::InstallPaths::MakeExeRelative(exe_path);
+  if (install_paths.error()) {
+    return Error(*install_paths.error());
+  }
+
+  Carbon::SetWorkingDirForBazel();
+
+  llvm::SmallVector<llvm::StringRef> args;
+  args.reserve(argc + 1);
+  if (busybox_info.mode && busybox_info.mode != "carbon") {
+    args.append({*busybox_info.mode, "--"});
+  }
+  args.append(argv + 1, argv + argc);
+
+  Carbon::Driver driver(*fs, &install_paths, llvm::outs(), llvm::errs());
+  bool success = driver.RunCommand(args).success;
+  return success ? EXIT_SUCCESS : EXIT_FAILURE;
+}
+
+}  // namespace Carbon
+
+auto main(int argc, char** argv) -> int {
+  auto result = Carbon::Main(argc, argv);
+  if (result.ok()) {
+    return *result;
+  } else {
+    llvm::errs() << "error: " << result.error() << "\n";
+    return EXIT_FAILURE;
+  }
+}

+ 18 - 3
toolchain/install/install_filegroups.bzl

@@ -22,7 +22,7 @@ def install_filegroup(name, filegroup_target):
         "name": name,
     }
 
-def install_symlink(name, symlink_to):
+def install_symlink(name, symlink_to, is_driver = False):
     """Adds a symlink for install.
 
     Used in the `install_dirs` dict.
@@ -30,9 +30,11 @@ def install_symlink(name, symlink_to):
     Args:
       name: The filename to use.
       symlink_to: A relative path for the symlink.
+      is_driver: False if it should be included in the `no_driver_name`
+        filegroup.
     """
     return {
-        "is_driver": False,
+        "is_driver": is_driver,
         "name": name,
         "symlink": symlink_to,
     }
@@ -117,10 +119,23 @@ def make_install_filegroups(name, no_driver_name, pkg_name, install_dirs, prefix
                 )
             elif "symlink" in entry:
                 symlink_to = "{0}/{1}/{2}".format(prefix, dir, entry["symlink"])
+
+                # For bazel, we need to resolve relative symlinks.
+                if "../" in symlink_to:
+                    parts = symlink_to.split("/")
+                    result = []
+                    for part in parts:
+                        if part == "..":
+                            result = result[:-1]
+                        else:
+                            result.append(part)
+                    symlink_to = "/".join(result)
                 symlink_file(
                     name = prefixed_path,
-                    symlink_label = symlink_to,
+                    symlink_binary = symlink_to,
                 )
+
+                # For the distributed package, we retain relative symlinks.
                 pkg_mklink(
                     name = pkg_path,
                     link_name = path,

+ 10 - 1
toolchain/install/install_paths.cpp

@@ -42,7 +42,8 @@ auto InstallPaths::MakeExeRelative(llvm::StringRef exe_path) -> InstallPaths {
   // FHS-like install prefix. We remove the filename and walk up to find the
   // expected install prefix.
   llvm::sys::path::remove_filename(paths.prefix_);
-  llvm::sys::path::append(paths.prefix_, llvm::sys::path::Style::posix, "../");
+  llvm::sys::path::append(paths.prefix_, llvm::sys::path::Style::posix,
+                          "../../");
 
   if (auto error = llvm::sys::fs::make_absolute(paths.prefix_)) {
     paths.SetError(error.message());
@@ -162,4 +163,12 @@ auto InstallPaths::llvm_install_bin() const -> std::string {
   return path.str().str();
 }
 
+auto InstallPaths::clang_path() const -> std::string {
+  llvm::SmallString<256> path(prefix_);
+  // TODO: Adjust this to work equally well on Windows.
+  llvm::sys::path::append(path, llvm::sys::path::Style::posix,
+                          "lib/carbon/llvm/bin/clang");
+  return path.str().str();
+}
+
 }  // namespace Carbon

+ 3 - 0
toolchain/install/install_paths.h

@@ -84,6 +84,9 @@ class InstallPaths {
   // The directory containing LLVM install binaries. Computed on demand.
   auto llvm_install_bin() const -> std::string;
 
+  // The path to `clang`.
+  auto clang_path() const -> std::string;
+
  private:
   friend class InstallPathsTestPeer;
 

+ 2 - 2
toolchain/install/install_paths_test.cpp

@@ -89,9 +89,9 @@ class InstallPathsTest : public ::testing::Test {
   std::unique_ptr<Runfiles> test_runfiles_;
 };
 
-TEST_F(InstallPathsTest, PrefixRootDriver) {
+TEST_F(InstallPathsTest, PrefixRootBusybox) {
   std::string installed_driver_path = test_runfiles_->Rlocation(
-      "carbon/toolchain/install/prefix_root/bin/carbon");
+      "carbon/toolchain/install/prefix_root/lib/carbon/carbon-busybox");
 
   auto paths = InstallPaths::MakeExeRelative(installed_driver_path);
   ASSERT_THAT(paths.error(), Eq(std::nullopt)) << *paths.error();