فهرست منبع

Make the Carbon toolchain a viable Bazel module exposing `cc_toolchain`s (#6652)

This let's you point Bazel at an installed toolchain or download one of
our release archives. When you do, it will configure itself as a C++
Bazel toolchain. This toolchain works reasonably well, but doesn't cache
the C++ runtimes, and so linking is inefficient. The next step will be
to pivot the runtimes from the implicitly on-demand (which can't cache
when using a sandboxed build system like Bazel) to _explicit_ on-demand
runtimes directly with Bazel support.

I've included an example Bazel project that uses this and provides a
bunch of documentation and an example script that should let folks try
this out easily.

---------

Co-authored-by: Geoff Romer <gromer@google.com>
Co-authored-by: David Blaikie <dblaikie@gmail.com>
Chandler Carruth 3 ماه پیش
والد
کامیت
b0ffed7c3e

+ 3 - 0
.bazelignore

@@ -10,3 +10,6 @@ github_tools
 
 # Used as part of repo patching.
 third_party/boost_unordered
+
+# Example Bazel project.
+examples/bazel

+ 5 - 0
.gitignore

@@ -9,6 +9,11 @@
 /github_tools/bazel-*
 /github_tools/MODULE.bazel.lock
 
+# We also have example Bazel projects that shouldn't have their implementation
+# details committed.
+/examples/**/bazel-*
+/examples/**/MODULE.bazel.lock
+
 # Directories created by python.
 **/__pycache__/
 

+ 17 - 0
bazel/cc_toolchains/BUILD

@@ -46,3 +46,20 @@ config_setting(
     name = "fastbuild",
     values = {"compilation_mode": "fastbuild"},
 )
+
+filegroup(
+    name = "installed_cc_toolchain_starlark",
+    srcs = [
+        "cc_toolchain_actions.bzl",
+        "cc_toolchain_base_features.bzl",
+        "cc_toolchain_config_features.bzl",
+        "cc_toolchain_cpp_features.bzl",
+        "cc_toolchain_debugging.bzl",
+        "cc_toolchain_features.bzl",
+        "cc_toolchain_linking.bzl",
+        "cc_toolchain_modules.bzl",
+        "cc_toolchain_optimization.bzl",
+        "cc_toolchain_sanitizer_features.bzl",
+        "cc_toolchain_tools.bzl",
+    ],
+)

+ 21 - 0
examples/bazel/BUILD

@@ -0,0 +1,21 @@
+# Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+# Exceptions. See /LICENSE for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# Note that this is not a BUILD file that is part of the larger Carbon project, and
+# is not built by running `bazel build //examples/bazel/...`. This is its own
+# _distinct_ example Bazel project rooted at `examples/bazel`. You will need to
+# `cd` into this directory to interact with it.
+#
+# For more details about how to manually interact with this example, please see
+# the adjacent `MODULES.bazel` file.
+
+load("@rules_cc//cc:defs.bzl", "cc_binary")
+
+cc_binary(
+    name = "example",
+    srcs = [
+        "example.cpp",
+        "example.h",
+    ],
+)

+ 56 - 0
examples/bazel/MODULE.bazel

@@ -0,0 +1,56 @@
+# Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+# Exceptions. See /LICENSE for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+"""Example Bazel module that builds C++ with the Carbon toolchain.
+
+Note that this is at the root of an example Bazel project, not part of the
+larger Carbon Bazel project. To interact with this example, you'll want to
+`cd examples/bazel` so that you can operate Bazel entirely within this example
+(sub-)project.
+
+Once interacting with this subdirectory's project, you can use this and build
+with the Carbon toolchain in a few different ways:
+
+1) Override the module with a local installation on the command line:
+   `bazel build --override_module=carbon_toolchain=/path/to/carbon_toolchain/installation/lib/carbon`
+
+2) Encode a local override into this file:
+   ```
+   local_path_override(
+       module_name = "carbon_toolchain",
+       path = "/path/to/carbon_toolchain/installation/lib/carbon",
+   )
+   ```
+
+3) Encode a archive override into this file:
+   ```
+   version = "0.0.0-0.nightly.YYYY.MM.DD"
+   archive_override(
+       module_name = "carbon_toolchain",
+       strip_prefix = "carbon_toolchain-{0}/lib/carbon".format(version),
+       urls = ["https://github.com/carbon-language/carbon-lang/releases/download/v{0}/carbon_toolchain-{0}.tar.gz".format(version)],
+   )
+   ```
+
+   Note: Initially, Bazel will warn about the lack of an `integrity` field,
+   and will print the value it found by downloading the archive which you can
+   verify on GitHub before encoding.
+
+You can use the provided script `update_module_to_nightly.py` to generate and
+add an `archive_override` of the current nightly release, including integrity
+from GitHub.
+
+Once the Carbon toolchain has established releases in the Bazel central
+registry, this file will work with an updated version out of the box.
+"""
+
+module(name = "example")
+
+bazel_dep(name = "rules_cc", version = "0.2.14")
+
+# Declare the `carbon_toolchain` module. This is needed even if it will be
+# overridden with a local path or archive.
+bazel_dep(name = "carbon_toolchain", version = "0.0.0")
+
+register_toolchains("@carbon_toolchain//:all")

+ 16 - 0
examples/bazel/example.cpp

@@ -0,0 +1,16 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "example.h"
+
+#include <stdlib.h>
+
+#include <iostream>
+
+auto HelloWorld() -> void { std::cout << "Hello World!\n"; }
+
+auto main() -> int {
+  HelloWorld();
+  return EXIT_SUCCESS;
+}

+ 10 - 0
examples/bazel/example.h

@@ -0,0 +1,10 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef CARBON_EXAMPLES_BAZEL_EXAMPLE_H_
+#define CARBON_EXAMPLES_BAZEL_EXAMPLE_H_
+
+auto HelloWorld() -> void;
+
+#endif  // CARBON_EXAMPLES_BAZEL_EXAMPLE_H_

+ 183 - 0
examples/bazel/update_module_to_nightly.py

@@ -0,0 +1,183 @@
+#!/usr/bin/env python3
+
+"""Updates example module file to use the nightly toolchain release.
+
+This script computes the most recent nightly Carbon toolchain release, and
+updates the example module file with an `archive_override` pointing at it.
+
+Usage:
+  # Within the `examples/bazel` directory:
+  ./update_module_to_nightly.py
+
+For more details about using the Carbon toolchain with Bazel, see the
+documentation in `examples/bazel/MODULE.bazel`.
+"""
+
+__copyright__ = """
+Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+Exceptions. See /LICENSE for license information.
+SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+"""
+
+import re
+import os
+import sys
+import base64
+import urllib.request
+import urllib.error
+import json
+
+MODULE_NAME = "carbon_toolchain"
+MODULE_FILENAME = "MODULE.bazel"
+DEP_PATTERN = re.compile(
+    rf'^bazel_dep\s*\(\s*name\s*=\s*"{MODULE_NAME}".*?\)',
+    re.DOTALL | re.MULTILINE,
+)
+OVERRIDE_PATTERN = re.compile(
+    rf'^archive_override\s*\(\s*module_name\s*=\s*"{MODULE_NAME}".*?\)',
+    re.DOTALL | re.MULTILINE,
+)
+
+# The nightly build starts at 2am UTC, and we give it up to 4 hours to complete.
+BUFFER_HOURS = 6
+
+RELEASES_URL = (
+    "https://github.com/carbon-language/carbon-lang/releases/download"
+)
+RELEASES_API_URL = (
+    "https://api.github.com/repos/carbon-language/carbon-lang/releases"
+)
+API_HEADERS = {
+    "Accept": "application/vnd.github+json",
+    "X-GitHub-Api-Version": "2022-11-28",
+    # GitHub API requires a User-Agent, urllib doesn't send one by default
+    "User-Agent": "python-urllib",
+}
+
+
+def log(msg: str) -> None:
+    print(f"[update_module_to_nightly] {msg}", file=sys.stderr)
+
+
+def get_latest_version() -> str:
+    # Use the 'releases' list endpoint, NOT 'releases/latest'. Using the
+    # `latest` endpoint only works for full releases, not pre-releases. Carbon's
+    # nightly releases are classified as pre-releases so we have to get the full
+    # list and simply take the first one. That does mean we only need the first
+    # page of results.
+    url = f"{RELEASES_API_URL}?per_page=1"
+    req = urllib.request.Request(url, headers=API_HEADERS)
+    try:
+        with urllib.request.urlopen(req) as response:
+            data = json.load(response)
+            if not data:
+                log("Error: no releases found for this repository.")
+                sys.exit(1)
+
+            # The API returns a list sorted by creation date (newest first).
+            latest_release = data[0]
+
+    except urllib.error.HTTPError as e:
+        log(f"Error: HTTP error {e.code} fetching latest release: {e.reason}")
+        # It's often useful to print the body for GitHub API errors (e.g. rate
+        # limit exceeded)
+        log(e.read().decode("utf-8"))
+        sys.exit(1)
+
+    # The release tag starts with `v` followed by the version.
+    latest_version = str(latest_release["tag_name"])
+    if not latest_version.startswith("v"):
+        log(f"Error: malformed release tag name: {latest_version}")
+        sys.exit(1)
+
+    return latest_version[1:]
+
+
+def get_digest(version: str, filename: str) -> str:
+    url = f"{RELEASES_API_URL}/tags/v{version}"
+    req = urllib.request.Request(url, headers=API_HEADERS)
+    try:
+        with urllib.request.urlopen(req) as response:
+            release_data = json.load(response)
+    except urllib.error.HTTPError as e:
+        log(f"Error: unable to find `v{version}`: {e.code}: {e.reason}")
+        sys.exit(1)
+
+    assets = release_data.get("assets", [])
+    for asset in assets:
+        name = str(asset.get("name"))
+        if name != filename:
+            continue
+
+        digest = str(asset.get("digest"))
+        if not digest.startswith("sha256:"):
+            log(f"Error: found invalid digest for `{filename}`: `{digest}`")
+            sys.exit(1)
+
+        # Re-encode from the GitHub format to Bazel.
+        digest = (
+            "sha256-"
+            + base64.b64encode(bytes.fromhex(digest[len("sha256:") :])).decode()
+        )
+        return digest
+
+    log(f"Error: unable to find a digest for `{filename}`")
+    sys.exit(1)
+
+
+def generate_override(version: str) -> str:
+    basename = f"carbon_toolchain-{version}"
+    digest = get_digest(version, f"{basename}.tar.gz")
+    return (
+        f"archive_override(\n"
+        f'    module_name = "{MODULE_NAME}",\n'
+        f'    integrity = "{digest}",\n'
+        f'    strip_prefix = "{basename}/lib/carbon",\n'
+        f'    urls = ["{RELEASES_URL}/v{version}/{basename}.tar.gz"],\n'
+        f")"
+    )
+
+
+def main() -> None:
+    if not os.path.exists(MODULE_FILENAME):
+        log(f"Error: `{MODULE_FILENAME}` not found in current directory.")
+        sys.exit(1)
+
+    with open(MODULE_FILENAME, "r") as f:
+        content = f.read()
+
+    # 1. Verification (Check if dependency exists)
+    dep_match = DEP_PATTERN.search(content)
+    if not dep_match:
+        log(
+            f"Error: `bazel_dep` for `{MODULE_NAME}` not found in "
+            f"`{MODULE_FILENAME}`."
+        )
+        sys.exit(1)
+
+    version = get_latest_version()
+    new_block = generate_override(version)
+
+    (new_content, count) = OVERRIDE_PATTERN.subn(new_block, content)
+    if count > 0:
+        log("Existing override found, replacing with a fresh one")
+    else:
+        log("No existing override found, inserting one")
+        new_content = (
+            content[: dep_match.end()]
+            + "\n\n"
+            + new_block
+            + content[dep_match.end() :]
+        )
+
+    with open(MODULE_FILENAME, "w") as f:
+        f.write(new_content)
+
+    log(f"Successfully updated `{MODULE_FILENAME}` to version `{version}`")
+
+
+if __name__ == "__main__":
+    try:
+        main()
+    except KeyboardInterrupt:
+        sys.exit(1)

+ 9 - 0
toolchain/install/BUILD

@@ -315,6 +315,8 @@ install_dirs = {
         ),
     ],
     "lib/carbon": [
+        install_target("MODULE.bazel", "bazel/install.MODULE.bazel"),
+        install_target("BUILD", "bazel/install.BUILD"),
         install_target("carbon_install.txt", "carbon_install.txt"),
         install_target(
             "install_digest.txt",
@@ -329,10 +331,17 @@ install_dirs = {
             executable = True,
             is_driver = True,
         ),
+        install_filegroup("bazel", "//bazel/cc_toolchains:installed_cc_toolchain_starlark"),
         # TODO: Consider if we want to keep `core` here or group it with
         # runtimes. It is a bit of both -- standard library, and runtimes.
         install_filegroup("core", "//core:prelude"),
     ],
+    "lib/carbon/bazel": [
+        install_target("carbon_cc_toolchain_config.bzl", "bazel/carbon_cc_toolchain_config.bzl"),
+        install_target("carbon_detected_variables.tpl.bzl", "bazel/carbon_detected_variables.tpl.bzl"),
+        install_target("carbon_toolchain.bzl", "bazel/carbon_toolchain.bzl"),
+        install_target("BUILD", "bazel/empty.BUILD"),
+    ],
     "lib/carbon/llvm/bin": [install_symlink(
         name,
         "../../carbon-busybox",

+ 111 - 0
toolchain/install/bazel/carbon_cc_toolchain_config.bzl

@@ -0,0 +1,111 @@
+# Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+# Exceptions. See /LICENSE for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+"""Starlark cc_toolchain configuration rules for using the Carbon toolchain"""
+
+load(
+    "@carbon_toolchain_config//:carbon_detected_variables.bzl",
+    "clang_include_dirs",
+    "clang_sysroot",
+)
+load(
+    "@rules_cc//cc:defs.bzl",
+    "CcToolchainConfigInfo",
+    "cc_toolchain",
+)
+load("@rules_cc//cc/common:cc_common.bzl", "cc_common")
+load(":cc_toolchain_features.bzl", "clang_cc_toolchain_features")
+load(
+    ":cc_toolchain_tools.bzl",
+    "llvm_action_configs",
+    "llvm_tool_paths",
+)
+
+def _impl(ctx):
+    # Hard code the the repository-relative path of the LLVM (and Clang)
+    # binaries as it is a fixed aspect of the install structure.
+    llvm_bindir = "llvm/bin"
+
+    # Only use a sysroot if a non-trivial one is set in Carbon's config.
+    builtin_sysroot = None
+    if clang_sysroot != "None" and clang_sysroot != "/":
+        builtin_sysroot = clang_sysroot
+
+    identifier = "carbon-toolchain-{0}-{1}".format(ctx.attr.target_cpu, ctx.attr.target_os)
+    return cc_common.create_cc_toolchain_config_info(
+        ctx = ctx,
+        features = clang_cc_toolchain_features(
+            target_os = ctx.attr.target_os,
+            target_cpu = ctx.attr.target_cpu,
+        ),
+        action_configs = llvm_action_configs(llvm_bindir),
+        cxx_builtin_include_directories = clang_include_dirs,
+        builtin_sysroot = builtin_sysroot,
+
+        # This configuration only supports local non-cross builds so derive
+        # everything from the target CPU selected.
+        toolchain_identifier = identifier,
+
+        # This is used to expose a "flag" that `config_setting` rules can use to
+        # determine if the compiler is Clang.
+        compiler = "clang",
+
+        # We do have to pass in our tool paths.
+        tool_paths = llvm_tool_paths(llvm_bindir),
+    )
+
+carbon_cc_toolchain_config = rule(
+    implementation = _impl,
+    attrs = {
+        "target_cpu": attr.string(mandatory = True),
+        "target_os": attr.string(mandatory = True),
+    },
+    provides = [CcToolchainConfigInfo],
+)
+
+def carbon_cc_toolchain_suite(name, configs):
+    """Create a toolchain suite that uses the local Clang/LLVM install.
+
+    Args:
+        name: The name of the toolchain suite to produce.
+        configs: An array of (os, cpu) pairs to support in the toolchain.
+    """
+
+    native.filegroup(
+        name = name + "_files",
+        srcs = native.glob([
+            "**",
+        ]),
+    )
+
+    # Create the individual local toolchains for each CPU.
+    for (os, cpu) in configs:
+        config_name = "{0}_{1}_{2}".format(name, os, cpu)
+        carbon_cc_toolchain_config(
+            name = config_name + "_config",
+            target_cpu = cpu,
+            target_os = os,
+        )
+        cc_toolchain(
+            name = config_name + "_tools",
+            all_files = ":" + name + "_files",
+            ar_files = ":" + name + "_files",
+            as_files = ":" + name + "_files",
+            compiler_files = ":" + name + "_files",
+            dwp_files = ":" + name + "_files",
+            linker_files = ":" + name + "_files",
+            objcopy_files = ":" + name + "_files",
+            strip_files = ":" + name + "_files",
+            supports_param_files = 1,
+            toolchain_config = ":" + config_name + "_config",
+            toolchain_identifier = config_name,
+        )
+        compatible_with = ["@platforms//cpu:" + cpu, "@platforms//os:" + os]
+        native.toolchain(
+            name = config_name,
+            exec_compatible_with = compatible_with,
+            target_compatible_with = compatible_with,
+            toolchain = config_name + "_tools",
+            toolchain_type = "@bazel_tools//tools/cpp:toolchain_type",
+        )

+ 12 - 0
toolchain/install/bazel/carbon_detected_variables.tpl.bzl

@@ -0,0 +1,12 @@
+# Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+# Exceptions. See /LICENSE for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+"""A Starlark file exporting detected Carbon toolchain configuration variables.
+
+This file gets processed by a repository rule, substituting the `VARIABLE`s with
+values, for example using an invocation of `carbon config`.
+"""
+
+clang_include_dirs = CLANG_INCLUDE_DIRS
+clang_sysroot = "CLANG_SYSROOT"

+ 60 - 0
toolchain/install/bazel/carbon_toolchain.bzl

@@ -0,0 +1,60 @@
+# Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+# Exceptions. See /LICENSE for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+"""Module extension to configure Carbon's `cc_toolchain`s.
+
+This extension extracts configuration from the Carbon toolchain into
+`carbon_detected_variables.bzl`. These values are then used by the
+`cc_toolchain` to setup the Carbon toolchain as a viable C++ Bazel toolchain.
+"""
+
+def _compute_config_vars(repository_ctx, carbon):
+    """Runs the `carbon` binary to get its config variables."""
+    exec_result = repository_ctx.execute([carbon, "config", "--json"])
+    if exec_result.return_code != 0:
+        fail("Command failed with return code {0}:\n{1}".format(
+            exec_result.return_code,
+            exec_result.stderr,
+        ))
+
+    vars = json.decode(exec_result.stdout)
+    if type(vars) != "dict":
+        fail("Config JSON decoded to a non-dict value: \n" + exec_result.stdout)
+
+    # Turn the values of all the keys in the JSON config results into strings.
+    # This provides a dictionary suitable for substituting with
+    # `repository_ctx.template`.
+    return {key: str(value) for key, value in vars.items()}
+
+def _create_config_repo_impl(repository_ctx):
+    vars = _compute_config_vars(repository_ctx, repository_ctx.attr._carbon)
+
+    repository_ctx.template(
+        "carbon_detected_variables.bzl",
+        repository_ctx.attr._template,
+        vars,
+    )
+
+    repository_ctx.file("BUILD.bazel", """
+exports_files(["carbon_detected_variables.bzl"])
+""")
+
+_create_config_repo = repository_rule(
+    implementation = _create_config_repo_impl,
+    attrs = {
+        "_carbon": attr.label(
+            default = "//:carbon-busybox",
+            allow_single_file = True,
+        ),
+        "_template": attr.label(
+            default = "//bazel:carbon_detected_variables.tpl.bzl",
+            allow_single_file = True,
+        ),
+    },
+)
+
+carbon_toolchain_config = module_extension(
+    implementation =
+        lambda ctx: _create_config_repo(name = "carbon_toolchain_config"),
+)

+ 0 - 0
toolchain/install/bazel/empty.BUILD


+ 18 - 0
toolchain/install/bazel/install.BUILD

@@ -0,0 +1,18 @@
+# Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+# Exceptions. See /LICENSE for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+load("//bazel:carbon_cc_toolchain_config.bzl", "carbon_cc_toolchain_suite")
+
+package(default_visibility = ["//visibility:public"])
+
+carbon_cc_toolchain_suite(
+    name = "carbon_cc_toolchain",
+    configs = [
+        ("linux", "aarch64"),
+        ("linux", "x86_64"),
+        ("freebsd", "x86_64"),
+        ("macos", "arm64"),
+        ("macos", "x86_64"),
+    ],
+)

+ 23 - 0
toolchain/install/bazel/install.MODULE.bazel

@@ -0,0 +1,23 @@
+# Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+# Exceptions. See /LICENSE for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+"""Carbon toolchain Bazel module.
+
+This module exposes Bazel `cc_toolchain`s built from this Carbon toolchain.
+
+Add the repository to your `MODULES.bazel` and then use
+`register_toolchains("@carbon_toolchain//:all")` to get these toolchains. For
+more detailed examples, see the Carbon project examples in examples/bazel
+"""
+
+module(name = "carbon_toolchain")
+
+bazel_dep(name = "rules_cc", version = "0.1.4")
+bazel_dep(name = "platforms", version = "1.0.0")
+
+carbon_toolchain_config = use_extension(
+    "//bazel:carbon_toolchain.bzl",
+    "carbon_toolchain_config",
+)
+use_repo(carbon_toolchain_config, "carbon_toolchain_config")