#!/usr/bin/env python3 """Automatically fixes bazel C++ dependencies. Bazel has some support for detecting when an include refers to a missing dependency. However, the ideal state is that a given build target depends directly on all #include'd headers, and Bazel doesn't enforce that. This automates the addition for technical correctness. """ __copyright__ = """ Part of the Carbon Language project, under the Apache License v2.0 with LLVM Exceptions. See /LICENSE for license information. SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception """ import re import subprocess from typing import Callable, NamedTuple from xml.etree import ElementTree import scripts_utils class ExternalRepo(NamedTuple): # A function for remapping files to #include paths. remap: Callable[[str], str] # The target expression to gather rules for within the repo. target: str # Whether to use "" or <> for the include. use_system_include: bool = False class RuleChoice(NamedTuple): # Whether to use "" or <> for the include. use_system_include: bool # Possible rules that may be used. rules: set[str] # Maps external repository names to a method translating bazel labels to file # paths for that repository. EXTERNAL_REPOS: dict[str, ExternalRepo] = { # llvm:include/llvm/Support/Error.h ->llvm/Support/Error.h # clang-tools-extra/clangd:URI.h -> clang-tools-extra/clangd/URI.h "@llvm-project": ExternalRepo( lambda x: re.sub(":", "/", re.sub("^(.*:(lib|include))/", "", x)), "...", ), # tools/cpp/runfiles:runfiles.h -> tools/cpp/runfiles/runfiles.h "@bazel_tools": ExternalRepo(lambda x: re.sub(":", "/", x), "..."), # absl/flags:flag.h -> absl/flags/flag.h "@abseil-cpp": ExternalRepo(lambda x: re.sub(":", "/", x), "..."), # :re2/re2.h -> re2/re2.h "@re2": ExternalRepo(lambda x: re.sub(":", "", x), ":re2"), # :googletest/include/gtest/gtest.h -> gtest/gtest.h "@googletest": ExternalRepo( lambda x: re.sub(":google(?:mock|test)/include/", "", x), ":gtest", use_system_include=True, ), # All of the `boost_unordered` headers are in a single rule. "@boost_unordered": ExternalRepo( lambda x: re.sub("^(.*:include)/", "", x), ":boost_unordered", use_system_include=True, ), } IGNORE_SOURCE_FILE_REGEX = re.compile( "^(third_party/clangd.*|common/version.*\\.cpp)$" ) class Rule(NamedTuple): # For cc_* rules: # The hdrs + textual_hdrs attributes, as relative paths to the file. hdrs: set[str] # The srcs attribute, as relative paths to the file. srcs: set[str] # The deps attribute, as full bazel labels. deps: set[str] # For genrules: # The outs attribute, as relative paths to the file. outs: set[str] def remap_file(label: str) -> str: """Remaps a bazel label to a file.""" repo, _, path = label.partition("//") if not repo: return path.replace(":", "/") # Ignore the version, just use the repo name. repo = repo.split("~", 1)[0] assert repo in EXTERNAL_REPOS, repo return EXTERNAL_REPOS[repo].remap(path) def get_bazel_list(list_child: ElementTree.Element, is_file: bool) -> set[str]: """Returns the contents of a bazel list. The return will normally be the full label, unless `is_file` is set, in which case the label will be translated to the underlying file. """ results: set[str] = set() for label in list_child: assert label.tag in ("label", "output"), label.tag value = label.attrib["value"] if is_file: value = remap_file(value) results.add(value) return results def get_rules(bazel: str, targets: str, keep_going: bool) -> dict[str, Rule]: """Queries the specified targets, returning the found rules. keep_going will be set to true for external repositories, where sometimes we see query errors. The return maps rule names to rule data. """ args = [ bazel, "query", "--output=xml", f"kind('(cc_binary|cc_library|cc_test|genrule)', set({targets}))", ] if keep_going: args.append("--keep_going") p = subprocess.run( args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding="utf-8" ) # 3 indicates incomplete results from --keep_going, which is fine here. if p.returncode not in {0, 3}: print(p.stderr) exit(f"bazel query returned {p.returncode}") rules: dict[str, Rule] = {} for rule_xml in ElementTree.fromstring(p.stdout): assert rule_xml.tag == "rule", rule_xml.tag rule_name = rule_xml.attrib["name"] hdrs: set[str] = set() srcs: set[str] = set() deps: set[str] = set() outs: set[str] = set() rule_class = rule_xml.attrib["class"] for list_child in rule_xml.findall("list"): list_name = list_child.attrib["name"] if rule_class in ("cc_library", "cc_binary", "cc_test"): if list_name in ("hdrs", "textual_hdrs"): hdrs = hdrs.union(get_bazel_list(list_child, True)) elif list_name == "srcs": srcs = get_bazel_list(list_child, True) elif list_name == "deps": deps = get_bazel_list(list_child, False) elif rule_class == "genrule": if list_name == "outs": outs = get_bazel_list(list_child, True) elif rule_class == "tree_sitter_cc_library": continue else: exit(f"unexpected rule type: {rule_class}") rules[rule_name] = Rule(hdrs, srcs, deps, outs) return rules def map_headers( header_to_rule_map: dict[str, RuleChoice], rules: dict[str, Rule] ) -> None: """Accumulates headers provided by rules into the map. The map maps header paths to rule names. """ for rule_name, rule in rules.items(): repo, _, path = rule_name.partition("//") use_system_include = False if repo in EXTERNAL_REPOS: use_system_include = EXTERNAL_REPOS[repo].use_system_include for header in rule.hdrs: if header in header_to_rule_map: header_to_rule_map[header].rules.add(rule_name) if ( use_system_include != header_to_rule_map[header].use_system_include ): exit( "Unexpected use_system_include inconsistency in " f"{header_to_rule_map[header]}" ) else: header_to_rule_map[header] = RuleChoice( use_system_include, {rule_name} ) def get_missing_deps( header_to_rule_map: dict[str, RuleChoice], generated_files: set[str], rule: Rule, ) -> tuple[set[str], bool]: """Returns missing dependencies for the rule. On return, the set is dependency labels that should be added; the bool indicates whether some where omitted due to ambiguity. """ missing_deps: set[str] = set() ambiguous = False rule_files = rule.hdrs.union(rule.srcs) for source_file in rule_files: if source_file in generated_files: continue if IGNORE_SOURCE_FILE_REGEX.match(source_file): continue with open(source_file, "r") as f: file_content = f.read() file_content_changed = False for header_groups in re.findall( r'^(#include (?:(["<])([^">]+)[">]))', file_content, re.MULTILINE, ): (full_include, include_open, header) = header_groups is_system_include = include_open == "<" if header in rule_files: continue if header not in header_to_rule_map: if is_system_include: # Don't error for unexpected system includes. continue exit( f"Missing rule for " f"'{full_include}' in '{source_file}'" ) rule_choice = header_to_rule_map[header] if not rule_choice.rules.intersection(rule.deps): if len(rule_choice.rules) > 1: print( f"Ambiguous dependency choice for " f"'{full_include}' in '{source_file}': " f"{', '.join(rule_choice.rules)}" ) ambiguous = True # Use the single dep without removing it. missing_deps.add(next(iter(rule_choice.rules))) # If the include style should change, update file content. if is_system_include != rule_choice.use_system_include: if rule_choice.use_system_include: new_include = f"#include <{header}>" else: new_include = f'#include "{header}"' print( f"Fixing include format in '{source_file}': " f"'{full_include}' to '{new_include}'" ) file_content = file_content.replace(full_include, new_include) file_content_changed = True if file_content_changed: with open(source_file, "w") as f: f.write(file_content) return missing_deps, ambiguous def main() -> None: scripts_utils.chdir_repo_root() bazel = scripts_utils.locate_bazel() print("Querying bazel for Carbon targets...") carbon_rules = get_rules(bazel, "//...", False) print("Querying bazel for external targets...") external_repo_query = " ".join( [f"{repo}//{EXTERNAL_REPOS[repo].target}" for repo in EXTERNAL_REPOS] ) external_rules = get_rules(bazel, external_repo_query, True) print("Building header map...") header_to_rule_map: dict[str, RuleChoice] = {} map_headers(header_to_rule_map, carbon_rules) map_headers(header_to_rule_map, external_rules) print("Building generated file list...") generated_files: set[str] = set() for rule in carbon_rules.values(): generated_files = generated_files.union(rule.outs) print("Parsing headers from source files...") all_missing_deps: list[tuple[str, set[str]]] = [] any_ambiguous = False for rule_name, rule in carbon_rules.items(): missing_deps, ambiguous = get_missing_deps( header_to_rule_map, generated_files, rule ) if missing_deps: all_missing_deps.append((rule_name, missing_deps)) if ambiguous: any_ambiguous = True if any_ambiguous: exit("Stopping due to ambiguous dependency choices.") if all_missing_deps: print("Checking buildozer availability...") buildozer = scripts_utils.get_release(scripts_utils.Release.BUILDOZER) print("Fixing dependencies...") SEPARATOR = "\n- " for rule_name, missing_deps in sorted(all_missing_deps): friendly_missing_deps = SEPARATOR.join(missing_deps) print( f"Adding deps to {rule_name}:{SEPARATOR}{friendly_missing_deps}" ) args = [ buildozer, f"add deps {' '.join(missing_deps)}", rule_name, ] subprocess.check_call(args) print("Done!") if __name__ == "__main__": main()