fix_cc_deps.py 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275
  1. #!/usr/bin/env python3
  2. """Automatically fixes bazel C++ dependencies.
  3. Bazel has some support for detecting when an include refers to a missing
  4. dependency. However, the ideal state is that a given build target depends
  5. directly on all #include'd headers, and Bazel doesn't enforce that. This
  6. automates the addition for technical correctness.
  7. """
  8. __copyright__ = """
  9. Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  10. Exceptions. See /LICENSE for license information.
  11. SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  12. """
  13. import os
  14. import re
  15. import shutil
  16. import subprocess
  17. from pathlib import Path
  18. import tempfile
  19. from typing import Callable, Dict, List, NamedTuple, Set, Tuple
  20. from xml.etree import ElementTree
  21. # Maps external repository names to a method translating bazel labels to file
  22. # paths for that repository.
  23. EXTERNAL_REPOS: Dict[str, Callable[[str], str]] = {
  24. "@llvm-project": lambda x: re.sub("^(.*:(lib|include))/", "", x)
  25. }
  26. class Rule(NamedTuple):
  27. # For cc_* rules:
  28. # The hdrs + textual_hdrs attributes, as relative paths to the file.
  29. hdrs: Set[str]
  30. # The srcs attribute, as relative paths to the file.
  31. srcs: Set[str]
  32. # The deps attribute, as full bazel labels.
  33. deps: Set[str]
  34. # For genrules:
  35. # The outs attribute, as relative paths to the file.
  36. outs: Set[str]
  37. def install_buildozer() -> str:
  38. # 4.2.4
  39. buildozer_sha = "cdedcc0318b9c8919afb0167e30c1588fc990ffc"
  40. args = [
  41. "go",
  42. "install",
  43. f"github.com/bazelbuild/buildtools/buildozer@{buildozer_sha}",
  44. ]
  45. # Install to a cache.
  46. env = os.environ.copy()
  47. cache_dir = Path(tempfile.gettempdir()).joinpath("carbon-pre-commit-cache")
  48. cache_dir.mkdir(parents=True, exist_ok=True)
  49. env["GOPATH"] = str(cache_dir)
  50. if "GOBIN" in env:
  51. del env["GOBIN"]
  52. subprocess.check_call(args, env=env)
  53. return str(cache_dir.joinpath("bin", "buildozer"))
  54. def locate_bazel() -> str:
  55. """Returns the bazel command.
  56. We use the `BAZEL` environment variable if present. If not, then we try to
  57. use `bazelisk` and then `bazel`.
  58. """
  59. bazel = os.environ.get("BAZEL")
  60. if bazel:
  61. return bazel
  62. if shutil.which("bazelisk"):
  63. return "bazelisk"
  64. if shutil.which("bazel"):
  65. return "bazel"
  66. exit("Unable to run Bazel")
  67. def remap_file(label: str) -> str:
  68. """Remaps a bazel label to a file."""
  69. repo, _, path = label.partition("//")
  70. if not repo:
  71. return path.replace(":", "/")
  72. assert repo in EXTERNAL_REPOS, repo
  73. return EXTERNAL_REPOS[repo](path)
  74. exit(f"Don't know how to remap label '{label}'")
  75. def get_bazel_list(list_child: ElementTree.Element, is_file: bool) -> Set[str]:
  76. """Returns the contents of a bazel list.
  77. The return will normally be the full label, unless `is_file` is set, in
  78. which case the label will be translated to the underlying file.
  79. """
  80. results: Set[str] = set()
  81. for label in list_child:
  82. assert label.tag in ("label", "output"), label.tag
  83. value = label.attrib["value"]
  84. if is_file:
  85. value = remap_file(value)
  86. results.add(value)
  87. return results
  88. def get_rules(targets: str, keep_going: bool) -> Dict[str, Rule]:
  89. """Queries the specified targets, returning the found rules.
  90. keep_going will be set to true for external repositories, where sometimes we
  91. see query errors.
  92. The return maps rule names to rule data.
  93. """
  94. args = [
  95. "bazel",
  96. "query",
  97. "--output=xml",
  98. f"kind('(cc_binary|cc_library|cc_test|genrule)', set({targets}))",
  99. ]
  100. if keep_going:
  101. args.append("--keep_going")
  102. p = subprocess.run(
  103. args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding="utf-8"
  104. )
  105. # 3 indicates incomplete results from --keep_going, which is fine here.
  106. if p.returncode not in {0, 3}:
  107. print(p.stderr)
  108. exit(f"bazel query returned {p.returncode}")
  109. rules: Dict[str, Rule] = {}
  110. for rule_xml in ElementTree.fromstring(p.stdout):
  111. assert rule_xml.tag == "rule", rule_xml.tag
  112. rule_name = rule_xml.attrib["name"]
  113. hdrs: Set[str] = set()
  114. srcs: Set[str] = set()
  115. deps: Set[str] = set()
  116. outs: Set[str] = set()
  117. rule_class = rule_xml.attrib["class"]
  118. for list_child in rule_xml.findall("list"):
  119. list_name = list_child.attrib["name"]
  120. if rule_class in ("cc_library", "cc_binary", "cc_test"):
  121. if list_name in ("hdrs", "textual_hdrs"):
  122. hdrs = hdrs.union(get_bazel_list(list_child, True))
  123. elif list_name == "srcs":
  124. srcs = get_bazel_list(list_child, True)
  125. elif list_name == "deps":
  126. deps = get_bazel_list(list_child, False)
  127. elif rule_class == "genrule":
  128. if list_name == "outs":
  129. outs = get_bazel_list(list_child, True)
  130. else:
  131. exit(f"unexpected rule type: {rule_class}")
  132. rules[rule_name] = Rule(hdrs, srcs, deps, outs)
  133. return rules
  134. def map_headers(
  135. header_to_rule_map: Dict[str, Set[str]], rules: Dict[str, Rule]
  136. ) -> None:
  137. """Accumulates headers provided by rules into the map.
  138. The map maps header paths to rule names.
  139. """
  140. for rule_name, rule in rules.items():
  141. for header in rule.hdrs:
  142. if header in header_to_rule_map:
  143. header_to_rule_map[header].add(rule_name)
  144. else:
  145. header_to_rule_map[header] = {rule_name}
  146. def get_missing_deps(
  147. header_to_rule_map: Dict[str, Set[str]],
  148. generated_files: Set[str],
  149. rule: Rule,
  150. ) -> Tuple[Set[str], bool]:
  151. """Returns missing dependencies for the rule.
  152. On return, the set is dependency labels that should be added; the bool
  153. indicates whether some where omitted due to ambiguity.
  154. """
  155. missing_deps: Set[str] = set()
  156. ambiguous = False
  157. rule_files = rule.hdrs.union(rule.srcs)
  158. for source_file in rule_files:
  159. if source_file in generated_files:
  160. continue
  161. with open(source_file, "r") as f:
  162. for header in re.findall(
  163. r'^#include "([^"]+)"', f.read(), re.MULTILINE
  164. ):
  165. if header in rule_files:
  166. continue
  167. if header not in header_to_rule_map:
  168. exit(
  169. f"Missing rule for #include '{header}' in "
  170. f"'{source_file}'"
  171. )
  172. dep_choices = header_to_rule_map[header]
  173. if not dep_choices.intersection(rule.deps):
  174. if len(dep_choices) > 1:
  175. print(
  176. f"Ambiguous dependency choice for #include "
  177. f"'{header}' in '{source_file}': "
  178. f"{', '.join(dep_choices)}"
  179. )
  180. ambiguous = True
  181. # Use the single dep without removing it.
  182. missing_deps.add(next(iter(dep_choices)))
  183. return missing_deps, ambiguous
  184. def main() -> None:
  185. # Change the working directory to the repository root so that the remaining
  186. # operations reliably operate relative to that root.
  187. os.chdir(Path(__file__).parent.parent)
  188. print("Querying bazel for Carbon targets...")
  189. carbon_rules = get_rules("//...", False)
  190. print("Querying bazel for external targets...")
  191. external_repo_query = " ".join([f"{repo}//..." for repo in EXTERNAL_REPOS])
  192. external_rules = get_rules(external_repo_query, True)
  193. print("Building header map...")
  194. header_to_rule_map: Dict[str, Set[str]] = {}
  195. map_headers(header_to_rule_map, carbon_rules)
  196. map_headers(header_to_rule_map, external_rules)
  197. print("Building generated file list...")
  198. generated_files: Set[str] = set()
  199. for rule in carbon_rules.values():
  200. generated_files = generated_files.union(rule.outs)
  201. print("Parsing headers from source files...")
  202. all_missing_deps: List[Tuple[str, Set[str]]] = []
  203. any_ambiguous = False
  204. for rule_name, rule in carbon_rules.items():
  205. missing_deps, ambiguous = get_missing_deps(
  206. header_to_rule_map, generated_files, rule
  207. )
  208. if missing_deps:
  209. all_missing_deps.append((rule_name, missing_deps))
  210. if ambiguous:
  211. any_ambiguous = True
  212. if any_ambiguous:
  213. exit("Stopping due to ambiguous dependency choices.")
  214. if all_missing_deps:
  215. print("Checking buildozer availability...")
  216. buildozer = install_buildozer()
  217. print("Fixing dependencies...")
  218. SEPARATOR = "\n- "
  219. for rule_name, missing_deps in sorted(all_missing_deps):
  220. friendly_missing_deps = SEPARATOR.join(missing_deps)
  221. print(
  222. f"Adding deps to {rule_name}:{SEPARATOR}{friendly_missing_deps}"
  223. )
  224. args = [
  225. buildozer,
  226. f"add deps {' '.join(missing_deps)}",
  227. rule_name,
  228. ]
  229. subprocess.check_call(args)
  230. print("Done!")
  231. if __name__ == "__main__":
  232. main()