fix_cc_deps.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309
  1. #!/usr/bin/env python3
  2. """Automatically fixes bazel C++ dependencies.
  3. Bazel has some support for detecting when an include refers to a missing
  4. dependency. However, the ideal state is that a given build target depends
  5. directly on all #include'd headers, and Bazel doesn't enforce that. This
  6. automates the addition for technical correctness.
  7. """
  8. __copyright__ = """
  9. Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  10. Exceptions. See /LICENSE for license information.
  11. SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  12. """
  13. import re
  14. import subprocess
  15. from typing import Callable, Dict, List, NamedTuple, Optional, Set, Tuple
  16. from xml.etree import ElementTree
  17. import scripts_utils
  18. class ExternalRepo(NamedTuple):
  19. # A function for remapping files to #include paths.
  20. remap: Callable[[str], str]
  21. # The target expression to gather rules for within the repo.
  22. target: str
  23. # The repo to use for dependencies.
  24. dep_repo: Optional[str]
  25. # Maps external repository names to a method translating bazel labels to file
  26. # paths for that repository.
  27. EXTERNAL_REPOS: Dict[str, ExternalRepo] = {
  28. # llvm:include/llvm/Support/Error.h ->llvm/Support/Error.h
  29. # clang-tools-extra/clangd:URI.h -> clang-tools-extra/clangd/URI.h
  30. "@llvm-project": ExternalRepo(
  31. lambda x: re.sub(":", "/", re.sub("^(.*:(lib|include))/", "", x)),
  32. "...",
  33. None,
  34. ),
  35. # :src/google/protobuf/descriptor.h -> google/protobuf/descriptor.h
  36. # - protobuf_headers is specified because there are multiple overlapping
  37. # targets.
  38. # - @com_google_protobuf is the official dependency, and we use it, but it
  39. # aliases @com_github_protocolbuffers_protobuf.
  40. "@com_github_protocolbuffers_protobuf": ExternalRepo(
  41. lambda x: re.sub("^(.*:src)/", "", x),
  42. ":protobuf_headers",
  43. "@com_google_protobuf",
  44. ),
  45. # :src/libfuzzer/libfuzzer_macro.h -> libfuzzer/libfuzzer_macro.h
  46. "@com_google_libprotobuf_mutator": ExternalRepo(
  47. lambda x: re.sub("^(.*:src)/", "", x), "...", None
  48. ),
  49. # tools/cpp/runfiles:runfiles.h -> tools/cpp/runfiles/runfiles.h
  50. "@bazel_tools": ExternalRepo(lambda x: re.sub(":", "/", x), "...", None),
  51. # absl/flags:flag.h -> absl/flags/flag.h
  52. "@com_google_absl": ExternalRepo(
  53. lambda x: re.sub(":", "/", x), "...", None
  54. ),
  55. # :re2/re2.h -> re2/re2.h
  56. "@com_googlesource_code_re2": ExternalRepo(
  57. lambda x: re.sub(":", "", x), ":re2", None
  58. ),
  59. }
  60. # TODO: proto rules are aspect-based and their generated files don't show up in
  61. # `bazel query` output.
  62. # Try using `bazel cquery --output=starlark` to print `target.files`.
  63. # For protobuf, need to add support for `alias` rule kind.
  64. IGNORE_HEADER_REGEX = re.compile("^(.*\\.pb\\.h)$")
  65. IGNORE_SOURCE_FILE_REGEX = re.compile("^third_party/clangd")
  66. class Rule(NamedTuple):
  67. # For cc_* rules:
  68. # The hdrs + textual_hdrs attributes, as relative paths to the file.
  69. hdrs: Set[str]
  70. # The srcs attribute, as relative paths to the file.
  71. srcs: Set[str]
  72. # The deps attribute, as full bazel labels.
  73. deps: Set[str]
  74. # For genrules:
  75. # The outs attribute, as relative paths to the file.
  76. outs: Set[str]
  77. def remap_file(label: str) -> str:
  78. """Remaps a bazel label to a file."""
  79. repo, _, path = label.partition("//")
  80. if not repo:
  81. return path.replace(":", "/")
  82. assert repo in EXTERNAL_REPOS, repo
  83. return EXTERNAL_REPOS[repo].remap(path)
  84. exit(f"Don't know how to remap label '{label}'")
  85. def get_bazel_list(list_child: ElementTree.Element, is_file: bool) -> Set[str]:
  86. """Returns the contents of a bazel list.
  87. The return will normally be the full label, unless `is_file` is set, in
  88. which case the label will be translated to the underlying file.
  89. """
  90. results: Set[str] = set()
  91. for label in list_child:
  92. assert label.tag in ("label", "output"), label.tag
  93. value = label.attrib["value"]
  94. if is_file:
  95. value = remap_file(value)
  96. results.add(value)
  97. return results
  98. def get_rules(bazel: str, targets: str, keep_going: bool) -> Dict[str, Rule]:
  99. """Queries the specified targets, returning the found rules.
  100. keep_going will be set to true for external repositories, where sometimes we
  101. see query errors.
  102. The return maps rule names to rule data.
  103. """
  104. args = [
  105. bazel,
  106. "query",
  107. "--output=xml",
  108. f"kind('(cc_binary|cc_library|cc_test|genrule)', set({targets}))",
  109. ]
  110. if keep_going:
  111. args.append("--keep_going")
  112. p = subprocess.run(
  113. args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding="utf-8"
  114. )
  115. # 3 indicates incomplete results from --keep_going, which is fine here.
  116. if p.returncode not in {0, 3}:
  117. print(p.stderr)
  118. exit(f"bazel query returned {p.returncode}")
  119. rules: Dict[str, Rule] = {}
  120. for rule_xml in ElementTree.fromstring(p.stdout):
  121. assert rule_xml.tag == "rule", rule_xml.tag
  122. rule_name = rule_xml.attrib["name"]
  123. hdrs: Set[str] = set()
  124. srcs: Set[str] = set()
  125. deps: Set[str] = set()
  126. outs: Set[str] = set()
  127. rule_class = rule_xml.attrib["class"]
  128. for list_child in rule_xml.findall("list"):
  129. list_name = list_child.attrib["name"]
  130. if rule_class in ("cc_library", "cc_binary", "cc_test"):
  131. if list_name in ("hdrs", "textual_hdrs"):
  132. hdrs = hdrs.union(get_bazel_list(list_child, True))
  133. elif list_name == "srcs":
  134. srcs = get_bazel_list(list_child, True)
  135. elif list_name == "deps":
  136. deps = get_bazel_list(list_child, False)
  137. elif rule_class == "genrule":
  138. if list_name == "outs":
  139. outs = get_bazel_list(list_child, True)
  140. elif rule_class == "tree_sitter_cc_library":
  141. continue
  142. else:
  143. exit(f"unexpected rule type: {rule_class}")
  144. rules[rule_name] = Rule(hdrs, srcs, deps, outs)
  145. return rules
  146. def map_headers(
  147. header_to_rule_map: Dict[str, Set[str]], rules: Dict[str, Rule]
  148. ) -> None:
  149. """Accumulates headers provided by rules into the map.
  150. The map maps header paths to rule names.
  151. """
  152. for rule_name, rule in rules.items():
  153. repo, _, path = rule_name.partition("//")
  154. if repo and EXTERNAL_REPOS[repo].dep_repo:
  155. rule_name = f"{EXTERNAL_REPOS[repo].dep_repo}//{path}"
  156. for header in rule.hdrs:
  157. if header in header_to_rule_map:
  158. header_to_rule_map[header].add(rule_name)
  159. else:
  160. header_to_rule_map[header] = {rule_name}
  161. def get_missing_deps(
  162. header_to_rule_map: Dict[str, Set[str]],
  163. generated_files: Set[str],
  164. rule: Rule,
  165. ) -> Tuple[Set[str], bool]:
  166. """Returns missing dependencies for the rule.
  167. On return, the set is dependency labels that should be added; the bool
  168. indicates whether some where omitted due to ambiguity.
  169. """
  170. missing_deps: Set[str] = set()
  171. ambiguous = False
  172. rule_files = rule.hdrs.union(rule.srcs)
  173. for source_file in rule_files:
  174. if source_file in generated_files:
  175. continue
  176. if IGNORE_SOURCE_FILE_REGEX.match(source_file):
  177. continue
  178. with open(source_file, "r") as f:
  179. for header_groups in re.findall(
  180. r'^(#include (?:"([^"]+)"|'
  181. r"<((?:google|gmock|gtest|libfuzzer)/[^>]+)>))",
  182. f.read(),
  183. re.MULTILINE,
  184. ):
  185. # Ignore whether the source was a quote or system include.
  186. header = header_groups[1]
  187. if not header:
  188. header = header_groups[2]
  189. if header in rule_files:
  190. continue
  191. if header not in header_to_rule_map:
  192. if IGNORE_HEADER_REGEX.match(header):
  193. print(
  194. f"Ignored missing "
  195. f"'{header_groups[0]}' in '{source_file}'"
  196. )
  197. continue
  198. else:
  199. exit(
  200. f"Missing rule for "
  201. f"'{header_groups[0]}' in '{source_file}'"
  202. )
  203. dep_choices = header_to_rule_map[header]
  204. if not dep_choices.intersection(rule.deps):
  205. if len(dep_choices) > 1:
  206. print(
  207. f"Ambiguous dependency choice for "
  208. f"'{header_groups[0]}' in '{source_file}': "
  209. f"{', '.join(dep_choices)}"
  210. )
  211. ambiguous = True
  212. # Use the single dep without removing it.
  213. missing_deps.add(next(iter(dep_choices)))
  214. return missing_deps, ambiguous
  215. def main() -> None:
  216. scripts_utils.chdir_repo_root()
  217. bazel = scripts_utils.locate_bazel()
  218. print("Querying bazel for Carbon targets...")
  219. carbon_rules = get_rules(bazel, "//...", False)
  220. print("Querying bazel for external targets...")
  221. external_repo_query = " ".join(
  222. [f"{repo}//{EXTERNAL_REPOS[repo].target}" for repo in EXTERNAL_REPOS]
  223. )
  224. external_rules = get_rules(bazel, external_repo_query, True)
  225. print("Building header map...")
  226. header_to_rule_map: Dict[str, Set[str]] = {
  227. "gmock/gmock.h": {"@com_google_googletest//:gtest"},
  228. "gtest/gtest.h": {"@com_google_googletest//:gtest"},
  229. }
  230. map_headers(header_to_rule_map, carbon_rules)
  231. map_headers(header_to_rule_map, external_rules)
  232. print("Building generated file list...")
  233. generated_files: Set[str] = set()
  234. for rule in carbon_rules.values():
  235. generated_files = generated_files.union(rule.outs)
  236. print("Parsing headers from source files...")
  237. all_missing_deps: List[Tuple[str, Set[str]]] = []
  238. any_ambiguous = False
  239. for rule_name, rule in carbon_rules.items():
  240. missing_deps, ambiguous = get_missing_deps(
  241. header_to_rule_map, generated_files, rule
  242. )
  243. if missing_deps:
  244. all_missing_deps.append((rule_name, missing_deps))
  245. if ambiguous:
  246. any_ambiguous = True
  247. if any_ambiguous:
  248. exit("Stopping due to ambiguous dependency choices.")
  249. if all_missing_deps:
  250. print("Checking buildozer availability...")
  251. buildozer = scripts_utils.get_release(scripts_utils.Release.BUILDOZER)
  252. print("Fixing dependencies...")
  253. SEPARATOR = "\n- "
  254. for rule_name, missing_deps in sorted(all_missing_deps):
  255. friendly_missing_deps = SEPARATOR.join(missing_deps)
  256. print(
  257. f"Adding deps to {rule_name}:{SEPARATOR}{friendly_missing_deps}"
  258. )
  259. args = [
  260. buildozer,
  261. f"add deps {' '.join(missing_deps)}",
  262. rule_name,
  263. ]
  264. subprocess.check_call(args)
  265. print("Done!")
  266. if __name__ == "__main__":
  267. main()