fix_cc_deps.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345
  1. #!/usr/bin/env python3
  2. """Automatically fixes bazel C++ dependencies.
  3. Bazel has some support for detecting when an include refers to a missing
  4. dependency. However, the ideal state is that a given build target depends
  5. directly on all #include'd headers, and Bazel doesn't enforce that. This
  6. automates the addition for technical correctness.
  7. """
  8. __copyright__ = """
  9. Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  10. Exceptions. See /LICENSE for license information.
  11. SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  12. """
  13. import re
  14. import subprocess
  15. from typing import Callable, Dict, List, NamedTuple, Set, Tuple
  16. from xml.etree import ElementTree
  17. import scripts_utils
  18. class ExternalRepo(NamedTuple):
  19. # A function for remapping files to #include paths.
  20. remap: Callable[[str], str]
  21. # The target expression to gather rules for within the repo.
  22. target: str
  23. # Whether to use "" or <> for the include.
  24. use_system_include: bool = False
  25. class RuleChoice(NamedTuple):
  26. # Whether to use "" or <> for the include.
  27. use_system_include: bool
  28. # Possible rules that may be used.
  29. rules: Set[str]
  30. # Maps external repository names to a method translating bazel labels to file
  31. # paths for that repository.
  32. EXTERNAL_REPOS: Dict[str, ExternalRepo] = {
  33. # llvm:include/llvm/Support/Error.h ->llvm/Support/Error.h
  34. # clang-tools-extra/clangd:URI.h -> clang-tools-extra/clangd/URI.h
  35. "@llvm-project": ExternalRepo(
  36. lambda x: re.sub(":", "/", re.sub("^(.*:(lib|include))/", "", x)),
  37. "...",
  38. ),
  39. # :src/google/protobuf/descriptor.h -> google/protobuf/descriptor.h
  40. # - protobuf_headers is specified because there are multiple overlapping
  41. # targets.
  42. "@protobuf": ExternalRepo(
  43. lambda x: re.sub("^(.*:src)/", "", x),
  44. ":protobuf_headers",
  45. use_system_include=True,
  46. ),
  47. # :src/libfuzzer/libfuzzer_macro.h -> libfuzzer/libfuzzer_macro.h
  48. "@com_google_libprotobuf_mutator": ExternalRepo(
  49. lambda x: re.sub("^(.*:src)/", "", x), "...", use_system_include=True
  50. ),
  51. # tools/cpp/runfiles:runfiles.h -> tools/cpp/runfiles/runfiles.h
  52. "@bazel_tools": ExternalRepo(lambda x: re.sub(":", "/", x), "..."),
  53. # absl/flags:flag.h -> absl/flags/flag.h
  54. "@abseil-cpp": ExternalRepo(lambda x: re.sub(":", "/", x), "..."),
  55. # :re2/re2.h -> re2/re2.h
  56. "@re2": ExternalRepo(lambda x: re.sub(":", "", x), ":re2"),
  57. # :googletest/include/gtest/gtest.h -> gtest/gtest.h
  58. "@googletest": ExternalRepo(
  59. lambda x: re.sub(":google(?:mock|test)/include/", "", x),
  60. ":gtest",
  61. use_system_include=True,
  62. ),
  63. # All of the `boost_unordered` headers are in a single rule.
  64. "@boost_unordered": ExternalRepo(
  65. lambda x: re.sub("^(.*:include)/", "", x),
  66. ":boost_unordered",
  67. use_system_include=True,
  68. ),
  69. }
  70. # TODO: proto rules and template expansions are aspect-based and their generated
  71. # files don't show up in `bazel query` output.
  72. # Try using `bazel cquery --output=starlark` to print `target.files`.
  73. # For protobuf, need to add support for `alias` rule kind.
  74. IGNORE_HEADER_REGEX = re.compile("^(.*\\.pb\\.h)$")
  75. IGNORE_SOURCE_FILE_REGEX = re.compile(
  76. "^(third_party/clangd.*|common/version.*\\.cpp)$"
  77. )
  78. class Rule(NamedTuple):
  79. # For cc_* rules:
  80. # The hdrs + textual_hdrs attributes, as relative paths to the file.
  81. hdrs: Set[str]
  82. # The srcs attribute, as relative paths to the file.
  83. srcs: Set[str]
  84. # The deps attribute, as full bazel labels.
  85. deps: Set[str]
  86. # For genrules:
  87. # The outs attribute, as relative paths to the file.
  88. outs: Set[str]
  89. def remap_file(label: str) -> str:
  90. """Remaps a bazel label to a file."""
  91. repo, _, path = label.partition("//")
  92. if not repo:
  93. return path.replace(":", "/")
  94. # Ignore the version, just use the repo name.
  95. repo = repo.split("~", 1)[0]
  96. assert repo in EXTERNAL_REPOS, repo
  97. return EXTERNAL_REPOS[repo].remap(path)
  98. def get_bazel_list(list_child: ElementTree.Element, is_file: bool) -> Set[str]:
  99. """Returns the contents of a bazel list.
  100. The return will normally be the full label, unless `is_file` is set, in
  101. which case the label will be translated to the underlying file.
  102. """
  103. results: Set[str] = set()
  104. for label in list_child:
  105. assert label.tag in ("label", "output"), label.tag
  106. value = label.attrib["value"]
  107. if is_file:
  108. value = remap_file(value)
  109. results.add(value)
  110. return results
  111. def get_rules(bazel: str, targets: str, keep_going: bool) -> Dict[str, Rule]:
  112. """Queries the specified targets, returning the found rules.
  113. keep_going will be set to true for external repositories, where sometimes we
  114. see query errors.
  115. The return maps rule names to rule data.
  116. """
  117. args = [
  118. bazel,
  119. "query",
  120. "--output=xml",
  121. f"kind('(cc_binary|cc_library|cc_test|genrule)', set({targets}))",
  122. ]
  123. if keep_going:
  124. args.append("--keep_going")
  125. p = subprocess.run(
  126. args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding="utf-8"
  127. )
  128. # 3 indicates incomplete results from --keep_going, which is fine here.
  129. if p.returncode not in {0, 3}:
  130. print(p.stderr)
  131. exit(f"bazel query returned {p.returncode}")
  132. rules: Dict[str, Rule] = {}
  133. for rule_xml in ElementTree.fromstring(p.stdout):
  134. assert rule_xml.tag == "rule", rule_xml.tag
  135. rule_name = rule_xml.attrib["name"]
  136. hdrs: Set[str] = set()
  137. srcs: Set[str] = set()
  138. deps: Set[str] = set()
  139. outs: Set[str] = set()
  140. rule_class = rule_xml.attrib["class"]
  141. for list_child in rule_xml.findall("list"):
  142. list_name = list_child.attrib["name"]
  143. if rule_class in ("cc_library", "cc_binary", "cc_test"):
  144. if list_name in ("hdrs", "textual_hdrs"):
  145. hdrs = hdrs.union(get_bazel_list(list_child, True))
  146. elif list_name == "srcs":
  147. srcs = get_bazel_list(list_child, True)
  148. elif list_name == "deps":
  149. deps = get_bazel_list(list_child, False)
  150. elif rule_class == "genrule":
  151. if list_name == "outs":
  152. outs = get_bazel_list(list_child, True)
  153. elif rule_class == "tree_sitter_cc_library":
  154. continue
  155. else:
  156. exit(f"unexpected rule type: {rule_class}")
  157. rules[rule_name] = Rule(hdrs, srcs, deps, outs)
  158. return rules
  159. def map_headers(
  160. header_to_rule_map: Dict[str, RuleChoice], rules: Dict[str, Rule]
  161. ) -> None:
  162. """Accumulates headers provided by rules into the map.
  163. The map maps header paths to rule names.
  164. """
  165. for rule_name, rule in rules.items():
  166. repo, _, path = rule_name.partition("//")
  167. use_system_include = False
  168. if repo in EXTERNAL_REPOS:
  169. use_system_include = EXTERNAL_REPOS[repo].use_system_include
  170. for header in rule.hdrs:
  171. if header in header_to_rule_map:
  172. header_to_rule_map[header].rules.add(rule_name)
  173. if (
  174. use_system_include
  175. != header_to_rule_map[header].use_system_include
  176. ):
  177. exit(
  178. "Unexpected use_system_include inconsistency in "
  179. f"{header_to_rule_map[header]}"
  180. )
  181. else:
  182. header_to_rule_map[header] = RuleChoice(
  183. use_system_include, {rule_name}
  184. )
  185. def get_missing_deps(
  186. header_to_rule_map: Dict[str, RuleChoice],
  187. generated_files: Set[str],
  188. rule: Rule,
  189. ) -> Tuple[Set[str], bool]:
  190. """Returns missing dependencies for the rule.
  191. On return, the set is dependency labels that should be added; the bool
  192. indicates whether some where omitted due to ambiguity.
  193. """
  194. missing_deps: Set[str] = set()
  195. ambiguous = False
  196. rule_files = rule.hdrs.union(rule.srcs)
  197. for source_file in rule_files:
  198. if source_file in generated_files:
  199. continue
  200. if IGNORE_SOURCE_FILE_REGEX.match(source_file):
  201. continue
  202. with open(source_file, "r") as f:
  203. file_content = f.read()
  204. file_content_changed = False
  205. for header_groups in re.findall(
  206. r'^(#include (?:(["<])([^">]+)[">]))',
  207. file_content,
  208. re.MULTILINE,
  209. ):
  210. (full_include, include_open, header) = header_groups
  211. is_system_include = include_open == "<"
  212. if header in rule_files:
  213. continue
  214. if header not in header_to_rule_map:
  215. if is_system_include:
  216. # Don't error for unexpected system includes.
  217. continue
  218. if IGNORE_HEADER_REGEX.match(header):
  219. # Don't print anything for explicitly ignored files.
  220. continue
  221. exit(
  222. f"Missing rule for " f"'{full_include}' in '{source_file}'"
  223. )
  224. rule_choice = header_to_rule_map[header]
  225. if not rule_choice.rules.intersection(rule.deps):
  226. if len(rule_choice.rules) > 1:
  227. print(
  228. f"Ambiguous dependency choice for "
  229. f"'{full_include}' in '{source_file}': "
  230. f"{', '.join(rule_choice.rules)}"
  231. )
  232. ambiguous = True
  233. # Use the single dep without removing it.
  234. missing_deps.add(next(iter(rule_choice.rules)))
  235. # If the include style should change, update file content.
  236. if is_system_include != rule_choice.use_system_include:
  237. if rule_choice.use_system_include:
  238. new_include = f"#include <{header}>"
  239. else:
  240. new_include = f'#include "{header}"'
  241. print(
  242. f"Fixing include format in '{source_file}': "
  243. f"'{full_include}' to '{new_include}'"
  244. )
  245. file_content = file_content.replace(full_include, new_include)
  246. file_content_changed = True
  247. if file_content_changed:
  248. with open(source_file, "w") as f:
  249. f.write(file_content)
  250. return missing_deps, ambiguous
  251. def main() -> None:
  252. scripts_utils.chdir_repo_root()
  253. bazel = scripts_utils.locate_bazel()
  254. print("Querying bazel for Carbon targets...")
  255. carbon_rules = get_rules(bazel, "//...", False)
  256. print("Querying bazel for external targets...")
  257. external_repo_query = " ".join(
  258. [f"{repo}//{EXTERNAL_REPOS[repo].target}" for repo in EXTERNAL_REPOS]
  259. )
  260. external_rules = get_rules(bazel, external_repo_query, True)
  261. print("Building header map...")
  262. header_to_rule_map: Dict[str, RuleChoice] = {}
  263. map_headers(header_to_rule_map, carbon_rules)
  264. map_headers(header_to_rule_map, external_rules)
  265. print("Building generated file list...")
  266. generated_files: Set[str] = set()
  267. for rule in carbon_rules.values():
  268. generated_files = generated_files.union(rule.outs)
  269. print("Parsing headers from source files...")
  270. all_missing_deps: List[Tuple[str, Set[str]]] = []
  271. any_ambiguous = False
  272. for rule_name, rule in carbon_rules.items():
  273. missing_deps, ambiguous = get_missing_deps(
  274. header_to_rule_map, generated_files, rule
  275. )
  276. if missing_deps:
  277. all_missing_deps.append((rule_name, missing_deps))
  278. if ambiguous:
  279. any_ambiguous = True
  280. if any_ambiguous:
  281. exit("Stopping due to ambiguous dependency choices.")
  282. if all_missing_deps:
  283. print("Checking buildozer availability...")
  284. buildozer = scripts_utils.get_release(scripts_utils.Release.BUILDOZER)
  285. print("Fixing dependencies...")
  286. SEPARATOR = "\n- "
  287. for rule_name, missing_deps in sorted(all_missing_deps):
  288. friendly_missing_deps = SEPARATOR.join(missing_deps)
  289. print(
  290. f"Adding deps to {rule_name}:{SEPARATOR}{friendly_missing_deps}"
  291. )
  292. args = [
  293. buildozer,
  294. f"add deps {' '.join(missing_deps)}",
  295. rule_name,
  296. ]
  297. subprocess.check_call(args)
  298. print("Done!")
  299. if __name__ == "__main__":
  300. main()