fix_cc_deps.py 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252
  1. #!/usr/bin/env python3
  2. """Automatically fixes bazel C++ dependencies.
  3. Bazel has some support for detecting when an include refers to a missing
  4. dependency. However, the ideal state is that a given build target depends
  5. directly on all #include'd headers, and Bazel doesn't enforce that. This
  6. automates the addition for technical correctness.
  7. """
  8. __copyright__ = """
  9. Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  10. Exceptions. See /LICENSE for license information.
  11. SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  12. """
  13. import re
  14. import subprocess
  15. from typing import Callable, Dict, List, NamedTuple, Set, Tuple
  16. from xml.etree import ElementTree
  17. import scripts_utils # type: ignore
  18. # Maps external repository names to a method translating bazel labels to file
  19. # paths for that repository.
  20. EXTERNAL_REPOS: Dict[str, Callable[[str], str]] = {
  21. # @llvm-project//llvm:include/llvm/Support/Error.h ->
  22. # llvm/Support/Error.h
  23. "@llvm-project": lambda x: re.sub("^(.*:(lib|include))/", "", x),
  24. # @com_google_protobuf//:src/google/protobuf/descriptor.h ->
  25. # google/protobuf/descriptor.h
  26. "@com_google_protobuf": lambda x: re.sub("^(.*:src)/", "", x),
  27. }
  28. # TODO: proto rules are aspect-based and their generated files don't show up in
  29. # `bazel query` output.
  30. # Try using `bazel cquery --output=starlark` to print `target.files`.
  31. # For protobuf, need to add support for `alias` rule kind.
  32. IGNORE_HEADER_REGEX = re.compile("^(.*\\.pb\\.h)|(.*google/protobuf/.*)$")
  33. class Rule(NamedTuple):
  34. # For cc_* rules:
  35. # The hdrs + textual_hdrs attributes, as relative paths to the file.
  36. hdrs: Set[str]
  37. # The srcs attribute, as relative paths to the file.
  38. srcs: Set[str]
  39. # The deps attribute, as full bazel labels.
  40. deps: Set[str]
  41. # For genrules:
  42. # The outs attribute, as relative paths to the file.
  43. outs: Set[str]
  44. def remap_file(label: str) -> str:
  45. """Remaps a bazel label to a file."""
  46. repo, _, path = label.partition("//")
  47. if not repo:
  48. return path.replace(":", "/")
  49. assert repo in EXTERNAL_REPOS, repo
  50. return EXTERNAL_REPOS[repo](path)
  51. exit(f"Don't know how to remap label '{label}'")
  52. def get_bazel_list(list_child: ElementTree.Element, is_file: bool) -> Set[str]:
  53. """Returns the contents of a bazel list.
  54. The return will normally be the full label, unless `is_file` is set, in
  55. which case the label will be translated to the underlying file.
  56. """
  57. results: Set[str] = set()
  58. for label in list_child:
  59. assert label.tag in ("label", "output"), label.tag
  60. value = label.attrib["value"]
  61. if is_file:
  62. value = remap_file(value)
  63. results.add(value)
  64. return results
  65. def get_rules(bazel: str, targets: str, keep_going: bool) -> Dict[str, Rule]:
  66. """Queries the specified targets, returning the found rules.
  67. keep_going will be set to true for external repositories, where sometimes we
  68. see query errors.
  69. The return maps rule names to rule data.
  70. """
  71. args = [
  72. bazel,
  73. "query",
  74. "--output=xml",
  75. f"kind('(cc_binary|cc_library|cc_test|genrule)', set({targets}))",
  76. ]
  77. if keep_going:
  78. args.append("--keep_going")
  79. p = subprocess.run(
  80. args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding="utf-8"
  81. )
  82. # 3 indicates incomplete results from --keep_going, which is fine here.
  83. if p.returncode not in {0, 3}:
  84. print(p.stderr)
  85. exit(f"bazel query returned {p.returncode}")
  86. rules: Dict[str, Rule] = {}
  87. for rule_xml in ElementTree.fromstring(p.stdout):
  88. assert rule_xml.tag == "rule", rule_xml.tag
  89. rule_name = rule_xml.attrib["name"]
  90. hdrs: Set[str] = set()
  91. srcs: Set[str] = set()
  92. deps: Set[str] = set()
  93. outs: Set[str] = set()
  94. rule_class = rule_xml.attrib["class"]
  95. for list_child in rule_xml.findall("list"):
  96. list_name = list_child.attrib["name"]
  97. if rule_class in ("cc_library", "cc_binary", "cc_test"):
  98. if list_name in ("hdrs", "textual_hdrs"):
  99. hdrs = hdrs.union(get_bazel_list(list_child, True))
  100. elif list_name == "srcs":
  101. srcs = get_bazel_list(list_child, True)
  102. elif list_name == "deps":
  103. deps = get_bazel_list(list_child, False)
  104. elif rule_class == "genrule":
  105. if list_name == "outs":
  106. outs = get_bazel_list(list_child, True)
  107. else:
  108. exit(f"unexpected rule type: {rule_class}")
  109. rules[rule_name] = Rule(hdrs, srcs, deps, outs)
  110. return rules
  111. def map_headers(
  112. header_to_rule_map: Dict[str, Set[str]], rules: Dict[str, Rule]
  113. ) -> None:
  114. """Accumulates headers provided by rules into the map.
  115. The map maps header paths to rule names.
  116. """
  117. for rule_name, rule in rules.items():
  118. for header in rule.hdrs:
  119. if header in header_to_rule_map:
  120. header_to_rule_map[header].add(rule_name)
  121. else:
  122. header_to_rule_map[header] = {rule_name}
  123. def get_missing_deps(
  124. header_to_rule_map: Dict[str, Set[str]],
  125. generated_files: Set[str],
  126. rule: Rule,
  127. ) -> Tuple[Set[str], bool]:
  128. """Returns missing dependencies for the rule.
  129. On return, the set is dependency labels that should be added; the bool
  130. indicates whether some where omitted due to ambiguity.
  131. """
  132. missing_deps: Set[str] = set()
  133. ambiguous = False
  134. rule_files = rule.hdrs.union(rule.srcs)
  135. for source_file in rule_files:
  136. if source_file in generated_files:
  137. continue
  138. with open(source_file, "r") as f:
  139. for header in re.findall(
  140. r'^#include "([^"]+)"', f.read(), re.MULTILINE
  141. ):
  142. if header in rule_files:
  143. continue
  144. if header not in header_to_rule_map:
  145. if IGNORE_HEADER_REGEX.match(header):
  146. print(
  147. f"Ignored missing #include '{header}' in "
  148. f"'{source_file}'"
  149. )
  150. continue
  151. else:
  152. exit(
  153. f"Missing rule for #include '{header}' in "
  154. f"'{source_file}'"
  155. )
  156. dep_choices = header_to_rule_map[header]
  157. if not dep_choices.intersection(rule.deps):
  158. if len(dep_choices) > 1:
  159. print(
  160. f"Ambiguous dependency choice for #include "
  161. f"'{header}' in '{source_file}': "
  162. f"{', '.join(dep_choices)}"
  163. )
  164. ambiguous = True
  165. # Use the single dep without removing it.
  166. missing_deps.add(next(iter(dep_choices)))
  167. return missing_deps, ambiguous
  168. def main() -> None:
  169. scripts_utils.chdir_repo_root()
  170. bazel = scripts_utils.locate_bazel()
  171. print("Querying bazel for Carbon targets...")
  172. carbon_rules = get_rules(bazel, "//...", False)
  173. print("Querying bazel for external targets...")
  174. external_repo_query = " ".join([f"{repo}//..." for repo in EXTERNAL_REPOS])
  175. external_rules = get_rules(bazel, external_repo_query, True)
  176. print("Building header map...")
  177. header_to_rule_map: Dict[str, Set[str]] = {}
  178. map_headers(header_to_rule_map, carbon_rules)
  179. map_headers(header_to_rule_map, external_rules)
  180. print("Building generated file list...")
  181. generated_files: Set[str] = set()
  182. for rule in carbon_rules.values():
  183. generated_files = generated_files.union(rule.outs)
  184. print("Parsing headers from source files...")
  185. all_missing_deps: List[Tuple[str, Set[str]]] = []
  186. any_ambiguous = False
  187. for rule_name, rule in carbon_rules.items():
  188. missing_deps, ambiguous = get_missing_deps(
  189. header_to_rule_map, generated_files, rule
  190. )
  191. if missing_deps:
  192. all_missing_deps.append((rule_name, missing_deps))
  193. if ambiguous:
  194. any_ambiguous = True
  195. if any_ambiguous:
  196. exit("Stopping due to ambiguous dependency choices.")
  197. if all_missing_deps:
  198. print("Checking buildozer availability...")
  199. buildozer = scripts_utils.get_release(scripts_utils.Release.BUILDOZER)
  200. print("Fixing dependencies...")
  201. SEPARATOR = "\n- "
  202. for rule_name, missing_deps in sorted(all_missing_deps):
  203. friendly_missing_deps = SEPARATOR.join(missing_deps)
  204. print(
  205. f"Adding deps to {rule_name}:{SEPARATOR}{friendly_missing_deps}"
  206. )
  207. args = [
  208. buildozer,
  209. f"add deps {' '.join(missing_deps)}",
  210. rule_name,
  211. ]
  212. subprocess.check_call(args)
  213. print("Done!")
  214. if __name__ == "__main__":
  215. main()