fix_cc_deps.py 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234
  1. #!/usr/bin/env python3
  2. """Automatically fixes bazel C++ dependencies.
  3. Bazel has some support for detecting when an include refers to a missing
  4. dependency. However, the ideal state is that a given build target depends
  5. directly on all #include'd headers, and Bazel doesn't enforce that. This
  6. automates the addition for technical correctness.
  7. """
  8. __copyright__ = """
  9. Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  10. Exceptions. See /LICENSE for license information.
  11. SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  12. """
  13. import re
  14. import subprocess
  15. from typing import Callable, Dict, List, NamedTuple, Set, Tuple
  16. from xml.etree import ElementTree
  17. import scripts_utils # type: ignore
  18. # Maps external repository names to a method translating bazel labels to file
  19. # paths for that repository.
  20. EXTERNAL_REPOS: Dict[str, Callable[[str], str]] = {
  21. "@llvm-project": lambda x: re.sub("^(.*:(lib|include))/", "", x)
  22. }
  23. class Rule(NamedTuple):
  24. # For cc_* rules:
  25. # The hdrs + textual_hdrs attributes, as relative paths to the file.
  26. hdrs: Set[str]
  27. # The srcs attribute, as relative paths to the file.
  28. srcs: Set[str]
  29. # The deps attribute, as full bazel labels.
  30. deps: Set[str]
  31. # For genrules:
  32. # The outs attribute, as relative paths to the file.
  33. outs: Set[str]
  34. def remap_file(label: str) -> str:
  35. """Remaps a bazel label to a file."""
  36. repo, _, path = label.partition("//")
  37. if not repo:
  38. return path.replace(":", "/")
  39. assert repo in EXTERNAL_REPOS, repo
  40. return EXTERNAL_REPOS[repo](path)
  41. exit(f"Don't know how to remap label '{label}'")
  42. def get_bazel_list(list_child: ElementTree.Element, is_file: bool) -> Set[str]:
  43. """Returns the contents of a bazel list.
  44. The return will normally be the full label, unless `is_file` is set, in
  45. which case the label will be translated to the underlying file.
  46. """
  47. results: Set[str] = set()
  48. for label in list_child:
  49. assert label.tag in ("label", "output"), label.tag
  50. value = label.attrib["value"]
  51. if is_file:
  52. value = remap_file(value)
  53. results.add(value)
  54. return results
  55. def get_rules(bazel: str, targets: str, keep_going: bool) -> Dict[str, Rule]:
  56. """Queries the specified targets, returning the found rules.
  57. keep_going will be set to true for external repositories, where sometimes we
  58. see query errors.
  59. The return maps rule names to rule data.
  60. """
  61. args = [
  62. bazel,
  63. "query",
  64. "--output=xml",
  65. f"kind('(cc_binary|cc_library|cc_test|genrule)', set({targets}))",
  66. ]
  67. if keep_going:
  68. args.append("--keep_going")
  69. p = subprocess.run(
  70. args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding="utf-8"
  71. )
  72. # 3 indicates incomplete results from --keep_going, which is fine here.
  73. if p.returncode not in {0, 3}:
  74. print(p.stderr)
  75. exit(f"bazel query returned {p.returncode}")
  76. rules: Dict[str, Rule] = {}
  77. for rule_xml in ElementTree.fromstring(p.stdout):
  78. assert rule_xml.tag == "rule", rule_xml.tag
  79. rule_name = rule_xml.attrib["name"]
  80. hdrs: Set[str] = set()
  81. srcs: Set[str] = set()
  82. deps: Set[str] = set()
  83. outs: Set[str] = set()
  84. rule_class = rule_xml.attrib["class"]
  85. for list_child in rule_xml.findall("list"):
  86. list_name = list_child.attrib["name"]
  87. if rule_class in ("cc_library", "cc_binary", "cc_test"):
  88. if list_name in ("hdrs", "textual_hdrs"):
  89. hdrs = hdrs.union(get_bazel_list(list_child, True))
  90. elif list_name == "srcs":
  91. srcs = get_bazel_list(list_child, True)
  92. elif list_name == "deps":
  93. deps = get_bazel_list(list_child, False)
  94. elif rule_class == "genrule":
  95. if list_name == "outs":
  96. outs = get_bazel_list(list_child, True)
  97. else:
  98. exit(f"unexpected rule type: {rule_class}")
  99. rules[rule_name] = Rule(hdrs, srcs, deps, outs)
  100. return rules
  101. def map_headers(
  102. header_to_rule_map: Dict[str, Set[str]], rules: Dict[str, Rule]
  103. ) -> None:
  104. """Accumulates headers provided by rules into the map.
  105. The map maps header paths to rule names.
  106. """
  107. for rule_name, rule in rules.items():
  108. for header in rule.hdrs:
  109. if header in header_to_rule_map:
  110. header_to_rule_map[header].add(rule_name)
  111. else:
  112. header_to_rule_map[header] = {rule_name}
  113. def get_missing_deps(
  114. header_to_rule_map: Dict[str, Set[str]],
  115. generated_files: Set[str],
  116. rule: Rule,
  117. ) -> Tuple[Set[str], bool]:
  118. """Returns missing dependencies for the rule.
  119. On return, the set is dependency labels that should be added; the bool
  120. indicates whether some where omitted due to ambiguity.
  121. """
  122. missing_deps: Set[str] = set()
  123. ambiguous = False
  124. rule_files = rule.hdrs.union(rule.srcs)
  125. for source_file in rule_files:
  126. if source_file in generated_files:
  127. continue
  128. with open(source_file, "r") as f:
  129. for header in re.findall(
  130. r'^#include "([^"]+)"', f.read(), re.MULTILINE
  131. ):
  132. if header in rule_files:
  133. continue
  134. if header not in header_to_rule_map:
  135. exit(
  136. f"Missing rule for #include '{header}' in "
  137. f"'{source_file}'"
  138. )
  139. dep_choices = header_to_rule_map[header]
  140. if not dep_choices.intersection(rule.deps):
  141. if len(dep_choices) > 1:
  142. print(
  143. f"Ambiguous dependency choice for #include "
  144. f"'{header}' in '{source_file}': "
  145. f"{', '.join(dep_choices)}"
  146. )
  147. ambiguous = True
  148. # Use the single dep without removing it.
  149. missing_deps.add(next(iter(dep_choices)))
  150. return missing_deps, ambiguous
  151. def main() -> None:
  152. scripts_utils.chdir_repo_root()
  153. bazel = scripts_utils.locate_bazel()
  154. print("Querying bazel for Carbon targets...")
  155. carbon_rules = get_rules(bazel, "//...", False)
  156. print("Querying bazel for external targets...")
  157. external_repo_query = " ".join([f"{repo}//..." for repo in EXTERNAL_REPOS])
  158. external_rules = get_rules(bazel, external_repo_query, True)
  159. print("Building header map...")
  160. header_to_rule_map: Dict[str, Set[str]] = {}
  161. map_headers(header_to_rule_map, carbon_rules)
  162. map_headers(header_to_rule_map, external_rules)
  163. print("Building generated file list...")
  164. generated_files: Set[str] = set()
  165. for rule in carbon_rules.values():
  166. generated_files = generated_files.union(rule.outs)
  167. print("Parsing headers from source files...")
  168. all_missing_deps: List[Tuple[str, Set[str]]] = []
  169. any_ambiguous = False
  170. for rule_name, rule in carbon_rules.items():
  171. missing_deps, ambiguous = get_missing_deps(
  172. header_to_rule_map, generated_files, rule
  173. )
  174. if missing_deps:
  175. all_missing_deps.append((rule_name, missing_deps))
  176. if ambiguous:
  177. any_ambiguous = True
  178. if any_ambiguous:
  179. exit("Stopping due to ambiguous dependency choices.")
  180. if all_missing_deps:
  181. print("Checking buildozer availability...")
  182. buildozer = scripts_utils.get_release(scripts_utils.Release.BUILDOZER)
  183. print("Fixing dependencies...")
  184. SEPARATOR = "\n- "
  185. for rule_name, missing_deps in sorted(all_missing_deps):
  186. friendly_missing_deps = SEPARATOR.join(missing_deps)
  187. print(
  188. f"Adding deps to {rule_name}:{SEPARATOR}{friendly_missing_deps}"
  189. )
  190. args = [
  191. buildozer,
  192. f"add deps {' '.join(missing_deps)}",
  193. rule_name,
  194. ]
  195. subprocess.check_call(args)
  196. print("Done!")
  197. if __name__ == "__main__":
  198. main()