gen_rtti.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316
  1. #!/usr/bin/env python3
  2. """Generates C++ header to support LLVM-style RTTI for a class hierarchy.
  3. This script should be run through the //explorer:gen_rtti build target.
  4. # Background
  5. A C++ class hierarchy supported by this script consists of *abstract* classes,
  6. which can be inherited from but can't be instantiated, and *concrete* classes,
  7. which can be instantiated but can't be inherited from. Classes can inherit from
  8. at most one other class in the hierarchy; a class that doesn't inherit from
  9. any other class is called a *root* class, and it cannot be concrete.
  10. # Input format
  11. This script's input file declares every class in the hierarchy, and specifies
  12. the parent of each non-root class. The input file consists of comment lines
  13. starting with `#`, whitespace lines, and one `;`-terminated line for each class.
  14. The core of a line is `class` followed by the class name. `class` can be
  15. prefixed with `root` or `abstract` to specify the corresponding kind of class;
  16. if there is no prefix, the class is concrete. If the class is not a root class,
  17. the name is followed by `:` and then the name of the class it inherits from. A
  18. class cannot inherit from a class defined later in the file.
  19. For example:
  20. root class R;
  21. abstract class A : R;
  22. abstract class B : R;
  23. class C : A;
  24. class D : B;
  25. class E : A;
  26. # Output
  27. For each abstract class `Foo`, the generated header file will contain
  28. `enum class FooKind`, which has an enumerator for each concrete class derived
  29. from `Foo`, with a name that matches the concrete class name.
  30. For each non-root class `Foo` whose root class is `Root`, the generated header
  31. file will also contain a function `bool InheritsFromFoo(RootKind kind)`, which
  32. returns true if the value of `kind` corresponds to a class that is derived from
  33. `Foo`. This function can be used to implement `Foo::classof`.
  34. All enumerators that represent the same concrete class will have the same
  35. numeric value, so you can use `static_cast` to convert between the enum types
  36. for different classes that have a common root, so long as the enumerator value
  37. is present in both types. As a result, `InheritsFromFoo` can be used to
  38. determine whether casting to `FooKind` is safe.
  39. """
  40. __copyright__ = """
  41. Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  42. Exceptions. See /LICENSE for license information.
  43. SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  44. """
  45. import enum
  46. import re
  47. import sys
  48. from typing import Dict, List, Optional, Tuple
  49. class Class:
  50. """Metadata about a class from the input file.
  51. This consists of information
  52. Attributes set at construction:
  53. name: The class name.
  54. kind: The class kind (root, abstract, or concrete)
  55. ancestors: A list of Class objects representing the class's ancestors,
  56. starting with the root and ending with the current class's parent.
  57. _children: A list of Class objects representing the classes that are
  58. derived directly from this one.
  59. Attributes set by Finalize():
  60. id (CONCRETE only): The class's numeric ID, which will become its
  61. enumerator value in the generated C++ code.
  62. id_range (ROOT and ABSTRACT only): A pair such that a Class
  63. object `c` represents a concrete class derived from `self` if and only
  64. if c.id >= self.id_range[0] and c.id < self.id_range[1].
  65. leaves (ROOT only): A list of all concrete classes derived from this one,
  66. indexed by their IDs.
  67. """
  68. class Kind(enum.Enum):
  69. ROOT = enum.auto()
  70. ABSTRACT = enum.auto()
  71. CONCRETE = enum.auto()
  72. def __init__(
  73. self, name: str, kind: Kind, parent: Optional["Class"]
  74. ) -> None:
  75. self.name = name
  76. self.kind = kind
  77. assert (parent is None) == (kind == Class.Kind.ROOT)
  78. self.ancestors: List[Class] = []
  79. if parent is not None:
  80. self.ancestors = parent.ancestors + [parent]
  81. if self.kind == Class.Kind.CONCRETE:
  82. self.id: Optional[int] = None
  83. else:
  84. self.id_range: Optional[Tuple[int, int]] = None
  85. if self.kind == Class.Kind.ROOT:
  86. self.leaves: List[Class] = []
  87. if self.kind != Class.Kind.CONCRETE:
  88. self._children: List[Class] = []
  89. if parent:
  90. parent._children.append(self)
  91. def Parent(self) -> "Class":
  92. """Returns this Class's parent."""
  93. return self.ancestors[-1]
  94. def Root(self) -> "Class":
  95. """Returns the root Class of this hierarchy."""
  96. if self.kind == Class.Kind.ROOT:
  97. return self
  98. else:
  99. return self.ancestors[0]
  100. def _RegisterLeaf(self, leaf: "Class") -> None:
  101. """Records that `leaf` is derived from self.
  102. Also recursively updates the parent of self. leaf.id must already be
  103. populated, and leaves must be registered in order of ID. This operation
  104. is idempotent.
  105. """
  106. already_visited = False
  107. assert leaf.id is not None
  108. if self.kind == Class.Kind.ROOT:
  109. if leaf.id == len(self.leaves):
  110. self.leaves.append(leaf)
  111. else:
  112. assert leaf.id + 1 == len(self.leaves)
  113. assert self.leaves[leaf.id] == leaf
  114. already_visited = True
  115. if self.kind in [Class.Kind.ROOT, Class.Kind.ABSTRACT]:
  116. if self not in leaf.ancestors:
  117. sys.exit(
  118. f"{leaf.name} derived from {self.name}, but has a"
  119. + " different root"
  120. )
  121. if not self.id_range:
  122. self.id_range = (leaf.id, leaf.id + 1)
  123. elif self.id_range[1] == leaf.id:
  124. self.id_range = (self.id_range[0], self.id_range[1] + 1)
  125. else:
  126. assert self.id_range[1] == leaf.id + 1
  127. already_visited = True
  128. if not already_visited:
  129. if self.kind != Class.Kind.ROOT:
  130. self.Parent()._RegisterLeaf(leaf)
  131. def Finalize(self) -> None:
  132. """Populates additional attributes for `self` and derived Classes.
  133. Each Class can only be finalized once, after which no additional Classes
  134. can be derived from it.
  135. """
  136. if self.kind == Class.Kind.CONCRETE:
  137. self.id = len(self.Root().leaves)
  138. self._RegisterLeaf(self)
  139. else:
  140. for child in self._children:
  141. child.Finalize()
  142. _LINE_PATTERN = r"""(?P<prefix> \w*) \s*
  143. class \s+
  144. (?P<name> \w+)
  145. (?: \s*:\s* (?P<parent> \w+)
  146. )?
  147. ;$"""
  148. def main() -> None:
  149. input_filename = sys.argv[1]
  150. header_filename = sys.argv[2]
  151. cpp_filename = sys.argv[3]
  152. relative_header = sys.argv[4]
  153. with open(input_filename) as file:
  154. lines = file.readlines()
  155. classes: Dict[str, Class] = {}
  156. for line_num, line in enumerate(lines, 1):
  157. if line.startswith("#") or line.strip() == "":
  158. continue
  159. match_result = re.match(_LINE_PATTERN, line.strip(), re.VERBOSE)
  160. if not match_result:
  161. sys.exit(f"Invalid format on line {line_num}")
  162. prefix = match_result.group("prefix")
  163. if prefix == "":
  164. kind = Class.Kind.CONCRETE
  165. elif prefix == "root":
  166. kind = Class.Kind.ROOT
  167. elif prefix == "abstract":
  168. kind = Class.Kind.ABSTRACT
  169. else:
  170. sys.exit(f"Unrecognized class prefix '{prefix}' on line {line_num}")
  171. parent = None
  172. if match_result.group("parent"):
  173. if kind == Class.Kind.ROOT:
  174. sys.exit(f"Root class cannot have parent on line {line_num}")
  175. parent_name = match_result.group("parent")
  176. parent = classes[parent_name]
  177. if not parent:
  178. sys.exit(f"Unknown class '{parent_name}' on line {line_num}")
  179. if parent.kind == Class.Kind.CONCRETE:
  180. sys.exit(f"{parent.name} cannot be a parent on line {line_num}")
  181. else:
  182. if kind != Class.Kind.ROOT:
  183. sys.exit(
  184. f"Non-root class must have a parent on line {line_num}"
  185. )
  186. classes[match_result.group("name")] = Class(
  187. match_result.group("name"), kind, parent
  188. )
  189. for node in classes.values():
  190. if node.kind == Class.Kind.ROOT:
  191. node.Finalize()
  192. header_file = open(header_filename, "w")
  193. sys.stdout = header_file
  194. print(f"// Generated from {input_filename} by explorer/gen_rtti.py\n")
  195. trans_table = str.maketrans({"/": "_", ".": "_"})
  196. guard_macro = input_filename.upper().translate(trans_table) + "_"
  197. print(f"#ifndef {guard_macro}")
  198. print(f"#define {guard_macro}")
  199. print("\n#include <string_view>")
  200. print("\nnamespace Carbon {\n")
  201. for node in classes.values():
  202. if node.kind != Class.Kind.CONCRETE:
  203. assert node.id_range is not None
  204. ids = range(node.id_range[0], node.id_range[1])
  205. print(f"enum class {node.name}Kind {{")
  206. for id in ids:
  207. print(f" {node.Root().leaves[id].name} = {id},")
  208. print("};\n")
  209. print(f"std::string_view {node.name}KindName({node.name}Kind k);\n")
  210. if node.kind in [Class.Kind.ABSTRACT, Class.Kind.CONCRETE]:
  211. print(
  212. f"inline bool InheritsFrom{node.name}({node.Root().name}Kind"
  213. + " kind) {"
  214. )
  215. if node.kind == Class.Kind.ABSTRACT:
  216. assert node.id_range is not None
  217. if node.id_range[0] == node.id_range[1]:
  218. print(" return false;")
  219. else:
  220. range_begin = node.Root().leaves[node.id_range[0]].name
  221. print(
  222. f" return kind >= {node.Root().name}Kind"
  223. + f"::{range_begin}"
  224. )
  225. if node.id_range[1] < len(node.Root().leaves):
  226. range_end = node.Root().leaves[node.id_range[1]].name
  227. print(
  228. f" && kind < {node.Root().name}Kind"
  229. + f"::{range_end}"
  230. )
  231. print(" ;")
  232. elif node.kind == Class.Kind.CONCRETE:
  233. print(
  234. f" return kind == {node.Root().name}Kind::{node.name};"
  235. )
  236. print("}\n")
  237. print("} // namespace Carbon\n")
  238. print(f"#endif // {guard_macro}")
  239. header_file.close()
  240. cpp_file = open(cpp_filename, "w")
  241. sys.stdout = cpp_file
  242. print(f"// Generated from {input_filename} by explorer/gen_rtti.py\n")
  243. print(f'#include "{relative_header}"')
  244. print("\nnamespace Carbon {\n")
  245. for node in classes.values():
  246. if node.kind != Class.Kind.CONCRETE:
  247. assert node.id_range is not None
  248. ids = range(node.id_range[0], node.id_range[1])
  249. print(f"std::string_view {node.name}KindName({node.name}Kind k) {{")
  250. print(" switch(k) {")
  251. for id in ids:
  252. name = node.Root().leaves[id].name
  253. desc = " ".join(
  254. w.lower() for w in re.sub(r"([A-Z])", r" \1", name).split()
  255. )
  256. print(f' case {node.name}Kind::{name}: return "{desc}";')
  257. print(" }")
  258. print("}\n")
  259. print("} // namespace Carbon\n")
  260. cpp_file.close()
  261. if __name__ == "__main__":
  262. main()