gen_rtti.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332
  1. #!/usr/bin/env python3
  2. """Generates C++ header to support LLVM-style RTTI for a class hierarchy.
  3. Takes as input a file describing the class hierarchy which can consist of
  4. four different kinds of classes: a *root* class is the base of a class
  5. hierarchy, meaning that it doesn't inherit from any other class. *Abstract* and
  6. *interface* classes are non-root classes that cannot be instantiated, and
  7. *concrete* classes are classes that can be instantiated.
  8. A non-root class C must inherit from exactly one parent, which can be a root or
  9. abstract class, and can also inherit from any number of interfaces, but each
  10. interface's parent must be an ancestor of C.
  11. The input file consists of comment lines starting with `#`, whitespace lines,
  12. and one `;`-terminated line for each class. The core of a line is `class`
  13. followed by the class name. `class` can be prefixed with `root`, `abstract`,
  14. or `interface` to specify the corresponding kind of class; if there is no
  15. prefix, the class is concrete. If the class is not a root class, the name is
  16. followed by `:` and then a comma-separated list of the names of the classes
  17. it inherits from. The first entry in the list is the parent, and the others
  18. are interfaces. A class cannot inherit from classes defined later in the file.
  19. For example:
  20. root class R;
  21. abstract class A : R;
  22. interface class I : R;
  23. abstract class B : R, I;
  24. class C : A;
  25. class D : B;
  26. class E : A, I;
  27. For each non-concrete class `Foo`, the generated header file will contain
  28. `enum class FooKind`, which has an enumerator for each concrete class derived
  29. from `Foo`, with a name that matches the concrete class name.
  30. For each non-root class `Foo` whose root class is `Root`, the generated header
  31. file will also contain a function `bool InheritsFromFoo(RootKind kind)`,
  32. which returns true if the value of `kind` corresponds to a class that is
  33. derived from `Foo`. This function can be used to implement `Foo::classof`.
  34. All enumerators that represent the same concrete class will have the same
  35. numeric value, so you can use `static_cast` to convert between the enum types
  36. for different classes that have a common root, so long as the enumerator value
  37. is present in both types. As a result, `InheritsFromFoo` can be used to
  38. determine whether casting to `FooKind` is safe.
  39. """
  40. __copyright__ = """
  41. Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  42. Exceptions. See /LICENSE for license information.
  43. SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  44. """
  45. import enum
  46. import re
  47. import sys
  48. class Class:
  49. """Metadata about a class from the input file.
  50. This consists of information
  51. Attributes set at construction:
  52. name: The class name.
  53. kind: The class kind (root, abstract, interface, or concrete)
  54. ancestors: A list of Class objects representing the class's ancestors,
  55. starting with the root and ending with the current class's parent.
  56. interfaces: A list of Class objects representing the interfaces the class
  57. inherits from.
  58. _children: A list of Class objects representing the classes that are
  59. derived directly from this one.
  60. Attributes set by Finalize():
  61. id (CONCRETE only): The class's numeric ID, which will become its
  62. enumerator value in the generated C++ code.
  63. id_range (ROOT and ABSTRACT only): A pair such that a Class
  64. object `c` represents a concrete class derived from `self` if and only
  65. if c.id >= self.id_range[0] and c.id < self.id_range[1].
  66. leaf_ids (INTERFACE only): A set containing the IDs of all concrete
  67. classes derived from this interface.
  68. leaves (ROOT only): A list of all concrete classes derived from this one,
  69. indexed by their IDs.
  70. """
  71. Kind = enum.Enum("Kind", "ROOT ABSTRACT INTERFACE CONCRETE")
  72. def __init__(self, name, kind, parent, interfaces):
  73. self.name = name
  74. self.kind = kind
  75. self.interfaces = interfaces
  76. assert (parent is None) == (kind == Class.Kind.ROOT)
  77. if parent is None:
  78. self.ancestors = []
  79. else:
  80. self.ancestors = parent.ancestors + [parent]
  81. if self.kind == Class.Kind.ROOT:
  82. self.leaves = []
  83. self.id_range = None
  84. elif self.kind == Class.Kind.ABSTRACT:
  85. self.id_range = None
  86. elif self.kind == Class.Kind.INTERFACE:
  87. self.leaf_ids = set()
  88. else:
  89. self.id = None
  90. if self.kind != Class.Kind.CONCRETE:
  91. self._children = []
  92. if parent:
  93. parent._children.append(self)
  94. for interface in self.interfaces:
  95. interface._children.append(self)
  96. def Parent(self):
  97. """Returns this Class's parent."""
  98. return self.ancestors[-1]
  99. def Root(self):
  100. """Returns the root Class of this hierarchy."""
  101. if self.kind == Class.Kind.ROOT:
  102. return self
  103. else:
  104. return self.ancestors[0]
  105. def _RegisterLeaf(self, leaf):
  106. """Records that `leaf` is derived from self.
  107. Also recursively updates the parent and interfaces of self. leaf.id must
  108. already be populated, and leaves must be registered in order of ID. This
  109. operation is idempotent."""
  110. already_visited = False
  111. if self.kind == Class.Kind.ROOT:
  112. if leaf.id == len(self.leaves):
  113. self.leaves.append(leaf)
  114. else:
  115. assert leaf.id + 1 == len(self.leaves)
  116. assert self.leaves[leaf.id] == leaf
  117. already_visited = True
  118. if self.kind in [Class.Kind.ROOT, Class.Kind.ABSTRACT]:
  119. if self not in leaf.ancestors:
  120. sys.exit(
  121. f"{leaf.name} derived from {self.name}, but has a"
  122. + " different root"
  123. )
  124. if not self.id_range:
  125. self.id_range = (leaf.id, leaf.id + 1)
  126. elif self.id_range[1] == leaf.id:
  127. self.id_range = (self.id_range[0], self.id_range[1] + 1)
  128. else:
  129. assert self.id_range[1] == leaf.id + 1
  130. already_visited = True
  131. elif self.kind == Class.Kind.INTERFACE:
  132. if leaf.id in self.leaf_ids:
  133. already_visited = True
  134. else:
  135. self.leaf_ids.add(leaf.id)
  136. if not already_visited:
  137. if self.kind != Class.Kind.ROOT:
  138. self.Parent()._RegisterLeaf(leaf)
  139. for interface in self.interfaces:
  140. interface._RegisterLeaf(leaf)
  141. def Finalize(self):
  142. """Populates additional attributes for `self` and derived Classes.
  143. Each Class can only be finalized once, after which no additional Classes
  144. can be derived from it.
  145. """
  146. if self.kind == Class.Kind.CONCRETE:
  147. self.id = len(self.Root().leaves)
  148. self._RegisterLeaf(self)
  149. elif self.kind in [Class.Kind.ROOT, Class.Kind.ABSTRACT]:
  150. for child in self._children:
  151. child.Finalize()
  152. _LINE_PATTERN = r"""(?P<prefix> \w*) \s*
  153. class \s+
  154. (?P<name> \w+)
  155. (?: \s*:\s* (?P<parent> \w+)
  156. (?: , (?P<interfaces> .*) )?
  157. )?
  158. ;$"""
  159. def main():
  160. input_filename = sys.argv[1]
  161. with open(input_filename) as file:
  162. lines = file.readlines()
  163. classes = dict()
  164. for line_num, line in enumerate(lines, 1):
  165. if line.startswith("#") or line.strip() == "":
  166. continue
  167. match_result = re.match(_LINE_PATTERN, line.strip(), re.VERBOSE)
  168. if not match_result:
  169. sys.exit(f"Invalid format on line {line_num}")
  170. prefix = match_result.group("prefix")
  171. if prefix == "":
  172. kind = Class.Kind.CONCRETE
  173. elif prefix == "root":
  174. kind = Class.Kind.ROOT
  175. elif prefix == "abstract":
  176. kind = Class.Kind.ABSTRACT
  177. elif prefix == "interface":
  178. kind = Class.Kind.INTERFACE
  179. else:
  180. sys.exit(f"Unrecognized class prefix '{prefix}' on line {line_num}")
  181. parent = None
  182. if match_result.group("parent"):
  183. if kind == Class.Kind.ROOT:
  184. sys.exit(f"Root class cannot have parent on line {line_num}")
  185. parent_name = match_result.group("parent")
  186. parent = classes[parent_name]
  187. if not parent:
  188. sys.exit(f"Unknown class '{parent_name}' on line {line_num}")
  189. if parent.kind == Class.Kind.CONCRETE:
  190. sys.exit(f"{parent.name} cannot be a parent on line {line_num}")
  191. elif parent.kind == Class.Kind.INTERFACE:
  192. if kind != Class.Kind.INTERFACE:
  193. sys.exit(
  194. "Interface cannot be parent of non-interface on"
  195. + f" line {line_num}"
  196. )
  197. else:
  198. if kind != Class.Kind.ROOT:
  199. sys.exit(
  200. f"Non-root class must have a parent on line {line_num}"
  201. )
  202. interfaces = []
  203. if match_result.group("interfaces"):
  204. for unstripped_name in match_result.group("interfaces").split(","):
  205. interface_name = unstripped_name.strip()
  206. interface = classes[interface_name]
  207. if not interface:
  208. sys.exit(
  209. f"Unknown class '{interface_name}' on line {line_num}"
  210. )
  211. if interface.kind != Class.Kind.INTERFACE:
  212. sys.exit(
  213. f"'{interface_name}' used as interface on"
  214. + f" line {line_num}"
  215. )
  216. interfaces.append(interface)
  217. classes[match_result.group("name")] = Class(
  218. match_result.group("name"), kind, parent, interfaces
  219. )
  220. for node in classes.values():
  221. if node.kind == Class.Kind.ROOT:
  222. node.Finalize()
  223. print(
  224. f"// Generated from {input_filename} by"
  225. + " executable_semantics/gen_rtti.py\n"
  226. )
  227. guard_macro = (
  228. input_filename.upper().translate(str.maketrans({"/": "_", ".": "_"}))
  229. + "_"
  230. )
  231. print(f"#ifndef {guard_macro}")
  232. print(f"#define {guard_macro}")
  233. print("\nnamespace Carbon {\n")
  234. for node in classes.values():
  235. if node.kind != Class.Kind.CONCRETE:
  236. if node.kind == Class.Kind.INTERFACE:
  237. ids = sorted(node.leaf_ids)
  238. else:
  239. ids = range(node.id_range[0], node.id_range[1])
  240. print(f"enum class {node.name}Kind {{")
  241. for id in ids:
  242. print(f" {node.Root().leaves[id].name} = {id},")
  243. print("};\n")
  244. if node.kind != Class.Kind.ROOT:
  245. print(
  246. f"inline bool InheritsFrom{node.name}({node.Root().name}Kind"
  247. + " kind) {"
  248. )
  249. if node.kind == Class.Kind.ABSTRACT:
  250. if node.id_range[0] == node.id_range[1]:
  251. print(" return false;")
  252. else:
  253. range_begin = node.Root().leaves[node.id_range[0]].name
  254. print(
  255. f" return kind >= {node.Root().name}Kind"
  256. + f"::{range_begin}"
  257. )
  258. if node.id_range[1] < len(node.Root().leaves):
  259. range_end = node.Root().leaves[node.id_range[1]].name
  260. print(
  261. f" && kind < {node.Root().name}Kind"
  262. + f"::{range_end}"
  263. )
  264. print(" ;")
  265. elif node.kind == Class.Kind.INTERFACE:
  266. print(" switch(kind) {")
  267. is_empty = True
  268. for id in sorted(node.leaf_ids):
  269. print(
  270. f" case {node.Root().name}Kind::"
  271. + f"{node.Root().leaves[id].name}:"
  272. )
  273. is_empty = False
  274. if not is_empty:
  275. print(" return true;")
  276. print(" default:")
  277. print(" return false;\n }")
  278. elif node.kind == Class.Kind.CONCRETE:
  279. print(
  280. f" return kind == {node.Root().name}Kind::{node.name};"
  281. )
  282. print("}\n")
  283. print("} // namespace Carbon\n")
  284. print(f"#endif // {guard_macro}")
  285. if __name__ == "__main__":
  286. main()