file_context.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #ifndef CARBON_TOOLCHAIN_LOWER_FILE_CONTEXT_H_
  5. #define CARBON_TOOLCHAIN_LOWER_FILE_CONTEXT_H_
  6. #include "clang/Basic/CodeGenOptions.h"
  7. #include "clang/CodeGen/ModuleBuilder.h"
  8. #include "clang/Lex/PreprocessorOptions.h"
  9. #include "llvm/Support/BLAKE3.h"
  10. #include "toolchain/lower/context.h"
  11. #include "toolchain/parse/tree_and_subtrees.h"
  12. #include "toolchain/sem_ir/file.h"
  13. #include "toolchain/sem_ir/ids.h"
  14. #include "toolchain/sem_ir/inst_namer.h"
  15. namespace Carbon::Lower {
  16. // Context and shared functionality for lowering within a SemIR file.
  17. class FileContext {
  18. public:
  19. // Describes a specific function's body fingerprint.
  20. struct SpecificFunctionFingerprint {
  21. // Fingerprint with all specific-dependent instructions, except specific
  22. // calls. This is built by the `FunctionContext` while lowering each
  23. // instruction in the definition of a specific function.
  24. // TODO: This can be merged with the function type fingerprint, for a
  25. // single upfront non-equivalence check, and hash bucketing for deeper
  26. // equivalence evaluation.
  27. llvm::BLAKE3Result<32> common_fingerprint;
  28. // Fingerprint for all calls to specific functions (hashes all calls to
  29. // other specifics). This is built by the `FunctionContext` while lowering.
  30. llvm::BLAKE3Result<32> specific_fingerprint;
  31. // All non-hashed specific_ids of functions called.
  32. llvm::SmallVector<SemIR::SpecificId> calls;
  33. };
  34. explicit FileContext(Context& context, const SemIR::File& sem_ir,
  35. const SemIR::InstNamer* inst_namer,
  36. llvm::raw_ostream* vlog_stream);
  37. // Creates the Clang `CodeGenerator` to generate LLVM module from imported C++
  38. // code. Returns null when not importing C++.
  39. auto CreateCppCodeGenerator() -> std::unique_ptr<clang::CodeGenerator>;
  40. // Prepares to lower code in this IR, by precomputing needed LLVM types,
  41. // constants, declarations, etc. Should only be called once, before we lower
  42. // anything in this file.
  43. auto PrepareToLower() -> void;
  44. // Lowers all the definitions provided by the SemIR::File to LLVM IR.
  45. auto LowerDefinitions() -> void;
  46. // Perform final cleanup tasks once all lowering has been completed.
  47. auto Finalize() -> void;
  48. // Gets a callable's function. Returns nullptr for a builtin or a function we
  49. // have not lowered.
  50. auto GetFunction(SemIR::FunctionId function_id,
  51. SemIR::SpecificId specific_id = SemIR::SpecificId::None)
  52. -> llvm::Function* {
  53. return *GetFunctionAddr(function_id, specific_id);
  54. }
  55. // Gets a or creates callable's function. Returns nullptr for a builtin.
  56. auto GetOrCreateFunction(SemIR::FunctionId function_id,
  57. SemIR::SpecificId specific_id) -> llvm::Function*;
  58. // Returns a lowered type for the given type_id.
  59. auto GetType(SemIR::TypeId type_id) -> llvm::Type* {
  60. CARBON_CHECK(type_id.has_value(), "Should not be called with `None`");
  61. CARBON_CHECK(type_id.is_concrete(), "Lowering symbolic type {0}: {1}",
  62. type_id, sem_ir().types().GetAsInst(type_id));
  63. CARBON_CHECK(types_[type_id.index], "Missing type {0}: {1}", type_id,
  64. sem_ir().types().GetAsInst(type_id));
  65. return types_[type_id.index];
  66. }
  67. // Returns location information for use with DebugInfo.
  68. auto GetLocForDI(SemIR::InstId inst_id) -> Context::LocForDI;
  69. // Returns a lowered value to use for a value of type `type`.
  70. auto GetTypeAsValue() -> llvm::Constant* {
  71. return context().GetTypeAsValue();
  72. }
  73. // Returns a lowered value to use for a value of int literal type.
  74. auto GetIntLiteralAsValue() -> llvm::Constant* {
  75. return context().GetIntLiteralAsValue();
  76. }
  77. // Returns a value for the given constant. If specified, `use_inst_id` is the
  78. // instruction that is using this constant.
  79. auto GetConstant(SemIR::ConstantId const_id, SemIR::InstId use_inst_id)
  80. -> llvm::Value*;
  81. // Returns the empty LLVM struct type used to represent the type `type`.
  82. auto GetTypeType() -> llvm::StructType* { return context().GetTypeType(); }
  83. auto context() -> Context& { return *context_; }
  84. auto llvm_context() -> llvm::LLVMContext& { return context().llvm_context(); }
  85. auto llvm_module() -> llvm::Module& { return context().llvm_module(); }
  86. auto sem_ir() -> const SemIR::File& { return *sem_ir_; }
  87. auto cpp_ast() -> const clang::ASTUnit* { return sem_ir().cpp_ast(); }
  88. auto inst_namer() -> const SemIR::InstNamer* { return inst_namer_; }
  89. auto global_variables() -> const Map<SemIR::InstId, llvm::GlobalVariable*>& {
  90. return global_variables_;
  91. }
  92. auto printf_int_format_string() -> llvm::Value* {
  93. return context().printf_int_format_string();
  94. }
  95. auto SetPrintfIntFormatString(llvm::Value* printf_int_format_string) {
  96. context().SetPrintfIntFormatString(printf_int_format_string);
  97. }
  98. struct FunctionTypeInfo {
  99. llvm::FunctionType* type;
  100. llvm::SmallVector<SemIR::InstId> param_inst_ids;
  101. llvm::Type* return_type = nullptr;
  102. SemIR::InstId return_param_id = SemIR::InstId::None;
  103. };
  104. // Retrieve various features of the function's type useful for constructing
  105. // the `llvm::Type` for the `llvm::Function`. If any part of the type can't be
  106. // manifest (eg: incomplete return or parameter types), then the result is as
  107. // if the type was `void()`.
  108. auto BuildFunctionTypeInfo(const SemIR::Function& function,
  109. SemIR::SpecificId specific_id) -> FunctionTypeInfo;
  110. // Builds the global for the given instruction, which should then be cached by
  111. // the caller.
  112. auto BuildGlobalVariableDecl(SemIR::VarStorage var_storage)
  113. -> llvm::GlobalVariable*;
  114. // Builds the definition for the given function. If the function is only a
  115. // declaration with no definition, does nothing. If this is a generic it'll
  116. // only be lowered if the specific_id is specified. During this lowering of
  117. // a generic, more generic functions may be added for lowering.
  118. auto BuildFunctionDefinition(
  119. SemIR::FunctionId function_id,
  120. SemIR::SpecificId specific_id = SemIR::SpecificId::None) -> void;
  121. private:
  122. // Gets the location in which a callable's function is stored.
  123. auto GetFunctionAddr(SemIR::FunctionId function_id,
  124. SemIR::SpecificId specific_id) -> llvm::Function** {
  125. return specific_id.has_value() ? &specific_functions_[specific_id.index]
  126. : &functions_[function_id.index];
  127. }
  128. // Notes that a C++ function has been referenced for the first time, so we
  129. // should ask Clang to generate a definition for it if possible.
  130. auto HandleReferencedCppFunction(clang::FunctionDecl* cpp_decl) -> void;
  131. // Notes that a specific function has been referenced for the first time.
  132. // Updates the fingerprint to include the function's type, and adds the
  133. // function to the list of specific functions whose definitions should be
  134. // lowered.
  135. auto HandleReferencedSpecificFunction(SemIR::FunctionId function_id,
  136. SemIR::SpecificId specific_id,
  137. llvm::Type* llvm_type) -> void;
  138. // Builds the declaration for the given function, which should then be cached
  139. // by the caller.
  140. auto BuildFunctionDecl(SemIR::FunctionId function_id,
  141. SemIR::SpecificId specific_id =
  142. SemIR::SpecificId::None) -> llvm::Function*;
  143. // Builds a function's body. Common functionality for all functions.
  144. //
  145. // The `function_id` and `specific_id` identify the function within this
  146. // context's file. If the function was defined in a different file,
  147. // `definition_context` is a `FileContext` for that other file.
  148. // `definition_function` is the `Function` object within the file that owns
  149. // the definition.
  150. auto BuildFunctionBody(SemIR::FunctionId function_id,
  151. SemIR::SpecificId specific_id,
  152. const SemIR::Function& declaration_function,
  153. FileContext& definition_context,
  154. const SemIR::Function& definition_function) -> void;
  155. // Build the DISubprogram metadata for the given function.
  156. auto BuildDISubprogram(const SemIR::Function& function,
  157. const llvm::Function* llvm_function)
  158. -> llvm::DISubprogram*;
  159. // Builds the type for the given instruction, which should then be cached by
  160. // the caller.
  161. auto BuildType(SemIR::InstId inst_id) -> llvm::Type*;
  162. auto BuildVtable(const SemIR::Class& class_info) -> llvm::GlobalVariable*;
  163. // Records a specific that was lowered for a generic. These are added one
  164. // by one while lowering their definitions.
  165. auto AddLoweredSpecificForGeneric(SemIR::GenericId generic_id,
  166. SemIR::SpecificId specific_id) {
  167. lowered_specifics_[generic_id.index].push_back(specific_id);
  168. }
  169. // Initializes and returns a SpecificFunctionFingerprint* instance for a
  170. // specific. The internal of the fingerprint are populated during and after
  171. // lowering the function body of that specific.
  172. auto InitializeFingerprintForSpecific(SemIR::SpecificId specific_id)
  173. -> SpecificFunctionFingerprint* {
  174. if (!specific_id.has_value()) {
  175. return nullptr;
  176. }
  177. return &lowered_specific_fingerprint_[specific_id.index];
  178. }
  179. // Entry point for coalescing equivalent specifics. Two function definitions,
  180. // from the same generic, with different specific_ids are considered
  181. // equivalent if, at the LLVM level, one can be replaced with the other, with
  182. // no change in behavior. All LLVM types and instructions must be equivalent.
  183. auto CoalesceEquivalentSpecifics() -> void;
  184. // While coalescing specifics, returns whether the function types for two
  185. // specifics are equivalent. This uses a fingerprint generated for each
  186. // function type.
  187. auto AreFunctionTypesEquivalent(SemIR::SpecificId specific_id1,
  188. SemIR::SpecificId specific_id2) -> bool;
  189. // While coalescing specifics, compare the function bodies for two specifics.
  190. // This uses fingerprints generated during lowering of the function body.
  191. // The `visited_equivalent_specifics` parameter is used to track cycles in
  192. // the function callgraph, and will also return equivalent pairs of specifics
  193. // found, if the two specifics given as arguments are found to be equivalent.
  194. auto AreFunctionBodiesEquivalent(
  195. SemIR::SpecificId specific_id1, SemIR::SpecificId specific_id2,
  196. Set<std::pair<SemIR::SpecificId, SemIR::SpecificId>>&
  197. visited_equivalent_specifics) -> bool;
  198. // Given an equivalent pair of specifics, updates the canonical specific to
  199. // use for each of the two Specifics found to be equivalent, replaces all
  200. // uses of one specific with the canonical one, and adds the non-canonical
  201. // specific to specifics_to_delete.
  202. auto ProcessSpecificEquivalence(
  203. std::pair<SemIR::SpecificId, SemIR::SpecificId> pair,
  204. llvm::SmallVector<SemIR::SpecificId>& specifics_to_delete) -> void;
  205. // Checks if two specific_ids are equivalent and also reduces the equivalence
  206. // chains/paths. This update ensures the canonical specific is always "one
  207. // hop away".
  208. auto IsKnownEquivalence(SemIR::SpecificId specific_id1,
  209. SemIR::SpecificId specific_id2) -> bool;
  210. // Inserts a pair into a set of pairs in canonical form. Also implicitly
  211. // checks entry already existed if it cannot be inserted.
  212. auto InsertPair(
  213. SemIR::SpecificId specific_id1, SemIR::SpecificId specific_id2,
  214. Set<std::pair<SemIR::SpecificId, SemIR::SpecificId>>& set_of_pairs)
  215. -> bool;
  216. // Checks if a pair is contained into a set of pairs, in canonical form.
  217. auto ContainsPair(
  218. SemIR::SpecificId specific_id1, SemIR::SpecificId specific_id2,
  219. const Set<std::pair<SemIR::SpecificId, SemIR::SpecificId>>& set_of_pairs)
  220. -> bool;
  221. // The overall lowering context.
  222. Context* context_;
  223. // The input SemIR.
  224. const SemIR::File* const sem_ir_;
  225. // The options used to create the Clang Code Generator.
  226. clang::HeaderSearchOptions cpp_header_search_options_;
  227. clang::PreprocessorOptions cpp_preprocessor_options_;
  228. clang::CodeGenOptions cpp_code_gen_options_;
  229. // The Clang `CodeGenerator` to generate LLVM module from imported C++
  230. // code. Should be initialized using `CreateCppCodeGenerator()`. Can be null
  231. // if no C++ code is imported.
  232. std::unique_ptr<clang::CodeGenerator> cpp_code_generator_;
  233. // The instruction namer, if given.
  234. const SemIR::InstNamer* const inst_namer_;
  235. // The optional vlog stream.
  236. llvm::raw_ostream* vlog_stream_;
  237. // Maps callables to lowered functions. SemIR treats callables as the
  238. // canonical form of a function, so lowering needs to do the same.
  239. // Vector indexes correspond to `FunctionId` indexes. We resize this directly
  240. // to the correct size.
  241. llvm::SmallVector<llvm::Function*, 0> functions_;
  242. // Maps specific callables to lowered functions. Vector indexes correspond to
  243. // `SpecificId` indexes. We resize this directly to the correct size.
  244. llvm::SmallVector<llvm::Function*, 0> specific_functions_;
  245. // Provides lowered versions of types.
  246. // Vector indexes correspond to `TypeId` indexes for non-symbolic types. We
  247. // resize this directly to the (often large) correct size.
  248. llvm::SmallVector<llvm::Type*, 0> types_;
  249. // Maps constants to their lowered values.
  250. // Vector indexes correspond to `InstId` indexes for constant instructions. We
  251. // resize this directly to the (often large) correct size.
  252. llvm::SmallVector<llvm::Constant*, 0> constants_;
  253. // Maps global variables to their lowered variant.
  254. Map<SemIR::InstId, llvm::GlobalVariable*> global_variables_;
  255. // For a generic function, keep track of the specifics for which LLVM
  256. // function declarations were created. Those can be retrieved then from
  257. // `specific_functions_`. We resize this to the correct size. Vector indexes
  258. // correspond to `GenericId` indexes.
  259. llvm::SmallVector<llvm::SmallVector<SemIR::SpecificId>, 0> lowered_specifics_;
  260. // For specifics that exist in lowered_specifics, a hash of their function
  261. // type information: return and parameter types. We resize this to the
  262. // correct size. Vector indexes correspond to `SpecificId` indexes.
  263. // TODO: Hashing all members of `FunctionTypeInfo` may not be necessary.
  264. llvm::SmallVector<llvm::BLAKE3Result<32>, 0>
  265. lowered_specifics_type_fingerprint_;
  266. // This is initialized and populated while lowering a specific.
  267. // We resize this to the correct size. Vector indexes correspond to
  268. // `SpecificId` indexes.
  269. llvm::SmallVector<SpecificFunctionFingerprint, 0>
  270. lowered_specific_fingerprint_;
  271. // Equivalent specifics that have been found. For each specific, this points
  272. // to the canonical equivalent specific, which may also be self. We currently
  273. // define the canonical specific as the one with the lowest
  274. // `SpecificId.index`.
  275. //
  276. // We resize this to the correct size and initialize to `SpecificId::None`,
  277. // which defines that there is no other equivalent specific to this
  278. // `SpecificId`. Vector indexes correspond to `SpecificId` indexes.
  279. llvm::SmallVector<SemIR::SpecificId, 0> equivalent_specifics_;
  280. // Non-equivalent specifics found.
  281. // TODO: Revisit this due to its quadratic space growth.
  282. Set<std::pair<SemIR::SpecificId, SemIR::SpecificId>>
  283. non_equivalent_specifics_;
  284. };
  285. } // namespace Carbon::Lower
  286. #endif // CARBON_TOOLCHAIN_LOWER_FILE_CONTEXT_H_