file_context.h 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #ifndef CARBON_TOOLCHAIN_LOWER_FILE_CONTEXT_H_
  5. #define CARBON_TOOLCHAIN_LOWER_FILE_CONTEXT_H_
  6. #include "clang/Basic/CodeGenOptions.h"
  7. #include "clang/CodeGen/ModuleBuilder.h"
  8. #include "clang/Lex/PreprocessorOptions.h"
  9. #include "common/raw_string_ostream.h"
  10. #include "llvm/IR/Constants.h"
  11. #include "llvm/IR/DIBuilder.h"
  12. #include "llvm/IR/Instructions.h"
  13. #include "llvm/IR/LLVMContext.h"
  14. #include "llvm/IR/Module.h"
  15. #include "llvm/Support/BLAKE3.h"
  16. #include "toolchain/parse/tree_and_subtrees.h"
  17. #include "toolchain/sem_ir/file.h"
  18. #include "toolchain/sem_ir/ids.h"
  19. #include "toolchain/sem_ir/inst_namer.h"
  20. namespace Carbon::Lower {
  21. // Context and shared functionality for lowering handlers.
  22. class FileContext {
  23. public:
  24. // Location information for use with DebugInfo. The line_number and
  25. // column_number are >= 0, with 0 as unknown, so that they can be passed
  26. // directly to DebugInfo.
  27. struct LocForDI {
  28. llvm::StringRef filename;
  29. int32_t line_number;
  30. int32_t column_number;
  31. };
  32. // Describes a specific function's body fingerprint.
  33. struct SpecificFunctionFingerprint {
  34. // Fingerprint with all specific-dependent instructions, except specific
  35. // calls. This is built by the `FunctionContext` while lowering each
  36. // instruction in the definition of a specific function.
  37. // TODO: This can be merged with the function type fingerprint, for a
  38. // single upfront non-equivalence check, and hash bucketing for deeper
  39. // equivalence evaluation.
  40. llvm::BLAKE3Result<32> common_fingerprint;
  41. // Fingerprint for all calls to specific functions (hashes all calls to
  42. // other specifics). This is built by the `FunctionContext` while lowering.
  43. llvm::BLAKE3Result<32> specific_fingerprint;
  44. // All non-hashed specific_ids of functions called.
  45. llvm::SmallVector<SemIR::SpecificId> calls;
  46. };
  47. explicit FileContext(
  48. llvm::LLVMContext& llvm_context,
  49. llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> fs,
  50. std::optional<llvm::ArrayRef<Parse::GetTreeAndSubtreesFn>>
  51. tree_and_subtrees_getters_for_debug_info,
  52. llvm::StringRef module_name, const SemIR::File& sem_ir,
  53. clang::ASTUnit* cpp_ast, const SemIR::InstNamer* inst_namer,
  54. llvm::raw_ostream* vlog_stream);
  55. // Lowers the SemIR::File to LLVM IR. Should only be called once, and handles
  56. // the main execution loop.
  57. auto Run() -> std::unique_ptr<llvm::Module>;
  58. // Create the DICompileUnit metadata for this compilation.
  59. auto BuildDICompileUnit(llvm::StringRef module_name,
  60. llvm::Module& llvm_module,
  61. llvm::DIBuilder& di_builder) -> llvm::DICompileUnit*;
  62. // Creates the Clang `CodeGenerator` to generate LLVM module from imported C++
  63. // code. Returns null when not importing C++.
  64. auto CreateCppCodeGenerator() -> std::unique_ptr<clang::CodeGenerator>;
  65. // Gets a callable's function. Returns nullptr for a builtin.
  66. auto GetFunction(SemIR::FunctionId function_id) -> llvm::Function* {
  67. return functions_[function_id.index];
  68. }
  69. // Gets a or creates callable's function. Returns nullptr for a builtin.
  70. auto GetOrCreateFunction(SemIR::FunctionId function_id,
  71. SemIR::SpecificId specific_id) -> llvm::Function*;
  72. // Returns a lowered type for the given type_id.
  73. auto GetType(SemIR::TypeId type_id) -> llvm::Type* {
  74. CARBON_CHECK(type_id.has_value(), "Should not be called with `None`");
  75. CARBON_CHECK(type_id.is_concrete(), "Lowering symbolic type {0}: {1}",
  76. type_id, sem_ir().types().GetAsInst(type_id));
  77. CARBON_CHECK(types_[type_id.index], "Missing type {0}: {1}", type_id,
  78. sem_ir().types().GetAsInst(type_id));
  79. return types_[type_id.index];
  80. }
  81. // Returns location information for use with DebugInfo.
  82. auto GetLocForDI(SemIR::InstId inst_id) -> LocForDI;
  83. // Returns a lowered value to use for a value of type `type`.
  84. auto GetTypeAsValue() -> llvm::Constant* {
  85. return llvm::ConstantStruct::get(GetTypeType());
  86. }
  87. // Returns a lowered value to use for a value of int literal type.
  88. auto GetIntLiteralAsValue() -> llvm::Constant* {
  89. // TODO: Consider adding a named struct type for integer literals.
  90. return llvm::ConstantStruct::get(llvm::StructType::get(llvm_context()));
  91. }
  92. // Returns a global value for the given instruction.
  93. auto GetGlobal(SemIR::InstId inst_id, SemIR::SpecificId specific_id)
  94. -> llvm::Value*;
  95. // Returns the empty LLVM struct type used to represent the type `type`.
  96. auto GetTypeType() -> llvm::StructType* {
  97. if (!type_type_) {
  98. // `type` is lowered to an empty LLVM StructType.
  99. type_type_ = llvm::StructType::create(*llvm_context_, {}, "type");
  100. }
  101. return type_type_;
  102. }
  103. auto llvm_context() -> llvm::LLVMContext& { return *llvm_context_; }
  104. auto llvm_module() -> llvm::Module& { return *llvm_module_; }
  105. auto sem_ir() -> const SemIR::File& { return *sem_ir_; }
  106. auto cpp_ast() -> clang::ASTUnit* { return cpp_ast_; }
  107. auto inst_namer() -> const SemIR::InstNamer* { return inst_namer_; }
  108. auto global_variables() -> const Map<SemIR::InstId, llvm::GlobalVariable*>& {
  109. return global_variables_;
  110. }
  111. auto printf_int_format_string() -> llvm::Value* {
  112. return printf_int_format_string_;
  113. }
  114. auto SetPrintfIntFormatString(llvm::Value* printf_int_format_string) {
  115. CARBON_CHECK(!printf_int_format_string_,
  116. "PrintInt formatting string already generated");
  117. printf_int_format_string_ = printf_int_format_string;
  118. }
  119. struct FunctionTypeInfo {
  120. llvm::FunctionType* type;
  121. llvm::SmallVector<SemIR::InstId> param_inst_ids;
  122. llvm::Type* return_type = nullptr;
  123. SemIR::InstId return_param_id = SemIR::InstId::None;
  124. };
  125. // Retrieve various features of the function's type useful for constructing
  126. // the `llvm::Type` for the `llvm::Function`. If any part of the type can't be
  127. // manifest (eg: incomplete return or parameter types), then the result is as
  128. // if the type was `void()`.
  129. auto BuildFunctionTypeInfo(const SemIR::Function& function,
  130. SemIR::SpecificId specific_id) -> FunctionTypeInfo;
  131. // Builds the global for the given instruction, which should then be cached by
  132. // the caller.
  133. auto BuildGlobalVariableDecl(SemIR::VarStorage var_storage)
  134. -> llvm::GlobalVariable*;
  135. private:
  136. // Builds the declaration for the given function, which should then be cached
  137. // by the caller.
  138. auto BuildFunctionDecl(SemIR::FunctionId function_id,
  139. SemIR::SpecificId specific_id =
  140. SemIR::SpecificId::None) -> llvm::Function*;
  141. // Builds the definition for the given function. If the function is only a
  142. // declaration with no definition, does nothing. If this is a generic it'll
  143. // only be lowered if the specific_id is specified. During this lowering of
  144. // a generic, more generic functions may be added for lowering.
  145. auto BuildFunctionDefinition(
  146. SemIR::FunctionId function_id,
  147. SemIR::SpecificId specific_id = SemIR::SpecificId::None) -> void;
  148. // Builds a functions body. Common functionality for all functions.
  149. auto BuildFunctionBody(
  150. SemIR::FunctionId function_id, const SemIR::Function& function,
  151. llvm::Function* llvm_function,
  152. SemIR::SpecificId specific_id = SemIR::SpecificId::None) -> void;
  153. // Build the DISubprogram metadata for the given function.
  154. auto BuildDISubprogram(const SemIR::Function& function,
  155. const llvm::Function* llvm_function)
  156. -> llvm::DISubprogram*;
  157. // Builds the type for the given instruction, which should then be cached by
  158. // the caller.
  159. auto BuildType(SemIR::InstId inst_id) -> llvm::Type*;
  160. auto BuildVtable(const SemIR::Class& class_info) -> llvm::GlobalVariable*;
  161. // Records a specific that was lowered for a generic. These are added one
  162. // by one while lowering their definitions.
  163. auto AddLoweredSpecificForGeneric(SemIR::GenericId generic_id,
  164. SemIR::SpecificId specific_id) {
  165. lowered_specifics_[generic_id.index].push_back(specific_id);
  166. }
  167. // Initializes and returns a SpecificFunctionFingerprint* instance for a
  168. // specific. The internal of the fingerprint are populated during and after
  169. // lowering the function body of that specific.
  170. auto InitializeFingerprintForSpecific(SemIR::SpecificId specific_id)
  171. -> SpecificFunctionFingerprint* {
  172. if (!specific_id.has_value()) {
  173. return nullptr;
  174. }
  175. return &lowered_specific_fingerprint_[specific_id.index];
  176. }
  177. // Entry point for coalescing equivalent specifics. Two function definitions,
  178. // from the same generic, with different specific_ids are considered
  179. // equivalent if, at the LLVM level, one can be replaced with the other, with
  180. // no change in behavior. All LLVM types and instructions must be equivalent.
  181. auto CoalesceEquivalentSpecifics() -> void;
  182. // While coalescing specifics, returns whether the function types for two
  183. // specifics are equivalent. This uses a fingerprint generated for each
  184. // function type.
  185. auto AreFunctionTypesEquivalent(SemIR::SpecificId specific_id1,
  186. SemIR::SpecificId specific_id2) -> bool;
  187. // While coalescing specifics, compare the function bodies for two specifics.
  188. // This uses fingerprints generated during lowering of the function body.
  189. // The `visited_equivalent_specifics` parameter is used to track cycles in
  190. // the function callgraph, and will also return equivalent pairs of specifics
  191. // found, if the two specifics given as arguments are found to be equivalent.
  192. auto AreFunctionBodiesEquivalent(
  193. SemIR::SpecificId specific_id1, SemIR::SpecificId specific_id2,
  194. Set<std::pair<SemIR::SpecificId, SemIR::SpecificId>>&
  195. visited_equivalent_specifics) -> bool;
  196. // Given an equivalent pair of specifics, updates the canonical specific to
  197. // use for each of the two Specifics found to be equivalent, replaces all
  198. // uses of one specific with the canonical one, and adds the non-canonical
  199. // specific to specifics_to_delete.
  200. auto ProcessSpecificEquivalence(
  201. std::pair<SemIR::SpecificId, SemIR::SpecificId> pair,
  202. llvm::SmallVector<SemIR::SpecificId>& specifics_to_delete) -> void;
  203. // Checks if two specific_ids are equivalent and also reduces the equivalence
  204. // chains/paths. This update ensures the canonical specific is always "one
  205. // hop away".
  206. auto IsKnownEquivalence(SemIR::SpecificId specific_id1,
  207. SemIR::SpecificId specific_id2) -> bool;
  208. // Inserts a pair into a set of pairs in canonical form. Also implicitly
  209. // checks entry already existed if it cannot be inserted.
  210. auto InsertPair(
  211. SemIR::SpecificId specific_id1, SemIR::SpecificId specific_id2,
  212. Set<std::pair<SemIR::SpecificId, SemIR::SpecificId>>& set_of_pairs)
  213. -> bool;
  214. // Checks if a pair is contained into a set of pairs, in canonical form.
  215. auto ContainsPair(
  216. SemIR::SpecificId specific_id1, SemIR::SpecificId specific_id2,
  217. const Set<std::pair<SemIR::SpecificId, SemIR::SpecificId>>& set_of_pairs)
  218. -> bool;
  219. // State for building the LLVM IR.
  220. llvm::LLVMContext* llvm_context_;
  221. std::unique_ptr<llvm::Module> llvm_module_;
  222. // The filesystem for source code.
  223. llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> fs_;
  224. // State for building the LLVM IR debug info metadata.
  225. llvm::DIBuilder di_builder_;
  226. // The DICompileUnit, if any - null implies debug info is not being emitted.
  227. llvm::DICompileUnit* di_compile_unit_;
  228. // The trees are only provided when debug info should be emitted.
  229. std::optional<llvm::ArrayRef<Parse::GetTreeAndSubtreesFn>>
  230. tree_and_subtrees_getters_for_debug_info_;
  231. // The input SemIR.
  232. const SemIR::File* const sem_ir_;
  233. // A mutable Clang AST is necessary for lowering since using the AST in lower
  234. // modifies it.
  235. clang::ASTUnit* cpp_ast_;
  236. // The options used to create the Clang Code Generator.
  237. clang::HeaderSearchOptions cpp_header_search_options_;
  238. clang::PreprocessorOptions cpp_preprocessor_options_;
  239. clang::CodeGenOptions cpp_code_gen_options_;
  240. // The Clang `CodeGenerator` to generate LLVM module from imported C++
  241. // code. Should be initialized using `CreateCppCodeGenerator()`. Can be null
  242. // if no C++ code is imported.
  243. std::unique_ptr<clang::CodeGenerator> cpp_code_generator_;
  244. // The instruction namer, if given.
  245. const SemIR::InstNamer* const inst_namer_;
  246. // The optional vlog stream.
  247. llvm::raw_ostream* vlog_stream_;
  248. // Maps callables to lowered functions. SemIR treats callables as the
  249. // canonical form of a function, so lowering needs to do the same.
  250. // Vector indexes correspond to `FunctionId` indexes. We resize this directly
  251. // to the correct size.
  252. llvm::SmallVector<llvm::Function*, 0> functions_;
  253. // Maps specific callables to lowered functions. Vector indexes correspond to
  254. // `SpecificId` indexes. We resize this directly to the correct size.
  255. llvm::SmallVector<llvm::Function*, 0> specific_functions_;
  256. // Maps which specific functions are generics that need to have their
  257. // definitions lowered after the lowering of other definitions.
  258. // This list may grow while lowering generic definitions from this list.
  259. // The list uses the `SpecificId` to index into specific_functions_.
  260. llvm::SmallVector<std::pair<SemIR::FunctionId, SemIR::SpecificId>, 10>
  261. specific_function_definitions_;
  262. // Provides lowered versions of types.
  263. // Vector indexes correspond to `TypeId` indexes for non-symbolic types. We
  264. // resize this directly to the (often large) correct size.
  265. llvm::SmallVector<llvm::Type*, 0> types_;
  266. // Lowered version of the builtin type `type`.
  267. llvm::StructType* type_type_ = nullptr;
  268. // Maps constants to their lowered values.
  269. // Vector indexes correspond to `InstId` indexes for constant instructions. We
  270. // resize this directly to the (often large) correct size.
  271. llvm::SmallVector<llvm::Constant*, 0> constants_;
  272. // Maps global variables to their lowered variant.
  273. Map<SemIR::InstId, llvm::GlobalVariable*> global_variables_;
  274. // Global format string for `printf.int.format` used by the PrintInt builtin.
  275. llvm::Value* printf_int_format_string_ = nullptr;
  276. // For a generic function, keep track of the specifics for which LLVM
  277. // function declarations were created. Those can be retrieved then from
  278. // `specific_functions_`. We resize this to the correct size. Vector indexes
  279. // correspond to `GenericId` indexes.
  280. llvm::SmallVector<llvm::SmallVector<SemIR::SpecificId>, 0> lowered_specifics_;
  281. // For specifics that exist in lowered_specifics, a hash of their function
  282. // type information: return and parameter types. We resize this to the
  283. // correct size. Vector indexes correspond to `SpecificId` indexes.
  284. // TODO: Hashing all members of `FunctionTypeInfo` may not be necessary.
  285. llvm::SmallVector<llvm::BLAKE3Result<32>, 0>
  286. lowered_specifics_type_fingerprint_;
  287. // This is initialized and populated while lowering a specific.
  288. // We resize this to the correct size. Vector indexes correspond to
  289. // `SpecificId` indexes.
  290. llvm::SmallVector<SpecificFunctionFingerprint, 0>
  291. lowered_specific_fingerprint_;
  292. // Equivalent specifics that have been found. For each specific, this points
  293. // to the canonical equivalent specific, which may also be self. We currently
  294. // define the canonical specific as the one with the lowest
  295. // `SpecificId.index`.
  296. //
  297. // We resize this to the correct size and initialize to `SpecificId::None`,
  298. // which defines that there is no other equivalent specific to this
  299. // `SpecificId`. Vector indexes correspond to `SpecificId` indexes.
  300. llvm::SmallVector<SemIR::SpecificId, 0> equivalent_specifics_;
  301. // Non-equivalent specifics found.
  302. // TODO: Revisit this due to its quadratic space growth.
  303. Set<std::pair<SemIR::SpecificId, SemIR::SpecificId>>
  304. non_equivalent_specifics_;
  305. };
  306. } // namespace Carbon::Lower
  307. #endif // CARBON_TOOLCHAIN_LOWER_FILE_CONTEXT_H_