file_context.cpp 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #include "toolchain/lower/file_context.h"
  5. #include "common/vlog.h"
  6. #include "llvm/ADT/STLExtras.h"
  7. #include "llvm/ADT/Sequence.h"
  8. #include "llvm/Transforms/Utils/ModuleUtils.h"
  9. #include "toolchain/base/kind_switch.h"
  10. #include "toolchain/lower/constant.h"
  11. #include "toolchain/lower/function_context.h"
  12. #include "toolchain/sem_ir/entry_point.h"
  13. #include "toolchain/sem_ir/file.h"
  14. #include "toolchain/sem_ir/function.h"
  15. #include "toolchain/sem_ir/ids.h"
  16. #include "toolchain/sem_ir/inst.h"
  17. #include "toolchain/sem_ir/typed_insts.h"
  18. namespace Carbon::Lower {
  19. FileContext::FileContext(llvm::LLVMContext& llvm_context,
  20. llvm::StringRef module_name, const SemIR::File& sem_ir,
  21. const SemIR::InstNamer* inst_namer,
  22. llvm::raw_ostream* vlog_stream)
  23. : llvm_context_(&llvm_context),
  24. llvm_module_(std::make_unique<llvm::Module>(module_name, llvm_context)),
  25. sem_ir_(&sem_ir),
  26. inst_namer_(inst_namer),
  27. vlog_stream_(vlog_stream) {
  28. CARBON_CHECK(!sem_ir.has_errors())
  29. << "Generating LLVM IR from invalid SemIR::File is unsupported.";
  30. }
  31. // TODO: Move this to lower.cpp.
  32. auto FileContext::Run() -> std::unique_ptr<llvm::Module> {
  33. CARBON_CHECK(llvm_module_) << "Run can only be called once.";
  34. // Lower all types that were required to be complete.
  35. types_.resize(sem_ir_->insts().size());
  36. for (auto type_id : sem_ir_->types().complete_types()) {
  37. if (type_id.index >= 0) {
  38. types_[type_id.index] = BuildType(sem_ir_->types().GetInstId(type_id));
  39. }
  40. }
  41. // Lower function declarations.
  42. functions_.resize_for_overwrite(sem_ir_->functions().size());
  43. for (auto i : llvm::seq(sem_ir_->functions().size())) {
  44. functions_[i] = BuildFunctionDecl(SemIR::FunctionId(i));
  45. }
  46. // Lower global variable declarations.
  47. for (auto inst_id :
  48. sem_ir().inst_blocks().Get(sem_ir().top_inst_block_id())) {
  49. // Only `VarStorage` indicates a global variable declaration in the
  50. // top instruction block.
  51. if (auto var = sem_ir().insts().TryGetAs<SemIR::VarStorage>(inst_id)) {
  52. global_variables_.Insert(inst_id, BuildGlobalVariableDecl(*var));
  53. }
  54. }
  55. // Lower constants.
  56. constants_.resize(sem_ir_->insts().size());
  57. LowerConstants(*this, constants_);
  58. // Lower function definitions.
  59. for (auto i : llvm::seq(sem_ir_->functions().size())) {
  60. BuildFunctionDefinition(SemIR::FunctionId(i));
  61. }
  62. // Append `__global_init` to `llvm::global_ctors` to initialize global
  63. // variables.
  64. if (sem_ir().global_ctor_id().is_valid()) {
  65. llvm::appendToGlobalCtors(llvm_module(),
  66. GetFunction(sem_ir().global_ctor_id()),
  67. /*Priority=*/0);
  68. }
  69. return std::move(llvm_module_);
  70. }
  71. auto FileContext::GetGlobal(SemIR::InstId inst_id) -> llvm::Value* {
  72. auto inst = sem_ir().insts().Get(inst_id);
  73. auto const_id = sem_ir().constant_values().Get(inst_id);
  74. if (const_id.is_template()) {
  75. auto const_inst_id = sem_ir().constant_values().GetInstId(const_id);
  76. // For value expressions and initializing expressions, the value produced by
  77. // a constant instruction is a value representation of the constant. For
  78. // initializing expressions, `FinishInit` will perform a copy if needed.
  79. // TODO: Handle reference expression constants.
  80. auto* const_value = constants_[const_inst_id.index];
  81. // If we want a pointer to the constant, materialize a global to hold it.
  82. // TODO: We could reuse the same global if the constant is used more than
  83. // once.
  84. auto value_rep = SemIR::ValueRepr::ForType(sem_ir(), inst.type_id());
  85. if (value_rep.kind == SemIR::ValueRepr::Pointer) {
  86. // Include both the name of the constant, if any, and the point of use in
  87. // the name of the variable.
  88. llvm::StringRef const_name;
  89. llvm::StringRef use_name;
  90. if (inst_namer_) {
  91. const_name = inst_namer_->GetUnscopedNameFor(const_inst_id);
  92. use_name = inst_namer_->GetUnscopedNameFor(inst_id);
  93. }
  94. // We always need to give the global a name even if the instruction namer
  95. // doesn't have one to use.
  96. if (const_name.empty()) {
  97. const_name = "const";
  98. }
  99. if (use_name.empty()) {
  100. use_name = "anon";
  101. }
  102. llvm::StringRef sep = (use_name[0] == '.') ? "" : ".";
  103. return new llvm::GlobalVariable(
  104. llvm_module(), GetType(sem_ir().GetPointeeType(value_rep.type_id)),
  105. /*isConstant=*/true, llvm::GlobalVariable::InternalLinkage,
  106. const_value, const_name + sep + use_name);
  107. }
  108. // Otherwise, we can use the constant value directly.
  109. return const_value;
  110. }
  111. // TODO: For generics, handle references to symbolic constants.
  112. CARBON_FATAL() << "Missing value: " << inst_id << " "
  113. << sem_ir().insts().Get(inst_id);
  114. }
  115. auto FileContext::BuildFunctionDecl(SemIR::FunctionId function_id)
  116. -> llvm::Function* {
  117. const auto& function = sem_ir().functions().Get(function_id);
  118. // Don't lower generic functions or associated functions.
  119. // TODO: Associated functions have `Self` in scope so should be treated as
  120. // generic functions.
  121. if (function.generic_id.is_valid() ||
  122. sem_ir().insts().Is<SemIR::InterfaceDecl>(
  123. sem_ir().name_scopes().Get(function.parent_scope_id).inst_id)) {
  124. return nullptr;
  125. }
  126. // Don't lower builtins.
  127. if (function.builtin_function_kind != SemIR::BuiltinFunctionKind::None) {
  128. return nullptr;
  129. }
  130. // TODO: Consider tracking whether the function has been used, and only
  131. // lowering it if it's needed.
  132. // TODO: Pass in a specific ID for generic functions.
  133. const auto specific_id = SemIR::SpecificId::Invalid;
  134. const auto return_info =
  135. SemIR::ReturnTypeInfo::ForFunction(sem_ir(), function, specific_id);
  136. CARBON_CHECK(return_info.is_valid()) << "Should not lower invalid functions.";
  137. auto implicit_param_refs =
  138. sem_ir().inst_blocks().GetOrEmpty(function.implicit_param_refs_id);
  139. // TODO: Include parameters corresponding to positional parameters.
  140. auto param_refs = sem_ir().inst_blocks().GetOrEmpty(function.param_refs_id);
  141. auto* return_type =
  142. return_info.type_id.is_valid() ? GetType(return_info.type_id) : nullptr;
  143. llvm::SmallVector<llvm::Type*> param_types;
  144. // TODO: Consider either storing `param_inst_ids` somewhere so that we can
  145. // reuse it from `BuildFunctionDefinition` and when building calls, or factor
  146. // out a mechanism to compute the mapping between parameters and arguments on
  147. // demand.
  148. llvm::SmallVector<SemIR::InstId> param_inst_ids;
  149. auto max_llvm_params = (return_info.has_return_slot() ? 1 : 0) +
  150. implicit_param_refs.size() + param_refs.size();
  151. param_types.reserve(max_llvm_params);
  152. param_inst_ids.reserve(max_llvm_params);
  153. if (return_info.has_return_slot()) {
  154. param_types.push_back(return_type->getPointerTo());
  155. param_inst_ids.push_back(function.return_storage_id);
  156. }
  157. for (auto param_ref_id :
  158. llvm::concat<const SemIR::InstId>(implicit_param_refs, param_refs)) {
  159. auto param_type_id =
  160. SemIR::Function::GetParamFromParamRefId(sem_ir(), param_ref_id)
  161. .second.type_id;
  162. switch (auto value_rep = SemIR::ValueRepr::ForType(sem_ir(), param_type_id);
  163. value_rep.kind) {
  164. case SemIR::ValueRepr::Unknown:
  165. CARBON_FATAL()
  166. << "Incomplete parameter type lowering function declaration";
  167. case SemIR::ValueRepr::None:
  168. break;
  169. case SemIR::ValueRepr::Copy:
  170. case SemIR::ValueRepr::Custom:
  171. case SemIR::ValueRepr::Pointer:
  172. param_types.push_back(GetType(value_rep.type_id));
  173. param_inst_ids.push_back(param_ref_id);
  174. break;
  175. }
  176. }
  177. // Compute the return type to use for the LLVM function. If the initializing
  178. // representation doesn't produce a value, set the return type to void.
  179. llvm::Type* function_return_type =
  180. return_info.init_repr.kind == SemIR::InitRepr::ByCopy
  181. ? return_type
  182. : llvm::Type::getVoidTy(llvm_context());
  183. std::string mangled_name;
  184. if (SemIR::IsEntryPoint(sem_ir(), function_id)) {
  185. // TODO: Add an implicit `return 0` if `Run` doesn't return `i32`.
  186. mangled_name = "main";
  187. } else if (auto name =
  188. sem_ir().names().GetAsStringIfIdentifier(function.name_id)) {
  189. // TODO: Decide on a name mangling scheme.
  190. mangled_name = *name;
  191. } else {
  192. CARBON_FATAL() << "Unexpected special name for function: "
  193. << function.name_id;
  194. }
  195. llvm::FunctionType* function_type = llvm::FunctionType::get(
  196. function_return_type, param_types, /*isVarArg=*/false);
  197. auto* llvm_function =
  198. llvm::Function::Create(function_type, llvm::Function::ExternalLinkage,
  199. mangled_name, llvm_module());
  200. // Set up parameters and the return slot.
  201. for (auto [inst_id, arg] :
  202. llvm::zip_equal(param_inst_ids, llvm_function->args())) {
  203. auto name_id = SemIR::NameId::Invalid;
  204. if (inst_id == function.return_storage_id) {
  205. name_id = SemIR::NameId::ReturnSlot;
  206. arg.addAttr(
  207. llvm::Attribute::getWithStructRetType(llvm_context(), return_type));
  208. } else {
  209. name_id = SemIR::Function::GetParamFromParamRefId(sem_ir(), inst_id)
  210. .second.name_id;
  211. }
  212. arg.setName(sem_ir().names().GetIRBaseName(name_id));
  213. }
  214. return llvm_function;
  215. }
  216. auto FileContext::BuildFunctionDefinition(SemIR::FunctionId function_id)
  217. -> void {
  218. const auto& function = sem_ir().functions().Get(function_id);
  219. const auto& body_block_ids = function.body_block_ids;
  220. if (body_block_ids.empty()) {
  221. // Function is probably defined in another file; not an error.
  222. return;
  223. }
  224. llvm::Function* llvm_function = GetFunction(function_id);
  225. if (!llvm_function) {
  226. // We chose not to lower this function at all, for example because it's a
  227. // generic function.
  228. return;
  229. }
  230. FunctionContext function_lowering(*this, llvm_function, vlog_stream_);
  231. // TODO: Pass in a specific ID for generic functions.
  232. const auto specific_id = SemIR::SpecificId::Invalid;
  233. // Add parameters to locals.
  234. // TODO: This duplicates the mapping between sem_ir instructions and LLVM
  235. // function parameters that was already computed in BuildFunctionDecl.
  236. // We should only do that once.
  237. auto implicit_param_refs =
  238. sem_ir().inst_blocks().GetOrEmpty(function.implicit_param_refs_id);
  239. auto param_refs = sem_ir().inst_blocks().GetOrEmpty(function.param_refs_id);
  240. int param_index = 0;
  241. if (SemIR::ReturnTypeInfo::ForFunction(sem_ir(), function, specific_id)
  242. .has_return_slot()) {
  243. function_lowering.SetLocal(function.return_storage_id,
  244. llvm_function->getArg(param_index));
  245. ++param_index;
  246. }
  247. for (auto param_ref_id :
  248. llvm::concat<const SemIR::InstId>(implicit_param_refs, param_refs)) {
  249. auto [param_id, param] =
  250. SemIR::Function::GetParamFromParamRefId(sem_ir(), param_ref_id);
  251. // Get the value of the parameter from the function argument.
  252. auto param_type_id = param.type_id;
  253. llvm::Value* param_value = llvm::PoisonValue::get(GetType(param_type_id));
  254. if (SemIR::ValueRepr::ForType(sem_ir(), param_type_id).kind !=
  255. SemIR::ValueRepr::None) {
  256. param_value = llvm_function->getArg(param_index);
  257. ++param_index;
  258. }
  259. // The value of the parameter is the value of the argument.
  260. function_lowering.SetLocal(param_id, param_value);
  261. // Match the portion of the pattern corresponding to the parameter against
  262. // the parameter value. For now this is always a single name binding,
  263. // possibly wrapped in `addr`.
  264. //
  265. // TODO: Support general patterns here.
  266. auto bind_name_id = param_ref_id;
  267. if (auto addr =
  268. sem_ir().insts().TryGetAs<SemIR::AddrPattern>(param_ref_id)) {
  269. bind_name_id = addr->inner_id;
  270. }
  271. auto bind_name = sem_ir().insts().Get(bind_name_id);
  272. // TODO: Should we stop passing compile-time bindings at runtime?
  273. CARBON_CHECK(bind_name.Is<SemIR::AnyBindName>());
  274. function_lowering.SetLocal(bind_name_id, param_value);
  275. }
  276. // Lower all blocks.
  277. for (auto block_id : body_block_ids) {
  278. CARBON_VLOG() << "Lowering " << block_id << "\n";
  279. auto* llvm_block = function_lowering.GetBlock(block_id);
  280. // Keep the LLVM blocks in lexical order.
  281. llvm_block->moveBefore(llvm_function->end());
  282. function_lowering.builder().SetInsertPoint(llvm_block);
  283. function_lowering.LowerBlock(block_id);
  284. }
  285. // LLVM requires that the entry block has no predecessors.
  286. auto* entry_block = &llvm_function->getEntryBlock();
  287. if (entry_block->hasNPredecessorsOrMore(1)) {
  288. auto* new_entry_block = llvm::BasicBlock::Create(
  289. llvm_context(), "entry", llvm_function, entry_block);
  290. llvm::BranchInst::Create(entry_block, new_entry_block);
  291. }
  292. }
  293. static auto BuildTypeForInst(FileContext& context, SemIR::ArrayType inst)
  294. -> llvm::Type* {
  295. return llvm::ArrayType::get(
  296. context.GetType(inst.element_type_id),
  297. context.sem_ir().GetArrayBoundValue(inst.bound_id));
  298. }
  299. static auto BuildTypeForInst(FileContext& context, SemIR::BuiltinInst inst)
  300. -> llvm::Type* {
  301. switch (inst.builtin_inst_kind) {
  302. case SemIR::BuiltinInstKind::Invalid:
  303. CARBON_FATAL() << "Unexpected builtin type in lowering.";
  304. case SemIR::BuiltinInstKind::Error:
  305. // This is a complete type but uses of it should never be lowered.
  306. return nullptr;
  307. case SemIR::BuiltinInstKind::TypeType:
  308. return context.GetTypeType();
  309. case SemIR::BuiltinInstKind::FloatType:
  310. return llvm::Type::getDoubleTy(context.llvm_context());
  311. case SemIR::BuiltinInstKind::IntType:
  312. return llvm::Type::getInt32Ty(context.llvm_context());
  313. case SemIR::BuiltinInstKind::BoolType:
  314. // TODO: We may want to have different representations for `bool`
  315. // storage
  316. // (`i8`) versus for `bool` values (`i1`).
  317. return llvm::Type::getInt1Ty(context.llvm_context());
  318. case SemIR::BuiltinInstKind::StringType:
  319. // TODO: Decide how we want to represent `StringType`.
  320. return llvm::PointerType::get(context.llvm_context(), 0);
  321. case SemIR::BuiltinInstKind::BoundMethodType:
  322. case SemIR::BuiltinInstKind::NamespaceType:
  323. case SemIR::BuiltinInstKind::WitnessType:
  324. // Return an empty struct as a placeholder.
  325. return llvm::StructType::get(context.llvm_context());
  326. }
  327. }
  328. // BuildTypeForInst is used to construct types for FileContext::BuildType below.
  329. // Implementations return the LLVM type for the instruction. This first overload
  330. // is the fallback handler for non-type instructions.
  331. template <typename InstT>
  332. requires(InstT::Kind.is_type() == SemIR::InstIsType::Never)
  333. static auto BuildTypeForInst(FileContext& /*context*/, InstT inst)
  334. -> llvm::Type* {
  335. CARBON_FATAL() << "Cannot use inst as type: " << inst;
  336. }
  337. static auto BuildTypeForInst(FileContext& context, SemIR::ClassType inst)
  338. -> llvm::Type* {
  339. auto object_repr_id =
  340. context.sem_ir().classes().Get(inst.class_id).object_repr_id;
  341. return context.GetType(object_repr_id);
  342. }
  343. static auto BuildTypeForInst(FileContext& context, SemIR::ConstType inst)
  344. -> llvm::Type* {
  345. return context.GetType(inst.inner_id);
  346. }
  347. static auto BuildTypeForInst(FileContext& context, SemIR::FloatType /*inst*/)
  348. -> llvm::Type* {
  349. // TODO: Handle different sizes.
  350. return llvm::Type::getDoubleTy(context.llvm_context());
  351. }
  352. static auto BuildTypeForInst(FileContext& context, SemIR::IntType inst)
  353. -> llvm::Type* {
  354. auto width =
  355. context.sem_ir().insts().TryGetAs<SemIR::IntLiteral>(inst.bit_width_id);
  356. CARBON_CHECK(width) << "Can't lower int type with symbolic width";
  357. return llvm::IntegerType::get(
  358. context.llvm_context(),
  359. context.sem_ir().ints().Get(width->int_id).getZExtValue());
  360. }
  361. static auto BuildTypeForInst(FileContext& context, SemIR::PointerType /*inst*/)
  362. -> llvm::Type* {
  363. return llvm::PointerType::get(context.llvm_context(), /*AddressSpace=*/0);
  364. }
  365. static auto BuildTypeForInst(FileContext& context, SemIR::StructType inst)
  366. -> llvm::Type* {
  367. auto fields = context.sem_ir().inst_blocks().Get(inst.fields_id);
  368. llvm::SmallVector<llvm::Type*> subtypes;
  369. subtypes.reserve(fields.size());
  370. for (auto field_id : fields) {
  371. auto field =
  372. context.sem_ir().insts().GetAs<SemIR::StructTypeField>(field_id);
  373. subtypes.push_back(context.GetType(field.field_type_id));
  374. }
  375. return llvm::StructType::get(context.llvm_context(), subtypes);
  376. }
  377. static auto BuildTypeForInst(FileContext& context, SemIR::TupleType inst)
  378. -> llvm::Type* {
  379. // TODO: Investigate special-casing handling of empty tuples so that they
  380. // can be collectively replaced with LLVM's void, particularly around
  381. // function returns. LLVM doesn't allow declaring variables with a void
  382. // type, so that may require significant special casing.
  383. auto elements = context.sem_ir().type_blocks().Get(inst.elements_id);
  384. llvm::SmallVector<llvm::Type*> subtypes;
  385. subtypes.reserve(elements.size());
  386. for (auto element_id : elements) {
  387. subtypes.push_back(context.GetType(element_id));
  388. }
  389. return llvm::StructType::get(context.llvm_context(), subtypes);
  390. }
  391. template <typename InstT>
  392. requires(InstT::Kind.template IsAnyOf<
  393. SemIR::AssociatedEntityType, SemIR::FunctionType,
  394. SemIR::GenericClassType, SemIR::GenericInterfaceType,
  395. SemIR::InterfaceType, SemIR::UnboundElementType>())
  396. static auto BuildTypeForInst(FileContext& context, InstT /*inst*/)
  397. -> llvm::Type* {
  398. // Return an empty struct as a placeholder.
  399. // TODO: Should we model an interface as a witness table, or an associated
  400. // entity as an index?
  401. return llvm::StructType::get(context.llvm_context());
  402. }
  403. // Treat non-monomorphized symbolic types as opaque.
  404. template <typename InstT>
  405. requires(InstT::Kind.template IsAnyOf<SemIR::BindSymbolicName,
  406. SemIR::InterfaceWitnessAccess>())
  407. static auto BuildTypeForInst(FileContext& context, InstT /*inst*/)
  408. -> llvm::Type* {
  409. return llvm::StructType::get(context.llvm_context());
  410. }
  411. auto FileContext::BuildType(SemIR::InstId inst_id) -> llvm::Type* {
  412. // Use overload resolution to select the implementation, producing compile
  413. // errors when BuildTypeForInst isn't defined for a given instruction.
  414. CARBON_KIND_SWITCH(sem_ir_->insts().Get(inst_id)) {
  415. #define CARBON_SEM_IR_INST_KIND(Name) \
  416. case CARBON_KIND(SemIR::Name inst): { \
  417. return BuildTypeForInst(*this, inst); \
  418. }
  419. #include "toolchain/sem_ir/inst_kind.def"
  420. }
  421. }
  422. auto FileContext::BuildGlobalVariableDecl(SemIR::VarStorage var_storage)
  423. -> llvm::GlobalVariable* {
  424. // TODO: Mangle name.
  425. auto mangled_name =
  426. *sem_ir().names().GetAsStringIfIdentifier(var_storage.name_id);
  427. auto* type =
  428. var_storage.type_id.is_valid() ? GetType(var_storage.type_id) : nullptr;
  429. return new llvm::GlobalVariable(llvm_module(), type,
  430. /*isConstant=*/false,
  431. llvm::GlobalVariable::InternalLinkage,
  432. /*Initializer=*/nullptr, mangled_name);
  433. }
  434. } // namespace Carbon::Lower