file_context.cpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #include "toolchain/lower/file_context.h"
  5. #include "common/vlog.h"
  6. #include "llvm/ADT/STLExtras.h"
  7. #include "llvm/ADT/Sequence.h"
  8. #include "toolchain/base/kind_switch.h"
  9. #include "toolchain/lower/constant.h"
  10. #include "toolchain/lower/function_context.h"
  11. #include "toolchain/sem_ir/entry_point.h"
  12. #include "toolchain/sem_ir/file.h"
  13. #include "toolchain/sem_ir/function.h"
  14. #include "toolchain/sem_ir/inst.h"
  15. #include "toolchain/sem_ir/typed_insts.h"
  16. namespace Carbon::Lower {
  17. FileContext::FileContext(llvm::LLVMContext& llvm_context,
  18. llvm::StringRef module_name, const SemIR::File& sem_ir,
  19. const SemIR::InstNamer* inst_namer,
  20. llvm::raw_ostream* vlog_stream)
  21. : llvm_context_(&llvm_context),
  22. llvm_module_(std::make_unique<llvm::Module>(module_name, llvm_context)),
  23. sem_ir_(&sem_ir),
  24. inst_namer_(inst_namer),
  25. vlog_stream_(vlog_stream) {
  26. CARBON_CHECK(!sem_ir.has_errors())
  27. << "Generating LLVM IR from invalid SemIR::File is unsupported.";
  28. }
  29. // TODO: Move this to lower.cpp.
  30. auto FileContext::Run() -> std::unique_ptr<llvm::Module> {
  31. CARBON_CHECK(llvm_module_) << "Run can only be called once.";
  32. // Lower all types that were required to be complete. Note that this may
  33. // leave some entries in `types_` null, if those types were mentioned but not
  34. // used.
  35. types_.resize(sem_ir_->types().size());
  36. for (auto type_id : sem_ir_->complete_types()) {
  37. types_[type_id.index] = BuildType(sem_ir_->types().GetInstId(type_id));
  38. }
  39. // Lower function declarations.
  40. functions_.resize_for_overwrite(sem_ir_->functions().size());
  41. for (auto i : llvm::seq(sem_ir_->functions().size())) {
  42. functions_[i] = BuildFunctionDecl(SemIR::FunctionId(i));
  43. }
  44. // TODO: Lower global variable declarations.
  45. // Lower constants.
  46. constants_.resize(sem_ir_->insts().size());
  47. LowerConstants(*this, constants_);
  48. // Lower function definitions.
  49. for (auto i : llvm::seq(sem_ir_->functions().size())) {
  50. BuildFunctionDefinition(SemIR::FunctionId(i));
  51. }
  52. // TODO: Lower global variable initializers.
  53. return std::move(llvm_module_);
  54. }
  55. auto FileContext::GetGlobal(SemIR::InstId inst_id) -> llvm::Value* {
  56. auto inst = sem_ir().insts().Get(inst_id);
  57. auto const_id = sem_ir().constant_values().Get(inst_id);
  58. if (const_id.is_template()) {
  59. // For value expressions and initializing expressions, the value produced by
  60. // a constant instruction is a value representation of the constant. For
  61. // initializing expressions, `FinishInit` will perform a copy if needed.
  62. // TODO: Handle reference expression constants.
  63. auto* const_value = constants_[const_id.inst_id().index];
  64. // If we want a pointer to the constant, materialize a global to hold it.
  65. // TODO: We could reuse the same global if the constant is used more than
  66. // once.
  67. auto value_rep = SemIR::GetValueRepr(sem_ir(), inst.type_id());
  68. if (value_rep.kind == SemIR::ValueRepr::Pointer) {
  69. // Include both the name of the constant, if any, and the point of use in
  70. // the name of the variable.
  71. llvm::StringRef const_name;
  72. llvm::StringRef use_name;
  73. if (inst_namer_) {
  74. const_name = inst_namer_->GetUnscopedNameFor(const_id.inst_id());
  75. use_name = inst_namer_->GetUnscopedNameFor(inst_id);
  76. }
  77. // We always need to give the global a name even if the instruction namer
  78. // doesn't have one to use.
  79. if (const_name.empty()) {
  80. const_name = "const";
  81. }
  82. if (use_name.empty()) {
  83. use_name = "anon";
  84. }
  85. llvm::StringRef sep = (use_name[0] == '.') ? "" : ".";
  86. return new llvm::GlobalVariable(
  87. llvm_module(), GetType(sem_ir().GetPointeeType(value_rep.type_id)),
  88. /*isConstant=*/true, llvm::GlobalVariable::InternalLinkage,
  89. const_value, const_name + sep + use_name);
  90. }
  91. // Otherwise, we can use the constant value directly.
  92. return const_value;
  93. }
  94. // TODO: For generics, handle references to symbolic constants.
  95. CARBON_FATAL() << "Missing value: " << inst_id << " "
  96. << sem_ir().insts().Get(inst_id);
  97. }
  98. auto FileContext::BuildFunctionDecl(SemIR::FunctionId function_id)
  99. -> llvm::Function* {
  100. const auto& function = sem_ir().functions().Get(function_id);
  101. // Don't lower associated functions.
  102. // TODO: We shouldn't lower any function that has generic parameters.
  103. if (sem_ir().insts().Is<SemIR::InterfaceDecl>(
  104. sem_ir().name_scopes().Get(function.enclosing_scope_id).inst_id)) {
  105. return nullptr;
  106. }
  107. // Don't lower builtins.
  108. if (function.builtin_kind != SemIR::BuiltinFunctionKind::None) {
  109. return nullptr;
  110. }
  111. // Don't lower unused functions.
  112. if (function.return_slot == SemIR::Function::ReturnSlot::NotComputed) {
  113. return nullptr;
  114. }
  115. const bool has_return_slot = function.has_return_slot();
  116. auto implicit_param_refs =
  117. sem_ir().inst_blocks().Get(function.implicit_param_refs_id);
  118. auto param_refs = sem_ir().inst_blocks().Get(function.param_refs_id);
  119. SemIR::InitRepr return_rep =
  120. function.return_type_id.is_valid()
  121. ? SemIR::GetInitRepr(sem_ir(), function.return_type_id)
  122. : SemIR::InitRepr{.kind = SemIR::InitRepr::None};
  123. CARBON_CHECK(return_rep.has_return_slot() == has_return_slot);
  124. llvm::SmallVector<llvm::Type*> param_types;
  125. // TODO: Consider either storing `param_inst_ids` somewhere so that we can
  126. // reuse it from `BuildFunctionDefinition` and when building calls, or factor
  127. // out a mechanism to compute the mapping between parameters and arguments on
  128. // demand.
  129. llvm::SmallVector<SemIR::InstId> param_inst_ids;
  130. auto max_llvm_params =
  131. has_return_slot + implicit_param_refs.size() + param_refs.size();
  132. param_types.reserve(max_llvm_params);
  133. param_inst_ids.reserve(max_llvm_params);
  134. if (has_return_slot) {
  135. param_types.push_back(GetType(function.return_type_id)->getPointerTo());
  136. param_inst_ids.push_back(function.return_storage_id);
  137. }
  138. for (auto param_ref_id :
  139. llvm::concat<const SemIR::InstId>(implicit_param_refs, param_refs)) {
  140. auto param_type_id =
  141. SemIR::Function::GetParamFromParamRefId(sem_ir(), param_ref_id)
  142. .second.type_id;
  143. switch (auto value_rep = SemIR::GetValueRepr(sem_ir(), param_type_id);
  144. value_rep.kind) {
  145. case SemIR::ValueRepr::Unknown:
  146. CARBON_FATAL()
  147. << "Incomplete parameter type lowering function declaration";
  148. case SemIR::ValueRepr::None:
  149. break;
  150. case SemIR::ValueRepr::Copy:
  151. case SemIR::ValueRepr::Custom:
  152. case SemIR::ValueRepr::Pointer:
  153. param_types.push_back(GetType(value_rep.type_id));
  154. param_inst_ids.push_back(param_ref_id);
  155. break;
  156. }
  157. }
  158. // If the initializing representation doesn't produce a value, set the return
  159. // type to void.
  160. llvm::Type* return_type = return_rep.kind == SemIR::InitRepr::ByCopy
  161. ? GetType(function.return_type_id)
  162. : llvm::Type::getVoidTy(llvm_context());
  163. std::string mangled_name;
  164. if (SemIR::IsEntryPoint(sem_ir(), function_id)) {
  165. // TODO: Add an implicit `return 0` if `Run` doesn't return `i32`.
  166. mangled_name = "main";
  167. } else if (auto name =
  168. sem_ir().names().GetAsStringIfIdentifier(function.name_id)) {
  169. // TODO: Decide on a name mangling scheme.
  170. mangled_name = *name;
  171. } else {
  172. CARBON_FATAL() << "Unexpected special name for function: "
  173. << function.name_id;
  174. }
  175. llvm::FunctionType* function_type =
  176. llvm::FunctionType::get(return_type, param_types, /*isVarArg=*/false);
  177. auto* llvm_function =
  178. llvm::Function::Create(function_type, llvm::Function::ExternalLinkage,
  179. mangled_name, llvm_module());
  180. // Set up parameters and the return slot.
  181. for (auto [inst_id, arg] :
  182. llvm::zip_equal(param_inst_ids, llvm_function->args())) {
  183. auto name_id = SemIR::NameId::Invalid;
  184. if (inst_id == function.return_storage_id) {
  185. name_id = SemIR::NameId::ReturnSlot;
  186. arg.addAttr(llvm::Attribute::getWithStructRetType(
  187. llvm_context(), GetType(function.return_type_id)));
  188. } else {
  189. name_id = SemIR::Function::GetParamFromParamRefId(sem_ir(), inst_id)
  190. .second.name_id;
  191. }
  192. arg.setName(sem_ir().names().GetIRBaseName(name_id));
  193. }
  194. return llvm_function;
  195. }
  196. auto FileContext::BuildFunctionDefinition(SemIR::FunctionId function_id)
  197. -> void {
  198. const auto& function = sem_ir().functions().Get(function_id);
  199. const auto& body_block_ids = function.body_block_ids;
  200. if (body_block_ids.empty()) {
  201. // Function is probably defined in another file; not an error.
  202. return;
  203. }
  204. llvm::Function* llvm_function = GetFunction(function_id);
  205. FunctionContext function_lowering(*this, llvm_function, vlog_stream_);
  206. const bool has_return_slot = function.has_return_slot();
  207. // Add parameters to locals.
  208. // TODO: This duplicates the mapping between sem_ir instructions and LLVM
  209. // function parameters that was already computed in BuildFunctionDecl.
  210. // We should only do that once.
  211. auto implicit_param_refs =
  212. sem_ir().inst_blocks().Get(function.implicit_param_refs_id);
  213. auto param_refs = sem_ir().inst_blocks().Get(function.param_refs_id);
  214. int param_index = 0;
  215. if (has_return_slot) {
  216. function_lowering.SetLocal(function.return_storage_id,
  217. llvm_function->getArg(param_index));
  218. ++param_index;
  219. }
  220. for (auto param_ref_id :
  221. llvm::concat<const SemIR::InstId>(implicit_param_refs, param_refs)) {
  222. auto [param_id, param] =
  223. SemIR::Function::GetParamFromParamRefId(sem_ir(), param_ref_id);
  224. // Get the value of the parameter from the function argument.
  225. auto param_type_id = param.type_id;
  226. llvm::Value* param_value = llvm::PoisonValue::get(GetType(param_type_id));
  227. if (SemIR::GetValueRepr(sem_ir(), param_type_id).kind !=
  228. SemIR::ValueRepr::None) {
  229. param_value = llvm_function->getArg(param_index);
  230. ++param_index;
  231. }
  232. // The value of the parameter is the value of the argument.
  233. function_lowering.SetLocal(param_id, param_value);
  234. // Match the portion of the pattern corresponding to the parameter against
  235. // the parameter value. For now this is always a single name binding,
  236. // possibly wrapped in `addr`.
  237. //
  238. // TODO: Support general patterns here.
  239. auto bind_name_id = param_ref_id;
  240. if (auto addr =
  241. sem_ir().insts().TryGetAs<SemIR::AddrPattern>(param_ref_id)) {
  242. bind_name_id = addr->inner_id;
  243. }
  244. auto bind_name = sem_ir().insts().Get(bind_name_id);
  245. // TODO: Should we stop passing compile-time bindings at runtime?
  246. CARBON_CHECK(bind_name.Is<SemIR::AnyBindName>());
  247. function_lowering.SetLocal(bind_name_id, param_value);
  248. }
  249. // Lower all blocks.
  250. for (auto block_id : body_block_ids) {
  251. CARBON_VLOG() << "Lowering " << block_id << "\n";
  252. auto* llvm_block = function_lowering.GetBlock(block_id);
  253. // Keep the LLVM blocks in lexical order.
  254. llvm_block->moveBefore(llvm_function->end());
  255. function_lowering.builder().SetInsertPoint(llvm_block);
  256. function_lowering.LowerBlock(block_id);
  257. }
  258. // LLVM requires that the entry block has no predecessors.
  259. auto* entry_block = &llvm_function->getEntryBlock();
  260. if (entry_block->hasNPredecessorsOrMore(1)) {
  261. auto* new_entry_block = llvm::BasicBlock::Create(
  262. llvm_context(), "entry", llvm_function, entry_block);
  263. llvm::BranchInst::Create(entry_block, new_entry_block);
  264. }
  265. }
  266. auto FileContext::BuildType(SemIR::InstId inst_id) -> llvm::Type* {
  267. CARBON_KIND_SWITCH(sem_ir_->insts().Get(inst_id)) {
  268. case CARBON_KIND(SemIR::ArrayType inst): {
  269. return llvm::ArrayType::get(GetType(inst.element_type_id),
  270. sem_ir_->GetArrayBoundValue(inst.bound_id));
  271. }
  272. case CARBON_KIND(SemIR::Builtin inst): {
  273. switch (inst.builtin_kind) {
  274. case SemIR::BuiltinKind::Invalid:
  275. case SemIR::BuiltinKind::Error:
  276. CARBON_FATAL() << "Unexpected builtin type in lowering.";
  277. case SemIR::BuiltinKind::TypeType:
  278. return GetTypeType();
  279. case SemIR::BuiltinKind::FloatType:
  280. return llvm::Type::getDoubleTy(*llvm_context_);
  281. case SemIR::BuiltinKind::IntType:
  282. return llvm::Type::getInt32Ty(*llvm_context_);
  283. case SemIR::BuiltinKind::BoolType:
  284. // TODO: We may want to have different representations for `bool`
  285. // storage
  286. // (`i8`) versus for `bool` values (`i1`).
  287. return llvm::Type::getInt1Ty(*llvm_context_);
  288. case SemIR::BuiltinKind::StringType:
  289. // TODO: Decide how we want to represent `StringType`.
  290. return llvm::PointerType::get(*llvm_context_, 0);
  291. case SemIR::BuiltinKind::BoundMethodType:
  292. case SemIR::BuiltinKind::NamespaceType:
  293. case SemIR::BuiltinKind::WitnessType:
  294. // Return an empty struct as a placeholder.
  295. return llvm::StructType::get(*llvm_context_);
  296. }
  297. }
  298. case CARBON_KIND(SemIR::ClassType inst): {
  299. auto object_repr_id =
  300. sem_ir_->classes().Get(inst.class_id).object_repr_id;
  301. return GetType(object_repr_id);
  302. }
  303. case CARBON_KIND(SemIR::ConstType inst): {
  304. return GetType(inst.inner_id);
  305. }
  306. case SemIR::FloatType::Kind: {
  307. // TODO: Handle different sizes.
  308. return llvm::Type::getDoubleTy(*llvm_context_);
  309. }
  310. case CARBON_KIND(SemIR::IntType inst): {
  311. auto width =
  312. sem_ir_->insts().TryGetAs<SemIR::IntLiteral>(inst.bit_width_id);
  313. CARBON_CHECK(width) << "Can't lower int type with symbolic width";
  314. return llvm::IntegerType::get(
  315. *llvm_context_, sem_ir_->ints().Get(width->int_id).getZExtValue());
  316. }
  317. case SemIR::PointerType::Kind: {
  318. return llvm::PointerType::get(*llvm_context_, /*AddressSpace=*/0);
  319. }
  320. case CARBON_KIND(SemIR::StructType inst): {
  321. auto fields = sem_ir_->inst_blocks().Get(inst.fields_id);
  322. llvm::SmallVector<llvm::Type*> subtypes;
  323. subtypes.reserve(fields.size());
  324. for (auto field_id : fields) {
  325. auto field = sem_ir_->insts().GetAs<SemIR::StructTypeField>(field_id);
  326. subtypes.push_back(GetType(field.field_type_id));
  327. }
  328. return llvm::StructType::get(*llvm_context_, subtypes);
  329. }
  330. case CARBON_KIND(SemIR::TupleType inst): {
  331. // TODO: Investigate special-casing handling of empty tuples so that they
  332. // can be collectively replaced with LLVM's void, particularly around
  333. // function returns. LLVM doesn't allow declaring variables with a void
  334. // type, so that may require significant special casing.
  335. auto elements = sem_ir_->type_blocks().Get(inst.elements_id);
  336. llvm::SmallVector<llvm::Type*> subtypes;
  337. subtypes.reserve(elements.size());
  338. for (auto element_id : elements) {
  339. subtypes.push_back(GetType(element_id));
  340. }
  341. return llvm::StructType::get(*llvm_context_, subtypes);
  342. }
  343. case SemIR::AssociatedEntityType::Kind:
  344. case SemIR::InterfaceType::Kind:
  345. case SemIR::FunctionType::Kind:
  346. case SemIR::GenericClassType::Kind:
  347. case SemIR::UnboundElementType::Kind: {
  348. // Return an empty struct as a placeholder.
  349. // TODO: Should we model an interface as a witness table, or an associated
  350. // entity as an index?
  351. return llvm::StructType::get(*llvm_context_);
  352. }
  353. // Treat non-monomorphized symbolic types as opaque.
  354. case SemIR::BindSymbolicName::Kind:
  355. case SemIR::InterfaceWitnessAccess::Kind: {
  356. return llvm::StructType::get(*llvm_context_);
  357. }
  358. #define CARBON_SEM_IR_INST_KIND_TYPE_ALWAYS(...)
  359. #define CARBON_SEM_IR_INST_KIND_TYPE_MAYBE(...)
  360. #define CARBON_SEM_IR_INST_KIND(Name) case SemIR::Name::Kind:
  361. #include "toolchain/sem_ir/inst_kind.def"
  362. CARBON_FATAL() << "Cannot use inst as type: " << inst_id << " "
  363. << sem_ir_->insts().Get(inst_id);
  364. }
  365. }
  366. } // namespace Carbon::Lower