context.cpp 40 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #include "toolchain/check/context.h"
  5. #include <string>
  6. #include <utility>
  7. #include "common/check.h"
  8. #include "common/vlog.h"
  9. #include "llvm/ADT/Sequence.h"
  10. #include "toolchain/check/declaration_name_stack.h"
  11. #include "toolchain/check/inst_block_stack.h"
  12. #include "toolchain/lex/tokenized_buffer.h"
  13. #include "toolchain/parse/node_kind.h"
  14. #include "toolchain/sem_ir/file.h"
  15. #include "toolchain/sem_ir/inst.h"
  16. #include "toolchain/sem_ir/inst_kind.h"
  17. namespace Carbon::Check {
  18. Context::Context(const Lex::TokenizedBuffer& tokens, DiagnosticEmitter& emitter,
  19. const Parse::Tree& parse_tree, SemIR::File& sem_ir,
  20. llvm::raw_ostream* vlog_stream)
  21. : tokens_(&tokens),
  22. emitter_(&emitter),
  23. parse_tree_(&parse_tree),
  24. sem_ir_(&sem_ir),
  25. vlog_stream_(vlog_stream),
  26. node_stack_(parse_tree, vlog_stream),
  27. inst_block_stack_("inst_block_stack_", sem_ir, vlog_stream),
  28. params_or_args_stack_("params_or_args_stack_", sem_ir, vlog_stream),
  29. args_type_info_stack_("args_type_info_stack_", sem_ir, vlog_stream),
  30. declaration_name_stack_(this) {
  31. // Inserts the "Error" and "Type" types as "used types" so that
  32. // canonicalization can skip them. We don't emit either for lowering.
  33. canonical_types_.insert({SemIR::InstId::BuiltinError, SemIR::TypeId::Error});
  34. canonical_types_.insert(
  35. {SemIR::InstId::BuiltinTypeType, SemIR::TypeId::TypeType});
  36. }
  37. auto Context::TODO(Parse::Node parse_node, std::string label) -> bool {
  38. CARBON_DIAGNOSTIC(SemanticsTodo, Error, "Semantics TODO: `{0}`.",
  39. std::string);
  40. emitter_->Emit(parse_node, SemanticsTodo, std::move(label));
  41. return false;
  42. }
  43. auto Context::VerifyOnFinish() -> void {
  44. // Information in all the various context objects should be cleaned up as
  45. // various pieces of context go out of scope. At this point, nothing should
  46. // remain.
  47. // node_stack_ will still contain top-level entities.
  48. CARBON_CHECK(name_lookup_.empty()) << name_lookup_.size();
  49. CARBON_CHECK(scope_stack_.empty()) << scope_stack_.size();
  50. CARBON_CHECK(inst_block_stack_.empty()) << inst_block_stack_.size();
  51. CARBON_CHECK(params_or_args_stack_.empty()) << params_or_args_stack_.size();
  52. }
  53. auto Context::AddInst(SemIR::Inst inst) -> SemIR::InstId {
  54. auto inst_id = inst_block_stack_.AddInst(inst);
  55. CARBON_VLOG() << "AddInst: " << inst << "\n";
  56. return inst_id;
  57. }
  58. auto Context::AddConstantInst(SemIR::Inst inst) -> SemIR::InstId {
  59. auto inst_id = insts().AddInNoBlock(inst);
  60. constants().Add(inst_id);
  61. CARBON_VLOG() << "AddConstantInst: " << inst << "\n";
  62. return inst_id;
  63. }
  64. auto Context::AddInstAndPush(Parse::Node parse_node, SemIR::Inst inst) -> void {
  65. auto inst_id = AddInst(inst);
  66. node_stack_.Push(parse_node, inst_id);
  67. }
  68. auto Context::DiagnoseDuplicateName(Parse::Node parse_node,
  69. SemIR::InstId prev_def_id) -> void {
  70. CARBON_DIAGNOSTIC(NameDeclarationDuplicate, Error,
  71. "Duplicate name being declared in the same scope.");
  72. CARBON_DIAGNOSTIC(NameDeclarationPrevious, Note,
  73. "Name is previously declared here.");
  74. auto prev_def = insts().Get(prev_def_id);
  75. emitter_->Build(parse_node, NameDeclarationDuplicate)
  76. .Note(prev_def.parse_node(), NameDeclarationPrevious)
  77. .Emit();
  78. }
  79. auto Context::DiagnoseNameNotFound(Parse::Node parse_node, IdentifierId name_id)
  80. -> void {
  81. CARBON_DIAGNOSTIC(NameNotFound, Error, "Name `{0}` not found.",
  82. llvm::StringRef);
  83. emitter_->Emit(parse_node, NameNotFound, identifiers().Get(name_id));
  84. }
  85. auto Context::NoteIncompleteClass(SemIR::ClassId class_id,
  86. DiagnosticBuilder& builder) -> void {
  87. CARBON_DIAGNOSTIC(ClassForwardDeclaredHere, Note,
  88. "Class was forward declared here.");
  89. CARBON_DIAGNOSTIC(ClassIncompleteWithinDefinition, Note,
  90. "Class is incomplete within its definition.");
  91. const auto& class_info = classes().Get(class_id);
  92. CARBON_CHECK(!class_info.is_defined()) << "Class is not incomplete";
  93. if (class_info.definition_id.is_valid()) {
  94. builder.Note(insts().Get(class_info.definition_id).parse_node(),
  95. ClassIncompleteWithinDefinition);
  96. } else {
  97. builder.Note(insts().Get(class_info.declaration_id).parse_node(),
  98. ClassForwardDeclaredHere);
  99. }
  100. }
  101. auto Context::AddNameToLookup(Parse::Node name_node, IdentifierId name_id,
  102. SemIR::InstId target_id) -> void {
  103. if (current_scope().names.insert(name_id).second) {
  104. // TODO: Reject if we previously performed a failed lookup for this name in
  105. // this scope or a scope nested within it.
  106. auto& lexical_results = name_lookup_[name_id];
  107. CARBON_CHECK(lexical_results.empty() ||
  108. lexical_results.back().scope_index < current_scope_index())
  109. << "Failed to clean up after scope nested within the current scope";
  110. lexical_results.push_back(
  111. {.node_id = target_id, .scope_index = current_scope_index()});
  112. } else {
  113. DiagnoseDuplicateName(name_node, name_lookup_[name_id].back().node_id);
  114. }
  115. }
  116. auto Context::LookupNameInDeclaration(Parse::Node parse_node,
  117. IdentifierId name_id,
  118. SemIR::NameScopeId scope_id)
  119. -> SemIR::InstId {
  120. if (scope_id == SemIR::NameScopeId::Invalid) {
  121. // Look for a name in the current scope only. There are two cases where the
  122. // name would be in an outer scope:
  123. //
  124. // - The name is the sole component of the declared name:
  125. //
  126. // class A;
  127. // fn F() {
  128. // class A;
  129. // }
  130. //
  131. // In this case, the inner A is not the same class as the outer A, so
  132. // lookup should not find the outer A.
  133. //
  134. // - The name is a qualifier of some larger declared name:
  135. //
  136. // class A { class B; }
  137. // fn F() {
  138. // class A.B {}
  139. // }
  140. //
  141. // In this case, we're not in the correct scope to define a member of
  142. // class A, so we should reject, and we achieve this by not finding the
  143. // name A from the outer scope.
  144. if (auto name_it = name_lookup_.find(name_id);
  145. name_it != name_lookup_.end()) {
  146. CARBON_CHECK(!name_it->second.empty())
  147. << "Should have been erased: " << identifiers().Get(name_id);
  148. auto result = name_it->second.back();
  149. if (result.scope_index == current_scope_index()) {
  150. return result.node_id;
  151. }
  152. }
  153. return SemIR::InstId::Invalid;
  154. } else {
  155. // TODO: Once we support `extend`, do not look into `extend`ed scopes here,
  156. // following the same logic as above.
  157. return LookupQualifiedName(parse_node, name_id, scope_id,
  158. /*required=*/false);
  159. }
  160. }
  161. auto Context::LookupUnqualifiedName(Parse::Node parse_node,
  162. IdentifierId name_id) -> SemIR::InstId {
  163. // TODO: Check for shadowed lookup results.
  164. // Find the results from enclosing lexical scopes. These will be combined with
  165. // results from non-lexical scopes such as namespaces and classes.
  166. llvm::ArrayRef<LexicalLookupResult> lexical_results;
  167. if (auto name_it = name_lookup_.find(name_id);
  168. name_it != name_lookup_.end()) {
  169. lexical_results = name_it->second;
  170. CARBON_CHECK(!lexical_results.empty())
  171. << "Should have been erased: " << identifiers().Get(name_id);
  172. }
  173. // Walk the non-lexical scopes and perform lookups into each of them.
  174. for (auto [index, name_scope_id] : llvm::reverse(non_lexical_scope_stack_)) {
  175. // If the innermost lexical result is within this non-lexical scope, then
  176. // it shadows all further non-lexical results and we're done.
  177. if (!lexical_results.empty() &&
  178. lexical_results.back().scope_index > index) {
  179. return lexical_results.back().node_id;
  180. }
  181. auto non_lexical_result =
  182. LookupQualifiedName(parse_node, name_id, name_scope_id,
  183. /*required=*/false);
  184. if (non_lexical_result.is_valid()) {
  185. return non_lexical_result;
  186. }
  187. }
  188. if (!lexical_results.empty()) {
  189. return lexical_results.back().node_id;
  190. }
  191. // We didn't find anything at all.
  192. DiagnoseNameNotFound(parse_node, name_id);
  193. return SemIR::InstId::BuiltinError;
  194. }
  195. auto Context::LookupQualifiedName(Parse::Node parse_node, IdentifierId name_id,
  196. SemIR::NameScopeId scope_id, bool required)
  197. -> SemIR::InstId {
  198. CARBON_CHECK(scope_id.is_valid()) << "No scope to perform lookup into";
  199. const auto& scope = name_scopes().Get(scope_id);
  200. auto it = scope.find(name_id);
  201. if (it == scope.end()) {
  202. // TODO: Also perform lookups into `extend`ed scopes.
  203. if (required) {
  204. DiagnoseNameNotFound(parse_node, name_id);
  205. return SemIR::InstId::BuiltinError;
  206. }
  207. return SemIR::InstId::Invalid;
  208. }
  209. return it->second;
  210. }
  211. auto Context::PushScope(SemIR::InstId scope_inst_id,
  212. SemIR::NameScopeId scope_id) -> void {
  213. scope_stack_.push_back({.index = next_scope_index_,
  214. .scope_inst_id = scope_inst_id,
  215. .scope_id = scope_id});
  216. if (scope_id.is_valid()) {
  217. non_lexical_scope_stack_.push_back({next_scope_index_, scope_id});
  218. }
  219. // TODO: Handle this case more gracefully.
  220. CARBON_CHECK(next_scope_index_.index != std::numeric_limits<int32_t>::max())
  221. << "Ran out of scopes";
  222. ++next_scope_index_.index;
  223. }
  224. auto Context::PopScope() -> void {
  225. auto scope = scope_stack_.pop_back_val();
  226. for (const auto& str_id : scope.names) {
  227. auto it = name_lookup_.find(str_id);
  228. CARBON_CHECK(it->second.back().scope_index == scope.index)
  229. << "Inconsistent scope index for name " << identifiers().Get(str_id);
  230. if (it->second.size() == 1) {
  231. // Erase names that no longer resolve.
  232. name_lookup_.erase(it);
  233. } else {
  234. it->second.pop_back();
  235. }
  236. }
  237. if (scope.scope_id.is_valid()) {
  238. CARBON_CHECK(non_lexical_scope_stack_.back().first == scope.index);
  239. non_lexical_scope_stack_.pop_back();
  240. }
  241. }
  242. auto Context::PopToScope(ScopeIndex index) -> void {
  243. while (current_scope_index() > index) {
  244. PopScope();
  245. }
  246. CARBON_CHECK(current_scope_index() == index)
  247. << "Scope index " << index << " does not enclose the current scope "
  248. << current_scope_index();
  249. }
  250. auto Context::FollowNameReferences(SemIR::InstId inst_id) -> SemIR::InstId {
  251. while (auto name_ref = insts().Get(inst_id).TryAs<SemIR::NameReference>()) {
  252. inst_id = name_ref->value_id;
  253. }
  254. return inst_id;
  255. }
  256. auto Context::GetConstantValue(SemIR::InstId inst_id) -> SemIR::InstId {
  257. // TODO: The constant value of an instruction should be computed as we build
  258. // the instruction, or at least cached once computed.
  259. while (true) {
  260. auto inst = insts().Get(inst_id);
  261. switch (inst.kind()) {
  262. case SemIR::NameReference::Kind:
  263. inst_id = inst.As<SemIR::NameReference>().value_id;
  264. break;
  265. case SemIR::BindName::Kind:
  266. inst_id = inst.As<SemIR::BindName>().value_id;
  267. break;
  268. case SemIR::Field::Kind:
  269. case SemIR::FunctionDeclaration::Kind:
  270. return inst_id;
  271. default:
  272. // TODO: Handle the remaining cases.
  273. return SemIR::InstId::Invalid;
  274. }
  275. }
  276. }
  277. template <typename BranchNode, typename... Args>
  278. static auto AddDominatedBlockAndBranchImpl(Context& context,
  279. Parse::Node parse_node, Args... args)
  280. -> SemIR::InstBlockId {
  281. if (!context.inst_block_stack().is_current_block_reachable()) {
  282. return SemIR::InstBlockId::Unreachable;
  283. }
  284. auto block_id = context.inst_blocks().AddDefaultValue();
  285. context.AddInst(BranchNode{parse_node, block_id, args...});
  286. return block_id;
  287. }
  288. auto Context::AddDominatedBlockAndBranch(Parse::Node parse_node)
  289. -> SemIR::InstBlockId {
  290. return AddDominatedBlockAndBranchImpl<SemIR::Branch>(*this, parse_node);
  291. }
  292. auto Context::AddDominatedBlockAndBranchWithArg(Parse::Node parse_node,
  293. SemIR::InstId arg_id)
  294. -> SemIR::InstBlockId {
  295. return AddDominatedBlockAndBranchImpl<SemIR::BranchWithArg>(*this, parse_node,
  296. arg_id);
  297. }
  298. auto Context::AddDominatedBlockAndBranchIf(Parse::Node parse_node,
  299. SemIR::InstId cond_id)
  300. -> SemIR::InstBlockId {
  301. return AddDominatedBlockAndBranchImpl<SemIR::BranchIf>(*this, parse_node,
  302. cond_id);
  303. }
  304. auto Context::AddConvergenceBlockAndPush(Parse::Node parse_node, int num_blocks)
  305. -> void {
  306. CARBON_CHECK(num_blocks >= 2) << "no convergence";
  307. SemIR::InstBlockId new_block_id = SemIR::InstBlockId::Unreachable;
  308. for ([[maybe_unused]] auto _ : llvm::seq(num_blocks)) {
  309. if (inst_block_stack().is_current_block_reachable()) {
  310. if (new_block_id == SemIR::InstBlockId::Unreachable) {
  311. new_block_id = inst_blocks().AddDefaultValue();
  312. }
  313. AddInst(SemIR::Branch{parse_node, new_block_id});
  314. }
  315. inst_block_stack().Pop();
  316. }
  317. inst_block_stack().Push(new_block_id);
  318. }
  319. auto Context::AddConvergenceBlockWithArgAndPush(
  320. Parse::Node parse_node, std::initializer_list<SemIR::InstId> block_args)
  321. -> SemIR::InstId {
  322. CARBON_CHECK(block_args.size() >= 2) << "no convergence";
  323. SemIR::InstBlockId new_block_id = SemIR::InstBlockId::Unreachable;
  324. for (auto arg_id : block_args) {
  325. if (inst_block_stack().is_current_block_reachable()) {
  326. if (new_block_id == SemIR::InstBlockId::Unreachable) {
  327. new_block_id = inst_blocks().AddDefaultValue();
  328. }
  329. AddInst(SemIR::BranchWithArg{parse_node, new_block_id, arg_id});
  330. }
  331. inst_block_stack().Pop();
  332. }
  333. inst_block_stack().Push(new_block_id);
  334. // Acquire the result value.
  335. SemIR::TypeId result_type_id = insts().Get(*block_args.begin()).type_id();
  336. return AddInst(SemIR::BlockArg{parse_node, result_type_id, new_block_id});
  337. }
  338. // Add the current code block to the enclosing function.
  339. auto Context::AddCurrentCodeBlockToFunction() -> void {
  340. CARBON_CHECK(!inst_block_stack().empty()) << "no current code block";
  341. CARBON_CHECK(!return_scope_stack().empty()) << "no current function";
  342. if (!inst_block_stack().is_current_block_reachable()) {
  343. // Don't include unreachable blocks in the function.
  344. return;
  345. }
  346. auto function_id =
  347. insts()
  348. .GetAs<SemIR::FunctionDeclaration>(return_scope_stack().back())
  349. .function_id;
  350. functions()
  351. .Get(function_id)
  352. .body_block_ids.push_back(inst_block_stack().PeekOrAdd());
  353. }
  354. auto Context::is_current_position_reachable() -> bool {
  355. if (!inst_block_stack().is_current_block_reachable()) {
  356. return false;
  357. }
  358. // Our current position is at the end of a reachable block. That position is
  359. // reachable unless the previous instruction is a terminator instruction.
  360. auto block_contents = inst_block_stack().PeekCurrentBlockContents();
  361. if (block_contents.empty()) {
  362. return true;
  363. }
  364. const auto& last_inst = insts().Get(block_contents.back());
  365. return last_inst.kind().terminator_kind() !=
  366. SemIR::TerminatorKind::Terminator;
  367. }
  368. auto Context::ParamOrArgStart() -> void { params_or_args_stack_.Push(); }
  369. auto Context::ParamOrArgComma() -> void {
  370. ParamOrArgSave(node_stack_.PopExpression());
  371. }
  372. auto Context::ParamOrArgEndNoPop(Parse::NodeKind start_kind) -> void {
  373. if (parse_tree_->node_kind(node_stack_.PeekParseNode()) != start_kind) {
  374. ParamOrArgSave(node_stack_.PopExpression());
  375. }
  376. }
  377. auto Context::ParamOrArgPop() -> SemIR::InstBlockId {
  378. return params_or_args_stack_.Pop();
  379. }
  380. auto Context::ParamOrArgEnd(Parse::NodeKind start_kind) -> SemIR::InstBlockId {
  381. ParamOrArgEndNoPop(start_kind);
  382. return ParamOrArgPop();
  383. }
  384. namespace {
  385. // Worklist-based type completion mechanism.
  386. //
  387. // When attempting to complete a type, we may find other types that also need to
  388. // be completed: types nested within that type, and the value representation of
  389. // the type. In order to complete a type without recursing arbitrarily deeply,
  390. // we use a worklist of tasks:
  391. //
  392. // - An `AddNestedIncompleteTypes` step adds a task for all incomplete types
  393. // nested within a type to the work list.
  394. // - A `BuildValueRepresentation` step computes the value representation for a
  395. // type, once all of its nested types are complete, and marks the type as
  396. // complete.
  397. class TypeCompleter {
  398. public:
  399. TypeCompleter(
  400. Context& context,
  401. std::optional<llvm::function_ref<auto()->Context::DiagnosticBuilder>>
  402. diagnoser)
  403. : context_(context), diagnoser_(diagnoser) {}
  404. // Attempts to complete the given type. Returns true if it is now complete,
  405. // false if it could not be completed.
  406. auto Complete(SemIR::TypeId type_id) -> bool {
  407. Push(type_id);
  408. while (!work_list_.empty()) {
  409. if (!ProcessStep()) {
  410. return false;
  411. }
  412. }
  413. return true;
  414. }
  415. private:
  416. // Adds `type_id` to the work list, if it's not already complete.
  417. auto Push(SemIR::TypeId type_id) -> void {
  418. if (!context_.sem_ir().IsTypeComplete(type_id)) {
  419. work_list_.push_back({type_id, Phase::AddNestedIncompleteTypes});
  420. }
  421. }
  422. // Runs the next step.
  423. auto ProcessStep() -> bool {
  424. auto [type_id, phase] = work_list_.back();
  425. // We might have enqueued the same type more than once. Just skip the
  426. // type if it's already complete.
  427. if (context_.sem_ir().IsTypeComplete(type_id)) {
  428. work_list_.pop_back();
  429. return true;
  430. }
  431. auto inst_id = context_.sem_ir().GetTypeAllowBuiltinTypes(type_id);
  432. auto inst = context_.insts().Get(inst_id);
  433. auto old_work_list_size = work_list_.size();
  434. switch (phase) {
  435. case Phase::AddNestedIncompleteTypes:
  436. if (!AddNestedIncompleteTypes(inst)) {
  437. return false;
  438. }
  439. CARBON_CHECK(work_list_.size() >= old_work_list_size)
  440. << "AddNestedIncompleteTypes should not remove work items";
  441. work_list_[old_work_list_size - 1].phase =
  442. Phase::BuildValueRepresentation;
  443. break;
  444. case Phase::BuildValueRepresentation: {
  445. auto value_rep = BuildValueRepresentation(type_id, inst);
  446. context_.sem_ir().CompleteType(type_id, value_rep);
  447. CARBON_CHECK(old_work_list_size == work_list_.size())
  448. << "BuildValueRepresentation should not change work items";
  449. work_list_.pop_back();
  450. // Also complete the value representation type, if necessary. This
  451. // should never fail: the value representation shouldn't require any
  452. // additional nested types to be complete.
  453. if (!context_.sem_ir().IsTypeComplete(value_rep.type_id)) {
  454. work_list_.push_back(
  455. {value_rep.type_id, Phase::BuildValueRepresentation});
  456. }
  457. // For a pointer representation, the pointee also needs to be complete.
  458. if (value_rep.kind == SemIR::ValueRepresentation::Pointer) {
  459. auto pointee_type_id =
  460. context_.sem_ir().GetPointeeType(value_rep.type_id);
  461. if (!context_.sem_ir().IsTypeComplete(pointee_type_id)) {
  462. work_list_.push_back(
  463. {pointee_type_id, Phase::BuildValueRepresentation});
  464. }
  465. }
  466. break;
  467. }
  468. }
  469. return true;
  470. }
  471. // Adds any types nested within `type_inst` that need to be complete for
  472. // `type_inst` to be complete to our work list.
  473. auto AddNestedIncompleteTypes(SemIR::Inst type_inst) -> bool {
  474. switch (type_inst.kind()) {
  475. case SemIR::ArrayType::Kind:
  476. Push(type_inst.As<SemIR::ArrayType>().element_type_id);
  477. break;
  478. case SemIR::StructType::Kind:
  479. for (auto field_id : context_.inst_blocks().Get(
  480. type_inst.As<SemIR::StructType>().fields_id)) {
  481. Push(context_.insts()
  482. .GetAs<SemIR::StructTypeField>(field_id)
  483. .field_type_id);
  484. }
  485. break;
  486. case SemIR::TupleType::Kind:
  487. for (auto element_type_id : context_.type_blocks().Get(
  488. type_inst.As<SemIR::TupleType>().elements_id)) {
  489. Push(element_type_id);
  490. }
  491. break;
  492. case SemIR::ClassType::Kind: {
  493. auto class_type = type_inst.As<SemIR::ClassType>();
  494. auto& class_info = context_.classes().Get(class_type.class_id);
  495. if (!class_info.is_defined()) {
  496. if (diagnoser_) {
  497. auto builder = (*diagnoser_)();
  498. context_.NoteIncompleteClass(class_type.class_id, builder);
  499. builder.Emit();
  500. }
  501. return false;
  502. }
  503. Push(class_info.object_representation_id);
  504. break;
  505. }
  506. case SemIR::ConstType::Kind:
  507. Push(type_inst.As<SemIR::ConstType>().inner_id);
  508. break;
  509. default:
  510. break;
  511. }
  512. return true;
  513. }
  514. // Makes an empty value representation, which is used for types that have no
  515. // state, such as empty structs and tuples.
  516. auto MakeEmptyRepresentation(Parse::Node parse_node) const
  517. -> SemIR::ValueRepresentation {
  518. return {.kind = SemIR::ValueRepresentation::None,
  519. .type_id = context_.CanonicalizeTupleType(parse_node, {})};
  520. }
  521. // Makes a value representation that uses pass-by-copy, copying the given
  522. // type.
  523. auto MakeCopyRepresentation(
  524. SemIR::TypeId rep_id,
  525. SemIR::ValueRepresentation::AggregateKind aggregate_kind =
  526. SemIR::ValueRepresentation::NotAggregate) const
  527. -> SemIR::ValueRepresentation {
  528. return {.kind = SemIR::ValueRepresentation::Copy,
  529. .aggregate_kind = aggregate_kind,
  530. .type_id = rep_id};
  531. }
  532. // Makes a value representation that uses pass-by-address with the given
  533. // pointee type.
  534. auto MakePointerRepresentation(
  535. Parse::Node parse_node, SemIR::TypeId pointee_id,
  536. SemIR::ValueRepresentation::AggregateKind aggregate_kind =
  537. SemIR::ValueRepresentation::NotAggregate) const
  538. -> SemIR::ValueRepresentation {
  539. // TODO: Should we add `const` qualification to `pointee_id`?
  540. return {.kind = SemIR::ValueRepresentation::Pointer,
  541. .aggregate_kind = aggregate_kind,
  542. .type_id = context_.GetPointerType(parse_node, pointee_id)};
  543. }
  544. // Gets the value representation of a nested type, which should already be
  545. // complete.
  546. auto GetNestedValueRepresentation(SemIR::TypeId nested_type_id) const {
  547. CARBON_CHECK(context_.sem_ir().IsTypeComplete(nested_type_id))
  548. << "Nested type should already be complete";
  549. auto value_rep = context_.sem_ir().GetValueRepresentation(nested_type_id);
  550. CARBON_CHECK(value_rep.kind != SemIR::ValueRepresentation::Unknown)
  551. << "Complete type should have a value representation";
  552. return value_rep;
  553. };
  554. auto BuildCrossReferenceValueRepresentation(SemIR::TypeId type_id,
  555. SemIR::CrossReference xref) const
  556. -> SemIR::ValueRepresentation {
  557. auto xref_inst = context_.sem_ir()
  558. .GetCrossReferenceIR(xref.ir_id)
  559. .insts()
  560. .Get(xref.inst_id);
  561. // The canonical description of a type should only have cross-references
  562. // for entities owned by another File, such as builtins, which are owned
  563. // by the prelude, and named entities like classes and interfaces, which
  564. // we don't support yet.
  565. CARBON_CHECK(xref_inst.kind() == SemIR::Builtin::Kind)
  566. << "TODO: Handle other kinds of inst cross-references";
  567. // clang warns on unhandled enum values; clang-tidy is incorrect here.
  568. // NOLINTNEXTLINE(bugprone-switch-missing-default-case)
  569. switch (xref_inst.As<SemIR::Builtin>().builtin_kind) {
  570. case SemIR::BuiltinKind::TypeType:
  571. case SemIR::BuiltinKind::Error:
  572. case SemIR::BuiltinKind::Invalid:
  573. case SemIR::BuiltinKind::BoolType:
  574. case SemIR::BuiltinKind::IntegerType:
  575. case SemIR::BuiltinKind::FloatingPointType:
  576. case SemIR::BuiltinKind::NamespaceType:
  577. case SemIR::BuiltinKind::FunctionType:
  578. case SemIR::BuiltinKind::BoundMethodType:
  579. return MakeCopyRepresentation(type_id);
  580. case SemIR::BuiltinKind::StringType:
  581. // TODO: Decide on string value semantics. This should probably be a
  582. // custom value representation carrying a pointer and size or
  583. // similar.
  584. return MakePointerRepresentation(Parse::Node::Invalid, type_id);
  585. }
  586. llvm_unreachable("All builtin kinds were handled above");
  587. }
  588. auto BuildStructOrTupleValueRepresentation(Parse::Node parse_node,
  589. std::size_t num_elements,
  590. SemIR::TypeId elementwise_rep,
  591. bool same_as_object_rep) const
  592. -> SemIR::ValueRepresentation {
  593. SemIR::ValueRepresentation::AggregateKind aggregate_kind =
  594. same_as_object_rep ? SemIR::ValueRepresentation::ValueAndObjectAggregate
  595. : SemIR::ValueRepresentation::ValueAggregate;
  596. if (num_elements == 1) {
  597. // The value representation for a struct or tuple with a single element
  598. // is a struct or tuple containing the value representation of the
  599. // element.
  600. // TODO: Consider doing the same whenever `elementwise_rep` is
  601. // sufficiently small.
  602. return MakeCopyRepresentation(elementwise_rep, aggregate_kind);
  603. }
  604. // For a struct or tuple with multiple fields, we use a pointer
  605. // to the elementwise value representation.
  606. return MakePointerRepresentation(parse_node, elementwise_rep,
  607. aggregate_kind);
  608. }
  609. auto BuildStructTypeValueRepresentation(SemIR::TypeId type_id,
  610. SemIR::StructType struct_type) const
  611. -> SemIR::ValueRepresentation {
  612. // TODO: Share more code with tuples.
  613. auto fields = context_.inst_blocks().Get(struct_type.fields_id);
  614. if (fields.empty()) {
  615. return MakeEmptyRepresentation(struct_type.parse_node);
  616. }
  617. // Find the value representation for each field, and construct a struct
  618. // of value representations.
  619. llvm::SmallVector<SemIR::InstId> value_rep_fields;
  620. value_rep_fields.reserve(fields.size());
  621. bool same_as_object_rep = true;
  622. for (auto field_id : fields) {
  623. auto field = context_.insts().GetAs<SemIR::StructTypeField>(field_id);
  624. auto field_value_rep = GetNestedValueRepresentation(field.field_type_id);
  625. if (field_value_rep.type_id != field.field_type_id) {
  626. same_as_object_rep = false;
  627. field.field_type_id = field_value_rep.type_id;
  628. field_id = context_.AddConstantInst(field);
  629. }
  630. value_rep_fields.push_back(field_id);
  631. }
  632. auto value_rep = same_as_object_rep
  633. ? type_id
  634. : context_.CanonicalizeStructType(
  635. struct_type.parse_node,
  636. context_.inst_blocks().Add(value_rep_fields));
  637. return BuildStructOrTupleValueRepresentation(
  638. struct_type.parse_node, fields.size(), value_rep, same_as_object_rep);
  639. }
  640. auto BuildTupleTypeValueRepresentation(SemIR::TypeId type_id,
  641. SemIR::TupleType tuple_type) const
  642. -> SemIR::ValueRepresentation {
  643. // TODO: Share more code with structs.
  644. auto elements = context_.type_blocks().Get(tuple_type.elements_id);
  645. if (elements.empty()) {
  646. return MakeEmptyRepresentation(tuple_type.parse_node);
  647. }
  648. // Find the value representation for each element, and construct a tuple
  649. // of value representations.
  650. llvm::SmallVector<SemIR::TypeId> value_rep_elements;
  651. value_rep_elements.reserve(elements.size());
  652. bool same_as_object_rep = true;
  653. for (auto element_type_id : elements) {
  654. auto element_value_rep = GetNestedValueRepresentation(element_type_id);
  655. if (element_value_rep.type_id != element_type_id) {
  656. same_as_object_rep = false;
  657. }
  658. value_rep_elements.push_back(element_value_rep.type_id);
  659. }
  660. auto value_rep = same_as_object_rep
  661. ? type_id
  662. : context_.CanonicalizeTupleType(tuple_type.parse_node,
  663. value_rep_elements);
  664. return BuildStructOrTupleValueRepresentation(
  665. tuple_type.parse_node, elements.size(), value_rep, same_as_object_rep);
  666. }
  667. // Builds and returns the value representation for the given type. All nested
  668. // types, as found by AddNestedIncompleteTypes, are known to be complete.
  669. auto BuildValueRepresentation(SemIR::TypeId type_id, SemIR::Inst inst) const
  670. -> SemIR::ValueRepresentation {
  671. // TODO: This can emit new SemIR instructions. Consider emitting them into a
  672. // dedicated file-scope instruction block where possible, or somewhere else
  673. // that better reflects the definition of the type, rather than wherever the
  674. // type happens to first be required to be complete.
  675. // clang warns on unhandled enum values; clang-tidy is incorrect here.
  676. // NOLINTNEXTLINE(bugprone-switch-missing-default-case)
  677. switch (inst.kind()) {
  678. case SemIR::AddressOf::Kind:
  679. case SemIR::ArrayIndex::Kind:
  680. case SemIR::ArrayInit::Kind:
  681. case SemIR::Assign::Kind:
  682. case SemIR::BinaryOperatorAdd::Kind:
  683. case SemIR::BindName::Kind:
  684. case SemIR::BindValue::Kind:
  685. case SemIR::BlockArg::Kind:
  686. case SemIR::BoolLiteral::Kind:
  687. case SemIR::BoundMethod::Kind:
  688. case SemIR::Branch::Kind:
  689. case SemIR::BranchIf::Kind:
  690. case SemIR::BranchWithArg::Kind:
  691. case SemIR::Call::Kind:
  692. case SemIR::ClassDeclaration::Kind:
  693. case SemIR::ClassFieldAccess::Kind:
  694. case SemIR::ClassInit::Kind:
  695. case SemIR::Dereference::Kind:
  696. case SemIR::Field::Kind:
  697. case SemIR::FunctionDeclaration::Kind:
  698. case SemIR::InitializeFrom::Kind:
  699. case SemIR::IntegerLiteral::Kind:
  700. case SemIR::NameReference::Kind:
  701. case SemIR::Namespace::Kind:
  702. case SemIR::NoOp::Kind:
  703. case SemIR::Parameter::Kind:
  704. case SemIR::RealLiteral::Kind:
  705. case SemIR::Return::Kind:
  706. case SemIR::ReturnExpression::Kind:
  707. case SemIR::SelfParameter::Kind:
  708. case SemIR::SpliceBlock::Kind:
  709. case SemIR::StringLiteral::Kind:
  710. case SemIR::StructAccess::Kind:
  711. case SemIR::StructTypeField::Kind:
  712. case SemIR::StructLiteral::Kind:
  713. case SemIR::StructInit::Kind:
  714. case SemIR::StructValue::Kind:
  715. case SemIR::Temporary::Kind:
  716. case SemIR::TemporaryStorage::Kind:
  717. case SemIR::TupleAccess::Kind:
  718. case SemIR::TupleIndex::Kind:
  719. case SemIR::TupleLiteral::Kind:
  720. case SemIR::TupleInit::Kind:
  721. case SemIR::TupleValue::Kind:
  722. case SemIR::UnaryOperatorNot::Kind:
  723. case SemIR::ValueAsReference::Kind:
  724. case SemIR::ValueOfInitializer::Kind:
  725. case SemIR::VarStorage::Kind:
  726. CARBON_FATAL() << "Type refers to non-type inst " << inst;
  727. case SemIR::CrossReference::Kind:
  728. return BuildCrossReferenceValueRepresentation(
  729. type_id, inst.As<SemIR::CrossReference>());
  730. case SemIR::ArrayType::Kind: {
  731. // For arrays, it's convenient to always use a pointer representation,
  732. // even when the array has zero or one element, in order to support
  733. // indexing.
  734. return MakePointerRepresentation(
  735. inst.parse_node(), type_id,
  736. SemIR::ValueRepresentation::ObjectAggregate);
  737. }
  738. case SemIR::StructType::Kind:
  739. return BuildStructTypeValueRepresentation(type_id,
  740. inst.As<SemIR::StructType>());
  741. case SemIR::TupleType::Kind:
  742. return BuildTupleTypeValueRepresentation(type_id,
  743. inst.As<SemIR::TupleType>());
  744. case SemIR::ClassType::Kind:
  745. // The value representation for a class is a pointer to the object
  746. // representation.
  747. // TODO: Support customized value representations for classes.
  748. // TODO: Pick a better value representation when possible.
  749. return MakePointerRepresentation(
  750. inst.parse_node(),
  751. context_.classes()
  752. .Get(inst.As<SemIR::ClassType>().class_id)
  753. .object_representation_id,
  754. SemIR::ValueRepresentation::ObjectAggregate);
  755. case SemIR::Builtin::Kind:
  756. CARBON_FATAL() << "Builtins should be named as cross-references";
  757. case SemIR::PointerType::Kind:
  758. case SemIR::UnboundFieldType::Kind:
  759. return MakeCopyRepresentation(type_id);
  760. case SemIR::ConstType::Kind:
  761. // The value representation of `const T` is the same as that of `T`.
  762. // Objects are not modifiable through their value representations.
  763. return GetNestedValueRepresentation(
  764. inst.As<SemIR::ConstType>().inner_id);
  765. }
  766. }
  767. enum class Phase : int8_t {
  768. // The next step is to add nested types to the list of types to complete.
  769. AddNestedIncompleteTypes,
  770. // The next step is to build the value representation for the type.
  771. BuildValueRepresentation,
  772. };
  773. struct WorkItem {
  774. SemIR::TypeId type_id;
  775. Phase phase;
  776. };
  777. Context& context_;
  778. llvm::SmallVector<WorkItem> work_list_;
  779. std::optional<llvm::function_ref<auto()->Context::DiagnosticBuilder>>
  780. diagnoser_;
  781. };
  782. } // namespace
  783. auto Context::TryToCompleteType(
  784. SemIR::TypeId type_id,
  785. std::optional<llvm::function_ref<auto()->DiagnosticBuilder>> diagnoser)
  786. -> bool {
  787. return TypeCompleter(*this, diagnoser).Complete(type_id);
  788. }
  789. auto Context::CanonicalizeTypeImpl(
  790. SemIR::InstKind kind,
  791. llvm::function_ref<void(llvm::FoldingSetNodeID& canonical_id)> profile_type,
  792. llvm::function_ref<SemIR::InstId()> make_inst) -> SemIR::TypeId {
  793. llvm::FoldingSetNodeID canonical_id;
  794. kind.Profile(canonical_id);
  795. profile_type(canonical_id);
  796. void* insert_pos;
  797. auto* node =
  798. canonical_type_nodes_.FindNodeOrInsertPos(canonical_id, insert_pos);
  799. if (node != nullptr) {
  800. return node->type_id();
  801. }
  802. auto inst_id = make_inst();
  803. auto type_id = types().Add({.inst_id = inst_id});
  804. CARBON_CHECK(canonical_types_.insert({inst_id, type_id}).second);
  805. type_node_storage_.push_back(
  806. std::make_unique<TypeNode>(canonical_id, type_id));
  807. // In a debug build, check that our insertion position is still valid. It
  808. // could have been invalidated by a misbehaving `make_inst`.
  809. CARBON_DCHECK([&] {
  810. void* check_insert_pos;
  811. auto* check_node = canonical_type_nodes_.FindNodeOrInsertPos(
  812. canonical_id, check_insert_pos);
  813. return !check_node && insert_pos == check_insert_pos;
  814. }()) << "Type was created recursively during canonicalization";
  815. canonical_type_nodes_.InsertNode(type_node_storage_.back().get(), insert_pos);
  816. return type_id;
  817. }
  818. // Compute a fingerprint for a tuple type, for use as a key in a folding set.
  819. static auto ProfileTupleType(llvm::ArrayRef<SemIR::TypeId> type_ids,
  820. llvm::FoldingSetNodeID& canonical_id) -> void {
  821. for (auto type_id : type_ids) {
  822. canonical_id.AddInteger(type_id.index);
  823. }
  824. }
  825. // Compute a fingerprint for a type, for use as a key in a folding set.
  826. static auto ProfileType(Context& semantics_context, SemIR::Inst inst,
  827. llvm::FoldingSetNodeID& canonical_id) -> void {
  828. switch (inst.kind()) {
  829. case SemIR::ArrayType::Kind: {
  830. auto array_type = inst.As<SemIR::ArrayType>();
  831. canonical_id.AddInteger(
  832. semantics_context.sem_ir().GetArrayBoundValue(array_type.bound_id));
  833. canonical_id.AddInteger(array_type.element_type_id.index);
  834. break;
  835. }
  836. case SemIR::Builtin::Kind:
  837. canonical_id.AddInteger(inst.As<SemIR::Builtin>().builtin_kind.AsInt());
  838. break;
  839. case SemIR::ClassType::Kind:
  840. canonical_id.AddInteger(inst.As<SemIR::ClassType>().class_id.index);
  841. break;
  842. case SemIR::CrossReference::Kind: {
  843. // TODO: Cross-references should be canonicalized by looking at their
  844. // target rather than treating them as new unique types.
  845. auto xref = inst.As<SemIR::CrossReference>();
  846. canonical_id.AddInteger(xref.ir_id.index);
  847. canonical_id.AddInteger(xref.inst_id.index);
  848. break;
  849. }
  850. case SemIR::ConstType::Kind:
  851. canonical_id.AddInteger(
  852. semantics_context
  853. .GetUnqualifiedType(inst.As<SemIR::ConstType>().inner_id)
  854. .index);
  855. break;
  856. case SemIR::PointerType::Kind:
  857. canonical_id.AddInteger(inst.As<SemIR::PointerType>().pointee_id.index);
  858. break;
  859. case SemIR::StructType::Kind: {
  860. auto fields = semantics_context.inst_blocks().Get(
  861. inst.As<SemIR::StructType>().fields_id);
  862. for (const auto& field_id : fields) {
  863. auto field =
  864. semantics_context.insts().GetAs<SemIR::StructTypeField>(field_id);
  865. canonical_id.AddInteger(field.name_id.index);
  866. canonical_id.AddInteger(field.field_type_id.index);
  867. }
  868. break;
  869. }
  870. case SemIR::TupleType::Kind:
  871. ProfileTupleType(semantics_context.type_blocks().Get(
  872. inst.As<SemIR::TupleType>().elements_id),
  873. canonical_id);
  874. break;
  875. case SemIR::UnboundFieldType::Kind: {
  876. auto unbound_field_type = inst.As<SemIR::UnboundFieldType>();
  877. canonical_id.AddInteger(unbound_field_type.class_type_id.index);
  878. canonical_id.AddInteger(unbound_field_type.field_type_id.index);
  879. break;
  880. }
  881. default:
  882. CARBON_FATAL() << "Unexpected type inst " << inst;
  883. }
  884. }
  885. auto Context::CanonicalizeTypeAndAddInstIfNew(SemIR::Inst inst)
  886. -> SemIR::TypeId {
  887. auto profile_node = [&](llvm::FoldingSetNodeID& canonical_id) {
  888. ProfileType(*this, inst, canonical_id);
  889. };
  890. auto make_inst = [&] { return AddConstantInst(inst); };
  891. return CanonicalizeTypeImpl(inst.kind(), profile_node, make_inst);
  892. }
  893. auto Context::CanonicalizeType(SemIR::InstId inst_id) -> SemIR::TypeId {
  894. inst_id = FollowNameReferences(inst_id);
  895. auto it = canonical_types_.find(inst_id);
  896. if (it != canonical_types_.end()) {
  897. return it->second;
  898. }
  899. auto inst = insts().Get(inst_id);
  900. auto profile_node = [&](llvm::FoldingSetNodeID& canonical_id) {
  901. ProfileType(*this, inst, canonical_id);
  902. };
  903. auto make_inst = [&] { return inst_id; };
  904. return CanonicalizeTypeImpl(inst.kind(), profile_node, make_inst);
  905. }
  906. auto Context::CanonicalizeStructType(Parse::Node parse_node,
  907. SemIR::InstBlockId refs_id)
  908. -> SemIR::TypeId {
  909. return CanonicalizeTypeAndAddInstIfNew(
  910. SemIR::StructType{parse_node, SemIR::TypeId::TypeType, refs_id});
  911. }
  912. auto Context::CanonicalizeTupleType(Parse::Node parse_node,
  913. llvm::ArrayRef<SemIR::TypeId> type_ids)
  914. -> SemIR::TypeId {
  915. // Defer allocating a SemIR::TypeBlockId until we know this is a new type.
  916. auto profile_tuple = [&](llvm::FoldingSetNodeID& canonical_id) {
  917. ProfileTupleType(type_ids, canonical_id);
  918. };
  919. auto make_tuple_inst = [&] {
  920. return AddConstantInst(SemIR::TupleType{parse_node, SemIR::TypeId::TypeType,
  921. type_blocks().Add(type_ids)});
  922. };
  923. return CanonicalizeTypeImpl(SemIR::TupleType::Kind, profile_tuple,
  924. make_tuple_inst);
  925. }
  926. auto Context::GetBuiltinType(SemIR::BuiltinKind kind) -> SemIR::TypeId {
  927. CARBON_CHECK(kind != SemIR::BuiltinKind::Invalid);
  928. auto type_id = CanonicalizeType(SemIR::InstId::ForBuiltin(kind));
  929. // To keep client code simpler, complete builtin types before returning them.
  930. bool complete = TryToCompleteType(type_id);
  931. CARBON_CHECK(complete) << "Failed to complete builtin type";
  932. return type_id;
  933. }
  934. auto Context::GetPointerType(Parse::Node parse_node,
  935. SemIR::TypeId pointee_type_id) -> SemIR::TypeId {
  936. return CanonicalizeTypeAndAddInstIfNew(
  937. SemIR::PointerType{parse_node, SemIR::TypeId::TypeType, pointee_type_id});
  938. }
  939. auto Context::GetUnqualifiedType(SemIR::TypeId type_id) -> SemIR::TypeId {
  940. SemIR::Inst type_inst =
  941. insts().Get(sem_ir_->GetTypeAllowBuiltinTypes(type_id));
  942. if (auto const_type = type_inst.TryAs<SemIR::ConstType>()) {
  943. return const_type->inner_id;
  944. }
  945. return type_id;
  946. }
  947. auto Context::PrintForStackDump(llvm::raw_ostream& output) const -> void {
  948. node_stack_.PrintForStackDump(output);
  949. inst_block_stack_.PrintForStackDump(output);
  950. params_or_args_stack_.PrintForStackDump(output);
  951. args_type_info_stack_.PrintForStackDump(output);
  952. }
  953. } // namespace Carbon::Check