inst_namer.cpp 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #include "toolchain/sem_ir/inst_namer.h"
  5. #include "common/ostream.h"
  6. #include "toolchain/base/kind_switch.h"
  7. #include "toolchain/base/value_store.h"
  8. #include "toolchain/lex/tokenized_buffer.h"
  9. #include "toolchain/parse/tree.h"
  10. #include "toolchain/sem_ir/builtin_function_kind.h"
  11. #include "toolchain/sem_ir/function.h"
  12. #include "toolchain/sem_ir/ids.h"
  13. #include "toolchain/sem_ir/inst_kind.h"
  14. #include "toolchain/sem_ir/typed_insts.h"
  15. namespace Carbon::SemIR {
  16. InstNamer::InstNamer(const Lex::TokenizedBuffer& tokenized_buffer,
  17. const Parse::Tree& parse_tree, const File& sem_ir)
  18. : tokenized_buffer_(tokenized_buffer),
  19. parse_tree_(parse_tree),
  20. sem_ir_(sem_ir) {
  21. insts_.resize(sem_ir.insts().size());
  22. labels_.resize(sem_ir.inst_blocks().size());
  23. scopes_.resize(static_cast<size_t>(GetScopeFor(NumberOfScopesTag())));
  24. generic_scopes_.resize(sem_ir.generics().size(), ScopeId::None);
  25. // Build the constants scope.
  26. CollectNamesInBlock(ScopeId::Constants, sem_ir.constants().array_ref());
  27. // Build the ImportRef scope.
  28. CollectNamesInBlock(ScopeId::ImportRefs,
  29. sem_ir.inst_blocks().Get(SemIR::InstBlockId::ImportRefs));
  30. // Build the file scope.
  31. CollectNamesInBlock(ScopeId::File, sem_ir.top_inst_block_id());
  32. // Build each function scope.
  33. for (auto [i, fn] : llvm::enumerate(sem_ir.functions().array_ref())) {
  34. FunctionId fn_id(i);
  35. auto fn_scope = GetScopeFor(fn_id);
  36. // TODO: Provide a location for the function for use as a
  37. // disambiguator.
  38. auto fn_loc = Parse::NodeId::Invalid;
  39. GetScopeInfo(fn_scope).name = globals_.AllocateName(
  40. *this, fn_loc, sem_ir.names().GetIRBaseName(fn.name_id).str());
  41. CollectNamesInBlock(fn_scope, fn.implicit_param_refs_id);
  42. CollectNamesInBlock(fn_scope, fn.param_refs_id);
  43. if (fn.return_storage_id.is_valid()) {
  44. insts_[fn.return_storage_id.index] = {
  45. fn_scope,
  46. GetScopeInfo(fn_scope).insts.AllocateName(
  47. *this, sem_ir.insts().GetLocId(fn.return_storage_id), "return")};
  48. }
  49. if (!fn.body_block_ids.empty()) {
  50. AddBlockLabel(fn_scope, fn.body_block_ids.front(), "entry", fn_loc);
  51. }
  52. for (auto block_id : fn.body_block_ids) {
  53. CollectNamesInBlock(fn_scope, block_id);
  54. }
  55. for (auto block_id : fn.body_block_ids) {
  56. AddBlockLabel(fn_scope, block_id);
  57. }
  58. CollectNamesInGeneric(fn_scope, fn.generic_id);
  59. }
  60. // Build each class scope.
  61. for (auto [i, class_info] : llvm::enumerate(sem_ir.classes().array_ref())) {
  62. ClassId class_id(i);
  63. auto class_scope = GetScopeFor(class_id);
  64. // TODO: Provide a location for the class for use as a disambiguator.
  65. auto class_loc = Parse::NodeId::Invalid;
  66. GetScopeInfo(class_scope).name = globals_.AllocateName(
  67. *this, class_loc,
  68. sem_ir.names().GetIRBaseName(class_info.name_id).str());
  69. AddBlockLabel(class_scope, class_info.body_block_id, "class", class_loc);
  70. CollectNamesInBlock(class_scope, class_info.body_block_id);
  71. CollectNamesInGeneric(class_scope, class_info.generic_id);
  72. }
  73. // Build each interface scope.
  74. for (auto [i, interface_info] :
  75. llvm::enumerate(sem_ir.interfaces().array_ref())) {
  76. InterfaceId interface_id(i);
  77. auto interface_scope = GetScopeFor(interface_id);
  78. // TODO: Provide a location for the interface for use as a disambiguator.
  79. auto interface_loc = Parse::NodeId::Invalid;
  80. GetScopeInfo(interface_scope).name = globals_.AllocateName(
  81. *this, interface_loc,
  82. sem_ir.names().GetIRBaseName(interface_info.name_id).str());
  83. AddBlockLabel(interface_scope, interface_info.body_block_id, "interface",
  84. interface_loc);
  85. CollectNamesInBlock(interface_scope, interface_info.body_block_id);
  86. CollectNamesInGeneric(interface_scope, interface_info.generic_id);
  87. }
  88. // Build each impl scope.
  89. for (auto [i, impl_info] : llvm::enumerate(sem_ir.impls().array_ref())) {
  90. ImplId impl_id(i);
  91. auto impl_scope = GetScopeFor(impl_id);
  92. // TODO: Provide a location for the impl for use as a disambiguator.
  93. auto impl_loc = Parse::NodeId::Invalid;
  94. // TODO: Invent a name based on the self and constraint types.
  95. GetScopeInfo(impl_scope).name =
  96. globals_.AllocateName(*this, impl_loc, "impl");
  97. AddBlockLabel(impl_scope, impl_info.body_block_id, "impl", impl_loc);
  98. CollectNamesInBlock(impl_scope, impl_info.body_block_id);
  99. // TODO: Collect names from the generic once we support generic impls.
  100. }
  101. }
  102. auto InstNamer::GetScopeName(ScopeId scope) const -> std::string {
  103. switch (scope) {
  104. case ScopeId::None:
  105. return "<invalid scope>";
  106. // These are treated as SemIR keywords.
  107. case ScopeId::File:
  108. return "file";
  109. case ScopeId::ImportRefs:
  110. return "imports";
  111. case ScopeId::Constants:
  112. return "constants";
  113. // For everything else, use an @ prefix.
  114. default:
  115. return ("@" + GetScopeInfo(scope).name.str()).str();
  116. }
  117. }
  118. auto InstNamer::GetUnscopedNameFor(InstId inst_id) const -> llvm::StringRef {
  119. if (!inst_id.is_valid()) {
  120. return "";
  121. }
  122. const auto& inst_name = insts_[inst_id.index].second;
  123. return inst_name ? inst_name.str() : "";
  124. }
  125. auto InstNamer::GetNameFor(ScopeId scope_id, InstId inst_id) const
  126. -> std::string {
  127. if (!inst_id.is_valid()) {
  128. return "invalid";
  129. }
  130. // Check for a builtin.
  131. if (inst_id.is_builtin()) {
  132. return inst_id.builtin_inst_kind().label().str();
  133. }
  134. if (inst_id == InstId::PackageNamespace) {
  135. return "package";
  136. }
  137. const auto& [inst_scope, inst_name] = insts_[inst_id.index];
  138. if (!inst_name) {
  139. // This should not happen in valid IR.
  140. std::string str;
  141. llvm::raw_string_ostream str_stream(str);
  142. str_stream << "<unexpected>." << inst_id;
  143. auto loc_id = sem_ir_.insts().GetLocId(inst_id);
  144. // TODO: Consider handling inst_id cases.
  145. if (loc_id.is_node_id()) {
  146. auto token = parse_tree_.node_token(loc_id.node_id());
  147. str_stream << ".loc" << tokenized_buffer_.GetLineNumber(token) << "_"
  148. << tokenized_buffer_.GetColumnNumber(token);
  149. }
  150. return str;
  151. }
  152. if (inst_scope == scope_id) {
  153. return ("%" + inst_name.str()).str();
  154. }
  155. return (GetScopeName(inst_scope) + ".%" + inst_name.str()).str();
  156. }
  157. auto InstNamer::GetUnscopedLabelFor(InstBlockId block_id) const
  158. -> llvm::StringRef {
  159. if (!block_id.is_valid()) {
  160. return "";
  161. }
  162. const auto& label_name = labels_[block_id.index].second;
  163. return label_name ? label_name.str() : "";
  164. }
  165. // Returns the IR name to use for a label, when referenced from a given scope.
  166. auto InstNamer::GetLabelFor(ScopeId scope_id, InstBlockId block_id) const
  167. -> std::string {
  168. if (!block_id.is_valid()) {
  169. return "!invalid";
  170. }
  171. const auto& [label_scope, label_name] = labels_[block_id.index];
  172. if (!label_name) {
  173. // This should not happen in valid IR.
  174. std::string str;
  175. llvm::raw_string_ostream(str)
  176. << "<unexpected instblockref " << block_id << ">";
  177. return str;
  178. }
  179. if (label_scope == scope_id) {
  180. return ("!" + label_name.str()).str();
  181. }
  182. return (GetScopeName(label_scope) + ".!" + label_name.str()).str();
  183. }
  184. auto InstNamer::Namespace::Name::str() const -> llvm::StringRef {
  185. llvm::StringMapEntry<NameResult>* value = value_;
  186. CARBON_CHECK(value, "cannot print a null name");
  187. while (value->second.ambiguous && value->second.fallback) {
  188. value = value->second.fallback.value_;
  189. }
  190. return value->first();
  191. }
  192. auto InstNamer::Namespace::AllocateName(const InstNamer& inst_namer,
  193. SemIR::LocId loc_id, std::string name)
  194. -> Name {
  195. // The best (shortest) name for this instruction so far, and the current
  196. // name for it.
  197. Name best;
  198. Name current;
  199. // Add `name` as a name for this entity.
  200. auto add_name = [&](bool mark_ambiguous = true) {
  201. auto [it, added] = allocated.insert({name, NameResult()});
  202. Name new_name = Name(it);
  203. if (!added) {
  204. if (mark_ambiguous) {
  205. // This name was allocated for a different instruction. Mark it as
  206. // ambiguous and keep looking for a name for this instruction.
  207. new_name.SetAmbiguous();
  208. }
  209. } else {
  210. if (!best) {
  211. best = new_name;
  212. } else {
  213. CARBON_CHECK(current);
  214. current.SetFallback(new_name);
  215. }
  216. current = new_name;
  217. }
  218. return added;
  219. };
  220. // Use the given name if it's available.
  221. if (!name.empty()) {
  222. add_name();
  223. }
  224. // Append location information to try to disambiguate.
  225. // TODO: Consider handling inst_id cases.
  226. if (loc_id.is_node_id()) {
  227. auto token = inst_namer.parse_tree_.node_token(loc_id.node_id());
  228. llvm::raw_string_ostream(name)
  229. << ".loc" << inst_namer.tokenized_buffer_.GetLineNumber(token);
  230. add_name();
  231. llvm::raw_string_ostream(name)
  232. << "_" << inst_namer.tokenized_buffer_.GetColumnNumber(token);
  233. add_name();
  234. }
  235. // Append numbers until we find an available name.
  236. name += ".";
  237. auto name_size_without_counter = name.size();
  238. for (int counter = 1;; ++counter) {
  239. name.resize(name_size_without_counter);
  240. llvm::raw_string_ostream(name) << counter;
  241. if (add_name(/*mark_ambiguous=*/false)) {
  242. return best;
  243. }
  244. }
  245. }
  246. auto InstNamer::AddBlockLabel(ScopeId scope_id, InstBlockId block_id,
  247. std::string name, SemIR::LocId loc_id) -> void {
  248. if (!block_id.is_valid() || labels_[block_id.index].second) {
  249. return;
  250. }
  251. if (!loc_id.is_valid()) {
  252. if (const auto& block = sem_ir_.inst_blocks().Get(block_id);
  253. !block.empty()) {
  254. loc_id = sem_ir_.insts().GetLocId(block.front());
  255. }
  256. }
  257. labels_[block_id.index] = {
  258. scope_id, GetScopeInfo(scope_id).labels.AllocateName(*this, loc_id,
  259. std::move(name))};
  260. }
  261. // Finds and adds a suitable block label for the given SemIR instruction that
  262. // represents some kind of branch.
  263. auto InstNamer::AddBlockLabel(ScopeId scope_id, SemIR::LocId loc_id,
  264. AnyBranch branch) -> void {
  265. llvm::StringRef name;
  266. switch (parse_tree_.node_kind(loc_id.node_id())) {
  267. case Parse::NodeKind::IfExprIf:
  268. switch (branch.kind) {
  269. case BranchIf::Kind:
  270. name = "if.expr.then";
  271. break;
  272. case Branch::Kind:
  273. name = "if.expr.else";
  274. break;
  275. case BranchWithArg::Kind:
  276. name = "if.expr.result";
  277. break;
  278. default:
  279. break;
  280. }
  281. break;
  282. case Parse::NodeKind::IfCondition:
  283. switch (branch.kind) {
  284. case BranchIf::Kind:
  285. name = "if.then";
  286. break;
  287. case Branch::Kind:
  288. name = "if.else";
  289. break;
  290. default:
  291. break;
  292. }
  293. break;
  294. case Parse::NodeKind::IfStatement:
  295. name = "if.done";
  296. break;
  297. case Parse::NodeKind::ShortCircuitOperandAnd:
  298. name = branch.kind == BranchIf::Kind ? "and.rhs" : "and.result";
  299. break;
  300. case Parse::NodeKind::ShortCircuitOperandOr:
  301. name = branch.kind == BranchIf::Kind ? "or.rhs" : "or.result";
  302. break;
  303. case Parse::NodeKind::WhileConditionStart:
  304. name = "while.cond";
  305. break;
  306. case Parse::NodeKind::WhileCondition:
  307. switch (branch.kind) {
  308. case BranchIf::Kind:
  309. name = "while.body";
  310. break;
  311. case Branch::Kind:
  312. name = "while.done";
  313. break;
  314. default:
  315. break;
  316. }
  317. break;
  318. default:
  319. break;
  320. }
  321. AddBlockLabel(scope_id, branch.target_id, name.str(), loc_id);
  322. }
  323. auto InstNamer::CollectNamesInBlock(ScopeId scope_id, InstBlockId block_id)
  324. -> void {
  325. if (block_id.is_valid()) {
  326. CollectNamesInBlock(scope_id, sem_ir_.inst_blocks().Get(block_id));
  327. }
  328. }
  329. auto InstNamer::CollectNamesInBlock(ScopeId scope_id,
  330. llvm::ArrayRef<InstId> block) -> void {
  331. Scope& scope = GetScopeInfo(scope_id);
  332. // Use bound names where available. Otherwise, assign a backup name.
  333. for (auto inst_id : block) {
  334. if (!inst_id.is_valid()) {
  335. continue;
  336. }
  337. auto untyped_inst = sem_ir_.insts().Get(inst_id);
  338. auto add_inst_name = [&](std::string name) {
  339. insts_[inst_id.index] = {
  340. scope_id, scope.insts.AllocateName(
  341. *this, sem_ir_.insts().GetLocId(inst_id), name)};
  342. };
  343. auto add_inst_name_id = [&](NameId name_id, llvm::StringRef suffix = "") {
  344. add_inst_name(
  345. (sem_ir_.names().GetIRBaseName(name_id).str() + suffix).str());
  346. };
  347. if (auto branch = untyped_inst.TryAs<AnyBranch>()) {
  348. AddBlockLabel(scope_id, sem_ir_.insts().GetLocId(inst_id), *branch);
  349. }
  350. CARBON_KIND_SWITCH(untyped_inst) {
  351. case CARBON_KIND(AddrPattern inst): {
  352. // TODO: We need to assign names to parameters that appear in
  353. // function declarations, which may be nested within a pattern. For
  354. // now, just look through `addr`, but we should find a better way to
  355. // visit parameters.
  356. CollectNamesInBlock(scope_id, inst.inner_id);
  357. break;
  358. }
  359. case CARBON_KIND(AssociatedConstantDecl inst): {
  360. add_inst_name_id(inst.name_id);
  361. continue;
  362. }
  363. case BindAlias::Kind:
  364. case BindName::Kind:
  365. case BindSymbolicName::Kind:
  366. case ExportDecl::Kind: {
  367. auto inst = untyped_inst.As<AnyBindNameOrExportDecl>();
  368. add_inst_name_id(
  369. sem_ir_.entity_names().Get(inst.entity_name_id).name_id);
  370. continue;
  371. }
  372. case CARBON_KIND(Call inst): {
  373. auto callee_function =
  374. SemIR::GetCalleeFunction(sem_ir_, inst.callee_id);
  375. if (!callee_function.function_id.is_valid()) {
  376. break;
  377. }
  378. const auto& function =
  379. sem_ir_.functions().Get(callee_function.function_id);
  380. // Name the call's result based on the callee.
  381. if (function.builtin_function_kind !=
  382. SemIR::BuiltinFunctionKind::None) {
  383. // For a builtin, use the builtin name. Otherwise, we'd typically pick
  384. // the name `Op` below, which is probably not very useful.
  385. add_inst_name(function.builtin_function_kind.name().str());
  386. continue;
  387. }
  388. add_inst_name_id(function.name_id, ".call");
  389. continue;
  390. }
  391. case CARBON_KIND(ClassDecl inst): {
  392. add_inst_name_id(sem_ir_.classes().Get(inst.class_id).name_id, ".decl");
  393. CollectNamesInBlock(scope_id, inst.decl_block_id);
  394. continue;
  395. }
  396. case CARBON_KIND(ClassType inst): {
  397. add_inst_name_id(sem_ir_.classes().Get(inst.class_id).name_id);
  398. continue;
  399. }
  400. case CARBON_KIND(FunctionDecl inst): {
  401. add_inst_name_id(sem_ir_.functions().Get(inst.function_id).name_id,
  402. ".decl");
  403. CollectNamesInBlock(scope_id, inst.decl_block_id);
  404. continue;
  405. }
  406. case CARBON_KIND(FunctionType inst): {
  407. add_inst_name_id(sem_ir_.functions().Get(inst.function_id).name_id,
  408. ".type");
  409. continue;
  410. }
  411. case CARBON_KIND(GenericClassType inst): {
  412. add_inst_name_id(sem_ir_.classes().Get(inst.class_id).name_id, ".type");
  413. continue;
  414. }
  415. case CARBON_KIND(GenericInterfaceType inst): {
  416. add_inst_name_id(sem_ir_.interfaces().Get(inst.interface_id).name_id,
  417. ".type");
  418. continue;
  419. }
  420. case CARBON_KIND(ImplDecl inst): {
  421. CollectNamesInBlock(scope_id, inst.decl_block_id);
  422. break;
  423. }
  424. case CARBON_KIND(ImportDecl inst): {
  425. if (inst.package_id.is_valid()) {
  426. add_inst_name_id(inst.package_id, ".import");
  427. } else {
  428. add_inst_name("default.import");
  429. }
  430. break;
  431. }
  432. case ImportRefUnloaded::Kind:
  433. case ImportRefLoaded::Kind: {
  434. add_inst_name("import_ref");
  435. // When building import refs, we frequently add instructions without
  436. // a block. Constants that refer to them need to be separately
  437. // named.
  438. auto const_id = sem_ir_.constant_values().Get(inst_id);
  439. if (const_id.is_valid() && const_id.is_template()) {
  440. auto const_inst_id = sem_ir_.constant_values().GetInstId(const_id);
  441. if (!insts_[const_inst_id.index].second) {
  442. CollectNamesInBlock(ScopeId::ImportRefs, const_inst_id);
  443. }
  444. }
  445. continue;
  446. }
  447. case CARBON_KIND(InterfaceDecl inst): {
  448. add_inst_name_id(sem_ir_.interfaces().Get(inst.interface_id).name_id,
  449. ".decl");
  450. CollectNamesInBlock(scope_id, inst.decl_block_id);
  451. continue;
  452. }
  453. case CARBON_KIND(NameRef inst): {
  454. add_inst_name_id(inst.name_id, ".ref");
  455. continue;
  456. }
  457. // The namespace is specified here due to the name conflict.
  458. case CARBON_KIND(SemIR::Namespace inst): {
  459. add_inst_name_id(sem_ir_.name_scopes().Get(inst.name_scope_id).name_id);
  460. continue;
  461. }
  462. case CARBON_KIND(Param inst): {
  463. add_inst_name_id(inst.name_id);
  464. continue;
  465. }
  466. case CARBON_KIND(SpliceBlock inst): {
  467. CollectNamesInBlock(scope_id, inst.block_id);
  468. break;
  469. }
  470. case CARBON_KIND(StructValue inst): {
  471. if (auto fn_ty = sem_ir_.types().TryGetAs<FunctionType>(inst.type_id)) {
  472. add_inst_name_id(sem_ir_.functions().Get(fn_ty->function_id).name_id);
  473. } else if (auto generic_class_ty =
  474. sem_ir_.types().TryGetAs<GenericClassType>(
  475. inst.type_id)) {
  476. add_inst_name_id(
  477. sem_ir_.classes().Get(generic_class_ty->class_id).name_id);
  478. } else if (auto generic_interface_ty =
  479. sem_ir_.types().TryGetAs<GenericInterfaceType>(
  480. inst.type_id)) {
  481. add_inst_name_id(sem_ir_.interfaces()
  482. .Get(generic_interface_ty->interface_id)
  483. .name_id);
  484. } else {
  485. add_inst_name("struct");
  486. }
  487. continue;
  488. }
  489. case CARBON_KIND(TupleValue inst): {
  490. if (sem_ir_.types().Is<ArrayType>(inst.type_id)) {
  491. add_inst_name("array");
  492. } else {
  493. add_inst_name("tuple");
  494. }
  495. continue;
  496. }
  497. case CARBON_KIND(VarStorage inst): {
  498. add_inst_name_id(inst.name_id, ".var");
  499. continue;
  500. }
  501. default: {
  502. break;
  503. }
  504. }
  505. // Sequentially number all remaining values.
  506. if (untyped_inst.kind().value_kind() != InstValueKind::None) {
  507. add_inst_name("");
  508. }
  509. }
  510. }
  511. auto InstNamer::CollectNamesInGeneric(ScopeId scope_id, GenericId generic_id)
  512. -> void {
  513. if (!generic_id.is_valid()) {
  514. return;
  515. }
  516. generic_scopes_[generic_id.index] = scope_id;
  517. const auto& generic = sem_ir_.generics().Get(generic_id);
  518. CollectNamesInBlock(scope_id, generic.decl_block_id);
  519. CollectNamesInBlock(scope_id, generic.definition_block_id);
  520. }
  521. } // namespace Carbon::SemIR