inst_namer.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #include "toolchain/sem_ir/inst_namer.h"
  5. #include "common/ostream.h"
  6. #include "toolchain/base/kind_switch.h"
  7. #include "toolchain/base/value_store.h"
  8. #include "toolchain/lex/tokenized_buffer.h"
  9. #include "toolchain/parse/tree.h"
  10. #include "toolchain/sem_ir/builtin_function_kind.h"
  11. #include "toolchain/sem_ir/function.h"
  12. #include "toolchain/sem_ir/ids.h"
  13. #include "toolchain/sem_ir/inst_kind.h"
  14. #include "toolchain/sem_ir/typed_insts.h"
  15. namespace Carbon::SemIR {
  16. InstNamer::InstNamer(const Lex::TokenizedBuffer& tokenized_buffer,
  17. const Parse::Tree& parse_tree, const File& sem_ir)
  18. : tokenized_buffer_(tokenized_buffer),
  19. parse_tree_(parse_tree),
  20. sem_ir_(sem_ir) {
  21. insts.resize(sem_ir.insts().size());
  22. labels.resize(sem_ir.inst_blocks().size());
  23. scopes.resize(static_cast<size_t>(GetScopeFor(NumberOfScopesTag())));
  24. // Build the constants scope.
  25. CollectNamesInBlock(ScopeId::Constants, sem_ir.constants().array_ref());
  26. // Build the file scope.
  27. CollectNamesInBlock(ScopeId::File, sem_ir.top_inst_block_id());
  28. // Build each function scope.
  29. for (auto [i, fn] : llvm::enumerate(sem_ir.functions().array_ref())) {
  30. auto fn_id = FunctionId(i);
  31. auto fn_scope = GetScopeFor(fn_id);
  32. // TODO: Provide a location for the function for use as a
  33. // disambiguator.
  34. auto fn_loc = Parse::NodeId::Invalid;
  35. GetScopeInfo(fn_scope).name = globals.AllocateName(
  36. *this, fn_loc, sem_ir.names().GetIRBaseName(fn.name_id).str());
  37. CollectNamesInBlock(fn_scope, fn.implicit_param_refs_id);
  38. CollectNamesInBlock(fn_scope, fn.param_refs_id);
  39. if (fn.return_storage_id.is_valid()) {
  40. insts[fn.return_storage_id.index] = {
  41. fn_scope,
  42. GetScopeInfo(fn_scope).insts.AllocateName(
  43. *this, sem_ir.insts().GetLocId(fn.return_storage_id), "return")};
  44. }
  45. if (!fn.body_block_ids.empty()) {
  46. AddBlockLabel(fn_scope, fn.body_block_ids.front(), "entry", fn_loc);
  47. }
  48. for (auto block_id : fn.body_block_ids) {
  49. CollectNamesInBlock(fn_scope, block_id);
  50. }
  51. for (auto block_id : fn.body_block_ids) {
  52. AddBlockLabel(fn_scope, block_id);
  53. }
  54. }
  55. // Build each class scope.
  56. for (auto [i, class_info] : llvm::enumerate(sem_ir.classes().array_ref())) {
  57. auto class_id = ClassId(i);
  58. auto class_scope = GetScopeFor(class_id);
  59. // TODO: Provide a location for the class for use as a disambiguator.
  60. auto class_loc = Parse::NodeId::Invalid;
  61. GetScopeInfo(class_scope).name = globals.AllocateName(
  62. *this, class_loc,
  63. sem_ir.names().GetIRBaseName(class_info.name_id).str());
  64. AddBlockLabel(class_scope, class_info.body_block_id, "class", class_loc);
  65. CollectNamesInBlock(class_scope, class_info.body_block_id);
  66. }
  67. // Build each interface scope.
  68. for (auto [i, interface_info] :
  69. llvm::enumerate(sem_ir.interfaces().array_ref())) {
  70. auto interface_id = InterfaceId(i);
  71. auto interface_scope = GetScopeFor(interface_id);
  72. // TODO: Provide a location for the interface for use as a disambiguator.
  73. auto interface_loc = Parse::NodeId::Invalid;
  74. GetScopeInfo(interface_scope).name = globals.AllocateName(
  75. *this, interface_loc,
  76. sem_ir.names().GetIRBaseName(interface_info.name_id).str());
  77. AddBlockLabel(interface_scope, interface_info.body_block_id, "interface",
  78. interface_loc);
  79. CollectNamesInBlock(interface_scope, interface_info.body_block_id);
  80. }
  81. // Build each impl scope.
  82. for (auto [i, impl_info] : llvm::enumerate(sem_ir.impls().array_ref())) {
  83. auto impl_id = ImplId(i);
  84. auto impl_scope = GetScopeFor(impl_id);
  85. // TODO: Provide a location for the impl for use as a disambiguator.
  86. auto impl_loc = Parse::NodeId::Invalid;
  87. // TODO: Invent a name based on the self and constraint types.
  88. GetScopeInfo(impl_scope).name =
  89. globals.AllocateName(*this, impl_loc, "impl");
  90. AddBlockLabel(impl_scope, impl_info.body_block_id, "impl", impl_loc);
  91. CollectNamesInBlock(impl_scope, impl_info.body_block_id);
  92. }
  93. }
  94. auto InstNamer::GetScopeName(ScopeId scope) const -> std::string {
  95. switch (scope) {
  96. case ScopeId::None:
  97. return "<invalid scope>";
  98. // These are treated as SemIR keywords.
  99. case ScopeId::File:
  100. return "file";
  101. case ScopeId::ImportRef:
  102. return "imports";
  103. case ScopeId::Constants:
  104. return "constants";
  105. // For everything else, use an @ prefix.
  106. default:
  107. return ("@" + GetScopeInfo(scope).name.str()).str();
  108. }
  109. }
  110. auto InstNamer::GetUnscopedNameFor(InstId inst_id) const -> llvm::StringRef {
  111. if (!inst_id.is_valid()) {
  112. return "";
  113. }
  114. const auto& inst_name = insts[inst_id.index].second;
  115. return inst_name ? inst_name.str() : "";
  116. }
  117. auto InstNamer::GetNameFor(ScopeId scope_id, InstId inst_id) const
  118. -> std::string {
  119. if (!inst_id.is_valid()) {
  120. return "invalid";
  121. }
  122. // Check for a builtin.
  123. if (inst_id.is_builtin()) {
  124. return inst_id.builtin_kind().label().str();
  125. }
  126. if (inst_id == InstId::PackageNamespace) {
  127. return "package";
  128. }
  129. const auto& [inst_scope, inst_name] = insts[inst_id.index];
  130. if (!inst_name) {
  131. // This should not happen in valid IR.
  132. std::string str;
  133. llvm::raw_string_ostream(str) << "<unexpected instref " << inst_id << ">";
  134. return str;
  135. }
  136. if (inst_scope == scope_id) {
  137. return ("%" + inst_name.str()).str();
  138. }
  139. return (GetScopeName(inst_scope) + ".%" + inst_name.str()).str();
  140. }
  141. auto InstNamer::GetUnscopedLabelFor(InstBlockId block_id) const
  142. -> llvm::StringRef {
  143. if (!block_id.is_valid()) {
  144. return "";
  145. }
  146. const auto& label_name = labels[block_id.index].second;
  147. return label_name ? label_name.str() : "";
  148. }
  149. // Returns the IR name to use for a label, when referenced from a given scope.
  150. auto InstNamer::GetLabelFor(ScopeId scope_id, InstBlockId block_id) const
  151. -> std::string {
  152. if (!block_id.is_valid()) {
  153. return "!invalid";
  154. }
  155. const auto& [label_scope, label_name] = labels[block_id.index];
  156. if (!label_name) {
  157. // This should not happen in valid IR.
  158. std::string str;
  159. llvm::raw_string_ostream(str)
  160. << "<unexpected instblockref " << block_id << ">";
  161. return str;
  162. }
  163. if (label_scope == scope_id) {
  164. return ("!" + label_name.str()).str();
  165. }
  166. return (GetScopeName(label_scope) + ".!" + label_name.str()).str();
  167. }
  168. auto InstNamer::Namespace::Name::str() const -> llvm::StringRef {
  169. llvm::StringMapEntry<NameResult>* value = value_;
  170. CARBON_CHECK(value) << "cannot print a null name";
  171. while (value->second.ambiguous && value->second.fallback) {
  172. value = value->second.fallback.value_;
  173. }
  174. return value->first();
  175. }
  176. auto InstNamer::Namespace::AllocateName(const InstNamer& inst_namer,
  177. SemIR::LocId loc_id, std::string name)
  178. -> Name {
  179. // The best (shortest) name for this instruction so far, and the current
  180. // name for it.
  181. Name best;
  182. Name current;
  183. // Add `name` as a name for this entity.
  184. auto add_name = [&](bool mark_ambiguous = true) {
  185. auto [it, added] = allocated.insert({name, NameResult()});
  186. Name new_name = Name(it);
  187. if (!added) {
  188. if (mark_ambiguous) {
  189. // This name was allocated for a different instruction. Mark it as
  190. // ambiguous and keep looking for a name for this instruction.
  191. new_name.SetAmbiguous();
  192. }
  193. } else {
  194. if (!best) {
  195. best = new_name;
  196. } else {
  197. CARBON_CHECK(current);
  198. current.SetFallback(new_name);
  199. }
  200. current = new_name;
  201. }
  202. return added;
  203. };
  204. // Use the given name if it's available.
  205. if (!name.empty()) {
  206. add_name();
  207. }
  208. // Append location information to try to disambiguate.
  209. // TODO: Consider handling inst_id cases.
  210. if (loc_id.is_node_id()) {
  211. auto token = inst_namer.parse_tree_.node_token(loc_id.node_id());
  212. llvm::raw_string_ostream(name)
  213. << ".loc" << inst_namer.tokenized_buffer_.GetLineNumber(token);
  214. add_name();
  215. llvm::raw_string_ostream(name)
  216. << "_" << inst_namer.tokenized_buffer_.GetColumnNumber(token);
  217. add_name();
  218. }
  219. // Append numbers until we find an available name.
  220. name += ".";
  221. auto name_size_without_counter = name.size();
  222. for (int counter = 1;; ++counter) {
  223. name.resize(name_size_without_counter);
  224. llvm::raw_string_ostream(name) << counter;
  225. if (add_name(/*mark_ambiguous=*/false)) {
  226. return best;
  227. }
  228. }
  229. }
  230. auto InstNamer::AddBlockLabel(ScopeId scope_id, InstBlockId block_id,
  231. std::string name, SemIR::LocId loc_id) -> void {
  232. if (!block_id.is_valid() || labels[block_id.index].second) {
  233. return;
  234. }
  235. if (!loc_id.is_valid()) {
  236. if (const auto& block = sem_ir_.inst_blocks().Get(block_id);
  237. !block.empty()) {
  238. loc_id = sem_ir_.insts().GetLocId(block.front());
  239. }
  240. }
  241. labels[block_id.index] = {
  242. scope_id, GetScopeInfo(scope_id).labels.AllocateName(*this, loc_id,
  243. std::move(name))};
  244. }
  245. // Finds and adds a suitable block label for the given SemIR instruction that
  246. // represents some kind of branch.
  247. auto InstNamer::AddBlockLabel(ScopeId scope_id, SemIR::LocId loc_id,
  248. AnyBranch branch) -> void {
  249. llvm::StringRef name;
  250. switch (parse_tree_.node_kind(loc_id.node_id())) {
  251. case Parse::NodeKind::IfExprIf:
  252. switch (branch.kind) {
  253. case BranchIf::Kind:
  254. name = "if.expr.then";
  255. break;
  256. case Branch::Kind:
  257. name = "if.expr.else";
  258. break;
  259. case BranchWithArg::Kind:
  260. name = "if.expr.result";
  261. break;
  262. default:
  263. break;
  264. }
  265. break;
  266. case Parse::NodeKind::IfCondition:
  267. switch (branch.kind) {
  268. case BranchIf::Kind:
  269. name = "if.then";
  270. break;
  271. case Branch::Kind:
  272. name = "if.else";
  273. break;
  274. default:
  275. break;
  276. }
  277. break;
  278. case Parse::NodeKind::IfStatement:
  279. name = "if.done";
  280. break;
  281. case Parse::NodeKind::ShortCircuitOperandAnd:
  282. name = branch.kind == BranchIf::Kind ? "and.rhs" : "and.result";
  283. break;
  284. case Parse::NodeKind::ShortCircuitOperandOr:
  285. name = branch.kind == BranchIf::Kind ? "or.rhs" : "or.result";
  286. break;
  287. case Parse::NodeKind::WhileConditionStart:
  288. name = "while.cond";
  289. break;
  290. case Parse::NodeKind::WhileCondition:
  291. switch (branch.kind) {
  292. case BranchIf::Kind:
  293. name = "while.body";
  294. break;
  295. case Branch::Kind:
  296. name = "while.done";
  297. break;
  298. default:
  299. break;
  300. }
  301. break;
  302. default:
  303. break;
  304. }
  305. AddBlockLabel(scope_id, branch.target_id, name.str(), loc_id);
  306. }
  307. auto InstNamer::CollectNamesInBlock(ScopeId scope_id, InstBlockId block_id)
  308. -> void {
  309. if (block_id.is_valid()) {
  310. CollectNamesInBlock(scope_id, sem_ir_.inst_blocks().Get(block_id));
  311. }
  312. }
  313. auto InstNamer::CollectNamesInBlock(ScopeId scope_id,
  314. llvm::ArrayRef<InstId> block) -> void {
  315. Scope& scope = GetScopeInfo(scope_id);
  316. // Use bound names where available. Otherwise, assign a backup name.
  317. for (auto inst_id : block) {
  318. if (!inst_id.is_valid()) {
  319. continue;
  320. }
  321. auto untyped_inst = sem_ir_.insts().Get(inst_id);
  322. auto add_inst_name = [&](std::string name) {
  323. insts[inst_id.index] = {
  324. scope_id, scope.insts.AllocateName(
  325. *this, sem_ir_.insts().GetLocId(inst_id), name)};
  326. };
  327. auto add_inst_name_id = [&](NameId name_id, llvm::StringRef suffix = "") {
  328. add_inst_name(
  329. (sem_ir_.names().GetIRBaseName(name_id).str() + suffix).str());
  330. };
  331. if (auto branch = untyped_inst.TryAs<AnyBranch>()) {
  332. AddBlockLabel(scope_id, sem_ir_.insts().GetLocId(inst_id), *branch);
  333. }
  334. CARBON_KIND_SWITCH(untyped_inst) {
  335. case CARBON_KIND(AddrPattern inst): {
  336. // TODO: We need to assign names to parameters that appear in
  337. // function declarations, which may be nested within a pattern. For
  338. // now, just look through `addr`, but we should find a better way to
  339. // visit parameters.
  340. CollectNamesInBlock(scope_id, inst.inner_id);
  341. break;
  342. }
  343. case CARBON_KIND(AssociatedConstantDecl inst): {
  344. add_inst_name_id(inst.name_id);
  345. continue;
  346. }
  347. case BindAlias::Kind:
  348. case BindName::Kind:
  349. case BindSymbolicName::Kind:
  350. case ExportDecl::Kind: {
  351. auto inst = untyped_inst.As<AnyBindNameOrExportDecl>();
  352. add_inst_name_id(sem_ir_.bind_names().Get(inst.bind_name_id).name_id);
  353. continue;
  354. }
  355. case CARBON_KIND(Call inst): {
  356. auto callee_function =
  357. SemIR::GetCalleeFunction(sem_ir_, inst.callee_id);
  358. if (!callee_function.function_id.is_valid()) {
  359. break;
  360. }
  361. const auto& function =
  362. sem_ir_.functions().Get(callee_function.function_id);
  363. // Name the call's result based on the callee.
  364. if (function.builtin_kind != SemIR::BuiltinFunctionKind::None) {
  365. // For a builtin, use the builtin name. Otherwise, we'd typically pick
  366. // the name `Op` below, which is probably not very useful.
  367. add_inst_name(function.builtin_kind.name().str());
  368. continue;
  369. }
  370. add_inst_name_id(function.name_id, ".call");
  371. continue;
  372. }
  373. case CARBON_KIND(ClassDecl inst): {
  374. add_inst_name_id(sem_ir_.classes().Get(inst.class_id).name_id, ".decl");
  375. CollectNamesInBlock(scope_id, inst.decl_block_id);
  376. continue;
  377. }
  378. case CARBON_KIND(ClassType inst): {
  379. add_inst_name_id(sem_ir_.classes().Get(inst.class_id).name_id);
  380. continue;
  381. }
  382. case CARBON_KIND(FunctionDecl inst): {
  383. add_inst_name_id(sem_ir_.functions().Get(inst.function_id).name_id,
  384. ".decl");
  385. CollectNamesInBlock(scope_id, inst.decl_block_id);
  386. continue;
  387. }
  388. case CARBON_KIND(FunctionType inst): {
  389. add_inst_name_id(sem_ir_.functions().Get(inst.function_id).name_id,
  390. ".type");
  391. continue;
  392. }
  393. case CARBON_KIND(GenericClassType inst): {
  394. add_inst_name_id(sem_ir_.classes().Get(inst.class_id).name_id, ".type");
  395. continue;
  396. }
  397. case CARBON_KIND(GenericInterfaceType inst): {
  398. add_inst_name_id(sem_ir_.interfaces().Get(inst.interface_id).name_id,
  399. ".type");
  400. continue;
  401. }
  402. case CARBON_KIND(ImplDecl inst): {
  403. CollectNamesInBlock(scope_id, inst.decl_block_id);
  404. break;
  405. }
  406. case ImportRefUnloaded::Kind:
  407. case ImportRefLoaded::Kind: {
  408. add_inst_name("import_ref");
  409. // When building import refs, we frequently add instructions without
  410. // a block. Constants that refer to them need to be separately
  411. // named.
  412. auto const_id = sem_ir_.constant_values().Get(inst_id);
  413. if (const_id.is_valid() && const_id.is_template()) {
  414. auto const_inst_id = sem_ir_.constant_values().GetInstId(const_id);
  415. if (!insts[const_inst_id.index].second) {
  416. CollectNamesInBlock(ScopeId::ImportRef, const_inst_id);
  417. }
  418. }
  419. continue;
  420. }
  421. case CARBON_KIND(InterfaceDecl inst): {
  422. add_inst_name_id(sem_ir_.interfaces().Get(inst.interface_id).name_id,
  423. ".decl");
  424. CollectNamesInBlock(scope_id, inst.decl_block_id);
  425. continue;
  426. }
  427. case CARBON_KIND(NameRef inst): {
  428. add_inst_name_id(inst.name_id, ".ref");
  429. continue;
  430. }
  431. // The namespace is specified here due to the name conflict.
  432. case CARBON_KIND(SemIR::Namespace inst): {
  433. add_inst_name_id(sem_ir_.name_scopes().Get(inst.name_scope_id).name_id);
  434. continue;
  435. }
  436. case CARBON_KIND(Param inst): {
  437. add_inst_name_id(inst.name_id);
  438. continue;
  439. }
  440. case CARBON_KIND(SpliceBlock inst): {
  441. CollectNamesInBlock(scope_id, inst.block_id);
  442. break;
  443. }
  444. case CARBON_KIND(StructValue inst): {
  445. if (auto fn_ty = sem_ir_.types().TryGetAs<FunctionType>(inst.type_id)) {
  446. add_inst_name_id(sem_ir_.functions().Get(fn_ty->function_id).name_id);
  447. } else if (auto generic_class_ty =
  448. sem_ir_.types().TryGetAs<GenericClassType>(
  449. inst.type_id)) {
  450. add_inst_name_id(
  451. sem_ir_.classes().Get(generic_class_ty->class_id).name_id);
  452. } else if (auto generic_interface_ty =
  453. sem_ir_.types().TryGetAs<GenericInterfaceType>(
  454. inst.type_id)) {
  455. add_inst_name_id(sem_ir_.interfaces()
  456. .Get(generic_interface_ty->interface_id)
  457. .name_id);
  458. } else {
  459. add_inst_name("struct");
  460. }
  461. continue;
  462. }
  463. case CARBON_KIND(TupleValue inst): {
  464. if (sem_ir_.types().Is<ArrayType>(inst.type_id)) {
  465. add_inst_name("array");
  466. } else {
  467. add_inst_name("tuple");
  468. }
  469. continue;
  470. }
  471. case CARBON_KIND(VarStorage inst): {
  472. add_inst_name_id(inst.name_id, ".var");
  473. continue;
  474. }
  475. default: {
  476. break;
  477. }
  478. }
  479. // Sequentially number all remaining values.
  480. if (untyped_inst.kind().value_kind() != InstValueKind::None) {
  481. add_inst_name("");
  482. }
  483. }
  484. }
  485. } // namespace Carbon::SemIR