Просмотр исходного кода

Include a fingerprint of the specific arguments in mangled names. (#4771)

Instead of including the raw index of the specific, which is unstable
across files and across unrelated changes, use a fingerprint of the
constant values of the specific arguments. This is a placeholder until
we decide on how we want to mangle specific functions.
Richard Smith 1 год назад
Родитель
Сommit
9a5f2d734b

+ 9 - 3
toolchain/lower/mangler.cpp

@@ -139,10 +139,16 @@ auto Mangler::Mangle(SemIR::FunctionId function_id,
 
   MangleInverseQualifiedNameScope(os, function.parent_scope_id);
 
-  // TODO: Add proper support for generic entities. The ID we emit here will not
-  // be consistent across object files.
+  // TODO: Add proper support for mangling generic entities. For now we use a
+  // fingerprint of the specific arguments, which should be stable across files,
+  // but isn't necessarily stable across toolchain changes.
   if (specific_id.is_valid()) {
-    os << "." << specific_id.index;
+    os << ".";
+    llvm::write_hex(
+        os,
+        fingerprinter_.GetOrCompute(
+            &sem_ir(), sem_ir().specifics().Get(specific_id).args_id),
+        llvm::HexPrintStyle::Lower, 16);
   }
 
   return os.str();

+ 5 - 0
toolchain/lower/mangler.h

@@ -10,6 +10,7 @@
 #include "toolchain/lower/file_context.h"
 #include "toolchain/sem_ir/constant.h"
 #include "toolchain/sem_ir/ids.h"
+#include "toolchain/sem_ir/inst_fingerprinter.h"
 
 namespace Carbon::Lower {
 
@@ -49,6 +50,10 @@ class Mangler {
   }
 
   FileContext& file_context_;
+
+  // TODO: If `file_context_` has an `InstNamer`, we could share its
+  // fingerprinter.
+  SemIR::InstFingerprinter fingerprinter_;
 };
 
 }  // namespace Carbon::Lower

+ 8 - 8
toolchain/lower/testdata/function/generic/call.carbon

@@ -44,11 +44,11 @@ fn G() {
 // CHECK:STDOUT:   call void @llvm.memcpy.p0.p0.i64(ptr align 1 %d.var, ptr align 1 @D.val.loc19_16, i64 0, i1 false), !dbg !9
 // CHECK:STDOUT:   call void @llvm.lifetime.start.p0(i64 4, ptr %n.var), !dbg !7
 // CHECK:STDOUT:   store i32 0, ptr %n.var, align 4, !dbg !10
-// CHECK:STDOUT:   call void @_CF.Main.129(ptr %c.var), !dbg !11
-// CHECK:STDOUT:   call void @_CF.Main.130(ptr %d.var), !dbg !12
+// CHECK:STDOUT:   call void @_CF.Main.15b1f98bd9cc0c5b(ptr %c.var), !dbg !11
+// CHECK:STDOUT:   call void @_CF.Main.2cc450fc05045897(ptr %d.var), !dbg !12
 // CHECK:STDOUT:   %.loc24 = load i32, ptr %n.var, align 4, !dbg !13
-// CHECK:STDOUT:   call void @_CF.Main.131(i32 %.loc24), !dbg !14
-// CHECK:STDOUT:   call void @_CF.Main.132(%type zeroinitializer), !dbg !15
+// CHECK:STDOUT:   call void @_CF.Main.9b813875c98e31f1(i32 %.loc24), !dbg !14
+// CHECK:STDOUT:   call void @_CF.Main.5754c7a55c7cbe4a(%type zeroinitializer), !dbg !15
 // CHECK:STDOUT:   ret void, !dbg !16
 // CHECK:STDOUT: }
 // CHECK:STDOUT:
@@ -58,13 +58,13 @@ fn G() {
 // CHECK:STDOUT: ; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
 // CHECK:STDOUT: declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #1
 // CHECK:STDOUT:
-// CHECK:STDOUT: declare void @_CF.Main.129(ptr)
+// CHECK:STDOUT: declare void @_CF.Main.15b1f98bd9cc0c5b(ptr)
 // CHECK:STDOUT:
-// CHECK:STDOUT: declare void @_CF.Main.130(ptr)
+// CHECK:STDOUT: declare void @_CF.Main.2cc450fc05045897(ptr)
 // CHECK:STDOUT:
-// CHECK:STDOUT: declare void @_CF.Main.131(i32)
+// CHECK:STDOUT: declare void @_CF.Main.9b813875c98e31f1(i32)
 // CHECK:STDOUT:
-// CHECK:STDOUT: declare void @_CF.Main.132(%type)
+// CHECK:STDOUT: declare void @_CF.Main.5754c7a55c7cbe4a(%type)
 // CHECK:STDOUT:
 // CHECK:STDOUT: ; uselistorder directives
 // CHECK:STDOUT: uselistorder ptr @llvm.lifetime.start.p0, { 2, 1, 0 }

+ 2 - 2
toolchain/lower/testdata/function/generic/call_method.carbon

@@ -34,7 +34,7 @@ fn CallF() -> i32 {
 // CHECK:STDOUT:   call void @llvm.lifetime.start.p0(i64 4, ptr %n.var), !dbg !7
 // CHECK:STDOUT:   store i32 0, ptr %n.var, align 4, !dbg !9
 // CHECK:STDOUT:   %.loc20_14 = load i32, ptr %n.var, align 4, !dbg !10
-// CHECK:STDOUT:   %F.call = call i32 @_CF.C.Main.129(ptr %c.var, i32 %.loc20_14), !dbg !11
+// CHECK:STDOUT:   %F.call = call i32 @_CF.C.Main.9b813875c98e31f1(ptr %c.var, i32 %.loc20_14), !dbg !11
 // CHECK:STDOUT:   ret i32 %F.call, !dbg !12
 // CHECK:STDOUT: }
 // CHECK:STDOUT:
@@ -44,7 +44,7 @@ fn CallF() -> i32 {
 // CHECK:STDOUT: ; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
 // CHECK:STDOUT: declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #1
 // CHECK:STDOUT:
-// CHECK:STDOUT: declare i32 @_CF.C.Main.129(ptr, i32)
+// CHECK:STDOUT: declare i32 @_CF.C.Main.9b813875c98e31f1(ptr, i32)
 // CHECK:STDOUT:
 // CHECK:STDOUT: ; uselistorder directives
 // CHECK:STDOUT: uselistorder ptr @llvm.lifetime.start.p0, { 1, 0 }

+ 43 - 12
toolchain/sem_ir/inst_fingerprinter.cpp

@@ -17,7 +17,7 @@ namespace Carbon::SemIR {
 namespace {
 struct Worklist {
   // The file containing the instruction we're currently processing.
-  const File* sem_ir;
+  const File* sem_ir = nullptr;
   // The instructions we need to compute fingerprints for.
   llvm::SmallVector<std::pair<const File*, InstId>> todo;
   // The contents of the current instruction as accumulated so far. This is used
@@ -28,6 +28,15 @@ struct Worklist {
   // the cache if not already present.
   Map<std::pair<const File*, InstId>, uint64_t>* fingerprints;
 
+  // Prepare to fingerprint a new instruction.
+  auto Prepare(const File* file) -> void {
+    sem_ir = file;
+    contents.clear();
+  }
+
+  // Finish fingerprinting and compute the fingerprint.
+  auto Finish() -> uint64_t { return llvm::stable_hash_combine(contents); }
+
   // Add an invalid marker to the contents. This is used when the entity
   // contains an invalid ID. This uses an arbitrary fixed value that is assumed
   // to be unlikely to collide with a valid value.
@@ -286,15 +295,22 @@ struct Worklist {
     Table[kind.ToIndex()](*this, arg);
   }
 
-  // Ensure all the instructions on the todo list have fingerprints.
-  auto Run() -> void {
-    while (!todo.empty()) {
+  // Ensure all the instructions on the todo list have fingerprints. To avoid a
+  // re-lookup, returns the fingerprint of the first instruction on the todo
+  // list, and requires the todo list to be non-empty.
+  auto Run() -> uint64_t {
+    CARBON_CHECK(!todo.empty());
+    while (true) {
       auto [next_sem_ir, next_inst_id] = todo.back();
 
       // If we already have a fingerprint for this instruction, we have nothing
       // to do. Just pop it from `todo`.
-      if (fingerprints->Contains(std::pair(next_sem_ir, next_inst_id))) {
+      if (auto lookup =
+              fingerprints->Lookup(std::pair(next_sem_ir, next_inst_id))) {
         todo.pop_back();
+        if (todo.empty()) {
+          return lookup.value();
+        }
         continue;
       }
 
@@ -306,8 +322,7 @@ struct Worklist {
       auto [arg0_kind, arg1_kind] = inst.ArgKinds();
 
       // Prepare to fingerprint this instruction.
-      sem_ir = next_sem_ir;
-      contents.clear();
+      Prepare(next_sem_ir);
 
       // Add the instruction's fields to the contents.
       Add(inst.kind());
@@ -326,9 +341,12 @@ struct Worklist {
       // pop it from the todo list. Otherwise, we leave it on the todo list so
       // we can compute its fingerprint once we've finished the work we added.
       if (todo.size() == init_size) {
-        auto fingerprint = llvm::stable_hash_combine(contents);
+        uint64_t fingerprint = Finish();
         fingerprints->Insert(std::pair(next_sem_ir, next_inst_id), fingerprint);
         todo.pop_back();
+        if (todo.empty()) {
+          return fingerprint;
+        }
       }
     }
   }
@@ -337,11 +355,24 @@ struct Worklist {
 
 auto InstFingerprinter::GetOrCompute(const File* file, InstId inst_id)
     -> uint64_t {
-  Worklist worklist = {.sem_ir = nullptr,
-                       .todo = {{file, inst_id}},
+  Worklist worklist = {.todo = {{file, inst_id}},
                        .fingerprints = &fingerprints_};
-  worklist.Run();
-  return fingerprints_.Lookup(std::pair(file, inst_id)).value();
+  return worklist.Run();
+}
+
+auto InstFingerprinter::GetOrCompute(const File* file,
+                                     InstBlockId inst_block_id) -> uint64_t {
+  Worklist worklist = {.todo = {}, .fingerprints = &fingerprints_};
+  worklist.Prepare(file);
+  worklist.Add(inst_block_id);
+  if (!worklist.todo.empty()) {
+    worklist.Run();
+    worklist.Prepare(file);
+    worklist.Add(inst_block_id);
+  }
+  CARBON_CHECK(worklist.todo.empty(),
+               "Should not require more than two passes.");
+  return worklist.Finish();
 }
 
 }  // namespace Carbon::SemIR

+ 9 - 0
toolchain/sem_ir/inst_fingerprinter.h

@@ -17,9 +17,18 @@ class InstFingerprinter {
   // Gets or computes a fingerprint for the given instruction.
   auto GetOrCompute(const File* file, InstId inst_id) -> uint64_t;
 
+  // Gets or computes a fingerprint for the given instruction block.
+  auto GetOrCompute(const File* file, InstBlockId inst_block_id) -> uint64_t;
+
  private:
   // The fingerprint for each instruction that has had its fingerprint computed,
   // indexed by the InstId's index.
+  //
+  // TODO: Experiment with also caching fingerprints for instruction blocks once
+  // we can get realistic performance measurements for this. This would simplify
+  // the `GetOrCompute` overload for `InstBlockId`s, and may save some work if
+  // the same canonical inst block is used by multiple instructions, for example
+  // as a specific argument list.
   Map<std::pair<const File*, InstId>, uint64_t> fingerprints_;
 };