Przeglądaj źródła

Refactor coalescing logic out of the file context. (#5723)

Move coalescing logic outside of the file context.
This is intended to be pure refactoring / NFC.
Alina Sbirlea 10 miesięcy temu
rodzic
commit
dd0905ccbb

+ 2 - 0
toolchain/lower/BUILD

@@ -35,6 +35,7 @@ cc_library(
         "function_context.cpp",
         "function_context.cpp",
         "mangler.cpp",
         "mangler.cpp",
         "mangler.h",
         "mangler.h",
+        "specific_coalescer.cpp",
     ] +
     ] +
     # Glob handler files to avoid missing any.
     # Glob handler files to avoid missing any.
     glob([
     glob([
@@ -44,6 +45,7 @@ cc_library(
         "context.h",
         "context.h",
         "file_context.h",
         "file_context.h",
         "function_context.h",
         "function_context.h",
+        "specific_coalescer.h",
     ],
     ],
     deps = [
     deps = [
         "//common:check",
         "//common:check",

+ 6 - 248
toolchain/lower/file_context.cpp

@@ -15,7 +15,6 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Sequence.h"
 #include "llvm/ADT/Sequence.h"
 #include "llvm/Linker/Linker.h"
 #include "llvm/Linker/Linker.h"
-#include "llvm/Support/BLAKE3.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
 #include "toolchain/base/kind_switch.h"
 #include "toolchain/base/kind_switch.h"
@@ -23,6 +22,7 @@
 #include "toolchain/lower/constant.h"
 #include "toolchain/lower/constant.h"
 #include "toolchain/lower/function_context.h"
 #include "toolchain/lower/function_context.h"
 #include "toolchain/lower/mangler.h"
 #include "toolchain/lower/mangler.h"
+#include "toolchain/lower/specific_coalescer.h"
 #include "toolchain/sem_ir/absolute_node_id.h"
 #include "toolchain/sem_ir/absolute_node_id.h"
 #include "toolchain/sem_ir/diagnostic_loc_converter.h"
 #include "toolchain/sem_ir/diagnostic_loc_converter.h"
 #include "toolchain/sem_ir/entry_point.h"
 #include "toolchain/sem_ir/entry_point.h"
@@ -54,9 +54,7 @@ FileContext::FileContext(Context& context, const SemIR::File& sem_ir,
       constants_(LoweredConstantStore::MakeWithExplicitSize(
       constants_(LoweredConstantStore::MakeWithExplicitSize(
           sem_ir.insts().size(), nullptr)),
           sem_ir.insts().size(), nullptr)),
       lowered_specifics_(sem_ir.generics(), {}),
       lowered_specifics_(sem_ir.generics(), {}),
-      lowered_specifics_type_fingerprint_(sem_ir.specifics(), {}),
-      lowered_specific_fingerprint_(sem_ir.specifics(), {}),
-      equivalent_specifics_(sem_ir.specifics(), SemIR::SpecificId::None) {
+      coalescer_(vlog_stream_, sem_ir.specifics()) {
   // Initialization that relies on invariants of the class.
   // Initialization that relies on invariants of the class.
   cpp_code_generator_ = CreateCppCodeGenerator();
   cpp_code_generator_ = CreateCppCodeGenerator();
   CARBON_CHECK(!sem_ir.has_errors(),
   CARBON_CHECK(!sem_ir.has_errors(),
@@ -161,243 +159,8 @@ auto FileContext::Finalize() -> void {
 
 
   // Find equivalent specifics (from the same generic), replace all uses and
   // Find equivalent specifics (from the same generic), replace all uses and
   // remove duplicately lowered function definitions.
   // remove duplicately lowered function definitions.
-  CoalesceEquivalentSpecifics();
-}
-
-auto FileContext::InsertPair(
-    SemIR::SpecificId specific_id1, SemIR::SpecificId specific_id2,
-    Set<std::pair<SemIR::SpecificId, SemIR::SpecificId>>& set_of_pairs)
-    -> bool {
-  if (specific_id1.index > specific_id2.index) {
-    std::swap(specific_id1.index, specific_id2.index);
-  }
-  auto insert_result =
-      set_of_pairs.Insert(std::make_pair(specific_id1, specific_id2));
-  return insert_result.is_inserted();
-}
-
-auto FileContext::ContainsPair(
-    SemIR::SpecificId specific_id1, SemIR::SpecificId specific_id2,
-    const Set<std::pair<SemIR::SpecificId, SemIR::SpecificId>>& set_of_pairs)
-    -> bool {
-  if (specific_id1.index > specific_id2.index) {
-    std::swap(specific_id1.index, specific_id2.index);
-  }
-  return set_of_pairs.Contains(std::make_pair(specific_id1, specific_id2));
-}
-
-auto FileContext::CoalesceEquivalentSpecifics() -> void {
-  for (auto& specifics : lowered_specifics_.values()) {
-    // Collect specifics to delete for each generic. Replace and remove each
-    // after processing all specifics for a generic. Note, we could also
-    // replace and remove all specifics after processing all generics.
-    llvm::SmallVector<SemIR::SpecificId> specifics_to_delete;
-    // i cannot be unsigned due to the comparison with a negative number when
-    // the specifics vector is empty.
-    for (int i = 0; i < static_cast<int>(specifics.size()) - 1; ++i) {
-      // This specific was already replaced, skip it.
-      if (equivalent_specifics_.Get(specifics[i]).has_value() &&
-          equivalent_specifics_.Get(specifics[i]) != specifics[i]) {
-        specifics_to_delete.push_back(specifics[i]);
-        specifics[i] = specifics[specifics.size() - 1];
-        specifics.pop_back();
-        --i;
-        continue;
-      }
-      // TODO: Improve quadratic behavior by using a single hash based on
-      // `lowered_specifics_type_fingerprint_` and `common_fingerprint`.
-      for (int j = i + 1; j < static_cast<int>(specifics.size()); ++j) {
-        // When the specific was already replaced, skip it.
-        if (equivalent_specifics_.Get(specifics[j]).has_value() &&
-            equivalent_specifics_.Get(specifics[j]) != specifics[j]) {
-          specifics_to_delete.push_back(specifics[j]);
-          specifics[j] = specifics[specifics.size() - 1];
-          specifics.pop_back();
-          --j;
-          continue;
-        }
-
-        // When the two specifics are not equivalent due to the function type
-        // info stored in lowered_specifics_types, mark non-equivalance. This
-        // can be reused to short-cut another path and continue the search for
-        // other equivalences.
-        if (!AreFunctionTypesEquivalent(specifics[i], specifics[j])) {
-          InsertPair(specifics[i], specifics[j], non_equivalent_specifics_);
-          continue;
-        }
-
-        Set<std::pair<SemIR::SpecificId, SemIR::SpecificId>>
-            visited_equivalent_specifics;
-        InsertPair(specifics[i], specifics[j], visited_equivalent_specifics);
-        // Function type information matches; check usages inside the function
-        // body that are dependent on the specific. This information has been
-        // stored in lowered_states while lowering each function body.
-        if (AreFunctionBodiesEquivalent(specifics[i], specifics[j],
-                                        visited_equivalent_specifics)) {
-          // When processing equivalences, we may change the canonical specific
-          // multiple times, so we don't delete replaced specifics until the
-          // end.
-          visited_equivalent_specifics.ForEach(
-              [&](std::pair<SemIR::SpecificId, SemIR::SpecificId>
-                      equivalent_entry) {
-                CARBON_VLOG("Found equivalent specifics: {0}, {1}",
-                            equivalent_entry.first, equivalent_entry.second);
-                ProcessSpecificEquivalence(equivalent_entry);
-              });
-
-          // Removed the replaced specific from the list of emitted specifics.
-          // Only the top level, since the others are somewhere else in the
-          // vector, they will be found and removed during processing.
-          specifics_to_delete.push_back(specifics[j]);
-          specifics[j] = specifics[specifics.size() - 1];
-          specifics.pop_back();
-          --j;
-        } else {
-          // Only mark non-equivalence based on state for starting specifics.
-          InsertPair(specifics[i], specifics[j], non_equivalent_specifics_);
-        }
-      }
-    }
-
-    // Once all equivalences are found for a generic, update and delete up
-    // equivalent specifics.
-    for (auto specific_id : specifics_to_delete) {
-      UpdateAndDeleteLLVMFunction(specific_id);
-    }
-  }
-}
-
-auto FileContext::ProcessSpecificEquivalence(
-    std::pair<SemIR::SpecificId, SemIR::SpecificId> pair) -> void {
-  auto [specific_id1, specific_id2] = pair;
-  CARBON_CHECK(specific_id1.has_value() && specific_id2.has_value(),
-               "Expected values in equivalence check");
-
-  auto get_canon = [&](SemIR::SpecificId specific_id) {
-    auto equiv_id = equivalent_specifics_.Get(specific_id);
-    return equiv_id.has_value() ? equiv_id : specific_id;
-  };
-  auto canon_id1 = get_canon(specific_id1);
-  auto canon_id2 = get_canon(specific_id2);
-
-  if (canon_id1 == canon_id2) {
-    // Already equivalent, there was a previous replacement.
-    return;
-  }
-
-  if (canon_id1.index >= canon_id2.index) {
-    // Prefer the earlier index for canonical values.
-    std::swap(canon_id1, canon_id2);
-  }
-
-  // Update equivalent_specifics_ for all. This is used as an indicator that
-  // this specific_id may be the canonical one when reducing the equivalence
-  // chains in `IsKnownEquivalence`.
-  equivalent_specifics_.Set(specific_id1, canon_id1);
-  equivalent_specifics_.Set(specific_id2, canon_id1);
-  equivalent_specifics_.Set(canon_id1, canon_id1);
-  equivalent_specifics_.Set(canon_id2, canon_id1);
-}
-
-auto FileContext::UpdateEquivalentSpecific(SemIR::SpecificId specific_id)
-    -> void {
-  if (!equivalent_specifics_.Get(specific_id).has_value()) {
-    return;
-  }
-
-  llvm::SmallVector<SemIR::SpecificId> stack;
-  SemIR::SpecificId specific_to_update = specific_id;
-  SemIR::SpecificId equivalent = equivalent_specifics_.Get(specific_to_update);
-  SemIR::SpecificId equivalent_next = equivalent_specifics_.Get(equivalent);
-  while (equivalent != equivalent_next) {
-    stack.push_back(specific_to_update);
-    specific_to_update = equivalent;
-    equivalent = equivalent_next;
-    equivalent_next = equivalent_specifics_.Get(equivalent_next);
-  }
-
-  for (auto specific : stack) {
-    equivalent_specifics_.Set(specific, equivalent);
-  }
-}
-
-auto FileContext::UpdateAndDeleteLLVMFunction(SemIR::SpecificId specific_id)
-    -> void {
-  UpdateEquivalentSpecific(specific_id);
-  auto* old_function = specific_functions_.Get(specific_id);
-  auto* new_function =
-      specific_functions_.Get(equivalent_specifics_.Get(specific_id));
-  old_function->replaceAllUsesWith(new_function);
-  old_function->eraseFromParent();
-  specific_functions_.Set(specific_id, new_function);
-}
-
-auto FileContext::IsKnownEquivalence(SemIR::SpecificId specific_id1,
-                                     SemIR::SpecificId specific_id2) -> bool {
-  if (!equivalent_specifics_.Get(specific_id1).has_value() ||
-      !equivalent_specifics_.Get(specific_id2).has_value()) {
-    return false;
-  }
-
-  UpdateEquivalentSpecific(specific_id1);
-  UpdateEquivalentSpecific(specific_id2);
-
-  return equivalent_specifics_.Get(specific_id1) ==
-         equivalent_specifics_.Get(specific_id2);
-}
-
-auto FileContext::AreFunctionTypesEquivalent(SemIR::SpecificId specific_id1,
-                                             SemIR::SpecificId specific_id2)
-    -> bool {
-  CARBON_CHECK(specific_id1.has_value() && specific_id2.has_value());
-  return lowered_specifics_type_fingerprint_.Get(specific_id1) ==
-         lowered_specifics_type_fingerprint_.Get(specific_id2);
-}
-
-auto FileContext::AreFunctionBodiesEquivalent(
-    SemIR::SpecificId specific_id1, SemIR::SpecificId specific_id2,
-    Set<std::pair<SemIR::SpecificId, SemIR::SpecificId>>&
-        visited_equivalent_specifics) -> bool {
-  llvm::SmallVector<std::pair<SemIR::SpecificId, SemIR::SpecificId>> worklist;
-  worklist.push_back({specific_id1, specific_id2});
-
-  while (!worklist.empty()) {
-    auto outer_pair = worklist.pop_back_val();
-    auto [specific_id1, specific_id2] = outer_pair;
-
-    auto state1 = lowered_specific_fingerprint_.Get(specific_id1);
-    auto state2 = lowered_specific_fingerprint_.Get(specific_id2);
-    if (state1.common_fingerprint != state2.common_fingerprint) {
-      InsertPair(specific_id1, specific_id2, non_equivalent_specifics_);
-      return false;
-    }
-    if (state1.specific_fingerprint == state2.specific_fingerprint) {
-      continue;
-    }
-
-    // A size difference should have been detected by the common fingerprint.
-    CARBON_CHECK(state1.calls.size() == state2.calls.size(),
-                 "Number of specific calls expected to be the same.");
-
-    for (auto [state1_call, state2_call] :
-         llvm::zip(state1.calls, state2.calls)) {
-      if (state1_call != state2_call) {
-        if (ContainsPair(state1_call, state2_call, non_equivalent_specifics_)) {
-          return false;
-        }
-        if (IsKnownEquivalence(state1_call, state2_call)) {
-          continue;
-        }
-        if (!InsertPair(state1_call, state2_call,
-                        visited_equivalent_specifics)) {
-          continue;
-        }
-        // Leave the added equivalence pair in place and continue.
-        worklist.push_back({state1_call, state2_call});
-      }
-    }
-  }
-  return true;
+  coalescer_.CoalesceEquivalentSpecifics(lowered_specifics_,
+                                         specific_functions_);
 }
 }
 
 
 auto FileContext::CreateCppCodeGenerator()
 auto FileContext::CreateCppCodeGenerator()
@@ -634,12 +397,7 @@ auto FileContext::HandleReferencedSpecificFunction(
   // For now, we compute the function type fingerprint only for specifics,
   // For now, we compute the function type fingerprint only for specifics,
   // though we might need it for all functions in order to create a canonical
   // though we might need it for all functions in order to create a canonical
   // fingerprint across translation units.
   // fingerprint across translation units.
-  llvm::BLAKE3 function_type_fingerprint;
-  RawStringOstream os;
-  llvm_type->print(os);
-  function_type_fingerprint.update(os.TakeStr());
-  function_type_fingerprint.final(
-      lowered_specifics_type_fingerprint_.Get(specific_id));
+  coalescer_.CreateTypeFingerprint(specific_id, llvm_type);
 }
 }
 
 
 auto FileContext::BuildFunctionDecl(SemIR::FunctionId function_id,
 auto FileContext::BuildFunctionDecl(SemIR::FunctionId function_id,
@@ -817,7 +575,7 @@ auto FileContext::BuildFunctionBody(SemIR::FunctionId function_id,
 
 
   FunctionContext function_lowering(
   FunctionContext function_lowering(
       definition_context, llvm_function, *this, specific_id,
       definition_context, llvm_function, *this, specific_id,
-      InitializeFingerprintForSpecific(specific_id),
+      coalescer_.InitializeFingerprintForSpecific(specific_id),
       definition_context.BuildDISubprogram(definition_function, llvm_function),
       definition_context.BuildDISubprogram(definition_function, llvm_function),
       vlog_stream_);
       vlog_stream_);
 
 

+ 2 - 109
toolchain/lower/file_context.h

@@ -8,8 +8,8 @@
 #include "clang/Basic/CodeGenOptions.h"
 #include "clang/Basic/CodeGenOptions.h"
 #include "clang/CodeGen/ModuleBuilder.h"
 #include "clang/CodeGen/ModuleBuilder.h"
 #include "clang/Lex/PreprocessorOptions.h"
 #include "clang/Lex/PreprocessorOptions.h"
-#include "llvm/Support/BLAKE3.h"
 #include "toolchain/lower/context.h"
 #include "toolchain/lower/context.h"
+#include "toolchain/lower/specific_coalescer.h"
 #include "toolchain/parse/tree_and_subtrees.h"
 #include "toolchain/parse/tree_and_subtrees.h"
 #include "toolchain/sem_ir/file.h"
 #include "toolchain/sem_ir/file.h"
 #include "toolchain/sem_ir/ids.h"
 #include "toolchain/sem_ir/ids.h"
@@ -23,22 +23,6 @@ class FileContext {
   using LoweredConstantStore =
   using LoweredConstantStore =
       FixedSizeValueStore<SemIR::InstId, llvm::Constant*>;
       FixedSizeValueStore<SemIR::InstId, llvm::Constant*>;
 
 
-  // Describes a specific function's body fingerprint.
-  struct SpecificFunctionFingerprint {
-    // Fingerprint with all specific-dependent instructions, except specific
-    // calls. This is built by the `FunctionContext` while lowering each
-    // instruction in the definition of a specific function.
-    // TODO: This can be merged with the function type fingerprint, for a
-    // single upfront non-equivalence check, and hash bucketing for deeper
-    // equivalence evaluation.
-    llvm::BLAKE3Result<32> common_fingerprint;
-    // Fingerprint for all calls to specific functions (hashes all calls to
-    // other specifics). This is built by the `FunctionContext` while lowering.
-    llvm::BLAKE3Result<32> specific_fingerprint;
-    // All non-hashed specific_ids of functions called.
-    llvm::SmallVector<SemIR::SpecificId> calls;
-  };
-
   explicit FileContext(Context& context, const SemIR::File& sem_ir,
   explicit FileContext(Context& context, const SemIR::File& sem_ir,
                        const SemIR::InstNamer* inst_namer,
                        const SemIR::InstNamer* inst_namer,
                        llvm::raw_ostream* vlog_stream);
                        llvm::raw_ostream* vlog_stream);
@@ -201,74 +185,6 @@ class FileContext {
     lowered_specifics_.Get(generic_id).push_back(specific_id);
     lowered_specifics_.Get(generic_id).push_back(specific_id);
   }
   }
 
 
-  // Initializes and returns a SpecificFunctionFingerprint* instance for a
-  // specific. The internal of the fingerprint are populated during and after
-  // lowering the function body of that specific.
-  auto InitializeFingerprintForSpecific(SemIR::SpecificId specific_id)
-      -> SpecificFunctionFingerprint* {
-    if (!specific_id.has_value()) {
-      return nullptr;
-    }
-    return &lowered_specific_fingerprint_.Get(specific_id);
-  }
-
-  // Entry point for coalescing equivalent specifics. Two function definitions,
-  // from the same generic, with different specific_ids are considered
-  // equivalent if, at the LLVM level, one can be replaced with the other, with
-  // no change in behavior. All LLVM types and instructions must be equivalent.
-  auto CoalesceEquivalentSpecifics() -> void;
-
-  // While coalescing specifics, returns whether the function types for two
-  // specifics are equivalent. This uses a fingerprint generated for each
-  // function type.
-  auto AreFunctionTypesEquivalent(SemIR::SpecificId specific_id1,
-                                  SemIR::SpecificId specific_id2) -> bool;
-
-  // While coalescing specifics, compare the function bodies for two specifics.
-  // This uses fingerprints generated during lowering of the function body.
-  // The `visited_equivalent_specifics` parameter is used to track cycles in
-  // the function callgraph, and will also return equivalent pairs of specifics
-  // found, if the two specifics given as arguments are found to be equivalent.
-  auto AreFunctionBodiesEquivalent(
-      SemIR::SpecificId specific_id1, SemIR::SpecificId specific_id2,
-      Set<std::pair<SemIR::SpecificId, SemIR::SpecificId>>&
-          visited_equivalent_specifics) -> bool;
-
-  // Given an equivalent pair of specifics, updates the canonical specific to
-  // use for each of the two Specifics found to be equivalent.
-  auto ProcessSpecificEquivalence(
-      std::pair<SemIR::SpecificId, SemIR::SpecificId> pair) -> void;
-
-  // Checks if two specific_ids are equivalent and also reduces the equivalence
-  // chains/paths. This update ensures the canonical specific is always "one
-  // hop away".
-  auto IsKnownEquivalence(SemIR::SpecificId specific_id1,
-                          SemIR::SpecificId specific_id2) -> bool;
-
-  // Update the tracked equivalent specific for the `SpecificId`. This may
-  // occur a replacement was performed and a chain of such replacements needs
-  // to be followed to discover the canonical specific for the given argument.
-  auto UpdateEquivalentSpecific(SemIR::SpecificId specific_id) -> void;
-
-  // Update the LLVM function to use for a `SpecificId` that has been found to
-  // have another equivalent LLVM function. Replace all uses of the original
-  // LLVM function with the equivalent one found, and delete the previous LLVM
-  // function body.
-  auto UpdateAndDeleteLLVMFunction(SemIR::SpecificId specific_id) -> void;
-
-  // Inserts a pair into a set of pairs in canonical form. Also implicitly
-  // checks entry already existed if it cannot be inserted.
-  auto InsertPair(
-      SemIR::SpecificId specific_id1, SemIR::SpecificId specific_id2,
-      Set<std::pair<SemIR::SpecificId, SemIR::SpecificId>>& set_of_pairs)
-      -> bool;
-
-  // Checks if a pair is contained into a set of pairs, in canonical form.
-  auto ContainsPair(
-      SemIR::SpecificId specific_id1, SemIR::SpecificId specific_id2,
-      const Set<std::pair<SemIR::SpecificId, SemIR::SpecificId>>& set_of_pairs)
-      -> bool;
-
   // The overall lowering context.
   // The overall lowering context.
   Context* context_;
   Context* context_;
 
 
@@ -317,30 +233,7 @@ class FileContext {
   FixedSizeValueStore<SemIR::GenericId, llvm::SmallVector<SemIR::SpecificId>>
   FixedSizeValueStore<SemIR::GenericId, llvm::SmallVector<SemIR::SpecificId>>
       lowered_specifics_;
       lowered_specifics_;
 
 
-  // For specifics that exist in lowered_specifics, a hash of their function
-  // type information: return and parameter types.
-  // TODO: Hashing all members of `FunctionTypeInfo` may not be necessary.
-  FixedSizeValueStore<SemIR::SpecificId, llvm::BLAKE3Result<32>>
-      lowered_specifics_type_fingerprint_;
-
-  // This is initialized and populated while lowering a specific.
-  FixedSizeValueStore<SemIR::SpecificId, SpecificFunctionFingerprint>
-      lowered_specific_fingerprint_;
-
-  // Equivalent specifics that have been found. For each specific, this points
-  // to the canonical equivalent specific, which may also be self. We currently
-  // define the canonical specific as the one with the lowest
-  // `SpecificId.index`.
-  //
-  // Entries are initialized to `SpecificId::None`, which defines that there is
-  // no other equivalent specific to this `SpecificId`.
-  FixedSizeValueStore<SemIR::SpecificId, SemIR::SpecificId>
-      equivalent_specifics_;
-
-  // Non-equivalent specifics found.
-  // TODO: Revisit this due to its quadratic space growth.
-  Set<std::pair<SemIR::SpecificId, SemIR::SpecificId>>
-      non_equivalent_specifics_;
+  SpecificCoalescer coalescer_;
 };
 };
 
 
 }  // namespace Carbon::Lower
 }  // namespace Carbon::Lower

+ 1 - 1
toolchain/lower/function_context.cpp

@@ -16,7 +16,7 @@ namespace Carbon::Lower {
 FunctionContext::FunctionContext(
 FunctionContext::FunctionContext(
     FileContext& file_context, llvm::Function* function,
     FileContext& file_context, llvm::Function* function,
     FileContext& specific_file_context, SemIR::SpecificId specific_id,
     FileContext& specific_file_context, SemIR::SpecificId specific_id,
-    FileContext::SpecificFunctionFingerprint* function_fingerprint,
+    SpecificCoalescer::SpecificFunctionFingerprint* function_fingerprint,
     llvm::DISubprogram* di_subprogram, llvm::raw_ostream* vlog_stream)
     llvm::DISubprogram* di_subprogram, llvm::raw_ostream* vlog_stream)
     : file_context_(&file_context),
     : file_context_(&file_context),
       function_(function),
       function_(function),

+ 3 - 2
toolchain/lower/function_context.h

@@ -13,6 +13,7 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Module.h"
 #include "toolchain/lower/file_context.h"
 #include "toolchain/lower/file_context.h"
+#include "toolchain/lower/specific_coalescer.h"
 #include "toolchain/sem_ir/file.h"
 #include "toolchain/sem_ir/file.h"
 #include "toolchain/sem_ir/ids.h"
 #include "toolchain/sem_ir/ids.h"
 
 
@@ -27,7 +28,7 @@ class FunctionContext {
   explicit FunctionContext(
   explicit FunctionContext(
       FileContext& file_context, llvm::Function* function,
       FileContext& file_context, llvm::Function* function,
       FileContext& specific_file_context, SemIR::SpecificId specific_id,
       FileContext& specific_file_context, SemIR::SpecificId specific_id,
-      FileContext::SpecificFunctionFingerprint* function_fingerprint,
+      SpecificCoalescer::SpecificFunctionFingerprint* function_fingerprint,
       llvm::DISubprogram* di_subprogram, llvm::raw_ostream* vlog_stream);
       llvm::DISubprogram* di_subprogram, llvm::raw_ostream* vlog_stream);
 
 
   // Describes a function's body fingerprint while creating the function body.
   // Describes a function's body fingerprint while creating the function body.
@@ -349,7 +350,7 @@ class FunctionContext {
   // The accumulated fingerprint is owned by the FileContext and passed into
   // The accumulated fingerprint is owned by the FileContext and passed into
   // the FunctionContext. The function fingerprint is currently only built for
   // the FunctionContext. The function fingerprint is currently only built for
   // specific functions, otherwise, this will be nullptr.
   // specific functions, otherwise, this will be nullptr.
-  FileContext::SpecificFunctionFingerprint* function_fingerprint_;
+  SpecificCoalescer::SpecificFunctionFingerprint* function_fingerprint_;
 
 
   // Maps a function's SemIR::File blocks to lowered blocks.
   // Maps a function's SemIR::File blocks to lowered blocks.
   Map<SemIR::InstBlockId, llvm::BasicBlock*> blocks_;
   Map<SemIR::InstBlockId, llvm::BasicBlock*> blocks_;

+ 258 - 0
toolchain/lower/specific_coalescer.cpp

@@ -0,0 +1,258 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "toolchain/lower/specific_coalescer.h"
+
+#include "common/check.h"
+#include "common/vlog.h"
+
+namespace Carbon::Lower {
+
+SpecificCoalescer::SpecificCoalescer(llvm::raw_ostream* vlog_stream,
+                                     const SemIR::SpecificStore& specifics)
+    : vlog_stream_(vlog_stream),
+      lowered_specifics_type_fingerprint_(specifics, {}),
+      lowered_specific_fingerprint_(specifics, {}),
+      equivalent_specifics_(specifics, SemIR::SpecificId::None) {}
+
+auto SpecificCoalescer::CoalesceEquivalentSpecifics(
+    LoweredSpecificsStore& lowered_specifics,
+    LoweredLlvmFunctionStore& lowered_llvm_functions) -> void {
+  for (auto& specifics : lowered_specifics.values()) {
+    // Collect specifics to delete for each generic. Replace and remove each
+    // after processing all specifics for a generic. Note, we could also
+    // replace and remove all specifics after processing all generics.
+    llvm::SmallVector<SemIR::SpecificId> specifics_to_delete;
+    // i cannot be unsigned due to the comparison with a negative number when
+    // the specifics vector is empty.
+    for (int i = 0; i < static_cast<int>(specifics.size()) - 1; ++i) {
+      // This specific was already replaced, skip it.
+      if (equivalent_specifics_.Get(specifics[i]).has_value() &&
+          equivalent_specifics_.Get(specifics[i]) != specifics[i]) {
+        specifics_to_delete.push_back(specifics[i]);
+        specifics[i] = specifics[specifics.size() - 1];
+        specifics.pop_back();
+        --i;
+        continue;
+      }
+      // TODO: Improve quadratic behavior by using a single hash based on
+      // `lowered_specifics_type_fingerprint_` and `common_fingerprint`.
+      for (int j = i + 1; j < static_cast<int>(specifics.size()); ++j) {
+        // When the specific was already replaced, skip it.
+        if (equivalent_specifics_.Get(specifics[j]).has_value() &&
+            equivalent_specifics_.Get(specifics[j]) != specifics[j]) {
+          specifics_to_delete.push_back(specifics[j]);
+          specifics[j] = specifics[specifics.size() - 1];
+          specifics.pop_back();
+          --j;
+          continue;
+        }
+
+        // When the two specifics are not equivalent due to the function type
+        // info stored in lowered_specifics_types, mark non-equivalance. This
+        // can be reused to short-cut another path and continue the search for
+        // other equivalences.
+        if (!AreFunctionTypesEquivalent(specifics[i], specifics[j])) {
+          InsertPair(specifics[i], specifics[j], non_equivalent_specifics_);
+          continue;
+        }
+
+        Set<std::pair<SemIR::SpecificId, SemIR::SpecificId>>
+            visited_equivalent_specifics;
+        InsertPair(specifics[i], specifics[j], visited_equivalent_specifics);
+        // Function type information matches; check usages inside the function
+        // body that are dependent on the specific. This information has been
+        // stored in lowered_states while lowering each function body.
+        if (AreFunctionBodiesEquivalent(specifics[i], specifics[j],
+                                        visited_equivalent_specifics)) {
+          // When processing equivalences, we may change the canonical specific
+          // multiple times, so we don't delete replaced specifics until the
+          // end.
+          visited_equivalent_specifics.ForEach(
+              [&](std::pair<SemIR::SpecificId, SemIR::SpecificId>
+                      equivalent_entry) {
+                CARBON_VLOG("Found equivalent specifics: {0}, {1}",
+                            equivalent_entry.first, equivalent_entry.second);
+                ProcessSpecificEquivalence(equivalent_entry);
+              });
+
+          // Removed the replaced specific from the list of emitted specifics.
+          // Only the top level, since the others are somewhere else in the
+          // vector, they will be found and removed during processing.
+          specifics_to_delete.push_back(specifics[j]);
+          specifics[j] = specifics[specifics.size() - 1];
+          specifics.pop_back();
+          --j;
+        } else {
+          // Only mark non-equivalence based on state for starting specifics.
+          InsertPair(specifics[i], specifics[j], non_equivalent_specifics_);
+        }
+      }
+    }
+
+    // Once all equivalences are found for a generic, update and delete up
+    // equivalent specifics.
+    for (auto specific_id : specifics_to_delete) {
+      UpdateAndDeleteLLVMFunction(lowered_llvm_functions, specific_id);
+    }
+  }
+}
+
+auto SpecificCoalescer::ProcessSpecificEquivalence(
+    std::pair<SemIR::SpecificId, SemIR::SpecificId> pair) -> void {
+  auto [specific_id1, specific_id2] = pair;
+  CARBON_CHECK(specific_id1.has_value() && specific_id2.has_value(),
+               "Expected values in equivalence check");
+
+  auto get_canon = [&](SemIR::SpecificId specific_id) {
+    auto equiv_id = equivalent_specifics_.Get(specific_id);
+    return equiv_id.has_value() ? equiv_id : specific_id;
+  };
+  auto canon_id1 = get_canon(specific_id1);
+  auto canon_id2 = get_canon(specific_id2);
+
+  if (canon_id1 == canon_id2) {
+    // Already equivalent, there was a previous replacement.
+    return;
+  }
+
+  if (canon_id1.index >= canon_id2.index) {
+    // Prefer the earlier index for canonical values.
+    std::swap(canon_id1, canon_id2);
+  }
+
+  // Update equivalent_specifics_ for all. This is used as an indicator that
+  // this specific_id may be the canonical one when reducing the equivalence
+  // chains in `IsKnownEquivalence`.
+  equivalent_specifics_.Set(specific_id1, canon_id1);
+  equivalent_specifics_.Set(specific_id2, canon_id1);
+  equivalent_specifics_.Set(canon_id1, canon_id1);
+  equivalent_specifics_.Set(canon_id2, canon_id1);
+}
+
+auto SpecificCoalescer::UpdateEquivalentSpecific(SemIR::SpecificId specific_id)
+    -> void {
+  if (!equivalent_specifics_.Get(specific_id).has_value()) {
+    return;
+  }
+
+  llvm::SmallVector<SemIR::SpecificId> stack;
+  SemIR::SpecificId specific_to_update = specific_id;
+  SemIR::SpecificId equivalent = equivalent_specifics_.Get(specific_to_update);
+  SemIR::SpecificId equivalent_next = equivalent_specifics_.Get(equivalent);
+  while (equivalent != equivalent_next) {
+    stack.push_back(specific_to_update);
+    specific_to_update = equivalent;
+    equivalent = equivalent_next;
+    equivalent_next = equivalent_specifics_.Get(equivalent_next);
+  }
+
+  for (auto specific : stack) {
+    equivalent_specifics_.Set(specific, equivalent);
+  }
+}
+
+auto SpecificCoalescer::UpdateAndDeleteLLVMFunction(
+    LoweredLlvmFunctionStore& lowered_llvm_functions,
+    SemIR::SpecificId specific_id) -> void {
+  UpdateEquivalentSpecific(specific_id);
+  auto* old_function = lowered_llvm_functions.Get(specific_id);
+  auto* new_function =
+      lowered_llvm_functions.Get(equivalent_specifics_.Get(specific_id));
+  old_function->replaceAllUsesWith(new_function);
+  old_function->eraseFromParent();
+  lowered_llvm_functions.Set(specific_id, new_function);
+}
+
+auto SpecificCoalescer::IsKnownEquivalence(SemIR::SpecificId specific_id1,
+                                           SemIR::SpecificId specific_id2)
+    -> bool {
+  if (!equivalent_specifics_.Get(specific_id1).has_value() ||
+      !equivalent_specifics_.Get(specific_id2).has_value()) {
+    return false;
+  }
+
+  UpdateEquivalentSpecific(specific_id1);
+  UpdateEquivalentSpecific(specific_id2);
+
+  return equivalent_specifics_.Get(specific_id1) ==
+         equivalent_specifics_.Get(specific_id2);
+}
+
+auto SpecificCoalescer::AreFunctionTypesEquivalent(
+    SemIR::SpecificId specific_id1, SemIR::SpecificId specific_id2) -> bool {
+  CARBON_CHECK(specific_id1.has_value() && specific_id2.has_value());
+  return lowered_specifics_type_fingerprint_.Get(specific_id1) ==
+         lowered_specifics_type_fingerprint_.Get(specific_id2);
+}
+
+auto SpecificCoalescer::AreFunctionBodiesEquivalent(
+    SemIR::SpecificId specific_id1, SemIR::SpecificId specific_id2,
+    Set<std::pair<SemIR::SpecificId, SemIR::SpecificId>>&
+        visited_equivalent_specifics) -> bool {
+  llvm::SmallVector<std::pair<SemIR::SpecificId, SemIR::SpecificId>> worklist;
+  worklist.push_back({specific_id1, specific_id2});
+
+  while (!worklist.empty()) {
+    auto outer_pair = worklist.pop_back_val();
+    auto [specific_id1, specific_id2] = outer_pair;
+
+    auto state1 = lowered_specific_fingerprint_.Get(specific_id1);
+    auto state2 = lowered_specific_fingerprint_.Get(specific_id2);
+    if (state1.common_fingerprint != state2.common_fingerprint) {
+      InsertPair(specific_id1, specific_id2, non_equivalent_specifics_);
+      return false;
+    }
+    if (state1.specific_fingerprint == state2.specific_fingerprint) {
+      continue;
+    }
+
+    // A size difference should have been detected by the common fingerprint.
+    CARBON_CHECK(state1.calls.size() == state2.calls.size(),
+                 "Number of specific calls expected to be the same.");
+
+    for (auto [state1_call, state2_call] :
+         llvm::zip(state1.calls, state2.calls)) {
+      if (state1_call != state2_call) {
+        if (ContainsPair(state1_call, state2_call, non_equivalent_specifics_)) {
+          return false;
+        }
+        if (IsKnownEquivalence(state1_call, state2_call)) {
+          continue;
+        }
+        if (!InsertPair(state1_call, state2_call,
+                        visited_equivalent_specifics)) {
+          continue;
+        }
+        // Leave the added equivalence pair in place and continue.
+        worklist.push_back({state1_call, state2_call});
+      }
+    }
+  }
+  return true;
+}
+
+auto SpecificCoalescer::InsertPair(
+    SemIR::SpecificId specific_id1, SemIR::SpecificId specific_id2,
+    Set<std::pair<SemIR::SpecificId, SemIR::SpecificId>>& set_of_pairs)
+    -> bool {
+  if (specific_id1.index > specific_id2.index) {
+    std::swap(specific_id1.index, specific_id2.index);
+  }
+  auto insert_result =
+      set_of_pairs.Insert(std::make_pair(specific_id1, specific_id2));
+  return insert_result.is_inserted();
+}
+
+auto SpecificCoalescer::ContainsPair(
+    SemIR::SpecificId specific_id1, SemIR::SpecificId specific_id2,
+    const Set<std::pair<SemIR::SpecificId, SemIR::SpecificId>>& set_of_pairs)
+    -> bool {
+  if (specific_id1.index > specific_id2.index) {
+    std::swap(specific_id1.index, specific_id2.index);
+  }
+  return set_of_pairs.Contains(std::make_pair(specific_id1, specific_id2));
+}
+
+}  // namespace Carbon::Lower

+ 156 - 0
toolchain/lower/specific_coalescer.h

@@ -0,0 +1,156 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef CARBON_TOOLCHAIN_LOWER_SPECIFIC_COALESCER_H_
+#define CARBON_TOOLCHAIN_LOWER_SPECIFIC_COALESCER_H_
+
+#include "llvm/Support/BLAKE3.h"
+#include "toolchain/lower/context.h"
+#include "toolchain/sem_ir/ids.h"
+
+namespace Carbon::Lower {
+
+// Coalescing functionality for lowering fewer specifics of the same generic.
+class SpecificCoalescer {
+ public:
+  using LoweredSpecificsStore =
+      FixedSizeValueStore<SemIR::GenericId,
+                          llvm::SmallVector<SemIR::SpecificId>>;
+  using LoweredLlvmFunctionStore =
+      FixedSizeValueStore<SemIR::SpecificId, llvm::Function*>;
+
+  // Describes a specific function's body fingerprint.
+  struct SpecificFunctionFingerprint {
+    // Fingerprint with all specific-dependent instructions, except specific
+    // calls. This is built by the `FunctionContext` while lowering each
+    // instruction in the definition of a specific function.
+    // TODO: This can be merged with the function type fingerprint, for a
+    // single upfront non-equivalence check, and hash bucketing for deeper
+    // equivalence evaluation.
+    llvm::BLAKE3Result<32> common_fingerprint;
+    // Fingerprint for all calls to specific functions (hashes all calls to
+    // other specifics). This is built by the `FunctionContext` while lowering.
+    llvm::BLAKE3Result<32> specific_fingerprint;
+    // All non-hashed specific_ids of functions called.
+    llvm::SmallVector<SemIR::SpecificId> calls;
+  };
+
+  // Takes a `SpecificStore` to help initialize related `FixedSizeValueStore`s.
+  explicit SpecificCoalescer(llvm::raw_ostream* vlog_stream,
+                             const SemIR::SpecificStore& specifics);
+
+  // Entry point for coalescing equivalent specifics. Two function definitions,
+  // from the same generic, with different specific_ids are considered
+  // equivalent if, at the LLVM level, one can be replaced with the other, with
+  // no change in behavior. All LLVM types and instructions must be equivalent.
+  auto CoalesceEquivalentSpecifics(
+      LoweredSpecificsStore& lowered_specifics,
+      LoweredLlvmFunctionStore& lowered_llvm_functions) -> void;
+
+  // Initializes and returns a SpecificFunctionFingerprint* instance for a
+  // specific. The internal of the fingerprint are populated during and after
+  // lowering the function body of that specific.
+  auto InitializeFingerprintForSpecific(SemIR::SpecificId specific_id)
+      -> SpecificFunctionFingerprint* {
+    if (!specific_id.has_value()) {
+      return nullptr;
+    }
+    return &lowered_specific_fingerprint_.Get(specific_id);
+  }
+
+  auto CreateTypeFingerprint(SemIR::SpecificId specific_id,
+                             llvm::Type* llvm_type) -> void {
+    llvm::BLAKE3 function_type_fingerprint;
+    RawStringOstream os;
+    llvm_type->print(os);
+    function_type_fingerprint.update(os.TakeStr());
+    function_type_fingerprint.final(
+        lowered_specifics_type_fingerprint_.Get(specific_id));
+  }
+
+ private:
+  // While coalescing specifics, returns whether the function types for two
+  // specifics are equivalent. This uses a fingerprint generated for each
+  // function type.
+  auto AreFunctionTypesEquivalent(SemIR::SpecificId specific_id1,
+                                  SemIR::SpecificId specific_id2) -> bool;
+
+  // While coalescing specifics, compare the function bodies for two specifics.
+  // This uses fingerprints generated during lowering of the function body.
+  // The `visited_equivalent_specifics` parameter is used to track cycles in
+  // the function callgraph, and will also return equivalent pairs of specifics
+  // found, if the two specifics given as arguments are found to be equivalent.
+  auto AreFunctionBodiesEquivalent(
+      SemIR::SpecificId specific_id1, SemIR::SpecificId specific_id2,
+      Set<std::pair<SemIR::SpecificId, SemIR::SpecificId>>&
+          visited_equivalent_specifics) -> bool;
+
+  // Given an equivalent pair of specifics, updates the canonical specific to
+  // use for each of the two Specifics found to be equivalent.
+  auto ProcessSpecificEquivalence(
+      std::pair<SemIR::SpecificId, SemIR::SpecificId> pair) -> void;
+
+  // Checks if two specific_ids are equivalent and also reduces the equivalence
+  // chains/paths. This update ensures the canonical specific is always "one
+  // hop away".
+  auto IsKnownEquivalence(SemIR::SpecificId specific_id1,
+                          SemIR::SpecificId specific_id2) -> bool;
+
+  // Update the tracked equivalent specific for the `SpecificId`. This may
+  // occur a replacement was performed and a chain of such replacements needs
+  // to be followed to discover the canonical specific for the given argument.
+  auto UpdateEquivalentSpecific(SemIR::SpecificId specific_id) -> void;
+
+  // Update the LLVM function to use for a `SpecificId` that has been found to
+  // have another equivalent LLVM function. Replace all uses of the original
+  // LLVM function with the equivalent one found, and delete the previous LLVM
+  // function body.
+  auto UpdateAndDeleteLLVMFunction(
+      LoweredLlvmFunctionStore& lowered_llvm_functions,
+      SemIR::SpecificId specific_id) -> void;
+
+  // Inserts a pair into a set of pairs in canonical form. Also implicitly
+  // checks entry already existed if it cannot be inserted.
+  auto InsertPair(
+      SemIR::SpecificId specific_id1, SemIR::SpecificId specific_id2,
+      Set<std::pair<SemIR::SpecificId, SemIR::SpecificId>>& set_of_pairs)
+      -> bool;
+
+  // Checks if a pair is contained into a set of pairs, in canonical form.
+  auto ContainsPair(
+      SemIR::SpecificId specific_id1, SemIR::SpecificId specific_id2,
+      const Set<std::pair<SemIR::SpecificId, SemIR::SpecificId>>& set_of_pairs)
+      -> bool;
+
+  // The optional vlog stream.
+  llvm::raw_ostream* vlog_stream_;
+
+  // For specifics that exist in lowered_specifics, a hash of their function
+  // type information.
+  FixedSizeValueStore<SemIR::SpecificId, llvm::BLAKE3Result<32>>
+      lowered_specifics_type_fingerprint_;
+
+  // This is initialized and populated while lowering a specific.
+  FixedSizeValueStore<SemIR::SpecificId, SpecificFunctionFingerprint>
+      lowered_specific_fingerprint_;
+
+  // Equivalent specifics that have been found. For each specific, this points
+  // to the canonical equivalent specific, which may also be self. We currently
+  // define the canonical specific as the one with the lowest
+  // `SpecificId.index`.
+  //
+  // Entries are initialized to `SpecificId::None`, which defines that there is
+  // no other equivalent specific to this `SpecificId`.
+  FixedSizeValueStore<SemIR::SpecificId, SemIR::SpecificId>
+      equivalent_specifics_;
+
+  // Non-equivalent specifics found.
+  // TODO: Revisit this due to its quadratic space growth.
+  Set<std::pair<SemIR::SpecificId, SemIR::SpecificId>>
+      non_equivalent_specifics_;
+};
+
+}  // namespace Carbon::Lower
+
+#endif  // CARBON_TOOLCHAIN_LOWER_SPECIFIC_COALESCER_H_