Ver código fonte

Refactor single-unit checking out of check.cpp (#4649)

This is primarily moving code around, to try to create a logical split
of the code in check.cpp, makingthe API boundaries clearer.

There's one small, deliberate logic change around false returns from
`HandleParseNode`, where before there was a `CARBON_CHECK` instantiated
by the `#define` (per `NodeKind`), and now it's outside the `#define`
(done mainly because the message didn't keep up with the `Handle##Name`
-> `HandleParseNode` rename).
Jon Ross-Perkins 1 ano atrás
pai
commit
87b3671330

+ 2 - 0
toolchain/check/BUILD

@@ -96,6 +96,8 @@ cc_library(
     name = "check",
     srcs = [
         "check.cpp",
+        "check_unit.cpp",
+        "check_unit.h",
         "deferred_definition_worklist.cpp",
         "deferred_definition_worklist.h",
         "handle.h",

+ 18 - 525
toolchain/check/check.cpp

@@ -6,536 +6,28 @@
 
 #include "common/check.h"
 #include "common/map.h"
-#include "toolchain/base/kind_switch.h"
-#include "toolchain/base/pretty_stack_trace_function.h"
+#include "toolchain/check/check_unit.h"
 #include "toolchain/check/context.h"
 #include "toolchain/check/diagnostic_helpers.h"
-#include "toolchain/check/generic.h"
-#include "toolchain/check/handle.h"
-#include "toolchain/check/import.h"
-#include "toolchain/check/import_ref.h"
-#include "toolchain/check/node_id_traversal.h"
 #include "toolchain/check/sem_ir_diagnostic_converter.h"
 #include "toolchain/diagnostics/diagnostic.h"
 #include "toolchain/diagnostics/format_providers.h"
 #include "toolchain/lex/token_kind.h"
 #include "toolchain/parse/node_ids.h"
 #include "toolchain/parse/tree.h"
-#include "toolchain/parse/tree_node_diagnostic_converter.h"
 #include "toolchain/sem_ir/file.h"
-#include "toolchain/sem_ir/ids.h"
 #include "toolchain/sem_ir/typed_insts.h"
 
 namespace Carbon::Check {
 
-namespace {
-struct UnitInfo {
-  // A given import within the file, with its destination.
-  struct Import {
-    Parse::Tree::PackagingNames names;
-    UnitInfo* unit_info;
-  };
-  // A file's imports corresponding to a single package, for the map.
-  struct PackageImports {
-    // Use the constructor so that the SmallVector is only constructed
-    // as-needed.
-    explicit PackageImports(IdentifierId package_id,
-                            Parse::ImportDeclId node_id)
-        : package_id(package_id), node_id(node_id) {}
-
-    // The identifier of the imported package.
-    IdentifierId package_id;
-    // The first `import` declaration in the file, which declared the package's
-    // identifier (even if the import failed). Used for associating diagnostics
-    // not specific to a single import.
-    Parse::ImportDeclId node_id;
-    // The associated `import` instruction. Only valid once a file is checked.
-    SemIR::InstId import_decl_id = SemIR::InstId::Invalid;
-    // Whether there's an import that failed to load.
-    bool has_load_error = false;
-    // The list of valid imports.
-    llvm::SmallVector<Import> imports;
-  };
-
-  explicit UnitInfo(SemIR::CheckIRId check_ir_id, Unit& unit)
-      : check_ir_id(check_ir_id),
-        unit(&unit),
-        err_tracker(*unit.consumer),
-        emitter(*unit.node_converter, err_tracker) {}
-
-  auto parse_tree() -> const Parse::Tree& { return unit->sem_ir->parse_tree(); }
-  auto source() -> const SourceBuffer& {
-    return parse_tree().tokens().source();
-  }
-
-  SemIR::CheckIRId check_ir_id;
-  Unit* unit;
-
-  // Emitter information.
-  ErrorTrackingDiagnosticConsumer err_tracker;
-  DiagnosticEmitter<Parse::NodeLoc> emitter;
-
-  // List of the outgoing imports. If a package includes unavailable library
-  // imports, it has an entry with has_load_error set. Invalid imports (for
-  // example, `import Main;`) aren't added because they won't add identifiers to
-  // name lookup.
-  llvm::SmallVector<PackageImports> package_imports;
-
-  // A map of the package names to the outgoing imports above.
-  Map<IdentifierId, int32_t> package_imports_map;
-
-  // The remaining number of imports which must be checked before this unit can
-  // be processed.
-  int32_t imports_remaining = 0;
-
-  // A list of incoming imports. This will be empty for `impl` files, because
-  // imports only touch `api` files.
-  llvm::SmallVector<UnitInfo*> incoming_imports;
-
-  // The corresponding `api` unit if this is an `impl` file. The entry should
-  // also be in the corresponding `PackageImports`.
-  UnitInfo* api_for_impl = nullptr;
-
-  // Whether the unit has been checked.
-  bool is_checked = false;
-};
-}  // namespace
-
-// Collects direct imports, for CollectTransitiveImports.
-static auto CollectDirectImports(llvm::SmallVector<SemIR::ImportIR>& results,
-                                 llvm::MutableArrayRef<int> ir_to_result_index,
-                                 SemIR::InstId import_decl_id,
-                                 const UnitInfo::PackageImports& imports,
-                                 bool is_local) -> void {
-  for (const auto& import : imports.imports) {
-    const auto& direct_ir = *import.unit_info->unit->sem_ir;
-    auto& index = ir_to_result_index[direct_ir.check_ir_id().index];
-    if (index != -1) {
-      // This should only happen when doing API imports for an implementation
-      // file. Don't change the entry; is_export doesn't matter.
-      continue;
-    }
-    index = results.size();
-    results.push_back({.decl_id = import_decl_id,
-                       // Only tag exports in API files, ignoring the value in
-                       // implementation files.
-                       .is_export = is_local && import.names.is_export,
-                       .sem_ir = &direct_ir});
-  }
-}
-
-// Collects transitive imports, handling deduplication. These will be unified
-// between local_imports and api_imports.
-static auto CollectTransitiveImports(
-    SemIR::InstId import_decl_id, const UnitInfo::PackageImports* local_imports,
-    const UnitInfo::PackageImports* api_imports, int total_ir_count)
-    -> llvm::SmallVector<SemIR::ImportIR> {
-  llvm::SmallVector<SemIR::ImportIR> results;
-
-  // Track whether an IR was imported in full, including `export import`. This
-  // distinguishes from IRs that are indirectly added without all names being
-  // exported to this IR.
-  llvm::SmallVector<int> ir_to_result_index(total_ir_count, -1);
-
-  // First add direct imports. This means that if an entity is imported both
-  // directly and indirectly, the import path will reflect the direct import.
-  if (local_imports) {
-    CollectDirectImports(results, ir_to_result_index, import_decl_id,
-                         *local_imports,
-                         /*is_local=*/true);
-  }
-  if (api_imports) {
-    CollectDirectImports(results, ir_to_result_index, import_decl_id,
-                         *api_imports,
-                         /*is_local=*/false);
-  }
-
-  // Loop through direct imports for any indirect exports. The underlying vector
-  // is appended during iteration, so take the size first.
-  const int direct_imports = results.size();
-  for (int direct_index : llvm::seq(direct_imports)) {
-    bool is_export = results[direct_index].is_export;
-
-    for (const auto& indirect_ir :
-         results[direct_index].sem_ir->import_irs().array_ref()) {
-      if (!indirect_ir.is_export) {
-        continue;
-      }
-
-      auto& indirect_index =
-          ir_to_result_index[indirect_ir.sem_ir->check_ir_id().index];
-      if (indirect_index == -1) {
-        indirect_index = results.size();
-        // TODO: In the case of a recursive `export import`, this only points at
-        // the outermost import. May want something that better reflects the
-        // recursion.
-        results.push_back({.decl_id = results[direct_index].decl_id,
-                           .is_export = is_export,
-                           .sem_ir = indirect_ir.sem_ir});
-      } else if (is_export) {
-        results[indirect_index].is_export = true;
-      }
-    }
-  }
-
-  return results;
-}
-
-// Imports the current package.
-static auto ImportCurrentPackage(Context& context, UnitInfo& unit_info,
-                                 int total_ir_count,
-                                 SemIR::InstId package_inst_id,
-                                 SemIR::TypeId namespace_type_id) -> void {
-  // Add imports from the current package.
-  auto import_map_lookup =
-      unit_info.package_imports_map.Lookup(IdentifierId::Invalid);
-  if (!import_map_lookup) {
-    // Push the scope; there are no names to add.
-    context.scope_stack().Push(package_inst_id, SemIR::NameScopeId::Package);
-    return;
-  }
-  UnitInfo::PackageImports& self_import =
-      unit_info.package_imports[import_map_lookup.value()];
-
-  if (self_import.has_load_error) {
-    context.name_scopes().Get(SemIR::NameScopeId::Package).set_has_error();
-  }
-
-  ImportLibrariesFromCurrentPackage(
-      context, namespace_type_id,
-      CollectTransitiveImports(self_import.import_decl_id, &self_import,
-                               /*api_imports=*/nullptr, total_ir_count));
-
-  context.scope_stack().Push(
-      package_inst_id, SemIR::NameScopeId::Package, SemIR::SpecificId::Invalid,
-      context.name_scopes().Get(SemIR::NameScopeId::Package).has_error());
-}
-
-// Imports all other packages (excluding the current package).
-static auto ImportOtherPackages(Context& context, UnitInfo& unit_info,
-                                int total_ir_count,
-                                SemIR::TypeId namespace_type_id) -> void {
-  // api_imports_list is initially the size of the current file's imports,
-  // including for API files, for simplicity in iteration. It's only really used
-  // when processing an implementation file, in order to combine the API file
-  // imports.
-  //
-  // For packages imported by the API file, the IdentifierId is the package name
-  // and the index is into the API's import list. Otherwise, the initial
-  // {Invalid, -1} state remains.
-  llvm::SmallVector<std::pair<IdentifierId, int32_t>> api_imports_list;
-  api_imports_list.resize(unit_info.package_imports.size(),
-                          {IdentifierId::Invalid, -1});
-
-  // When there's an API file, add the mapping to api_imports_list.
-  if (unit_info.api_for_impl) {
-    const auto& api_identifiers =
-        unit_info.api_for_impl->unit->value_stores->identifiers();
-    auto& impl_identifiers = unit_info.unit->value_stores->identifiers();
-
-    for (auto [api_imports_index, api_imports] :
-         llvm::enumerate(unit_info.api_for_impl->package_imports)) {
-      // Skip the current package.
-      if (!api_imports.package_id.is_valid()) {
-        continue;
-      }
-      // Translate the package ID from the API file to the implementation file.
-      auto impl_package_id =
-          impl_identifiers.Add(api_identifiers.Get(api_imports.package_id));
-      if (auto lookup = unit_info.package_imports_map.Lookup(impl_package_id)) {
-        // On a hit, replace the entry to unify the API and implementation
-        // imports.
-        api_imports_list[lookup.value()] = {impl_package_id, api_imports_index};
-      } else {
-        // On a miss, add the package as API-only.
-        api_imports_list.push_back({impl_package_id, api_imports_index});
-      }
-    }
-  }
-
-  for (auto [i, api_imports_entry] : llvm::enumerate(api_imports_list)) {
-    // These variables are updated after figuring out which imports are present.
-    auto import_decl_id = SemIR::InstId::Invalid;
-    IdentifierId package_id = IdentifierId::Invalid;
-    bool has_load_error = false;
-
-    // Identify the local package imports if present.
-    UnitInfo::PackageImports* local_imports = nullptr;
-    if (i < unit_info.package_imports.size()) {
-      local_imports = &unit_info.package_imports[i];
-      if (!local_imports->package_id.is_valid()) {
-        // Skip the current package.
-        continue;
-      }
-      import_decl_id = local_imports->import_decl_id;
-
-      package_id = local_imports->package_id;
-      has_load_error |= local_imports->has_load_error;
-    }
-
-    // Identify the API package imports if present.
-    UnitInfo::PackageImports* api_imports = nullptr;
-    if (api_imports_entry.second != -1) {
-      api_imports =
-          &unit_info.api_for_impl->package_imports[api_imports_entry.second];
-
-      if (local_imports) {
-        CARBON_CHECK(package_id == api_imports_entry.first);
-      } else {
-        auto import_ir_inst_id = context.import_ir_insts().Add(
-            {.ir_id = SemIR::ImportIRId::ApiForImpl,
-             .inst_id = api_imports->import_decl_id});
-        import_decl_id =
-            context.AddInst(context.MakeImportedLocAndInst<SemIR::ImportDecl>(
-                import_ir_inst_id, {.package_id = SemIR::NameId::ForIdentifier(
-                                        api_imports_entry.first)}));
-        package_id = api_imports_entry.first;
-      }
-      has_load_error |= api_imports->has_load_error;
-    }
-
-    // Do the actual import.
-    ImportLibrariesFromOtherPackage(
-        context, namespace_type_id, import_decl_id, package_id,
-        CollectTransitiveImports(import_decl_id, local_imports, api_imports,
-                                 total_ir_count),
-        has_load_error);
-  }
-}
-
-// Add imports to the root block.
-static auto InitPackageScopeAndImports(Context& context, UnitInfo& unit_info,
-                                       int total_ir_count) -> void {
-  // First create the constant values map for all imported IRs. We'll populate
-  // these with mappings for namespaces as we go.
-  size_t num_irs = 0;
-  for (auto& package_imports : unit_info.package_imports) {
-    num_irs += package_imports.imports.size();
-  }
-  if (!unit_info.api_for_impl) {
-    // Leave an empty slot for ImportIRId::ApiForImpl.
-    ++num_irs;
-  }
-
-  context.import_irs().Reserve(num_irs);
-  context.import_ir_constant_values().reserve(num_irs);
-
-  context.SetTotalIRCount(total_ir_count);
-
-  // Importing makes many namespaces, so only canonicalize the type once.
-  auto namespace_type_id =
-      context.GetSingletonType(SemIR::NamespaceType::SingletonInstId);
-
-  // Define the package scope, with an instruction for `package` expressions to
-  // reference.
-  auto package_scope_id = context.name_scopes().Add(
-      SemIR::Namespace::PackageInstId, SemIR::NameId::PackageNamespace,
-      SemIR::NameScopeId::Invalid);
-  CARBON_CHECK(package_scope_id == SemIR::NameScopeId::Package);
-
-  auto package_inst_id = context.AddInst<SemIR::Namespace>(
-      Parse::NodeId::Invalid, {.type_id = namespace_type_id,
-                               .name_scope_id = SemIR::NameScopeId::Package,
-                               .import_id = SemIR::InstId::Invalid});
-  CARBON_CHECK(package_inst_id == SemIR::Namespace::PackageInstId);
-
-  // If there is an implicit `api` import, set it first so that it uses the
-  // ImportIRId::ApiForImpl when processed for imports.
-  if (unit_info.api_for_impl) {
-    const auto& names = context.parse_tree().packaging_decl()->names;
-    auto import_decl_id = context.AddInst<SemIR::ImportDecl>(
-        names.node_id,
-        {.package_id = SemIR::NameId::ForIdentifier(names.package_id)});
-    SetApiImportIR(context, {.decl_id = import_decl_id,
-                             .is_export = false,
-                             .sem_ir = unit_info.api_for_impl->unit->sem_ir});
-  } else {
-    SetApiImportIR(context,
-                   {.decl_id = SemIR::InstId::Invalid, .sem_ir = nullptr});
-  }
-
-  // Add import instructions for everything directly imported. Implicit imports
-  // are handled separately.
-  for (auto& package_imports : unit_info.package_imports) {
-    CARBON_CHECK(!package_imports.import_decl_id.is_valid());
-    package_imports.import_decl_id = context.AddInst<SemIR::ImportDecl>(
-        package_imports.node_id, {.package_id = SemIR::NameId::ForIdentifier(
-                                      package_imports.package_id)});
-  }
-
-  // Process the imports.
-  if (unit_info.api_for_impl) {
-    ImportApiFile(context, namespace_type_id,
-                  *unit_info.api_for_impl->unit->sem_ir);
-  }
-  ImportCurrentPackage(context, unit_info, total_ir_count, package_inst_id,
-                       namespace_type_id);
-  CARBON_CHECK(context.scope_stack().PeekIndex() == ScopeIndex::Package);
-  ImportOtherPackages(context, unit_info, total_ir_count, namespace_type_id);
-}
-
-// Checks that each required definition is available. If the definition can be
-// generated by resolving a specific, does so, otherwise emits a diagnostic for
-// each declaration in context.definitions_required() that doesn't have a
-// definition.
-static auto CheckRequiredDefinitions(Context& context,
-                                     Context::DiagnosticEmitter& emitter)
-    -> void {
-  CARBON_DIAGNOSTIC(MissingDefinitionInImpl, Error,
-                    "no definition found for declaration in impl file");
-  // Note that more required definitions can be added during this loop.
-  for (size_t i = 0; i != context.definitions_required().size(); ++i) {
-    SemIR::InstId decl_inst_id = context.definitions_required()[i];
-    SemIR::Inst decl_inst = context.insts().Get(decl_inst_id);
-    CARBON_KIND_SWITCH(context.insts().Get(decl_inst_id)) {
-      case CARBON_KIND(SemIR::ClassDecl class_decl): {
-        if (!context.classes().Get(class_decl.class_id).is_defined()) {
-          emitter.Emit(decl_inst_id, MissingDefinitionInImpl);
-        }
-        break;
-      }
-      case CARBON_KIND(SemIR::FunctionDecl function_decl): {
-        if (context.functions().Get(function_decl.function_id).definition_id ==
-            SemIR::InstId::Invalid) {
-          emitter.Emit(decl_inst_id, MissingDefinitionInImpl);
-        }
-        break;
-      }
-      case CARBON_KIND(SemIR::ImplDecl impl_decl): {
-        if (!context.impls().Get(impl_decl.impl_id).is_defined()) {
-          emitter.Emit(decl_inst_id, MissingDefinitionInImpl);
-        }
-        break;
-      }
-      case SemIR::InterfaceDecl::Kind: {
-        // TODO: Handle `interface` as well, once we can test it without
-        // triggering
-        // https://github.com/carbon-language/carbon-lang/issues/4071.
-        CARBON_FATAL("TODO: Support interfaces in DiagnoseMissingDefinitions");
-      }
-      case CARBON_KIND(SemIR::SpecificFunction specific_function): {
-        if (!ResolveSpecificDefinition(context,
-                                       specific_function.specific_id)) {
-          CARBON_DIAGNOSTIC(MissingGenericFunctionDefinition, Error,
-                            "use of undefined generic function");
-          CARBON_DIAGNOSTIC(MissingGenericFunctionDefinitionHere, Note,
-                            "generic function declared here");
-          auto generic_decl_id =
-              context.generics()
-                  .Get(context.specifics()
-                           .Get(specific_function.specific_id)
-                           .generic_id)
-                  .decl_id;
-          emitter.Build(decl_inst_id, MissingGenericFunctionDefinition)
-              .Note(generic_decl_id, MissingGenericFunctionDefinitionHere)
-              .Emit();
-        }
-        break;
-      }
-      default: {
-        CARBON_FATAL("Unexpected inst in definitions_required: {0}", decl_inst);
-      }
-    }
-  }
-}
-
-// Loops over all nodes in the tree. On some errors, this may return early,
-// for example if an unrecoverable state is encountered.
-// NOLINTNEXTLINE(readability-function-size)
-static auto ProcessNodeIds(Context& context, llvm::raw_ostream* vlog_stream,
-                           ErrorTrackingDiagnosticConsumer& err_tracker,
-                           Parse::NodeLocConverter& converter) -> bool {
-  NodeIdTraversal traversal(context, vlog_stream);
-
-  Parse::NodeId node_id = Parse::NodeId::Invalid;
-
-  // On crash, report which token we were handling.
-  PrettyStackTraceFunction node_dumper([&](llvm::raw_ostream& output) {
-    auto loc = converter.ConvertLoc(
-        node_id, [](DiagnosticLoc, const DiagnosticBase<>&) {});
-    loc.FormatLocation(output);
-    output << ": checking " << context.parse_tree().node_kind(node_id) << "\n";
-    // Crash output has a tab indent; try to indent slightly past that.
-    loc.FormatSnippet(output, /*indent=*/10);
-  });
-
-  while (auto maybe_node_id = traversal.Next()) {
-    node_id = *maybe_node_id;
-    auto parse_kind = context.parse_tree().node_kind(node_id);
-
-    switch (parse_kind) {
-#define CARBON_PARSE_NODE_KIND(Name)                                 \
-  case Parse::NodeKind::Name: {                                      \
-    if (!HandleParseNode(context, Parse::Name##Id(node_id))) {       \
-      CARBON_CHECK(err_tracker.seen_error(),                         \
-                   "Handle" #Name                                    \
-                   " returned false without printing a diagnostic"); \
-      return false;                                                  \
-    }                                                                \
-    break;                                                           \
-  }
-#include "toolchain/parse/node_kind.def"
-    }
-
-    traversal.Handle(parse_kind);
-  }
-  return true;
-}
-
-// Produces and checks the IR for the provided Parse::Tree.
-static auto CheckParseTree(UnitInfo& unit_info, int total_ir_count,
-                           llvm::raw_ostream* vlog_stream) -> void {
-  Timings::ScopedTiming timing(unit_info.unit->timings, "check");
-
-  // We can safely mark this as checked at the start.
-  unit_info.is_checked = true;
-
-  SemIR::File* sem_ir = unit_info.unit->sem_ir;
-  Context::DiagnosticEmitter emitter(*unit_info.unit->sem_ir_converter,
-                                     unit_info.err_tracker);
-  Context context(&emitter, unit_info.unit->get_parse_tree_and_subtrees, sem_ir,
-                  vlog_stream);
-  PrettyStackTraceFunction context_dumper(
-      [&](llvm::raw_ostream& output) { context.PrintForStackDump(output); });
-
-  // Add a block for the file.
-  context.inst_block_stack().Push();
-
-  InitPackageScopeAndImports(context, unit_info, total_ir_count);
-
-  // Eagerly import the impls declared in the api file to prepare to redeclare
-  // them.
-  ImportImplsFromApiFile(context);
-
-  if (!ProcessNodeIds(context, vlog_stream, unit_info.err_tracker,
-                      *unit_info.unit->node_converter)) {
-    context.sem_ir().set_has_errors(true);
-    return;
-  }
-
-  CheckRequiredDefinitions(context, emitter);
-
-  context.Finalize();
-
-  context.VerifyOnFinish();
-
-  sem_ir->set_has_errors(unit_info.err_tracker.seen_error());
-
-#ifndef NDEBUG
-  if (auto verify = sem_ir->Verify(); !verify.ok()) {
-    CARBON_FATAL("{0}Built invalid semantics IR: {1}\n", *sem_ir,
-                 verify.error());
-  }
-#endif
-}
-
 // The package and library names, used as map keys.
 using ImportKey = std::pair<llvm::StringRef, llvm::StringRef>;
 
 // Returns a key form of the package object. file_package_id is only used for
 // imports, not the main package declaration; as a consequence, it will be
 // invalid for the main package declaration.
-static auto GetImportKey(UnitInfo& unit_info, IdentifierId file_package_id,
+static auto GetImportKey(UnitAndImports& unit_info,
+                         IdentifierId file_package_id,
                          Parse::Tree::PackagingNames names) -> ImportKey {
   auto* stores = unit_info.unit->value_stores;
   llvm::StringRef package_name =
@@ -566,10 +58,10 @@ static auto RenderImportKey(ImportKey import_key) -> std::string {
 //
 // The ID comparisons between the import and unit are okay because they both
 // come from the same file.
-static auto TrackImport(Map<ImportKey, UnitInfo*>& api_map,
+static auto TrackImport(Map<ImportKey, UnitAndImports*>& api_map,
                         Map<ImportKey, Parse::NodeId>* explicit_import_map,
-                        UnitInfo& unit_info, Parse::Tree::PackagingNames import)
-    -> void {
+                        UnitAndImports& unit_info,
+                        Parse::Tree::PackagingNames import) -> void {
   const auto& packaging = unit_info.parse_tree().packaging_decl();
 
   IdentifierId file_package_id =
@@ -670,17 +162,17 @@ static auto TrackImport(Map<ImportKey, UnitInfo*>& api_map,
   auto create_imports = [&]() -> int32_t {
     int32_t index = unit_info.package_imports.size();
     unit_info.package_imports.push_back(
-        UnitInfo::PackageImports(import.package_id, import.node_id));
+        PackageImports(import.package_id, import.node_id));
     return index;
   };
   auto insert_result =
       unit_info.package_imports_map.Insert(import.package_id, create_imports);
-  UnitInfo::PackageImports& package_imports =
+  PackageImports& package_imports =
       unit_info.package_imports[insert_result.value()];
 
   if (auto api_lookup = api_map.Lookup(import_key)) {
     // Add references between the file and imported api.
-    UnitInfo* api = api_lookup.value();
+    UnitAndImports* api = api_lookup.value();
     package_imports.imports.push_back({import, api});
     ++unit_info.imports_remaining;
     api->incoming_imports.push_back(&unit_info);
@@ -713,8 +205,9 @@ static auto TrackImport(Map<ImportKey, UnitInfo*>& api_map,
 // related to the packaging because the strings are loaded as part of getting
 // the ImportKey (which we then do for `impl` files too).
 static auto BuildApiMapAndDiagnosePackaging(
-    llvm::MutableArrayRef<UnitInfo> unit_infos) -> Map<ImportKey, UnitInfo*> {
-  Map<ImportKey, UnitInfo*> api_map;
+    llvm::MutableArrayRef<UnitAndImports> unit_infos)
+    -> Map<ImportKey, UnitAndImports*> {
+  Map<ImportKey, UnitAndImports*> api_map;
   for (auto& unit_info : unit_infos) {
     const auto& packaging = unit_info.parse_tree().packaging_decl();
     // An import key formed from the `package` or `library` declaration. Or, for
@@ -803,19 +296,19 @@ static auto BuildApiMapAndDiagnosePackaging(
 
 auto CheckParseTrees(llvm::MutableArrayRef<Unit> units, bool prelude_import,
                      llvm::raw_ostream* vlog_stream) -> void {
-  // UnitInfo is big due to its SmallVectors, so we default to 0 on the
+  // UnitAndImports is big due to its SmallVectors, so we default to 0 on the
   // stack.
-  llvm::SmallVector<UnitInfo, 0> unit_infos;
+  llvm::SmallVector<UnitAndImports, 0> unit_infos;
   unit_infos.reserve(units.size());
   for (auto [i, unit] : llvm::enumerate(units)) {
     unit_infos.emplace_back(SemIR::CheckIRId(i), unit);
   }
 
-  Map<ImportKey, UnitInfo*> api_map =
+  Map<ImportKey, UnitAndImports*> api_map =
       BuildApiMapAndDiagnosePackaging(unit_infos);
 
   // Mark down imports for all files.
-  llvm::SmallVector<UnitInfo*> ready_to_check;
+  llvm::SmallVector<UnitAndImports*> ready_to_check;
   ready_to_check.reserve(units.size());
   for (auto& unit_info : unit_infos) {
     const auto& packaging = unit_info.parse_tree().packaging_decl();
@@ -857,7 +350,7 @@ auto CheckParseTrees(llvm::MutableArrayRef<Unit> units, bool prelude_import,
   for (int check_index = 0;
        check_index < static_cast<int>(ready_to_check.size()); ++check_index) {
     auto* unit_info = ready_to_check[check_index];
-    CheckParseTree(*unit_info, units.size(), vlog_stream);
+    CheckUnit(unit_info, units.size(), vlog_stream).Run();
     for (auto* incoming_import : unit_info->incoming_imports) {
       --incoming_import->imports_remaining;
       if (incoming_import->imports_remaining == 0) {
@@ -904,7 +397,7 @@ auto CheckParseTrees(llvm::MutableArrayRef<Unit> units, bool prelude_import,
     // incomplete imports.
     for (auto& unit_info : unit_infos) {
       if (unit_info.imports_remaining > 0) {
-        CheckParseTree(unit_info, units.size(), vlog_stream);
+        CheckUnit(&unit_info, units.size(), vlog_stream).Run();
       }
     }
   }

+ 0 - 2
toolchain/check/check.h

@@ -10,8 +10,6 @@
 #include "toolchain/base/timings.h"
 #include "toolchain/check/sem_ir_diagnostic_converter.h"
 #include "toolchain/diagnostics/diagnostic_emitter.h"
-#include "toolchain/lex/tokenized_buffer.h"
-#include "toolchain/parse/tree.h"
 #include "toolchain/parse/tree_and_subtrees.h"
 #include "toolchain/sem_ir/file.h"
 

+ 430 - 0
toolchain/check/check_unit.cpp

@@ -0,0 +1,430 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "toolchain/check/check_unit.h"
+
+#include "toolchain/base/kind_switch.h"
+#include "toolchain/base/pretty_stack_trace_function.h"
+#include "toolchain/check/generic.h"
+#include "toolchain/check/handle.h"
+#include "toolchain/check/import.h"
+#include "toolchain/check/import_ref.h"
+#include "toolchain/check/node_id_traversal.h"
+
+namespace Carbon::Check {
+
+// Returns the number of imported IRs, to assist in Context construction.
+static auto GetImportedIRCount(UnitAndImports* unit_and_imports) -> int {
+  int count = 0;
+  for (auto& package_imports : unit_and_imports->package_imports) {
+    count += package_imports.imports.size();
+  }
+  if (!unit_and_imports->api_for_impl) {
+    // Leave an empty slot for ImportIRId::ApiForImpl.
+    ++count;
+  }
+  return count;
+}
+
+CheckUnit::CheckUnit(UnitAndImports* unit_and_imports, int total_ir_count,
+                     llvm::raw_ostream* vlog_stream)
+    : unit_and_imports_(unit_and_imports),
+      total_ir_count_(total_ir_count),
+      vlog_stream_(vlog_stream),
+      emitter_(*unit_and_imports_->unit->sem_ir_converter,
+               unit_and_imports_->err_tracker),
+      context_(&emitter_, unit_and_imports_->unit->get_parse_tree_and_subtrees,
+               unit_and_imports_->unit->sem_ir,
+               GetImportedIRCount(unit_and_imports), total_ir_count,
+               vlog_stream) {}
+
+auto CheckUnit::Run() -> void {
+  Timings::ScopedTiming timing(unit_and_imports_->unit->timings, "check");
+
+  // We can safely mark this as checked at the start.
+  unit_and_imports_->is_checked = true;
+
+  PrettyStackTraceFunction context_dumper(
+      [&](llvm::raw_ostream& output) { context_.PrintForStackDump(output); });
+
+  // Add a block for the file.
+  context_.inst_block_stack().Push();
+
+  InitPackageScopeAndImports();
+
+  // Eagerly import the impls declared in the api file to prepare to redeclare
+  // them.
+  ImportImplsFromApiFile(context_);
+
+  if (!ProcessNodeIds()) {
+    context_.sem_ir().set_has_errors(true);
+    return;
+  }
+
+  CheckRequiredDefinitions();
+
+  context_.Finalize();
+
+  context_.VerifyOnFinish();
+
+  context_.sem_ir().set_has_errors(unit_and_imports_->err_tracker.seen_error());
+
+#ifndef NDEBUG
+  if (auto verify = context_.sem_ir().Verify(); !verify.ok()) {
+    CARBON_FATAL("{0}Built invalid semantics IR: {1}\n", context_.sem_ir(),
+                 verify.error());
+  }
+#endif
+}
+
+auto CheckUnit::InitPackageScopeAndImports() -> void {
+  // Importing makes many namespaces, so only canonicalize the type once.
+  auto namespace_type_id =
+      context_.GetSingletonType(SemIR::NamespaceType::SingletonInstId);
+
+  // Define the package scope, with an instruction for `package` expressions to
+  // reference.
+  auto package_scope_id = context_.name_scopes().Add(
+      SemIR::Namespace::PackageInstId, SemIR::NameId::PackageNamespace,
+      SemIR::NameScopeId::Invalid);
+  CARBON_CHECK(package_scope_id == SemIR::NameScopeId::Package);
+
+  auto package_inst_id = context_.AddInst<SemIR::Namespace>(
+      Parse::NodeId::Invalid, {.type_id = namespace_type_id,
+                               .name_scope_id = SemIR::NameScopeId::Package,
+                               .import_id = SemIR::InstId::Invalid});
+  CARBON_CHECK(package_inst_id == SemIR::Namespace::PackageInstId);
+
+  // If there is an implicit `api` import, set it first so that it uses the
+  // ImportIRId::ApiForImpl when processed for imports.
+  if (unit_and_imports_->api_for_impl) {
+    const auto& names = context_.parse_tree().packaging_decl()->names;
+    auto import_decl_id = context_.AddInst<SemIR::ImportDecl>(
+        names.node_id,
+        {.package_id = SemIR::NameId::ForIdentifier(names.package_id)});
+    SetApiImportIR(context_,
+                   {.decl_id = import_decl_id,
+                    .is_export = false,
+                    .sem_ir = unit_and_imports_->api_for_impl->unit->sem_ir});
+  } else {
+    SetApiImportIR(context_,
+                   {.decl_id = SemIR::InstId::Invalid, .sem_ir = nullptr});
+  }
+
+  // Add import instructions for everything directly imported. Implicit imports
+  // are handled separately.
+  for (auto& package_imports : unit_and_imports_->package_imports) {
+    CARBON_CHECK(!package_imports.import_decl_id.is_valid());
+    package_imports.import_decl_id = context_.AddInst<SemIR::ImportDecl>(
+        package_imports.node_id, {.package_id = SemIR::NameId::ForIdentifier(
+                                      package_imports.package_id)});
+  }
+
+  // Process the imports.
+  if (unit_and_imports_->api_for_impl) {
+    ImportApiFile(context_, namespace_type_id,
+                  *unit_and_imports_->api_for_impl->unit->sem_ir);
+  }
+  ImportCurrentPackage(package_inst_id, namespace_type_id);
+  CARBON_CHECK(context_.scope_stack().PeekIndex() == ScopeIndex::Package);
+  ImportOtherPackages(namespace_type_id);
+}
+
+auto CheckUnit::CollectDirectImports(
+    llvm::SmallVector<SemIR::ImportIR>& results,
+    llvm::MutableArrayRef<int> ir_to_result_index, SemIR::InstId import_decl_id,
+    const PackageImports& imports, bool is_local) -> void {
+  for (const auto& import : imports.imports) {
+    const auto& direct_ir = *import.unit_info->unit->sem_ir;
+    auto& index = ir_to_result_index[direct_ir.check_ir_id().index];
+    if (index != -1) {
+      // This should only happen when doing API imports for an implementation
+      // file. Don't change the entry; is_export doesn't matter.
+      continue;
+    }
+    index = results.size();
+    results.push_back({.decl_id = import_decl_id,
+                       // Only tag exports in API files, ignoring the value in
+                       // implementation files.
+                       .is_export = is_local && import.names.is_export,
+                       .sem_ir = &direct_ir});
+  }
+}
+
+auto CheckUnit::CollectTransitiveImports(SemIR::InstId import_decl_id,
+                                         const PackageImports* local_imports,
+                                         const PackageImports* api_imports)
+    -> llvm::SmallVector<SemIR::ImportIR> {
+  llvm::SmallVector<SemIR::ImportIR> results;
+
+  // Track whether an IR was imported in full, including `export import`. This
+  // distinguishes from IRs that are indirectly added without all names being
+  // exported to this IR.
+  llvm::SmallVector<int> ir_to_result_index(total_ir_count_, -1);
+
+  // First add direct imports. This means that if an entity is imported both
+  // directly and indirectly, the import path will reflect the direct import.
+  if (local_imports) {
+    CollectDirectImports(results, ir_to_result_index, import_decl_id,
+                         *local_imports,
+                         /*is_local=*/true);
+  }
+  if (api_imports) {
+    CollectDirectImports(results, ir_to_result_index, import_decl_id,
+                         *api_imports,
+                         /*is_local=*/false);
+  }
+
+  // Loop through direct imports for any indirect exports. The underlying vector
+  // is appended during iteration, so take the size first.
+  const int direct_imports = results.size();
+  for (int direct_index : llvm::seq(direct_imports)) {
+    bool is_export = results[direct_index].is_export;
+
+    for (const auto& indirect_ir :
+         results[direct_index].sem_ir->import_irs().array_ref()) {
+      if (!indirect_ir.is_export) {
+        continue;
+      }
+
+      auto& indirect_index =
+          ir_to_result_index[indirect_ir.sem_ir->check_ir_id().index];
+      if (indirect_index == -1) {
+        indirect_index = results.size();
+        // TODO: In the case of a recursive `export import`, this only points at
+        // the outermost import. May want something that better reflects the
+        // recursion.
+        results.push_back({.decl_id = results[direct_index].decl_id,
+                           .is_export = is_export,
+                           .sem_ir = indirect_ir.sem_ir});
+      } else if (is_export) {
+        results[indirect_index].is_export = true;
+      }
+    }
+  }
+
+  return results;
+}
+
+auto CheckUnit::ImportCurrentPackage(SemIR::InstId package_inst_id,
+                                     SemIR::TypeId namespace_type_id) -> void {
+  // Add imports from the current package.
+  auto import_map_lookup =
+      unit_and_imports_->package_imports_map.Lookup(IdentifierId::Invalid);
+  if (!import_map_lookup) {
+    // Push the scope; there are no names to add.
+    context_.scope_stack().Push(package_inst_id, SemIR::NameScopeId::Package);
+    return;
+  }
+  PackageImports& self_import =
+      unit_and_imports_->package_imports[import_map_lookup.value()];
+
+  if (self_import.has_load_error) {
+    context_.name_scopes().Get(SemIR::NameScopeId::Package).set_has_error();
+  }
+
+  ImportLibrariesFromCurrentPackage(
+      context_, namespace_type_id,
+      CollectTransitiveImports(self_import.import_decl_id, &self_import,
+                               /*api_imports=*/nullptr));
+
+  context_.scope_stack().Push(
+      package_inst_id, SemIR::NameScopeId::Package, SemIR::SpecificId::Invalid,
+      context_.name_scopes().Get(SemIR::NameScopeId::Package).has_error());
+}
+
+auto CheckUnit::ImportOtherPackages(SemIR::TypeId namespace_type_id) -> void {
+  // api_imports_list is initially the size of the current file's imports,
+  // including for API files, for simplicity in iteration. It's only really used
+  // when processing an implementation file, in order to combine the API file
+  // imports.
+  //
+  // For packages imported by the API file, the IdentifierId is the package name
+  // and the index is into the API's import list. Otherwise, the initial
+  // {Invalid, -1} state remains.
+  llvm::SmallVector<std::pair<IdentifierId, int32_t>> api_imports_list;
+  api_imports_list.resize(unit_and_imports_->package_imports.size(),
+                          {IdentifierId::Invalid, -1});
+
+  // When there's an API file, add the mapping to api_imports_list.
+  if (unit_and_imports_->api_for_impl) {
+    const auto& api_identifiers =
+        unit_and_imports_->api_for_impl->unit->value_stores->identifiers();
+    auto& impl_identifiers =
+        unit_and_imports_->unit->value_stores->identifiers();
+
+    for (auto [api_imports_index, api_imports] :
+         llvm::enumerate(unit_and_imports_->api_for_impl->package_imports)) {
+      // Skip the current package.
+      if (!api_imports.package_id.is_valid()) {
+        continue;
+      }
+      // Translate the package ID from the API file to the implementation file.
+      auto impl_package_id =
+          impl_identifiers.Add(api_identifiers.Get(api_imports.package_id));
+      if (auto lookup =
+              unit_and_imports_->package_imports_map.Lookup(impl_package_id)) {
+        // On a hit, replace the entry to unify the API and implementation
+        // imports.
+        api_imports_list[lookup.value()] = {impl_package_id, api_imports_index};
+      } else {
+        // On a miss, add the package as API-only.
+        api_imports_list.push_back({impl_package_id, api_imports_index});
+      }
+    }
+  }
+
+  for (auto [i, api_imports_entry] : llvm::enumerate(api_imports_list)) {
+    // These variables are updated after figuring out which imports are present.
+    auto import_decl_id = SemIR::InstId::Invalid;
+    IdentifierId package_id = IdentifierId::Invalid;
+    bool has_load_error = false;
+
+    // Identify the local package imports if present.
+    PackageImports* local_imports = nullptr;
+    if (i < unit_and_imports_->package_imports.size()) {
+      local_imports = &unit_and_imports_->package_imports[i];
+      if (!local_imports->package_id.is_valid()) {
+        // Skip the current package.
+        continue;
+      }
+      import_decl_id = local_imports->import_decl_id;
+
+      package_id = local_imports->package_id;
+      has_load_error |= local_imports->has_load_error;
+    }
+
+    // Identify the API package imports if present.
+    PackageImports* api_imports = nullptr;
+    if (api_imports_entry.second != -1) {
+      api_imports = &unit_and_imports_->api_for_impl
+                         ->package_imports[api_imports_entry.second];
+
+      if (local_imports) {
+        CARBON_CHECK(package_id == api_imports_entry.first);
+      } else {
+        auto import_ir_inst_id = context_.import_ir_insts().Add(
+            {.ir_id = SemIR::ImportIRId::ApiForImpl,
+             .inst_id = api_imports->import_decl_id});
+        import_decl_id =
+            context_.AddInst(context_.MakeImportedLocAndInst<SemIR::ImportDecl>(
+                import_ir_inst_id, {.package_id = SemIR::NameId::ForIdentifier(
+                                        api_imports_entry.first)}));
+        package_id = api_imports_entry.first;
+      }
+      has_load_error |= api_imports->has_load_error;
+    }
+
+    // Do the actual import.
+    ImportLibrariesFromOtherPackage(
+        context_, namespace_type_id, import_decl_id, package_id,
+        CollectTransitiveImports(import_decl_id, local_imports, api_imports),
+        has_load_error);
+  }
+}
+
+// Loops over all nodes in the tree. On some errors, this may return early,
+// for example if an unrecoverable state is encountered.
+// NOLINTNEXTLINE(readability-function-size)
+auto CheckUnit::ProcessNodeIds() -> bool {
+  NodeIdTraversal traversal(context_, vlog_stream_);
+
+  Parse::NodeId node_id = Parse::NodeId::Invalid;
+
+  // On crash, report which token we were handling.
+  PrettyStackTraceFunction node_dumper([&](llvm::raw_ostream& output) {
+    auto loc = unit_and_imports_->unit->node_converter->ConvertLoc(
+        node_id, [](DiagnosticLoc, const DiagnosticBase<>&) {});
+    loc.FormatLocation(output);
+    output << ": checking " << context_.parse_tree().node_kind(node_id) << "\n";
+    // Crash output has a tab indent; try to indent slightly past that.
+    loc.FormatSnippet(output, /*indent=*/10);
+  });
+
+  while (auto maybe_node_id = traversal.Next()) {
+    node_id = *maybe_node_id;
+    auto parse_kind = context_.parse_tree().node_kind(node_id);
+
+    bool result;
+    switch (parse_kind) {
+#define CARBON_PARSE_NODE_KIND(Name)                              \
+  case Parse::NodeKind::Name: {                                   \
+    result = HandleParseNode(context_, Parse::Name##Id(node_id)); \
+    break;                                                        \
+  }
+#include "toolchain/parse/node_kind.def"
+    }
+
+    if (!result) {
+      CARBON_CHECK(
+          unit_and_imports_->err_tracker.seen_error(),
+          "HandleParseNode for `{0}` returned false without diagnosing.",
+          parse_kind);
+      return false;
+    }
+    traversal.Handle(parse_kind);
+  }
+  return true;
+}
+
+auto CheckUnit::CheckRequiredDefinitions() -> void {
+  CARBON_DIAGNOSTIC(MissingDefinitionInImpl, Error,
+                    "no definition found for declaration in impl file");
+  // Note that more required definitions can be added during this loop.
+  for (size_t i = 0; i != context_.definitions_required().size(); ++i) {
+    SemIR::InstId decl_inst_id = context_.definitions_required()[i];
+    SemIR::Inst decl_inst = context_.insts().Get(decl_inst_id);
+    CARBON_KIND_SWITCH(context_.insts().Get(decl_inst_id)) {
+      case CARBON_KIND(SemIR::ClassDecl class_decl): {
+        if (!context_.classes().Get(class_decl.class_id).is_defined()) {
+          emitter_.Emit(decl_inst_id, MissingDefinitionInImpl);
+        }
+        break;
+      }
+      case CARBON_KIND(SemIR::FunctionDecl function_decl): {
+        if (context_.functions().Get(function_decl.function_id).definition_id ==
+            SemIR::InstId::Invalid) {
+          emitter_.Emit(decl_inst_id, MissingDefinitionInImpl);
+        }
+        break;
+      }
+      case CARBON_KIND(SemIR::ImplDecl impl_decl): {
+        if (!context_.impls().Get(impl_decl.impl_id).is_defined()) {
+          emitter_.Emit(decl_inst_id, MissingDefinitionInImpl);
+        }
+        break;
+      }
+      case SemIR::InterfaceDecl::Kind: {
+        // TODO: Handle `interface` as well, once we can test it without
+        // triggering
+        // https://github.com/carbon-language/carbon-lang/issues/4071.
+        CARBON_FATAL("TODO: Support interfaces in DiagnoseMissingDefinitions");
+      }
+      case CARBON_KIND(SemIR::SpecificFunction specific_function): {
+        if (!ResolveSpecificDefinition(context_,
+                                       specific_function.specific_id)) {
+          CARBON_DIAGNOSTIC(MissingGenericFunctionDefinition, Error,
+                            "use of undefined generic function");
+          CARBON_DIAGNOSTIC(MissingGenericFunctionDefinitionHere, Note,
+                            "generic function declared here");
+          auto generic_decl_id =
+              context_.generics()
+                  .Get(context_.specifics()
+                           .Get(specific_function.specific_id)
+                           .generic_id)
+                  .decl_id;
+          emitter_.Build(decl_inst_id, MissingGenericFunctionDefinition)
+              .Note(generic_decl_id, MissingGenericFunctionDefinitionHere)
+              .Emit();
+        }
+        break;
+      }
+      default: {
+        CARBON_FATAL("Unexpected inst in definitions_required: {0}", decl_inst);
+      }
+    }
+  }
+}
+
+}  // namespace Carbon::Check

+ 153 - 0
toolchain/check/check_unit.h

@@ -0,0 +1,153 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef CARBON_TOOLCHAIN_CHECK_CHECK_UNIT_H_
+#define CARBON_TOOLCHAIN_CHECK_CHECK_UNIT_H_
+
+#include "common/map.h"
+#include "llvm/ADT/SmallVector.h"
+#include "toolchain/check/check.h"
+#include "toolchain/check/context.h"
+#include "toolchain/parse/tree_node_diagnostic_converter.h"
+#include "toolchain/sem_ir/ids.h"
+
+namespace Carbon::Check {
+
+struct UnitAndImports;
+
+// A file's imports corresponding to a single package, for
+// `UnitAndImports::package_imports`.
+struct PackageImports {
+  // A given import within the file, with its destination.
+  struct Import {
+    Parse::Tree::PackagingNames names;
+    UnitAndImports* unit_info;
+  };
+
+  // Use the constructor so that the SmallVector is only constructed
+  // as-needed.
+  explicit PackageImports(IdentifierId package_id, Parse::ImportDeclId node_id)
+      : package_id(package_id), node_id(node_id) {}
+
+  // The identifier of the imported package.
+  IdentifierId package_id;
+  // The first `import` declaration in the file, which declared the package's
+  // identifier (even if the import failed). Used for associating diagnostics
+  // not specific to a single import.
+  Parse::ImportDeclId node_id;
+  // The associated `import` instruction. Only valid once a file is checked.
+  SemIR::InstId import_decl_id = SemIR::InstId::Invalid;
+  // Whether there's an import that failed to load.
+  bool has_load_error = false;
+  // The list of valid imports.
+  llvm::SmallVector<Import> imports;
+};
+
+// Contains information accumulated while checking a `Unit` (primarily import
+// information), in addition to the `Unit` itself.
+struct UnitAndImports {
+  explicit UnitAndImports(SemIR::CheckIRId check_ir_id, Unit& unit)
+      : check_ir_id(check_ir_id),
+        unit(&unit),
+        err_tracker(*unit.consumer),
+        emitter(*unit.node_converter, err_tracker) {}
+
+  auto parse_tree() -> const Parse::Tree& { return unit->sem_ir->parse_tree(); }
+  auto source() -> const SourceBuffer& {
+    return parse_tree().tokens().source();
+  }
+
+  SemIR::CheckIRId check_ir_id;
+  Unit* unit;
+
+  // Emitter information.
+  ErrorTrackingDiagnosticConsumer err_tracker;
+  DiagnosticEmitter<Parse::NodeLoc> emitter;
+
+  // List of the outgoing imports. If a package includes unavailable library
+  // imports, it has an entry with has_load_error set. Invalid imports (for
+  // example, `import Main;`) aren't added because they won't add identifiers to
+  // name lookup.
+  llvm::SmallVector<PackageImports> package_imports;
+
+  // A map of the package names to the outgoing imports above.
+  Map<IdentifierId, int32_t> package_imports_map;
+
+  // The remaining number of imports which must be checked before this unit can
+  // be processed.
+  int32_t imports_remaining = 0;
+
+  // A list of incoming imports. This will be empty for `impl` files, because
+  // imports only touch `api` files.
+  llvm::SmallVector<UnitAndImports*> incoming_imports;
+
+  // The corresponding `api` unit if this is an `impl` file. The entry should
+  // also be in the corresponding `PackageImports`.
+  UnitAndImports* api_for_impl = nullptr;
+
+  // Whether the unit has been checked.
+  bool is_checked = false;
+};
+
+// Handles checking of a single unit. Requires that all dependencies have been
+// checked.
+//
+// This mainly splits out the single-unit logic from the higher level cross-unit
+// logic in check.cpp.
+class CheckUnit {
+ public:
+  explicit CheckUnit(UnitAndImports* unit_and_imports, int total_ir_count,
+                     llvm::raw_ostream* vlog_stream);
+
+  // Produces and checks the IR for the provided unit.
+  auto Run() -> void;
+
+ private:
+  // Add imports to the root block.
+  auto InitPackageScopeAndImports() -> void;
+
+  // Collects direct imports, for CollectTransitiveImports.
+  auto CollectDirectImports(llvm::SmallVector<SemIR::ImportIR>& results,
+                            llvm::MutableArrayRef<int> ir_to_result_index,
+                            SemIR::InstId import_decl_id,
+                            const PackageImports& imports, bool is_local)
+      -> void;
+
+  // Collects transitive imports, handling deduplication. These will be unified
+  // between local_imports and api_imports.
+  auto CollectTransitiveImports(SemIR::InstId import_decl_id,
+                                const PackageImports* local_imports,
+                                const PackageImports* api_imports)
+      -> llvm::SmallVector<SemIR::ImportIR>;
+
+  // Imports the current package.
+  auto ImportCurrentPackage(SemIR::InstId package_inst_id,
+                            SemIR::TypeId namespace_type_id) -> void;
+
+  // Imports all other packages (excluding the current package).
+  auto ImportOtherPackages(SemIR::TypeId namespace_type_id) -> void;
+
+  // Checks that each required definition is available. If the definition can be
+  // generated by resolving a specific, does so, otherwise emits a diagnostic
+  // for each declaration in context.definitions_required() that doesn't have a
+  // definition.
+  auto CheckRequiredDefinitions() -> void;
+
+  // Loops over all nodes in the tree. On some errors, this may return early,
+  // for example if an unrecoverable state is encountered.
+  // NOLINTNEXTLINE(readability-function-size)
+  auto ProcessNodeIds() -> bool;
+
+  UnitAndImports* unit_and_imports_;
+  // The number of IRs being checked in total.
+  int total_ir_count_;
+  llvm::raw_ostream* vlog_stream_;
+
+  Context::DiagnosticEmitter emitter_;
+  Context context_;
+};
+
+}  // namespace Carbon::Check
+
+#endif  // CARBON_TOOLCHAIN_CHECK_CHECK_UNIT_H_

+ 7 - 1
toolchain/check/context.cpp

@@ -41,7 +41,8 @@ namespace Carbon::Check {
 Context::Context(DiagnosticEmitter* emitter,
                  llvm::function_ref<const Parse::TreeAndSubtrees&()>
                      get_parse_tree_and_subtrees,
-                 SemIR::File* sem_ir, llvm::raw_ostream* vlog_stream)
+                 SemIR::File* sem_ir, int imported_ir_count, int total_ir_count,
+                 llvm::raw_ostream* vlog_stream)
     : emitter_(emitter),
       get_parse_tree_and_subtrees_(get_parse_tree_and_subtrees),
       sem_ir_(sem_ir),
@@ -54,6 +55,11 @@ Context::Context(DiagnosticEmitter* emitter,
       decl_name_stack_(this),
       scope_stack_(sem_ir_->identifiers()),
       global_init_(this) {
+  // Prepare fields which relate to the number of IRs available for import.
+  import_irs().Reserve(imported_ir_count);
+  import_ir_constant_values_.reserve(imported_ir_count);
+  check_ir_map_.resize(total_ir_count, SemIR::ImportIRId::Invalid);
+
   // Map the builtin `<error>` and `type` type constants to their corresponding
   // special `TypeId` values.
   type_ids_for_type_constants_.Insert(

+ 2 - 8
toolchain/check/context.h

@@ -72,7 +72,8 @@ class Context {
   explicit Context(DiagnosticEmitter* emitter,
                    llvm::function_ref<const Parse::TreeAndSubtrees&()>
                        get_parse_tree_and_subtrees,
-                   SemIR::File* sem_ir, llvm::raw_ostream* vlog_stream);
+                   SemIR::File* sem_ir, int imported_ir_count,
+                   int total_ir_count, llvm::raw_ostream* vlog_stream);
 
   // Marks an implementation TODO. Always returns false.
   auto TODO(SemIRLoc loc, std::string label) -> bool;
@@ -461,13 +462,6 @@ class Context {
 
   auto Finalize() -> void;
 
-  // Sets the total number of IRs which exist. This is used to prepare a map
-  // from IR to imported IR.
-  auto SetTotalIRCount(int num_irs) -> void {
-    CARBON_CHECK(check_ir_map_.empty(), "SetTotalIRCount is only called once");
-    check_ir_map_.resize(num_irs, SemIR::ImportIRId::Invalid);
-  }
-
   // Returns the imported IR ID for an IR, or invalid if not imported.
   auto GetImportIRId(const SemIR::File& sem_ir) -> SemIR::ImportIRId& {
     return check_ir_map_[sem_ir.check_ir_id().index];