Explorar o código

Store Clang `Decl`s in a `CanonicalValueStore` (#5638)

Saves space since in most cases, we only need a `None` 32 bits index
instead of a null 64 bits pointer.

Based on discussions in [Carbon C++ Interop
weekly](https://docs.google.com/document/d/1YlxEOJ0r-o19o19TCJbFl4Ln1U88yn_Vj23y1Hr5vTk/edit?tab=t.0#heading=h.23l56bt5xwlu)
and Discord
([1](https://discord.com/channels/655572317891461132/768530752592805919/1375121180306047106),
[2](https://discord.com/channels/655572317891461132/768530752592805919/1380575881050718469)).

Note: Any `clang::DeclContext` is also a `clang::Decl`.

Part of #4666.
Boaz Brickner hai 10 meses
pai
achega
e4c8150f2c

+ 10 - 6
toolchain/check/import_cpp.cpp

@@ -252,8 +252,8 @@ auto ImportCppFiles(Context& context, llvm::StringRef importing_file_path,
 
   SemIR::NameScope& name_scope = context.name_scopes().Get(name_scope_id);
   name_scope.set_is_closed_import(true);
-  name_scope.set_cpp_decl_context(
-      generated_ast->getASTContext().getTranslationUnitDecl());
+  name_scope.set_clang_decl_context_id(context.sem_ir().clang_decls().Add(
+      generated_ast->getASTContext().getTranslationUnitDecl()));
 
   if (ast_has_error) {
     name_scope.set_has_error();
@@ -291,7 +291,9 @@ static auto ClangLookup(Context& context, SemIR::NameScopeId scope_id,
   lookup.suppressDiagnostics();
 
   bool found = sema.LookupQualifiedName(
-      lookup, context.name_scopes().Get(scope_id).cpp_decl_context());
+      lookup,
+      clang::dyn_cast<clang::DeclContext>(context.sem_ir().clang_decls().Get(
+          context.name_scopes().Get(scope_id).clang_decl_context_id())));
 
   if (!found) {
     return std::nullopt;
@@ -468,7 +470,7 @@ static auto ImportFunctionDecl(Context& context, SemIR::LocId loc_id,
        .return_slot_pattern_id = return_slot_pattern_id,
        .virtual_modifier = SemIR::FunctionFields::VirtualModifier::None,
        .self_param_id = SemIR::InstId::None,
-       .cpp_decl = clang_decl}};
+       .clang_decl_id = context.sem_ir().clang_decls().Add(clang_decl)}};
 
   function_decl.function_id = context.functions().Add(function_info);
 
@@ -492,7 +494,8 @@ static auto ImportNamespaceDecl(Context& context,
       name_id, parent_scope_id, /*import_id=*/SemIR::InstId::None);
   context.name_scopes()
       .Get(result.name_scope_id)
-      .set_cpp_decl_context(clang_decl);
+      .set_clang_decl_context_id(
+          context.sem_ir().clang_decls().Add(clang_decl));
   return result.inst_id;
 }
 
@@ -555,7 +558,8 @@ static auto BuildClassDefinition(Context& context,
 
   context.name_scopes()
       .Get(class_info.scope_id)
-      .set_cpp_decl_context(clang_decl);
+      .set_clang_decl_context_id(
+          context.sem_ir().clang_decls().Add(clang_decl));
 
   return {class_id, class_decl_id};
 }

+ 4 - 2
toolchain/lower/file_context.cpp

@@ -670,10 +670,12 @@ auto FileContext::BuildFunctionDecl(SemIR::FunctionId function_id,
   }
 
   // If this is a C++ function, tell Clang that we referenced it.
-  if (auto* cpp_decl = sem_ir().functions().Get(function_id).cpp_decl) {
+  if (auto clang_decl_id = sem_ir().functions().Get(function_id).clang_decl_id;
+      clang_decl_id.has_value()) {
     CARBON_CHECK(!specific_id.has_value(),
                  "Specific functions cannot have C++ definitions");
-    HandleReferencedCppFunction(cpp_decl);
+    HandleReferencedCppFunction(clang::dyn_cast<clang::FunctionDecl>(
+        sem_ir().clang_decls().Get(clang_decl_id)));
     // TODO: Check that the signature and mangling generated by Clang and the
     // one we generated are the same.
   }

+ 3 - 2
toolchain/lower/mangler.cpp

@@ -150,8 +150,9 @@ auto Mangler::Mangle(SemIR::FunctionId function_id,
     CARBON_CHECK(!specific_id.has_value(), "entry point should not be generic");
     return "main";
   }
-  if (function.cpp_decl) {
-    return MangleCppClang(function.cpp_decl);
+  if (function.clang_decl_id.has_value()) {
+    return MangleCppClang(llvm::dyn_cast<clang::NamedDecl>(
+        sem_ir().clang_decls().Get(function.clang_decl_id)));
   }
   RawStringOstream os;
   os << "_C";

+ 9 - 0
toolchain/sem_ir/file.h

@@ -203,6 +203,12 @@ class File : public Printable<File> {
   // pointer in the constructor and remove this function. This is part of
   // https://github.com/carbon-language/carbon-lang/issues/4666
   auto set_cpp_ast(clang::ASTUnit* cpp_ast) -> void { cpp_ast_ = cpp_ast; }
+  auto clang_decls() -> CanonicalValueStore<ClangDeclId>& {
+    return clang_decls_;
+  }
+  auto clang_decls() const -> const CanonicalValueStore<ClangDeclId>& {
+    return clang_decls_;
+  }
   auto names() const -> NameStoreWrapper {
     return NameStoreWrapper(&identifiers());
   }
@@ -329,6 +335,9 @@ class File : public Printable<File> {
   // `Cpp` imports.
   clang::ASTUnit* cpp_ast_ = nullptr;
 
+  // Clang AST declarations pointing to the AST.
+  CanonicalValueStore<ClangDeclId> clang_decls_;
+
   // All instructions. The first entries will always be the singleton
   // instructions.
   InstStore insts_ = InstStore(this);

+ 5 - 7
toolchain/sem_ir/function.h

@@ -75,13 +75,11 @@ struct FunctionFields {
   // be empty for declarations that don't have a visible definition.
   llvm::SmallVector<InstBlockId> body_block_ids = {};
 
-  // If the function is imported from C++, points to the Clang declaration in
-  // the AST. Used for mangling and inline function definition code generation.
-  // The AST is owned by `CompileSubcommand` so we expect it to be live from
-  // `Function` creation to mangling.
-  // TODO: #4666 Ensure we can easily serialize/deserialize this. Consider decl
-  // ID to point into the AST.
-  clang::FunctionDecl* cpp_decl = nullptr;
+  // If the function is imported from C++, the Clang function declaration. Used
+  // for mangling and inline function definition code generation. The AST is
+  // owned by `CompileSubcommand` so we expect it to be live from `Function`
+  // creation to mangling.
+  ClangDeclId clang_decl_id = ClangDeclId::None;
 };
 
 // A function. See EntityWithParamsBase regarding the inheritance here.

+ 12 - 0
toolchain/sem_ir/ids.h

@@ -18,6 +18,7 @@
 namespace clang {
 
 // Forward declare indexed types, for integration with ValueStore.
+class Decl;
 class SourceLocation;
 
 }  // namespace clang
@@ -787,6 +788,17 @@ struct TypeId : public IdBase<TypeId> {
   auto Print(llvm::raw_ostream& out) const -> void;
 };
 
+// The ID of a Clang `Decl` pointer, pointing to the Clang AST.
+struct ClangDeclId : public IdBase<ClangDeclId> {
+  static constexpr llvm::StringLiteral Label = "clang_decl_id";
+
+  // TODO: Ensure we can easily serialize/deserialize this. Consider
+  // `clang::LazyDeclPtr`.
+  using ValueType = clang::Decl*;
+
+  using IdBase::IdBase;
+};
+
 // The ID of a Clang Source Location.
 struct ClangSourceLocId : public IdBase<ClangSourceLocId> {
   static constexpr llvm::StringLiteral Label = "clang_source_loc";

+ 9 - 12
toolchain/sem_ir/name_scope.h

@@ -216,15 +216,16 @@ class NameScope : public Printable<NameScope> {
     is_closed_import_ = is_closed_import;
   }
 
-  auto is_cpp_scope() const -> bool { return cpp_decl_context(); }
+  auto is_cpp_scope() const -> bool {
+    return clang_decl_context_id().has_value();
+  }
 
-  auto cpp_decl_context() const -> const clang::DeclContext* {
-    return cpp_decl_context_;
+  auto clang_decl_context_id() const -> ClangDeclId {
+    return clang_decl_context_id_;
   }
-  auto cpp_decl_context() -> clang::DeclContext* { return cpp_decl_context_; }
 
-  auto set_cpp_decl_context(clang::DeclContext* cpp_decl_context) -> void {
-    cpp_decl_context_ = cpp_decl_context;
+  auto set_clang_decl_context_id(ClangDeclId clang_decl_context_id) -> void {
+    clang_decl_context_id_ = clang_decl_context_id;
   }
 
   // Returns true if this name scope describes an imported package.
@@ -295,12 +296,8 @@ class NameScope : public Printable<NameScope> {
   bool is_closed_import_ = false;
 
   // Set if this is the `Cpp` scope or a scope inside `Cpp`. Points to the
-  // matching Clang declaration context to look for names. This is mutable since
-  // `clang::Sema::LookupQualifiedName()` requires a mutable `DeclContext`.
-  // TODO: Ensure we can easily serialize/deserialize this. Consider decl ID to
-  // point into the AST. This is related to:
-  // https://github.com/carbon-language/carbon-lang/issues/4666.
-  clang::DeclContext* cpp_decl_context_ = nullptr;
+  // matching Clang declaration context to look for names.
+  ClangDeclId clang_decl_context_id_ = ClangDeclId::None;
 
   // True if this is the scope of an interface definition, where associated
   // entities will be bound to the interface's `Self` symbolic type.