Explorar o código

Delay finishing the C++ translation unit until we reach the real EOF. (#6489)

Instead of parsing a complete C++ translation unit and then interacting
with the translation unit further after the fact, delay finishing the
translation unit until we finish the Carbon check phase. This fixes some
issues where we would produce duplicated or incorrect diagnostics at the
end of the C++ translation unit, particularly for unused declarations.
Now we're in control of how we parse the translation unit, also disable
parsing of C++20 modules if the syntax appears within `import Cpp
inline` code.

Keep the same clang parser alive throughout check, and use it instead of
building a new one when parsing macros. This resolves issues where the
translation unit scope was destroyed too early, resulting in unqualified
lookup within macros being unable to find global scope entities.

---------

Co-authored-by: Jon Ross-Perkins <jperkins@google.com>
Richard Smith hai 4 meses
pai
achega
a8eca2ece6

+ 1 - 1
toolchain/check/BUILD

@@ -26,7 +26,6 @@ cc_library(
         "cpp/context.cpp",
         "cpp/custom_type_mapping.cpp",
         "cpp/generate_ast.cpp",
-        "cpp/generate_ast.h",
         "cpp/impl_lookup.cpp",
         "cpp/import.cpp",
         "cpp/location.cpp",
@@ -83,6 +82,7 @@ cc_library(
         "cpp/call.h",
         "cpp/context.h",
         "cpp/custom_type_mapping.h",
+        "cpp/generate_ast.h",
         "cpp/impl_lookup.h",
         "cpp/import.h",
         "cpp/location.h",

+ 3 - 6
toolchain/check/check_unit.cpp

@@ -17,6 +17,7 @@
 #include "llvm/Support/VirtualFileSystem.h"
 #include "toolchain/base/fixed_size_value_store.h"
 #include "toolchain/base/kind_switch.h"
+#include "toolchain/check/cpp/generate_ast.h"
 #include "toolchain/check/cpp/import.h"
 #include "toolchain/check/diagnostic_helpers.h"
 #include "toolchain/check/generic.h"
@@ -580,12 +581,8 @@ auto CheckUnit::FinishRun() -> void {
   CheckPoisonedConcreteImplLookupQueries();
   CheckImpls();
 
-  if (auto* cpp_context = context_.cpp_context()) {
-    // Ask Clang to perform any cleanups required, including instantiating used
-    // templates.
-    cpp_context->sema().ActOnEndOfTranslationUnit();
-    context_.emitter().Flush();
-  }
+  // Finalizes the C++ portion of the compilation.
+  FinishAst(context_);
 
   // Pop information for the file-level scope.
   context_.sem_ir().set_top_inst_block_id(context_.inst_block_stack().Pop());

+ 1 - 1
toolchain/check/context.h

@@ -90,7 +90,7 @@ class Context {
 
   // TODO: Remove this and pass the C++ context to the constructor.
   auto set_cpp_context(std::unique_ptr<CppContext> cpp_context) {
-    CARBON_CHECK(cpp_context, "C++ context set more than once");
+    CARBON_CHECK(!cpp_context_ || !cpp_context, "Already have a C++ context");
     cpp_context_ = std::move(cpp_context);
   }
 

+ 11 - 0
toolchain/check/cpp/context.h

@@ -10,6 +10,8 @@
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Frontend/CompilerInstance.h"
 #include "clang/Frontend/FrontendAction.h"
+#include "clang/Parse/Parser.h"
+#include "common/check.h"
 #include "llvm/ADT/SmallVector.h"
 
 namespace Carbon::Check {
@@ -34,6 +36,12 @@ class CppContext {
     return action_->getCompilerInstance().getSema();
   }
 
+  auto parser() -> clang::Parser& { return *parser_; }
+  auto set_parser(std::unique_ptr<clang::Parser> parser) {
+    CARBON_CHECK(!parser_);
+    parser_ = std::move(parser);
+  }
+
   auto clang_mangle_context() -> clang::MangleContext&;
 
   auto carbon_file_locations() -> llvm::SmallVector<clang::SourceLocation>& {
@@ -50,6 +58,9 @@ class CppContext {
 
   // The Clang mangle context for the target in the ASTContext.
   std::unique_ptr<clang::MangleContext> clang_mangle_context_;
+
+  // The Clang parser.
+  std::unique_ptr<clang::Parser> parser_;
 };
 
 }  // namespace Carbon::Check

+ 53 - 0
toolchain/check/cpp/generate_ast.cpp

@@ -14,7 +14,9 @@
 #include "clang/Frontend/FrontendAction.h"
 #include "clang/Frontend/TextDiagnostic.h"
 #include "clang/Lex/PreprocessorOptions.h"
+#include "clang/Parse/Parser.h"
 #include "clang/Sema/ExternalSemaSource.h"
+#include "clang/Sema/Sema.h"
 #include "common/check.h"
 #include "common/raw_string_ostream.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
@@ -365,6 +367,44 @@ class GenerateASTAction : public clang::ASTFrontendAction {
     return true;
   }
 
+  // Parse the imports and inline C++ fragments. This is notionally very similar
+  // to `clang::ParseAST`, which `ASTFrontendAction::ExecuteAction` calls, but
+  // this version doesn't parse C++20 modules and stops just before reaching the
+  // end of the translation unit.
+  auto ExecuteAction() -> void override {
+    clang::CompilerInstance& clang_instance = getCompilerInstance();
+    clang_instance.createSema(getTranslationUnitKind(),
+                              /*CompletionConsumer=*/nullptr);
+
+    context_->cpp_context()->set_parser(std::make_unique<clang::Parser>(
+        clang_instance.getPreprocessor(), clang_instance.getSema(),
+        /*SkipFunctionBodies=*/false));
+    auto& parser = context_->cpp_context()->parser();
+
+    clang_instance.getPreprocessor().EnterMainSourceFile();
+    if (auto* source = clang_instance.getASTContext().getExternalSource()) {
+      source->StartTranslationUnit(&clang_instance.getASTConsumer());
+    }
+
+    parser.Initialize();
+    clang_instance.getSema().ActOnStartOfTranslationUnit();
+
+    // Don't allow C++20 module declarations in inline Cpp code fragments.
+    auto module_import_state = clang::Sema::ModuleImportState::NotACXX20Module;
+
+    // Parse top-level declarations until we see EOF. Do not parse EOF, as that
+    // will cause the parser to end the translation unit prematurely.
+    while (parser.getCurToken().isNot(clang::tok::eof)) {
+      clang::Parser::DeclGroupPtrTy decl_group;
+      bool eof = parser.ParseTopLevelDecl(decl_group, module_import_state);
+      CARBON_CHECK(!eof);
+      if (decl_group && !clang_instance.getASTConsumer().HandleTopLevelDecl(
+                            decl_group.get())) {
+        break;
+      }
+    }
+  }
+
  private:
   Context* context_;
 };
@@ -447,4 +487,17 @@ auto GenerateAst(Context& context,
   return !trap.hasErrorOccurred();
 }
 
+auto FinishAst(Context& context) -> void {
+  if (!context.cpp_context()) {
+    return;
+  }
+
+  context.cpp_context()->sema().ActOnEndOfTranslationUnit();
+
+  // We don't call FrontendAction::EndSourceFile, because that destroys the AST.
+  context.set_cpp_context(nullptr);
+
+  context.emitter().Flush();
+}
+
 }  // namespace Carbon::Check

+ 4 - 0
toolchain/check/cpp/generate_ast.h

@@ -21,6 +21,10 @@ auto GenerateAst(Context& context,
                  std::shared_ptr<clang::CompilerInvocation> base_invocation)
     -> bool;
 
+// Finishes AST generation for the given checking context. Performs end of file
+// steps such as template instantiation and warning on unused declarations.
+auto FinishAst(Context& context) -> void;
+
 }  // namespace Carbon::Check
 
 #endif  // CARBON_TOOLCHAIN_CHECK_CPP_GENERATE_AST_H_

+ 4 - 0
toolchain/check/cpp/import.cpp

@@ -1624,6 +1624,10 @@ static auto ImportVarDecl(Context& context, SemIR::LocId loc_id,
   // Finalize the `VarStorage` instruction.
   ReplaceInstBeforeConstantUse(context, var_storage_inst_id, var_storage);
 
+  // Inform Clang that the variable has been referenced.
+  context.clang_sema().MarkVariableReferenced(GetCppLocation(context, loc_id),
+                                              var_decl);
+
   return var_storage_inst_id;
 }
 

+ 1 - 5
toolchain/check/cpp/macros.cpp

@@ -25,7 +25,7 @@ auto TryEvaluateMacroToConstant(Context& context, SemIR::LocId loc_id,
 
   clang::Sema& sema = context.clang_sema();
   clang::Preprocessor& preprocessor = sema.getPreprocessor();
-  clang::Parser parser(preprocessor, sema, false);
+  auto& parser = context.cpp_context()->parser();
 
   llvm::SmallVector<clang::Token> tokens(macro_info->tokens().begin(),
                                          macro_info->tokens().end());
@@ -45,11 +45,7 @@ auto TryEvaluateMacroToConstant(Context& context, SemIR::LocId loc_id,
                                 /*IsReinject=*/false);
   parser.ConsumeAnyToken(true);
 
-  // TODO: Identifiers are still only available if prefixed with "::" (e.g.
-  // "#define M_Var ::myVar").
-  parser.EnterScope(clang::Scope::DeclScope);
   clang::ExprResult result = parser.ParseConstantExpression();
-  parser.ExitScope();
 
   clang::Expr* result_expr = result.get();
 

+ 3 - 44
toolchain/check/testdata/interop/cpp/macros.carbon

@@ -613,32 +613,13 @@ constexpr auto operator""_kb(unsigned long long k) -> unsigned long long {
 }
 #define M_1KB 1_kb
 
-// --- fail_todo_import_user_defined_literal.carbon
+// --- import_user_defined_literal.carbon
 
 library "[[@TEST_NAME]]";
 
-// CHECK:STDERR: fail_todo_import_user_defined_literal.carbon:[[@LINE+4]]:10: in file included here [InCppInclude]
-// CHECK:STDERR: ./user_defined_literal.h:4:16: error: no matching literal operator for call to 'operator""_kb' with argument of type 'unsigned long long' or 'const char *', and no matching literal operator template [CppInteropParseError]
-// CHECK:STDERR:     4 | #define M_1KB 1_kb
-// CHECK:STDERR:       |                ^
 import Cpp library "user_defined_literal.h";
 
 fn F() {
- // CHECK:STDERR: fail_todo_import_user_defined_literal.carbon:[[@LINE+15]]:2: note: in `Cpp` name lookup for `M_1KB` [InCppNameLookup]
- // CHECK:STDERR:  Cpp.M_1KB;
- // CHECK:STDERR:  ^~~~~~~~~
- // CHECK:STDERR:
- // CHECK:STDERR: fail_todo_import_user_defined_literal.carbon:[[@LINE+11]]:2: error: failed to parse macro Cpp.M_1KB to a valid constant expression [InCppMacroEvaluation]
- // CHECK:STDERR:  Cpp.M_1KB;
- // CHECK:STDERR:  ^~~~~~~~~
- // CHECK:STDERR: fail_todo_import_user_defined_literal.carbon:[[@LINE+8]]:2: note: in `Cpp` name lookup for `M_1KB` [InCppNameLookup]
- // CHECK:STDERR:  Cpp.M_1KB;
- // CHECK:STDERR:  ^~~~~~~~~
- // CHECK:STDERR:
- // CHECK:STDERR: fail_todo_import_user_defined_literal.carbon:[[@LINE+4]]:2: error: member name `M_1KB` not found in `Cpp` [MemberNameNotFoundInInstScope]
- // CHECK:STDERR:  Cpp.M_1KB;
- // CHECK:STDERR:  ^~~~~~~~~
- // CHECK:STDERR:
  Cpp.M_1KB;
 }
 
@@ -657,27 +638,16 @@ fn F() {
   //@dump-sem-ir-end
 }
 
-// --- fail_todo_enums_no_scope.carbon
+// --- enums_no_scope.carbon
 
 library "[[@TEST_NAME]]";
 
 import Cpp inline '''
   enum A { a = 1, b = 2 };
-  // CHECK:STDERR: fail_todo_enums_no_scope.carbon:[[@LINE+7]]:24: error: use of undeclared identifier 'a'; did you mean '::a'? [CppInteropParseError]
-  // CHECK:STDERR:    13 |   #define M_A_NO_SCOPE a
-  // CHECK:STDERR:       |                        ^
-  // CHECK:STDERR:       |                        ::a
-  // CHECK:STDERR: fail_todo_enums_no_scope.carbon:[[@LINE-5]]:12: note: '::a' declared here [CppInteropParseNote]
-  // CHECK:STDERR:     5 |   enum A { a = 1, b = 2 };
-  // CHECK:STDERR:       |            ^
   #define M_A_NO_SCOPE a
 ''';
 
 fn F() {
-  // CHECK:STDERR: fail_todo_enums_no_scope.carbon:[[@LINE+4]]:3: note: in `Cpp` name lookup for `M_A_NO_SCOPE` [InCppNameLookup]
-  // CHECK:STDERR:   Cpp.M_A_NO_SCOPE;
-  // CHECK:STDERR:   ^~~~~~~~~~~~~~~~
-  // CHECK:STDERR:
   Cpp.M_A_NO_SCOPE;
 }
 
@@ -696,27 +666,16 @@ fn F() {
   //@dump-sem-ir-end
 }
 
-// --- fail_import_constexpr_no_scope.carbon
+// --- import_constexpr_no_scope.carbon
 
 library "[[@TEST_NAME]]";
 
 import Cpp inline '''
    constexpr int a = 1;
-   // CHECK:STDERR: fail_import_constexpr_no_scope.carbon:[[@LINE+7]]:33: error: use of undeclared identifier 'a'; did you mean '::a'? [CppInteropParseError]
-   // CHECK:STDERR:    13 |    #define M_CONSTEXPR_NO_SCOPE a
-   // CHECK:STDERR:       |                                 ^
-   // CHECK:STDERR:       |                                 ::a
-   // CHECK:STDERR: fail_import_constexpr_no_scope.carbon:[[@LINE-5]]:18: note: '::a' declared here [CppInteropParseNote]
-   // CHECK:STDERR:     5 |    constexpr int a = 1;
-   // CHECK:STDERR:       |                  ^
    #define M_CONSTEXPR_NO_SCOPE a
 ''';
 
 fn F() {
-  // CHECK:STDERR: fail_import_constexpr_no_scope.carbon:[[@LINE+4]]:16: note: in `Cpp` name lookup for `M_CONSTEXPR_NO_SCOPE` [InCppNameLookup]
-  // CHECK:STDERR:   let a: i32 = Cpp.M_CONSTEXPR_NO_SCOPE;
-  // CHECK:STDERR:                ^~~~~~~~~~~~~~~~~~~~~~~~
-  // CHECK:STDERR:
   let a: i32 = Cpp.M_CONSTEXPR_NO_SCOPE;
 }
 

+ 89 - 0
toolchain/check/testdata/interop/cpp/modules.carbon

@@ -0,0 +1,89 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// INCLUDE-FILE: toolchain/testing/testdata/min_prelude/none.carbon
+// EXTRA-ARGS: --clang-arg=--std=c++20
+//
+// AUTOUPDATE
+// TIP: To test this file alone, run:
+// TIP:   bazel test //toolchain/testing:file_test --test_arg=--file_tests=toolchain/check/testdata/interop/cpp/modules.carbon
+// TIP: To dump output, run:
+// TIP:   bazel run //toolchain/testing:file_test -- --dump_output --file_tests=toolchain/check/testdata/interop/cpp/modules.carbon
+
+// --- fail_export_module_in_inline_cpp.carbon
+
+library "[[@TEST_NAME]]";
+
+// TODO: This diagnostic isn't very good.
+import Cpp inline '''
+// CHECK:STDERR: fail_export_module_in_inline_cpp.carbon:[[@LINE+7]]:8: error: module declaration must occur at the start of the translation unit [CppInteropParseError]
+// CHECK:STDERR:    13 | export module Foo;
+// CHECK:STDERR:       |        ^
+// CHECK:STDERR: <carbon Cpp imports>:1:1: note: add 'module;' to the start of the file to introduce a global module fragment [CppInteropParseNote]
+// CHECK:STDERR:     1 | # 6 "fail_export_module_in_inline_cpp.carbon"
+// CHECK:STDERR:       | ^
+// CHECK:STDERR:
+export module Foo;
+''';
+
+// --- fail_module_in_inline_cpp.carbon
+
+library "[[@TEST_NAME]]";
+
+// TODO: This diagnostic isn't very good.
+import Cpp inline '''
+// CHECK:STDERR: fail_module_in_inline_cpp.carbon:[[@LINE+11]]:1: error: module declaration must occur at the start of the translation unit [CppInteropParseError]
+// CHECK:STDERR:    17 | module Foo;
+// CHECK:STDERR:       | ^
+// CHECK:STDERR: <carbon Cpp imports>:1:1: note: add 'module;' to the start of the file to introduce a global module fragment [CppInteropParseNote]
+// CHECK:STDERR:     1 | # 6 "fail_module_in_inline_cpp.carbon"
+// CHECK:STDERR:       | ^
+// CHECK:STDERR:
+// CHECK:STDERR: fail_module_in_inline_cpp.carbon:[[@LINE+4]]:8: error: module 'Foo' not found [CppInteropParseError]
+// CHECK:STDERR:    17 | module Foo;
+// CHECK:STDERR:       | ~~~~~~~^~~
+// CHECK:STDERR:
+module Foo;
+''';
+
+// --- fail_global_module_in_inline_cpp.carbon
+
+library "[[@TEST_NAME]]";
+
+// TODO: This diagnostic isn't very good.
+import Cpp inline '''
+// CHECK:STDERR: fail_global_module_in_inline_cpp.carbon:[[@LINE+4]]:1: error: 'module;' introducing a global module fragment can appear only at the start of the translation unit [CppInteropParseError]
+// CHECK:STDERR:    10 | module;
+// CHECK:STDERR:       | ^~~~~~~
+// CHECK:STDERR:
+module;
+
+int n;
+
+// CHECK:STDERR: fail_global_module_in_inline_cpp.carbon:[[@LINE+11]]:1: error: module declaration must occur at the start of the translation unit [CppInteropParseError]
+// CHECK:STDERR:    25 | module Foo;
+// CHECK:STDERR:       | ^
+// CHECK:STDERR: <carbon Cpp imports>:1:1: note: add 'module;' to the start of the file to introduce a global module fragment [CppInteropParseNote]
+// CHECK:STDERR:     1 | # 6 "fail_global_module_in_inline_cpp.carbon"
+// CHECK:STDERR:       | ^
+// CHECK:STDERR:
+// CHECK:STDERR: fail_global_module_in_inline_cpp.carbon:[[@LINE+4]]:8: error: module 'Foo' not found [CppInteropParseError]
+// CHECK:STDERR:    25 | module Foo;
+// CHECK:STDERR:       | ~~~~~~~^~~
+// CHECK:STDERR:
+module Foo;
+''';
+
+// --- fail_import_in_inline_cpp.carbon
+
+library "[[@TEST_NAME]]";
+
+// TODO: Also test that a valid module import works.
+import Cpp inline '''
+// CHECK:STDERR: fail_import_in_inline_cpp.carbon:[[@LINE+4]]:8: error: 'file_that_does_not_exist.h' file not found [CppInteropParseError]
+// CHECK:STDERR:    10 | import "file_that_does_not_exist.h";
+// CHECK:STDERR:       |        ^~~~~~~~~~~~~~~~~~~~~~~~~~~~
+// CHECK:STDERR:
+import "file_that_does_not_exist.h";
+''';

+ 43 - 0
toolchain/check/testdata/interop/cpp/unused_internal.carbon

@@ -0,0 +1,43 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// INCLUDE-FILE: toolchain/testing/testdata/min_prelude/int.carbon
+// EXTRA-ARGS: --clang-arg=-Wunused
+//
+// AUTOUPDATE
+// TIP: To test this file alone, run:
+// TIP:   bazel test //toolchain/testing:file_test --test_arg=--file_tests=toolchain/check/testdata/interop/cpp/unused_internal.carbon
+// TIP: To dump output, run:
+// TIP:   bazel run //toolchain/testing:file_test -- --dump_output --file_tests=toolchain/check/testdata/interop/cpp/unused_internal.carbon
+
+// --- unused.carbon
+
+library "[[@TEST_NAME]]";
+
+import Cpp inline '''c++
+// CHECK:STDERR: unused.carbon:[[@LINE+4]]:12: warning: unused variable 'n' [CppInteropParseWarning]
+// CHECK:STDERR:     9 | static int n = 0;
+// CHECK:STDERR:       |            ^
+// CHECK:STDERR:
+static int n = 0;
+// CHECK:STDERR: unused.carbon:[[@LINE+4]]:13: warning: unused function 'f' [CppInteropParseWarning]
+// CHECK:STDERR:    14 | static void f() {}
+// CHECK:STDERR:       |             ^
+// CHECK:STDERR:
+static void f() {}
+''';
+
+// --- not_unused.carbon
+
+library "[[@TEST_NAME]]";
+
+import Cpp inline '''c++
+static int n = 0;
+static void f() {}
+''';
+
+fn F() -> i32 {
+  Cpp.f();
+  return Cpp.n;
+}