generate_ast.cpp 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #include "toolchain/check/cpp/generate_ast.h"
  5. #include <memory>
  6. #include <string>
  7. #include "clang/AST/ASTContext.h"
  8. #include "clang/Basic/FileManager.h"
  9. #include "clang/CodeGen/ModuleBuilder.h"
  10. #include "clang/Frontend/CompilerInstance.h"
  11. #include "clang/Frontend/CompilerInvocation.h"
  12. #include "clang/Frontend/FrontendAction.h"
  13. #include "clang/Frontend/TextDiagnostic.h"
  14. #include "clang/Lex/PreprocessorOptions.h"
  15. #include "clang/Parse/Parser.h"
  16. #include "clang/Sema/ExternalSemaSource.h"
  17. #include "clang/Sema/Sema.h"
  18. #include "common/check.h"
  19. #include "common/raw_string_ostream.h"
  20. #include "llvm/ADT/IntrusiveRefCntPtr.h"
  21. #include "llvm/ADT/StringRef.h"
  22. #include "llvm/Support/raw_ostream.h"
  23. #include "toolchain/check/context.h"
  24. #include "toolchain/diagnostics/diagnostic.h"
  25. #include "toolchain/diagnostics/emitter.h"
  26. #include "toolchain/diagnostics/format_providers.h"
  27. #include "toolchain/parse/node_ids.h"
  28. #include "toolchain/sem_ir/cpp_file.h"
  29. namespace Carbon::Check {
  30. // Add a line marker directive pointing at the location of the `import Cpp`
  31. // declaration in the Carbon source file. This will cause Clang's diagnostics
  32. // machinery to track and report the location in Carbon code where the import
  33. // was written.
  34. static auto GenerateLineMarker(Context& context, llvm::raw_ostream& out,
  35. int line) {
  36. out << "# " << line << " \""
  37. << FormatEscaped(context.tokens().source().filename()) << "\"\n";
  38. }
  39. // Generates C++ file contents to #include all requested imports.
  40. static auto GenerateCppIncludesHeaderCode(
  41. Context& context, llvm::ArrayRef<Parse::Tree::PackagingNames> imports)
  42. -> std::string {
  43. std::string code;
  44. llvm::raw_string_ostream code_stream(code);
  45. for (const Parse::Tree::PackagingNames& import : imports) {
  46. if (import.inline_body_id.has_value()) {
  47. // Expand `import Cpp inline "code";` directly into the specified code.
  48. auto code_token = context.parse_tree().node_token(import.inline_body_id);
  49. // Compute the line number on which the C++ code starts. Usually the code
  50. // is specified as a block string literal and starts on the line after the
  51. // start of the string token.
  52. // TODO: Determine if this is a block string literal without calling
  53. // `GetTokenText`, which re-lexes the string.
  54. int line = context.tokens().GetLineNumber(code_token);
  55. if (context.tokens().GetTokenText(code_token).contains('\n')) {
  56. ++line;
  57. }
  58. GenerateLineMarker(context, code_stream, line);
  59. code_stream << context.string_literal_values().Get(
  60. context.tokens().GetStringLiteralValue(code_token))
  61. << "\n";
  62. // TODO: Inject a clang pragma here to produce an error if there are
  63. // unclosed scopes at the end of this inline C++ fragment.
  64. } else if (import.library_id.has_value()) {
  65. // Translate `import Cpp library "foo.h";` into `#include "foo.h"`.
  66. GenerateLineMarker(context, code_stream,
  67. context.tokens().GetLineNumber(
  68. context.parse_tree().node_token(import.node_id)));
  69. auto name = context.string_literal_values().Get(import.library_id);
  70. if (name.starts_with('<') && name.ends_with('>')) {
  71. code_stream << "#include <"
  72. << FormatEscaped(name.drop_front().drop_back()) << ">\n";
  73. } else {
  74. code_stream << "#include \"" << FormatEscaped(name) << "\"\n";
  75. }
  76. }
  77. }
  78. // Inject a declaration of placement operator new, because the code we
  79. // generate in thunks depends on it for placement new expressions. Clang has
  80. // special-case logic for lowering a new-expression using this, so a
  81. // definition is not required.
  82. // TODO: This is a hack. We should be able to directly generate Clang AST to
  83. // construct objects in-place without this.
  84. // TODO: Once we can rely on libc++ being available, consider including
  85. // `<__new/placement_new_delete.h>` instead.
  86. code_stream << R"(# 1 "<carbon-internal>"
  87. #undef constexpr
  88. #if __cplusplus > 202302L
  89. constexpr
  90. #endif
  91. #undef void
  92. #undef operator
  93. #undef new
  94. void* operator new(__SIZE_TYPE__, void*)
  95. #if __cplusplus < 201103L
  96. #undef throw
  97. throw()
  98. #else
  99. #undef noexcept
  100. noexcept
  101. #endif
  102. ;
  103. )";
  104. return code;
  105. }
  106. // Adds the given source location and an `ImportIRInst` referring to it in
  107. // `ImportIRId::Cpp`.
  108. static auto AddImportIRInst(SemIR::File& file,
  109. clang::SourceLocation clang_source_loc)
  110. -> SemIR::ImportIRInstId {
  111. SemIR::ClangSourceLocId clang_source_loc_id =
  112. file.clang_source_locs().Add(clang_source_loc);
  113. return file.import_ir_insts().Add(SemIR::ImportIRInst(clang_source_loc_id));
  114. }
  115. namespace {
  116. // Used to convert Clang diagnostics to Carbon diagnostics.
  117. //
  118. // Handling of Clang notes is a little subtle: as far as Clang is concerned,
  119. // notes are separate diagnostics, not connected to the error or warning that
  120. // precedes them. But in Carbon's diagnostics system, notes are part of the
  121. // enclosing diagnostic. To handle this, we buffer Clang diagnostics until we
  122. // reach a point where we know we're not in the middle of a diagnostic, and then
  123. // emit a diagnostic along with all of its notes. This is triggered when adding
  124. // or removing a Carbon context note, which could otherwise get attached to the
  125. // wrong C++ diagnostics, and at the end of the Carbon program.
  126. class CarbonClangDiagnosticConsumer : public clang::DiagnosticConsumer {
  127. public:
  128. // Creates an instance with the location that triggers calling Clang. The
  129. // `context` is not stored here, and the diagnostics consumer is expected to
  130. // outlive it.
  131. explicit CarbonClangDiagnosticConsumer(
  132. Context& context, std::shared_ptr<clang::CompilerInvocation> invocation)
  133. : sem_ir_(&context.sem_ir()),
  134. emitter_(&context.emitter()),
  135. invocation_(std::move(invocation)) {
  136. emitter_->AddFlushFn([this] { EmitDiagnostics(); });
  137. }
  138. ~CarbonClangDiagnosticConsumer() override {
  139. // Do not inspect `emitter_` here; it's typically destroyed before the
  140. // consumer is.
  141. // TODO: If Clang produces diagnostics after check finishes, they'll get
  142. // added to the list of pending diagnostics and never emitted.
  143. CARBON_CHECK(diagnostic_infos_.empty(),
  144. "Missing flush before destroying diagnostic consumer");
  145. }
  146. // Generates a Carbon warning for each Clang warning and a Carbon error for
  147. // each Clang error or fatal.
  148. auto HandleDiagnostic(clang::DiagnosticsEngine::Level diag_level,
  149. const clang::Diagnostic& info) -> void override {
  150. DiagnosticConsumer::HandleDiagnostic(diag_level, info);
  151. SemIR::ImportIRInstId clang_import_ir_inst_id =
  152. AddImportIRInst(*sem_ir_, info.getLocation());
  153. llvm::SmallString<256> message;
  154. info.FormatDiagnostic(message);
  155. // Render a code snippet including any highlighted ranges and fixit hints.
  156. // TODO: Also include the #include stack and macro expansion stack in the
  157. // diagnostic output in some way.
  158. RawStringOstream snippet_stream;
  159. if (!info.hasSourceManager()) {
  160. // If we don't have a source manager, this is an error from early in the
  161. // frontend. Don't produce a snippet.
  162. CARBON_CHECK(info.getLocation().isInvalid());
  163. } else {
  164. CodeContextRenderer(snippet_stream, invocation_->getLangOpts(),
  165. invocation_->getDiagnosticOpts())
  166. .emitDiagnostic(
  167. clang::FullSourceLoc(info.getLocation(), info.getSourceManager()),
  168. diag_level, message, info.getRanges(), info.getFixItHints());
  169. }
  170. diagnostic_infos_.push_back({.level = diag_level,
  171. .import_ir_inst_id = clang_import_ir_inst_id,
  172. .message = message.str().str(),
  173. .snippet = snippet_stream.TakeStr()});
  174. }
  175. // Returns the diagnostic to use for a given Clang diagnostic level.
  176. static auto GetDiagnostic(clang::DiagnosticsEngine::Level level)
  177. -> const Diagnostics::DiagnosticBase<std::string>& {
  178. switch (level) {
  179. case clang::DiagnosticsEngine::Ignored: {
  180. CARBON_FATAL("Emitting an ignored diagnostic");
  181. break;
  182. }
  183. case clang::DiagnosticsEngine::Note: {
  184. CARBON_DIAGNOSTIC(CppInteropParseNote, Note, "{0}", std::string);
  185. return CppInteropParseNote;
  186. }
  187. case clang::DiagnosticsEngine::Remark:
  188. case clang::DiagnosticsEngine::Warning: {
  189. // TODO: Add a distinct Remark level to Carbon diagnostics, and stop
  190. // mapping remarks to warnings.
  191. CARBON_DIAGNOSTIC(CppInteropParseWarning, Warning, "{0}", std::string);
  192. return CppInteropParseWarning;
  193. }
  194. case clang::DiagnosticsEngine::Error:
  195. case clang::DiagnosticsEngine::Fatal: {
  196. CARBON_DIAGNOSTIC(CppInteropParseError, Error, "{0}", std::string);
  197. return CppInteropParseError;
  198. }
  199. }
  200. }
  201. // Outputs Carbon diagnostics based on the collected Clang diagnostics. Must
  202. // be called after the AST is set in the context.
  203. auto EmitDiagnostics() -> void {
  204. CARBON_CHECK(
  205. sem_ir_->cpp_file(),
  206. "Attempted to emit C++ diagnostics before the C++ file is set");
  207. for (size_t i = 0; i != diagnostic_infos_.size(); ++i) {
  208. const ClangDiagnosticInfo& info = diagnostic_infos_[i];
  209. auto builder = emitter_->Build(SemIR::LocId(info.import_ir_inst_id),
  210. GetDiagnostic(info.level), info.message);
  211. builder.OverrideSnippet(info.snippet);
  212. for (; i + 1 < diagnostic_infos_.size() &&
  213. diagnostic_infos_[i + 1].level == clang::DiagnosticsEngine::Note;
  214. ++i) {
  215. const ClangDiagnosticInfo& note_info = diagnostic_infos_[i + 1];
  216. builder
  217. .Note(SemIR::LocId(note_info.import_ir_inst_id),
  218. GetDiagnostic(note_info.level), note_info.message)
  219. .OverrideSnippet(note_info.snippet);
  220. }
  221. // TODO: This will apply all current Carbon annotation functions. We
  222. // should instead track how Clang's context notes and Carbon's annotation
  223. // functions are interleaved, and interleave the notes in the same order.
  224. builder.Emit();
  225. }
  226. diagnostic_infos_.clear();
  227. }
  228. private:
  229. // A diagnostics renderer based on clang's TextDiagnostic that captures just
  230. // the code context (the snippet).
  231. class CodeContextRenderer : public clang::TextDiagnostic {
  232. protected:
  233. using TextDiagnostic::TextDiagnostic;
  234. void emitDiagnosticMessage(
  235. clang::FullSourceLoc /*loc*/, clang::PresumedLoc /*ploc*/,
  236. clang::DiagnosticsEngine::Level /*level*/, llvm::StringRef /*message*/,
  237. llvm::ArrayRef<clang::CharSourceRange> /*ranges*/,
  238. clang::DiagOrStoredDiag /*info*/) override {}
  239. void emitDiagnosticLoc(
  240. clang::FullSourceLoc /*loc*/, clang::PresumedLoc /*ploc*/,
  241. clang::DiagnosticsEngine::Level /*level*/,
  242. llvm::ArrayRef<clang::CharSourceRange> /*ranges*/) override {}
  243. // emitCodeContext is inherited from clang::TextDiagnostic.
  244. void emitIncludeLocation(clang::FullSourceLoc /*loc*/,
  245. clang::PresumedLoc /*ploc*/) override {}
  246. void emitImportLocation(clang::FullSourceLoc /*loc*/,
  247. clang::PresumedLoc /*ploc*/,
  248. llvm::StringRef /*module_name*/) override {}
  249. void emitBuildingModuleLocation(clang::FullSourceLoc /*loc*/,
  250. clang::PresumedLoc /*ploc*/,
  251. llvm::StringRef /*module_name*/) override {}
  252. // beginDiagnostic and endDiagnostic are inherited from
  253. // clang::TextDiagnostic in case it wants to do any setup / teardown work.
  254. };
  255. // Information on a Clang diagnostic that can be converted to a Carbon
  256. // diagnostic.
  257. struct ClangDiagnosticInfo {
  258. // The Clang diagnostic level.
  259. clang::DiagnosticsEngine::Level level;
  260. // The ID of the ImportIR instruction referring to the Clang source
  261. // location.
  262. SemIR::ImportIRInstId import_ir_inst_id;
  263. // The Clang diagnostic textual message.
  264. std::string message;
  265. // The code snippet produced by clang.
  266. std::string snippet;
  267. };
  268. // The Carbon file that this C++ compilation is attached to.
  269. SemIR::File* sem_ir_;
  270. // The diagnostic emitter that we're emitting diagnostics into.
  271. DiagnosticEmitterBase* emitter_;
  272. // The compiler invocation that is producing the diagnostics.
  273. std::shared_ptr<clang::CompilerInvocation> invocation_;
  274. // Collects the information for all Clang diagnostics to be converted to
  275. // Carbon diagnostics after the context has been initialized with the Clang
  276. // AST.
  277. llvm::SmallVector<ClangDiagnosticInfo> diagnostic_infos_;
  278. };
  279. // A wrapper around a clang::CompilerInvocation that allows us to make a shallow
  280. // copy of most of the invocation and only make a deep copy of the parts that we
  281. // want to change.
  282. //
  283. // clang::CowCompilerInvocation almost allows this, but doesn't derive from
  284. // CompilerInvocation or support shallow copies from a CompilerInvocation, so is
  285. // not useful to us as we can't build an ASTUnit from it.
  286. class ShallowCopyCompilerInvocation : public clang::CompilerInvocation {
  287. public:
  288. explicit ShallowCopyCompilerInvocation(
  289. const clang::CompilerInvocation& invocation) {
  290. shallow_copy_assign(invocation);
  291. // Make a deep copy of options that we modify.
  292. FrontendOpts = std::make_shared<clang::FrontendOptions>(*FrontendOpts);
  293. PPOpts = std::make_shared<clang::PreprocessorOptions>(*PPOpts);
  294. }
  295. };
  296. // An action and a set of registered Clang callbacks used to generate an AST
  297. // from a set of Cpp imports.
  298. class GenerateASTAction : public clang::ASTFrontendAction {
  299. public:
  300. explicit GenerateASTAction(Context& context) : context_(&context) {}
  301. protected:
  302. auto CreateASTConsumer(clang::CompilerInstance& clang_instance,
  303. llvm::StringRef /*file*/)
  304. -> std::unique_ptr<clang::ASTConsumer> override {
  305. auto& cpp_file = *context_->sem_ir().cpp_file();
  306. if (!cpp_file.llvm_context()) {
  307. return std::make_unique<clang::ASTConsumer>();
  308. }
  309. auto code_generator =
  310. std::unique_ptr<clang::CodeGenerator>(clang::CreateLLVMCodeGen(
  311. cpp_file.diagnostics(), context_->sem_ir().filename(),
  312. clang_instance.getVirtualFileSystemPtr(),
  313. clang_instance.getHeaderSearchOpts(),
  314. clang_instance.getPreprocessorOpts(),
  315. clang_instance.getCodeGenOpts(), *cpp_file.llvm_context()));
  316. cpp_file.SetCodeGenerator(code_generator.get());
  317. return code_generator;
  318. }
  319. auto BeginSourceFileAction(clang::CompilerInstance& /*clang_instance*/)
  320. -> bool override {
  321. // TODO: Consider creating an `ExternalSemaSource` here and attaching it to
  322. // the compilation.
  323. // TODO: `clang.getPreprocessor().enableIncrementalProcessing();` to avoid
  324. // the TU scope getting torn down before we're done parsing macros.
  325. return true;
  326. }
  327. // Parse the imports and inline C++ fragments. This is notionally very similar
  328. // to `clang::ParseAST`, which `ASTFrontendAction::ExecuteAction` calls, but
  329. // this version doesn't parse C++20 modules and stops just before reaching the
  330. // end of the translation unit.
  331. auto ExecuteAction() -> void override {
  332. clang::CompilerInstance& clang_instance = getCompilerInstance();
  333. clang_instance.createSema(getTranslationUnitKind(),
  334. /*CompletionConsumer=*/nullptr);
  335. context_->cpp_context()->set_parser(std::make_unique<clang::Parser>(
  336. clang_instance.getPreprocessor(), clang_instance.getSema(),
  337. /*SkipFunctionBodies=*/false));
  338. auto& parser = context_->cpp_context()->parser();
  339. clang_instance.getPreprocessor().EnterMainSourceFile();
  340. if (auto* source = clang_instance.getASTContext().getExternalSource()) {
  341. source->StartTranslationUnit(&clang_instance.getASTConsumer());
  342. }
  343. parser.Initialize();
  344. clang_instance.getSema().ActOnStartOfTranslationUnit();
  345. // Don't allow C++20 module declarations in inline Cpp code fragments.
  346. auto module_import_state = clang::Sema::ModuleImportState::NotACXX20Module;
  347. // Parse top-level declarations until we see EOF. Do not parse EOF, as that
  348. // will cause the parser to end the translation unit prematurely.
  349. while (parser.getCurToken().isNot(clang::tok::eof)) {
  350. clang::Parser::DeclGroupPtrTy decl_group;
  351. bool eof = parser.ParseTopLevelDecl(decl_group, module_import_state);
  352. CARBON_CHECK(!eof);
  353. if (decl_group && !clang_instance.getASTConsumer().HandleTopLevelDecl(
  354. decl_group.get())) {
  355. break;
  356. }
  357. }
  358. }
  359. private:
  360. Context* context_;
  361. };
  362. } // namespace
  363. auto GenerateAst(Context& context,
  364. llvm::ArrayRef<Parse::Tree::PackagingNames> imports,
  365. llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> fs,
  366. llvm::LLVMContext* llvm_context,
  367. std::shared_ptr<clang::CompilerInvocation> base_invocation)
  368. -> bool {
  369. CARBON_CHECK(!context.cpp_context());
  370. CARBON_CHECK(!context.sem_ir().cpp_file());
  371. auto invocation =
  372. std::make_shared<ShallowCopyCompilerInvocation>(*base_invocation);
  373. // Ask Clang to not leak memory.
  374. invocation->getFrontendOpts().DisableFree = false;
  375. // Build a diagnostics engine.
  376. llvm::IntrusiveRefCntPtr<clang::DiagnosticsEngine> diags(
  377. clang::CompilerInstance::createDiagnostics(
  378. *fs, invocation->getDiagnosticOpts(),
  379. new CarbonClangDiagnosticConsumer(context, invocation),
  380. /*ShouldOwnClient=*/true));
  381. // Extract the input from the frontend invocation and make sure it makes
  382. // sense.
  383. const auto& inputs = invocation->getFrontendOpts().Inputs;
  384. CARBON_CHECK(inputs.size() == 1 &&
  385. inputs[0].getKind().getLanguage() == clang::Language::CXX &&
  386. inputs[0].getKind().getFormat() == clang::InputKind::Source);
  387. llvm::StringRef file_name = inputs[0].getFile();
  388. // Remap the imports file name to the corresponding `#include`s.
  389. // TODO: Modify the frontend options to specify this memory buffer as input
  390. // instead of remapping the file.
  391. std::string includes = GenerateCppIncludesHeaderCode(context, imports);
  392. auto includes_buffer =
  393. llvm::MemoryBuffer::getMemBufferCopy(includes, file_name);
  394. invocation->getPreprocessorOpts().addRemappedFile(file_name,
  395. includes_buffer.release());
  396. clang::DiagnosticErrorTrap trap(*diags);
  397. auto clang_instance_ptr =
  398. std::make_unique<clang::CompilerInstance>(invocation);
  399. auto& clang_instance = *clang_instance_ptr;
  400. context.sem_ir().set_cpp_file(std::make_unique<SemIR::CppFile>(
  401. std::move(clang_instance_ptr), llvm_context));
  402. clang_instance.setDiagnostics(diags);
  403. clang_instance.setVirtualFileSystem(fs);
  404. clang_instance.createFileManager();
  405. clang_instance.createSourceManager();
  406. if (!clang_instance.createTarget()) {
  407. return false;
  408. }
  409. context.set_cpp_context(std::make_unique<CppContext>(
  410. std::make_unique<GenerateASTAction>(context)));
  411. if (!context.cpp_context()->action().BeginSourceFile(clang_instance,
  412. inputs[0])) {
  413. return false;
  414. }
  415. if (llvm::Error error = context.cpp_context()->action().Execute()) {
  416. // `Execute` currently never fails, but its contract allows it to.
  417. context.TODO(SemIR::LocId::None, "failed to execute clang action: " +
  418. llvm::toString(std::move(error)));
  419. return false;
  420. }
  421. // Flush any diagnostics. We know we're not part-way through emitting a
  422. // diagnostic now.
  423. context.emitter().Flush();
  424. return !trap.hasErrorOccurred();
  425. }
  426. auto FinishAst(Context& context) -> void {
  427. if (!context.cpp_context()) {
  428. return;
  429. }
  430. context.cpp_context()->sema().ActOnEndOfTranslationUnit();
  431. // We don't call FrontendAction::EndSourceFile, because that destroys the AST.
  432. context.set_cpp_context(nullptr);
  433. context.emitter().Flush();
  434. }
  435. } // namespace Carbon::Check