driver.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #include "toolchain/driver/driver.h"
  5. #include "common/command_line.h"
  6. #include "common/vlog.h"
  7. #include "llvm/ADT/ArrayRef.h"
  8. #include "llvm/ADT/StringExtras.h"
  9. #include "llvm/ADT/StringRef.h"
  10. #include "llvm/IR/LLVMContext.h"
  11. #include "llvm/Support/Path.h"
  12. #include "llvm/TargetParser/Host.h"
  13. #include "toolchain/codegen/codegen.h"
  14. #include "toolchain/diagnostics/diagnostic_emitter.h"
  15. #include "toolchain/diagnostics/sorting_diagnostic_consumer.h"
  16. #include "toolchain/lexer/tokenized_buffer.h"
  17. #include "toolchain/lowering/lower_to_llvm.h"
  18. #include "toolchain/parser/parse_tree.h"
  19. #include "toolchain/semantics/semantics_ir.h"
  20. #include "toolchain/semantics/semantics_ir_formatter.h"
  21. #include "toolchain/source/source_buffer.h"
  22. namespace Carbon {
  23. struct Driver::CompileOptions {
  24. static constexpr CommandLine::CommandInfo Info = {
  25. .name = "compile",
  26. .help = R"""(
  27. Compile Carbon source code.
  28. This subcommand runs the Carbon compiler over input source code, checking it for
  29. errors and producing the requested output.
  30. Error messages are written to the standard error stream.
  31. Different phases of the compiler can be selected to run, and intermediate state
  32. can be written to standard output as these phases progress.
  33. )""",
  34. };
  35. enum class Phase : int8_t {
  36. Lex,
  37. Parse,
  38. Check,
  39. Lower,
  40. CodeGen,
  41. };
  42. friend auto operator<<(llvm::raw_ostream& out, Phase phase)
  43. -> llvm::raw_ostream& {
  44. switch (phase) {
  45. case Phase::Lex:
  46. out << "lex";
  47. break;
  48. case Phase::Parse:
  49. out << "parse";
  50. break;
  51. case Phase::Check:
  52. out << "check";
  53. break;
  54. case Phase::Lower:
  55. out << "lower";
  56. break;
  57. case Phase::CodeGen:
  58. out << "codegen";
  59. break;
  60. }
  61. return out;
  62. }
  63. void Build(CommandLine::CommandBuilder& b) {
  64. b.AddStringPositionalArg(
  65. {
  66. .name = "FILE",
  67. .help = R"""(
  68. The input Carbon source file to compile.
  69. )""",
  70. },
  71. [&](auto& arg_b) {
  72. arg_b.Required(true);
  73. arg_b.Set(&input_file_name);
  74. });
  75. b.AddOneOfOption(
  76. {
  77. .name = "phase",
  78. .help = R"""(
  79. Selects the compilation phase to run. These phases are always run in sequence,
  80. so every phase before the one selected will also be run. The default is to
  81. compile to machine code.
  82. )""",
  83. },
  84. [&](auto& arg_b) {
  85. arg_b.SetOneOf(
  86. {
  87. arg_b.OneOfValue("lex", Phase::Lex),
  88. arg_b.OneOfValue("parse", Phase::Parse),
  89. arg_b.OneOfValue("check", Phase::Check),
  90. arg_b.OneOfValue("lower", Phase::Lower),
  91. arg_b.OneOfValue("codegen", Phase::CodeGen).Default(true),
  92. },
  93. &phase);
  94. });
  95. // TODO: Rearrange the code setting this option and two related ones to
  96. // allow them to reference each other instead of hard-coding their names.
  97. b.AddStringOption(
  98. {
  99. .name = "output",
  100. .value_name = "FILE",
  101. .help = R"""(
  102. The output filename for codegen.
  103. When this is a file name, either textual assembly or a binary object will be
  104. written to it based on the flag `--asm-output`. The default is to write a binary
  105. object file.
  106. Passing `--output=-` will write the output to stdout. In that
  107. case, the flag `--asm-output` is ignored and the output defaults to textual
  108. assembly. Binary object output can be forced by enabling `--force-obj-output`.
  109. )""",
  110. },
  111. [&](auto& arg_b) { arg_b.Set(&output_file_name); });
  112. b.AddStringOption(
  113. {
  114. .name = "target",
  115. .help = R"""(
  116. Select a target platform. Uses the LLVM target syntax. Also known as a "triple"
  117. for historical reasons.
  118. This corresponds to the `target` flag to Clang and accepts the same strings
  119. documented there:
  120. https://clang.llvm.org/docs/CrossCompilation.html#target-triple
  121. )""",
  122. },
  123. [&](auto& arg_b) {
  124. arg_b.Default(host);
  125. arg_b.Set(&target);
  126. });
  127. b.AddFlag(
  128. {
  129. .name = "asm-output",
  130. .help = R"""(
  131. Write textual assembly rather than a binary object file to the code generation
  132. output.
  133. This flag only applies when writing to a file. When writing to stdout, the
  134. default is textual assembly and this flag is ignored.
  135. )""",
  136. },
  137. [&](auto& arg_b) { arg_b.Set(&asm_output); });
  138. b.AddFlag(
  139. {
  140. .name = "force-obj-output",
  141. .help = R"""(
  142. Force binary object output, even with `--output=-`.
  143. When `--output=-` is set, the default is textual assembly; this forces printing
  144. of a binary object file instead. Ignored for other `--output` values.
  145. )""",
  146. },
  147. [&](auto& arg_b) { arg_b.Set(&force_obj_output); });
  148. b.AddFlag(
  149. {
  150. .name = "stream-errors",
  151. .help = R"""(
  152. Stream error messages to stderr as they are generated rather than sorting them
  153. and displaying them in source order.
  154. )""",
  155. },
  156. [&](auto& arg_b) { arg_b.Set(&stream_errors); });
  157. b.AddFlag(
  158. {
  159. .name = "dump-tokens",
  160. .help = R"""(
  161. Dump the tokens to stdout when lexed.
  162. )""",
  163. },
  164. [&](auto& arg_b) { arg_b.Set(&dump_tokens); });
  165. b.AddFlag(
  166. {
  167. .name = "dump-parse-tree",
  168. .help = R"""(
  169. Dump the parse tree to stdout when parsed.
  170. )""",
  171. },
  172. [&](auto& arg_b) { arg_b.Set(&dump_parse_tree); });
  173. b.AddFlag(
  174. {
  175. .name = "preorder-parse-tree",
  176. .help = R"""(
  177. When dumping the parse tree, reorder it so that it is in preorder rather than
  178. postorder.
  179. )""",
  180. },
  181. [&](auto& arg_b) { arg_b.Set(&preorder_parse_tree); });
  182. b.AddFlag(
  183. {
  184. .name = "dump-raw-semantics-ir",
  185. .help = R"""(
  186. Dump the raw JSON structure of semantics IR to stdout when built.
  187. )""",
  188. },
  189. [&](auto& arg_b) { arg_b.Set(&dump_raw_semantics_ir); });
  190. b.AddFlag(
  191. {
  192. .name = "dump-semantics-ir",
  193. .help = R"""(
  194. Dump the semantics IR to stdout when built.
  195. )""",
  196. },
  197. [&](auto& arg_b) { arg_b.Set(&dump_semantics_ir); });
  198. b.AddFlag(
  199. {
  200. .name = "builtin-semantics-ir",
  201. .help = R"""(
  202. Include the semantics IR for builtins when dumping it.
  203. )""",
  204. },
  205. [&](auto& arg_b) { arg_b.Set(&builtin_semantics_ir); });
  206. b.AddFlag(
  207. {
  208. .name = "dump-llvm-ir",
  209. .help = R"""(
  210. Dump the LLVM IR to stdout after lowering.
  211. )""",
  212. },
  213. [&](auto& arg_b) { arg_b.Set(&dump_llvm_ir); });
  214. b.AddFlag(
  215. {
  216. .name = "dump-asm",
  217. .help = R"""(
  218. Dump the generated assembly to stdout after codegen.
  219. )""",
  220. },
  221. [&](auto& arg_b) { arg_b.Set(&dump_asm); });
  222. }
  223. Phase phase;
  224. std::string host = llvm::sys::getDefaultTargetTriple();
  225. llvm::StringRef target;
  226. llvm::StringRef output_file_name;
  227. llvm::StringRef input_file_name;
  228. bool asm_output = false;
  229. bool force_obj_output = false;
  230. bool dump_tokens = false;
  231. bool dump_parse_tree = false;
  232. bool dump_raw_semantics_ir = false;
  233. bool dump_semantics_ir = false;
  234. bool dump_llvm_ir = false;
  235. bool dump_asm = false;
  236. bool stream_errors = false;
  237. bool preorder_parse_tree = false;
  238. bool builtin_semantics_ir = false;
  239. };
  240. struct Driver::Options {
  241. static constexpr CommandLine::CommandInfo Info = {
  242. .name = "carbon",
  243. // TODO: Setup more detailed version information and use that here.
  244. .version = R"""(
  245. Carbon Language toolchain -- version 0.0.0
  246. )""",
  247. .help = R"""(
  248. This is the unified Carbon Language toolchain driver. It's subcommands provide
  249. all of the core behavior of the toolchain, including compilation, linking, and
  250. developer tools. Each of these has its own subcommand, and you can pass a
  251. specific subcommand to the `help` subcommand to get details about is usage.
  252. )""",
  253. .help_epilogue = R"""(
  254. For questions, issues, or bug reports, please use our GitHub project:
  255. https://github.com/carbon-language/carbon-lang
  256. )""",
  257. };
  258. enum class Subcommand : int8_t {
  259. Compile,
  260. };
  261. void Build(CommandLine::CommandBuilder& b) {
  262. b.AddFlag(
  263. {
  264. .name = "verbose",
  265. .short_name = "v",
  266. .help = "Enable verbose logging to the stderr stream.",
  267. },
  268. [&](CommandLine::FlagBuilder& arg_b) { arg_b.Set(&verbose); });
  269. b.AddSubcommand(CompileOptions::Info,
  270. [&](CommandLine::CommandBuilder& sub_b) {
  271. compile_options.Build(sub_b);
  272. sub_b.Do([&] { subcommand = Subcommand::Compile; });
  273. });
  274. b.RequiresSubcommand();
  275. }
  276. bool verbose;
  277. Subcommand subcommand;
  278. CompileOptions compile_options;
  279. };
  280. auto Driver::ParseArgs(llvm::ArrayRef<llvm::StringRef> args, Options& options)
  281. -> CommandLine::ParseResult {
  282. return CommandLine::Parse(
  283. args, output_stream_, error_stream_, Options::Info,
  284. [&](CommandLine::CommandBuilder& b) { options.Build(b); });
  285. }
  286. auto Driver::RunCommand(llvm::ArrayRef<llvm::StringRef> args) -> bool {
  287. Options options;
  288. CommandLine::ParseResult result = ParseArgs(args, options);
  289. if (result == CommandLine::ParseResult::Error) {
  290. return false;
  291. } else if (result == CommandLine::ParseResult::MetaSuccess) {
  292. return true;
  293. }
  294. if (options.verbose) {
  295. // Note this implies streamed output in order to interleave.
  296. vlog_stream_ = &error_stream_;
  297. }
  298. switch (options.subcommand) {
  299. case Options::Subcommand::Compile:
  300. return Compile(options.compile_options);
  301. }
  302. llvm_unreachable("All subcommands handled!");
  303. }
  304. auto Driver::ValidateCompileOptions(const CompileOptions& options) const
  305. -> bool {
  306. using Phase = CompileOptions::Phase;
  307. switch (options.phase) {
  308. case Phase::Lex:
  309. if (options.dump_parse_tree) {
  310. error_stream_ << "ERROR: Requested dumping the parse tree but compile "
  311. "phase is limited to '"
  312. << options.phase << "'\n";
  313. return false;
  314. }
  315. [[clang::fallthrough]];
  316. case Phase::Parse:
  317. if (options.dump_semantics_ir) {
  318. error_stream_ << "ERROR: Requested dumping the semantics IR but "
  319. "compile phase is limited to '"
  320. << options.phase << "'\n";
  321. return false;
  322. }
  323. [[clang::fallthrough]];
  324. case Phase::Check:
  325. if (options.dump_llvm_ir) {
  326. error_stream_ << "ERROR: Requested dumping the LLVM IR but compile "
  327. "phase is limited to '"
  328. << options.phase << "'\n";
  329. return false;
  330. }
  331. [[clang::fallthrough]];
  332. case Phase::Lower:
  333. case Phase::CodeGen:
  334. // Everything can be dumped in these phases.
  335. break;
  336. }
  337. return true;
  338. }
  339. auto Driver::Compile(const CompileOptions& options) -> bool {
  340. using Phase = CompileOptions::Phase;
  341. if (!ValidateCompileOptions(options)) {
  342. return false;
  343. }
  344. StreamDiagnosticConsumer stream_consumer(error_stream_);
  345. DiagnosticConsumer* consumer = &stream_consumer;
  346. // Note, the diagnostics consumer must be flushed before each `return` in this
  347. // function, as diagnostics can refer to state that lives on our stack.
  348. std::unique_ptr<SortingDiagnosticConsumer> sorting_consumer;
  349. if (vlog_stream_ == nullptr && !options.stream_errors) {
  350. sorting_consumer = std::make_unique<SortingDiagnosticConsumer>(*consumer);
  351. consumer = sorting_consumer.get();
  352. }
  353. CARBON_VLOG() << "*** SourceBuffer::CreateFromFile on '"
  354. << options.input_file_name << "' ***\n";
  355. auto source = SourceBuffer::CreateFromFile(fs_, options.input_file_name);
  356. CARBON_VLOG() << "*** SourceBuffer::CreateFromFile done ***\n";
  357. if (!source.ok()) {
  358. error_stream_ << "ERROR: Unable to open input source file: "
  359. << source.error();
  360. consumer->Flush();
  361. return false;
  362. }
  363. CARBON_VLOG() << "*** file:\n```\n" << source->text() << "\n```\n";
  364. CARBON_VLOG() << "*** TokenizedBuffer::Lex ***\n";
  365. auto tokenized_source = TokenizedBuffer::Lex(*source, *consumer);
  366. bool has_errors = tokenized_source.has_errors();
  367. CARBON_VLOG() << "*** TokenizedBuffer::Lex done ***\n";
  368. if (options.dump_tokens) {
  369. CARBON_VLOG() << "Finishing output.";
  370. consumer->Flush();
  371. output_stream_ << tokenized_source;
  372. }
  373. CARBON_VLOG() << "tokenized_buffer: " << tokenized_source;
  374. if (options.phase == Phase::Lex) {
  375. consumer->Flush();
  376. return !has_errors;
  377. }
  378. CARBON_VLOG() << "*** ParseTree::Parse ***\n";
  379. auto parse_tree = ParseTree::Parse(tokenized_source, *consumer, vlog_stream_);
  380. has_errors |= parse_tree.has_errors();
  381. CARBON_VLOG() << "*** ParseTree::Parse done ***\n";
  382. if (options.dump_parse_tree) {
  383. consumer->Flush();
  384. parse_tree.Print(output_stream_, options.preorder_parse_tree);
  385. }
  386. CARBON_VLOG() << "parse_tree: " << parse_tree;
  387. if (options.phase == Phase::Parse) {
  388. consumer->Flush();
  389. return !has_errors;
  390. }
  391. const SemIR::File builtin_ir = SemIR::File::MakeBuiltinIR();
  392. CARBON_VLOG() << "*** SemanticsIR::MakeFromParseTree ***\n";
  393. const SemIR::File semantics_ir = SemIR::File::MakeFromParseTree(
  394. builtin_ir, tokenized_source, parse_tree, *consumer, vlog_stream_);
  395. // We've finished all steps that can produce diagnostics. Emit the
  396. // diagnostics now, so that the developer sees them sooner and doesn't need
  397. // to wait for code generation.
  398. consumer->Flush();
  399. has_errors |= semantics_ir.has_errors();
  400. CARBON_VLOG() << "*** SemIR::File::MakeFromParseTree done ***\n";
  401. if (options.dump_raw_semantics_ir) {
  402. semantics_ir.Print(output_stream_, options.builtin_semantics_ir);
  403. if (options.dump_semantics_ir) {
  404. output_stream_ << "\n";
  405. }
  406. }
  407. if (options.dump_semantics_ir) {
  408. consumer->Flush();
  409. SemIR::FormatFile(tokenized_source, parse_tree, semantics_ir,
  410. output_stream_);
  411. }
  412. CARBON_VLOG() << "semantics_ir: " << semantics_ir;
  413. if (options.phase == Phase::Check) {
  414. return !has_errors;
  415. }
  416. // Unlike previous steps, errors block further progress.
  417. if (has_errors) {
  418. CARBON_VLOG() << "*** Stopping before lowering due to syntax errors ***";
  419. return false;
  420. }
  421. CARBON_VLOG() << "*** LowerToLLVM ***\n";
  422. llvm::LLVMContext llvm_context;
  423. const std::unique_ptr<llvm::Module> module = LowerToLLVM(
  424. llvm_context, options.input_file_name, semantics_ir, vlog_stream_);
  425. CARBON_VLOG() << "*** LowerToLLVM done ***\n";
  426. if (options.dump_llvm_ir) {
  427. module->print(output_stream_, /*AAW=*/nullptr,
  428. /*ShouldPreserveUseListOrder=*/true);
  429. }
  430. if (vlog_stream_) {
  431. CARBON_VLOG() << "module: ";
  432. module->print(*vlog_stream_, /*AAW=*/nullptr,
  433. /*ShouldPreserveUseListOrder=*/false,
  434. /*IsForDebug=*/true);
  435. }
  436. if (options.phase == Phase::Lower) {
  437. return true;
  438. }
  439. CARBON_VLOG() << "*** CodeGen ***\n";
  440. std::optional<CodeGen> codegen =
  441. CodeGen::Create(*module, options.target, error_stream_);
  442. if (!codegen) {
  443. return false;
  444. }
  445. if (vlog_stream_) {
  446. CARBON_VLOG() << "assembly:\n";
  447. codegen->EmitAssembly(*vlog_stream_);
  448. }
  449. if (options.output_file_name == "-") {
  450. // TODO: the output file name, forcing object output, and requesting textual
  451. // assembly output are all somewhat linked flags. We should add some
  452. // validation that they are used correctly.
  453. if (options.force_obj_output) {
  454. if (!codegen->EmitObject(output_stream_)) {
  455. return false;
  456. }
  457. } else {
  458. if (!codegen->EmitAssembly(output_stream_)) {
  459. return false;
  460. }
  461. }
  462. } else {
  463. llvm::SmallString<256> output_file_name = options.output_file_name;
  464. if (output_file_name.empty()) {
  465. output_file_name = options.input_file_name;
  466. llvm::sys::path::replace_extension(output_file_name,
  467. options.asm_output ? ".s" : ".o");
  468. }
  469. CARBON_VLOG() << "Writing output to: " << output_file_name << "\n";
  470. std::error_code ec;
  471. llvm::raw_fd_ostream output_file(output_file_name, ec,
  472. llvm::sys::fs::OF_None);
  473. if (ec) {
  474. error_stream_ << "ERROR: Could not open output file '" << output_file_name
  475. << "': " << ec.message() << "\n";
  476. return false;
  477. }
  478. if (options.asm_output) {
  479. if (!codegen->EmitAssembly(output_file)) {
  480. return false;
  481. }
  482. } else {
  483. if (!codegen->EmitObject(output_file)) {
  484. return false;
  485. }
  486. }
  487. }
  488. CARBON_VLOG() << "*** CodeGen done ***\n";
  489. return true;
  490. }
  491. } // namespace Carbon