Pārlūkot izejas kodu

Add skeletal format subcommand. (#4383)

This extracts out the SourceBuffer handling of `-` in order to trivially
share it.

Note this still has a number of TODOs, it's just setting up the
essential subcommand infrastructure, with some tests demonstrating that
it at least does something.
Jon Ross-Perkins 1 gadu atpakaļ
vecāks
revīzija
434173b016

+ 3 - 0
toolchain/driver/BUILD

@@ -89,6 +89,8 @@ cc_library(
         "compile_subcommand.h",
         "driver.cpp",
         "driver_env.h",
+        "format_subcommand.cpp",
+        "format_subcommand.h",
         "link_subcommand.cpp",
         "link_subcommand.h",
     ],
@@ -112,6 +114,7 @@ cc_library(
         "//toolchain/codegen",
         "//toolchain/diagnostics:diagnostic_emitter",
         "//toolchain/diagnostics:sorting_diagnostic_consumer",
+        "//toolchain/format",
         "//toolchain/install:install_paths",
         "//toolchain/lex",
         "//toolchain/lower",

+ 3 - 7
toolchain/driver/compile_subcommand.cpp

@@ -329,13 +329,9 @@ class CompilationUnit {
 
   // Loads source and lexes it. Returns true on success.
   auto RunLex() -> void {
-    LogCall("SourceBuffer::MakeFromFile", [&] {
-      if (input_filename_ == "-") {
-        source_ = SourceBuffer::MakeFromStdin(*consumer_);
-      } else {
-        source_ = SourceBuffer::MakeFromFile(driver_env_->fs, input_filename_,
-                                             *consumer_);
-      }
+    LogCall("SourceBuffer::MakeFromFileOrStdin", [&] {
+      source_ = SourceBuffer::MakeFromFileOrStdin(driver_env_->fs,
+                                                  input_filename_, *consumer_);
     });
     if (mem_usage_) {
       mem_usage_->Add("source_", source_->text().size(),

+ 7 - 0
toolchain/driver/driver.cpp

@@ -12,6 +12,7 @@
 #include "common/version.h"
 #include "toolchain/driver/clang_subcommand.h"
 #include "toolchain/driver/compile_subcommand.h"
+#include "toolchain/driver/format_subcommand.h"
 #include "toolchain/driver/link_subcommand.h"
 
 namespace Carbon {
@@ -26,6 +27,7 @@ struct Options {
 
   ClangSubcommand clang;
   CompileSubcommand compile;
+  FormatSubcommand format;
   LinkSubcommand link;
 
   // On success, this is set to the subcommand to run.
@@ -72,6 +74,11 @@ auto Options::Build(CommandLine::CommandBuilder& b) -> void {
                     sub_b.Do([&] { subcommand = &compile; });
                   });
 
+  b.AddSubcommand(FormatOptions::Info, [&](CommandLine::CommandBuilder& sub_b) {
+    format.BuildOptions(sub_b);
+    sub_b.Do([&] { subcommand = &format; });
+  });
+
   b.AddSubcommand(LinkOptions::Info, [&](CommandLine::CommandBuilder& sub_b) {
     link.BuildOptions(sub_b);
     sub_b.Do([&] { subcommand = &link; });

+ 99 - 0
toolchain/driver/format_subcommand.cpp

@@ -0,0 +1,99 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "toolchain/driver/format_subcommand.h"
+
+#include <string>
+
+#include "toolchain/base/value_store.h"
+#include "toolchain/diagnostics/diagnostic_consumer.h"
+#include "toolchain/format/format.h"
+#include "toolchain/lex/lex.h"
+#include "toolchain/source/source_buffer.h"
+
+namespace Carbon {
+
+constexpr CommandLine::CommandInfo FormatOptions::Info = {
+    .name = "format",
+    .help = R"""(
+Format Carbon source code.
+)""",
+};
+
+auto FormatOptions::Build(CommandLine::CommandBuilder& b) -> void {
+  b.AddStringPositionalArg(
+      {
+          .name = "FILE",
+          .help = R"""(
+The input Carbon source file(s) to format.
+)""",
+      },
+      [&](auto& arg_b) {
+        arg_b.Required(true);
+        arg_b.Append(&input_filenames);
+      });
+  b.AddStringOption(
+      {
+          .name = "output",
+          .value_name = "FILE",
+          .help = R"""(
+The output filename for formatted output.
+
+By default, the input file is formatted. Passing `--output=-` will write the
+output to stdout.
+
+Not valid when multiple files are passed for formatting.
+)""",
+      },
+      [&](auto& arg_b) { arg_b.Set(&output_filename); });
+}
+
+auto FormatSubcommand::Run(DriverEnv& driver_env) -> DriverResult {
+  DriverResult result = {.success = true};
+  if (options_.input_filenames.size() > 1 &&
+      !options_.output_filename.empty()) {
+    driver_env.error_stream << "error: cannot format multiple input files when "
+                               "--output is set\n";
+    result.success = false;
+    return result;
+  }
+
+  auto mark_per_file_error = [&]() {
+    result.success = false;
+    result.per_file_success.back().second = false;
+  };
+
+  StreamDiagnosticConsumer consumer(driver_env.error_stream);
+  for (auto& f : options_.input_filenames) {
+    // Push a result, which we'll update on failure.
+    result.per_file_success.push_back({f.str(), true});
+
+    // TODO: Consider refactoring this for sharing with compile.
+    // TODO: Decide what to do with `-` when there are multiple arguments.
+    auto source = SourceBuffer::MakeFromFileOrStdin(driver_env.fs, f, consumer);
+    if (!source) {
+      mark_per_file_error();
+      continue;
+    }
+    SharedValueStores value_stores;
+    auto tokens = Lex::Lex(value_stores, *source, consumer);
+
+    std::string buffer_str;
+    llvm::raw_string_ostream buffer(buffer_str);
+
+    if (Format::Format(tokens, buffer)) {
+      // TODO: Figure out a multi-file output setup that supports good
+      // multi-file testing.
+      // TODO: Use --output values (and default to overwrite).
+      driver_env.output_stream << buffer_str;
+    } else {
+      mark_per_file_error();
+      driver_env.output_stream << source->text();
+    }
+  }
+
+  return result;
+}
+
+}  // namespace Carbon

+ 39 - 0
toolchain/driver/format_subcommand.h

@@ -0,0 +1,39 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef CARBON_TOOLCHAIN_DRIVER_FORMAT_SUBCOMMAND_H_
+#define CARBON_TOOLCHAIN_DRIVER_FORMAT_SUBCOMMAND_H_
+
+#include "common/command_line.h"
+#include "toolchain/driver/driver_env.h"
+#include "toolchain/driver/driver_subcommand.h"
+
+namespace Carbon {
+
+// Options for the format subcommand.
+//
+// See the implementation of `Build` for documentation on members.
+struct FormatOptions {
+  static const CommandLine::CommandInfo Info;
+
+  auto Build(CommandLine::CommandBuilder& b) -> void;
+
+  llvm::StringRef output_filename;
+  llvm::SmallVector<llvm::StringRef> input_filenames;
+};
+
+// Implements the format subcommand of the driver.
+class FormatSubcommand : public DriverSubcommand {
+ public:
+  auto BuildOptions(CommandLine::CommandBuilder& b) { options_.Build(b); }
+
+  auto Run(DriverEnv& driver_env) -> DriverResult override;
+
+ private:
+  FormatOptions options_;
+};
+
+}  // namespace Carbon
+
+#endif  // CARBON_TOOLCHAIN_DRIVER_FORMAT_SUBCOMMAND_H_

+ 23 - 0
toolchain/format/BUILD

@@ -0,0 +1,23 @@
+# Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+# Exceptions. See /LICENSE for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+load("@rules_cc//cc:defs.bzl", "cc_library")
+
+package(default_visibility = ["//visibility:public"])
+
+filegroup(
+    name = "testdata",
+    data = glob(["testdata/**/*.carbon"]),
+)
+
+cc_library(
+    name = "format",
+    srcs = ["format.cpp"],
+    hdrs = ["format.h"],
+    deps = [
+        "//common:ostream",
+        "//toolchain/lex:token_index",
+        "//toolchain/lex:tokenized_buffer",
+    ],
+)

+ 36 - 0
toolchain/format/format.cpp

@@ -0,0 +1,36 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "toolchain/format/format.h"
+
+#include "toolchain/lex/token_index.h"
+#include "toolchain/lex/tokenized_buffer.h"
+
+namespace Carbon::Format {
+
+// TODO: Add support for formatting line ranges (will need flags too).
+auto Format(const Lex::TokenizedBuffer& tokens, llvm::raw_ostream& out)
+    -> bool {
+  if (tokens.has_errors()) {
+    // TODO: Error recovery.
+    return false;
+  }
+  llvm::ListSeparator sep(" ");
+  for (auto token : tokens.tokens()) {
+    switch (tokens.GetKind(token)) {
+      case Lex::TokenKind::FileStart:
+        break;
+      case Lex::TokenKind::FileEnd:
+        out << "\n";
+        break;
+      default:
+        // TODO: More dependent formatting.
+        out << sep << tokens.GetTokenText(token);
+        break;
+    }
+  }
+  return true;
+}
+
+}  // namespace Carbon::Format

+ 20 - 0
toolchain/format/format.h

@@ -0,0 +1,20 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef CARBON_TOOLCHAIN_FORMAT_FORMAT_H_
+#define CARBON_TOOLCHAIN_FORMAT_FORMAT_H_
+
+#include "common/ostream.h"
+#include "toolchain/lex/tokenized_buffer.h"
+
+namespace Carbon::Format {
+
+// Formats file content (based on tokens) to the out stream. Returns false if
+// there was an error during formatting, and the formatted stream shouldn't be
+// used (in that case, the caller might want to use the original content).
+auto Format(const Lex::TokenizedBuffer& tokens, llvm::raw_ostream& out) -> bool;
+
+}  // namespace Carbon::Format
+
+#endif  // CARBON_TOOLCHAIN_FORMAT_FORMAT_H_

+ 13 - 0
toolchain/format/testdata/basics/empty.carbon

@@ -0,0 +1,13 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// AUTOUPDATE
+// TIP: To test this file alone, run:
+// TIP:   bazel test //toolchain/testing:file_test --test_arg=--file_tests=toolchain/format/testdata/basics/empty.carbon
+// TIP: To dump output, run:
+// TIP:   bazel run //toolchain/testing:file_test -- --dump_output --file_tests=toolchain/format/testdata/basics/empty.carbon
+
+// --- test.carbon
+
+// CHECK:STDOUT:

+ 20 - 0
toolchain/format/testdata/basics/fail_invalid_comment.carbon

@@ -0,0 +1,20 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// TODO: This can't autoupdate because the STDERR is retained in test input.
+// NOAUTOUPDATE
+// TIP: To test this file alone, run:
+// TIP:   bazel test //toolchain/testing:file_test --test_arg=--file_tests=toolchain/format/testdata/basics/fail_invalid_comment.carbon
+// TIP: To dump output, run:
+// TIP:   bazel run //toolchain/testing:file_test -- --dump_output --file_tests=toolchain/format/testdata/basics/fail_invalid_comment.carbon
+
+// CHECK:STDERR: fail_test.carbon:2:3: error: whitespace is required after '//'
+// CHECK:STDERR: //f
+// CHECK:STDERR:   ^
+// CHECK:STDOUT:
+// CHECK:STDOUT: //f
+
+// --- fail_test.carbon
+
+//f

+ 11 - 0
toolchain/format/testdata/basics/fail_nonexistent.carbon

@@ -0,0 +1,11 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// ARGS: format fail_target_file.carbon
+// AUTOUPDATE
+// TIP: To test this file alone, run:
+// TIP:   bazel test //toolchain/testing:file_test --test_arg=--file_tests=toolchain/format/testdata/basics/fail_nonexistent.carbon
+// TIP: To dump output, run:
+// TIP:   bazel run //toolchain/testing:file_test -- --dump_output --file_tests=toolchain/format/testdata/basics/fail_nonexistent.carbon
+// CHECK:STDERR: fail_target_file.carbon: error: error opening file for read: No such file or directory

+ 15 - 0
toolchain/format/testdata/basics/simple.carbon

@@ -0,0 +1,15 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// AUTOUPDATE
+// TIP: To test this file alone, run:
+// TIP:   bazel test //toolchain/testing:file_test --test_arg=--file_tests=toolchain/format/testdata/basics/simple.carbon
+// TIP: To dump output, run:
+// TIP:   bazel run //toolchain/testing:file_test -- --dump_output --file_tests=toolchain/format/testdata/basics/simple.carbon
+
+// --- test.carbon
+
+fn F(x: i32) -> i32 { return x; }
+
+// CHECK:STDOUT: fn F ( x : i32 ) -> i32 { return x ; }

+ 12 - 0
toolchain/source/source_buffer.h

@@ -45,6 +45,18 @@ class SourceBuffer {
                            DiagnosticConsumer& consumer)
       -> std::optional<SourceBuffer>;
 
+  // Handles conditional use of stdin based on the filename being "-".
+  static auto MakeFromFileOrStdin(llvm::vfs::FileSystem& fs,
+                                  llvm::StringRef filename,
+                                  DiagnosticConsumer& consumer)
+      -> std::optional<SourceBuffer> {
+    if (filename == "-") {
+      return MakeFromStdin(consumer);
+    } else {
+      return MakeFromFile(fs, filename, consumer);
+    }
+  }
+
   // Use one of the factory functions above to create a source buffer.
   SourceBuffer() = delete;
 

+ 1 - 0
toolchain/testing/BUILD

@@ -20,6 +20,7 @@ file_test(
         "//toolchain/codegen:testdata",
         "//toolchain/diagnostics:testdata",
         "//toolchain/driver:testdata",
+        "//toolchain/format:testdata",
         "//toolchain/lex:testdata",
         "//toolchain/lower:testdata",
         "//toolchain/parse:testdata",

+ 4 - 0
toolchain/testing/file_test.cpp

@@ -59,6 +59,10 @@ class ToolchainFileTest : public FileTestBase {
   }
 
   auto GetDefaultArgs() -> llvm::SmallVector<std::string> override {
+    if (component_ == "format") {
+      return {"format", "%s"};
+    }
+
     llvm::SmallVector<std::string> args = {"compile",
                                            "--phase=" + component_.str()};