فهرست منبع

Support for reading source code from stdin and other unusual places. (#3416)

- Treat an input of `-` as meaning stdin.

- Fix building of an llvm::MemoryBuffer from a non-regular file.

- Do not enforce filename restrictions on non-regular files.

- Do not invent an output file name based on the name of a non-regular
file.

---------

Co-authored-by: Chandler Carruth <chandlerc@gmail.com>
Richard Smith 2 سال پیش
والد
کامیت
9154c6410e

+ 3 - 1
toolchain/autoupdate_testdata.py

@@ -41,7 +41,9 @@ def main() -> None:
                 f"Args do not seem to be test files; for example, {sys.argv[1]}"
             )
         argv.append("--file_tests=" + ",".join(file_tests))
-    subprocess.run(argv, check=True)
+    # Provide an empty stdin so that the driver tests that read from stdin
+    # don't block waiting for input. This matches the behavior of `bazel test`.
+    subprocess.run(argv, check=True, stdin=subprocess.DEVNULL)
 
 
 if __name__ == "__main__":

+ 30 - 25
toolchain/check/check.cpp

@@ -19,7 +19,8 @@ namespace Carbon::Check {
 struct UnitInfo {
   explicit UnitInfo(Unit& unit)
       : unit(&unit),
-        translator(unit.tokens, unit.tokens->filename(), unit.parse_tree),
+        translator(unit.tokens, unit.tokens->source().filename(),
+                   unit.parse_tree),
         err_tracker(*unit.consumer),
         emitter(translator, err_tracker) {}
 
@@ -47,9 +48,9 @@ struct UnitInfo {
 // imports should suppress errors where it makes sense.
 static auto CheckParseTree(const SemIR::File& builtin_ir, UnitInfo& unit_info,
                            llvm::raw_ostream* vlog_stream) -> void {
-  unit_info.unit->sem_ir->emplace(*unit_info.unit->value_stores,
-                                  unit_info.unit->tokens->filename().str(),
-                                  &builtin_ir);
+  unit_info.unit->sem_ir->emplace(
+      *unit_info.unit->value_stores,
+      unit_info.unit->tokens->source().filename().str(), &builtin_ir);
 
   // For ease-of-access.
   SemIR::File& sem_ir = **unit_info.unit->sem_ir;
@@ -279,7 +280,8 @@ static auto BuildApiMapAndDiagnosePackaging(
     if (!is_impl) {
       auto [entry, success] = api_map.insert({import_key, &unit_info});
       if (!success) {
-        llvm::StringRef prev_filename = entry->second->unit->tokens->filename();
+        llvm::StringRef prev_filename =
+            entry->second->unit->tokens->source().filename();
         if (packaging) {
           CARBON_DIAGNOSTIC(DuplicateLibraryApi, Error,
                             "Library's API previously provided by `{0}`.",
@@ -298,28 +300,31 @@ static auto BuildApiMapAndDiagnosePackaging(
     }
 
     // Validate file extensions. Note imports rely the packaging directive, not
-    // the extension.
-    auto filename = unit_info.unit->tokens->filename();
-    static constexpr llvm::StringLiteral ApiExt = ".carbon";
-    static constexpr llvm::StringLiteral ImplExt = ".impl.carbon";
-    bool is_api_with_impl_ext = !is_impl && filename.ends_with(ImplExt);
-    auto want_ext = is_impl ? ImplExt : ApiExt;
-    if (is_api_with_impl_ext || !filename.ends_with(want_ext)) {
-      CARBON_DIAGNOSTIC(IncorrectExtension, Error,
-                        "File extension of `{0}` required for `{1}`.",
-                        llvm::StringLiteral, Lex::TokenKind);
-      auto diag = unit_info.emitter.Build(
-          packaging ? packaging->names.node : Parse::Node::Invalid,
-          IncorrectExtension, want_ext,
-          is_impl ? Lex::TokenKind::Impl : Lex::TokenKind::Api);
-      if (is_api_with_impl_ext) {
-        CARBON_DIAGNOSTIC(IncorrectExtensionImplNote, Note,
-                          "File extension of `{0}` only allowed for `{1}`.",
+    // the extension. If the input is not a regular file, for example because it
+    // is stdin, no filename checking is performed.
+    if (unit_info.unit->tokens->source().is_regular_file()) {
+      auto filename = unit_info.unit->tokens->source().filename();
+      static constexpr llvm::StringLiteral ApiExt = ".carbon";
+      static constexpr llvm::StringLiteral ImplExt = ".impl.carbon";
+      bool is_api_with_impl_ext = !is_impl && filename.ends_with(ImplExt);
+      auto want_ext = is_impl ? ImplExt : ApiExt;
+      if (is_api_with_impl_ext || !filename.ends_with(want_ext)) {
+        CARBON_DIAGNOSTIC(IncorrectExtension, Error,
+                          "File extension of `{0}` required for `{1}`.",
                           llvm::StringLiteral, Lex::TokenKind);
-        diag.Note(Parse::Node::Invalid, IncorrectExtensionImplNote, ImplExt,
-                  Lex::TokenKind::Impl);
+        auto diag = unit_info.emitter.Build(
+            packaging ? packaging->names.node : Parse::Node::Invalid,
+            IncorrectExtension, want_ext,
+            is_impl ? Lex::TokenKind::Impl : Lex::TokenKind::Api);
+        if (is_api_with_impl_ext) {
+          CARBON_DIAGNOSTIC(IncorrectExtensionImplNote, Note,
+                            "File extension of `{0}` only allowed for `{1}`.",
+                            llvm::StringLiteral, Lex::TokenKind);
+          diag.Note(Parse::Node::Invalid, IncorrectExtensionImplNote, ImplExt,
+                    Lex::TokenKind::Impl);
+        }
+        diag.Emit();
       }
-      diag.Emit();
     }
   }
   return api_map;

+ 19 - 2
toolchain/driver/driver.cpp

@@ -411,8 +411,12 @@ class Driver::CompilationUnit {
   // Loads source and lexes it. Returns true on success.
   auto RunLex() -> bool {
     LogCall("SourceBuffer::CreateFromFile", [&] {
-      source_ = SourceBuffer::CreateFromFile(driver_->fs_, input_file_name_,
-                                             *consumer_);
+      if (input_file_name_ == "-") {
+        source_ = SourceBuffer::CreateFromStdin(*consumer_);
+      } else {
+        source_ = SourceBuffer::CreateFromFile(driver_->fs_, input_file_name_,
+                                               *consumer_);
+      }
     });
     if (!source_) {
       return false;
@@ -535,9 +539,22 @@ class Driver::CompilationUnit {
     } else {
       llvm::SmallString<256> output_file_name = options_.output_file_name;
       if (output_file_name.empty()) {
+        if (!source_->is_regular_file()) {
+          // Don't invent file names like `-.o` or `/dev/stdin.o`.
+          driver_->error_stream_
+              << "ERROR: Output file name must be specified for input '"
+              << input_file_name_ << "' that is not a regular file.\n";
+          return false;
+        }
         output_file_name = input_file_name_;
         llvm::sys::path::replace_extension(output_file_name,
                                            options_.asm_output ? ".s" : ".o");
+      } else {
+        // TODO: Handle the case where multiple input files were specified
+        // along with an output file name. That should either be an error or
+        // should produce a single LLVM IR module containing all inputs.
+        // Currently each unit overwrites the output from the previous one in
+        // this case.
       }
       CARBON_VLOG() << "Writing output to: " << output_file_name << "\n";
 

+ 8 - 0
toolchain/driver/testdata/fail_input_is_directory.carbon

@@ -0,0 +1,8 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// ARGS: compile .
+//
+// AUTOUPDATE
+// CHECK:STDERR: .: ERROR: Error opening file for read: Invalid argument

+ 8 - 0
toolchain/driver/testdata/fail_missing_stdin_output.carbon

@@ -0,0 +1,8 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// ARGS: compile -
+//
+// AUTOUPDATE
+// CHECK:STDERR: ERROR: Output file name must be specified for input '-' that is not a regular file.

+ 10 - 0
toolchain/driver/testdata/stdin.carbon

@@ -0,0 +1,10 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// ARGS: compile - --phase=check --dump-sem-ir
+//
+// AUTOUPDATE
+
+// CHECK:STDOUT: file "<stdin>" {
+// CHECK:STDOUT: }

+ 1 - 1
toolchain/lex/tokenized_buffer.h

@@ -233,7 +233,7 @@ class TokenizedBuffer : public Printable<TokenizedBuffer> {
     return expected_parse_tree_size_;
   }
 
-  auto filename() const -> llvm::StringRef { return source_->filename(); }
+  auto source() const -> const SourceBuffer& { return *source_; }
 
  private:
   friend class Lexer;

+ 2 - 2
toolchain/parse/tree.cpp

@@ -131,7 +131,7 @@ auto Tree::PrintNode(llvm::raw_ostream& output, Node n, int depth,
 }
 
 auto Tree::Print(llvm::raw_ostream& output) const -> void {
-  output << "- filename: " << tokens_->filename() << "\n"
+  output << "- filename: " << tokens_->source().filename() << "\n"
          << "  parse_tree: [\n";
 
   // Walk the tree just to calculate depths for each node.
@@ -166,7 +166,7 @@ auto Tree::Print(llvm::raw_ostream& output, bool preorder) const -> void {
     return;
   }
 
-  output << "- filename: " << tokens_->filename() << "\n"
+  output << "- filename: " << tokens_->source().filename() << "\n"
          << "  parse_tree: [\n";
 
   // The parse tree is stored in postorder. The preorder can be constructed

+ 34 - 12
toolchain/source/source_buffer.cpp

@@ -18,6 +18,12 @@ struct FilenameTranslator : DiagnosticLocationTranslator<llvm::StringRef> {
 };
 }  // namespace
 
+auto SourceBuffer::CreateFromStdin(DiagnosticConsumer& consumer)
+    -> std::optional<SourceBuffer> {
+  return CreateFromMemoryBuffer(llvm::MemoryBuffer::getSTDIN(), "<stdin>",
+                                /*is_regular_file=*/false, consumer);
+}
+
 auto SourceBuffer::CreateFromFile(llvm::vfs::FileSystem& fs,
                                   llvm::StringRef filename,
                                   DiagnosticConsumer& consumer)
@@ -41,25 +47,41 @@ auto SourceBuffer::CreateFromFile(llvm::vfs::FileSystem& fs,
     emitter.Emit(filename, ErrorStattingFile, file.getError().message());
     return std::nullopt;
   }
-  int64_t size = status->getSize();
-  if (size >= std::numeric_limits<int32_t>::max()) {
-    CARBON_DIAGNOSTIC(FileTooLarge, Error,
-                      "File is over the 2GiB input limit; size is {0} bytes.",
-                      int64_t);
-    emitter.Emit(filename, FileTooLarge, size);
-    return std::nullopt;
-  }
 
-  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> buffer =
-      (*file)->getBuffer(filename, size, /*RequiresNullTerminator=*/false);
+  // `stat` on a file without a known size gives a size of 0, which causes
+  // `llvm::vfs::File::getBuffer` to produce an empty buffer. Use a size of -1
+  // in this case so we get the complete file contents.
+  bool is_regular_file = status->isRegularFile();
+  int64_t size = is_regular_file ? status->getSize() : -1;
+
+  return CreateFromMemoryBuffer(
+      (*file)->getBuffer(filename, size, /*RequiresNullTerminator=*/false),
+      filename, is_regular_file, consumer);
+}
+
+auto SourceBuffer::CreateFromMemoryBuffer(
+    llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> buffer,
+    llvm::StringRef filename, bool is_regular_file,
+    DiagnosticConsumer& consumer) -> std::optional<SourceBuffer> {
+  FilenameTranslator translator;
+  DiagnosticEmitter<llvm::StringRef> emitter(translator, consumer);
+
   if (buffer.getError()) {
     CARBON_DIAGNOSTIC(ErrorReadingFile, Error, "Error reading file: {0}",
                       std::string);
-    emitter.Emit(filename, ErrorReadingFile, file.getError().message());
+    emitter.Emit(filename, ErrorReadingFile, buffer.getError().message());
+    return std::nullopt;
+  }
+
+  if (buffer.get()->getBufferSize() >= std::numeric_limits<int32_t>::max()) {
+    CARBON_DIAGNOSTIC(FileTooLarge, Error,
+                      "File is over the 2GiB input limit; size is {0} bytes.",
+                      int64_t);
+    emitter.Emit(filename, FileTooLarge, buffer.get()->getBufferSize());
     return std::nullopt;
   }
 
-  return SourceBuffer(filename.str(), std::move(buffer.get()));
+  return SourceBuffer(filename.str(), std::move(buffer.get()), is_regular_file);
 }
 
 }  // namespace Carbon

+ 25 - 2
toolchain/source/source_buffer.h

@@ -34,6 +34,11 @@ namespace Carbon {
 // some implementation complexity in the future if needed.
 class SourceBuffer {
  public:
+  // Opens and reads the contents of stdin. Returns a SourceBuffer on success.
+  // Prints an error and returns nullopt on failure.
+  static auto CreateFromStdin(DiagnosticConsumer& consumer)
+      -> std::optional<SourceBuffer>;
+
   // Opens the requested file. Returns a SourceBuffer on success. Prints an
   // error and returns nullopt on failure.
   static auto CreateFromFile(llvm::vfs::FileSystem& fs,
@@ -50,13 +55,31 @@ class SourceBuffer {
     return text_->getBuffer();
   }
 
+  [[nodiscard]] auto is_regular_file() const -> bool {
+    return is_regular_file_;
+  }
+
  private:
+  // Creates a `SourceBuffer` from the given `llvm::MemoryBuffer`. Prints an
+  // error and returns nullopt on failure.
+  static auto CreateFromMemoryBuffer(
+      llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> buffer,
+      llvm::StringRef filename, bool is_regular_file,
+      DiagnosticConsumer& consumer) -> std::optional<SourceBuffer>;
+
   explicit SourceBuffer(std::string filename,
-                        std::unique_ptr<llvm::MemoryBuffer> text)
-      : filename_(std::move(filename)), text_(std::move(text)) {}
+                        std::unique_ptr<llvm::MemoryBuffer> text,
+                        bool is_regular_file)
+      : filename_(std::move(filename)),
+        text_(std::move(text)),
+        is_regular_file_(is_regular_file) {}
 
   std::string filename_;
   std::unique_ptr<llvm::MemoryBuffer> text_;
+
+  // Whether this buffer is a regular file, rather than stdin or a named pipe or
+  // similar.
+  bool is_regular_file_;
 };
 
 }  // namespace Carbon