1 year ago · 1338f9e0ad
--- a/.codespell_ignore
+++ b/.codespell_ignore
@@ -12,6 +12,7 @@ crossreference
 
				 falsy
			
 
				 forin
			
 
				 groupt
			
 
				+indext
			
 
				 inout
			
 
				 parameteras
			
 
				 pullrequest
			
--- a/toolchain/base/index_base.h
+++ b/toolchain/base/index_base.h
@@ -7,8 +7,11 @@
 
				 
			
 
				 #include <compare>
			
 
				 #include <concepts>
			
 
				+#include <iterator>
			
 
				+#include <type_traits>
			
 
				 
			
 
				 #include "common/ostream.h"
			
 
				+#include "llvm/ADT/iterator.h"
			
 
				 
			
 
				 namespace Carbon {
			
 
				 
			
@@ -74,6 +77,52 @@ auto operator<=>(IndexType lhs, IndexType rhs) -> std::strong_ordering {
 
				   return lhs.index <=> rhs.index;
			
 
				 }
			
 
				 
			
 
				+// A random-access iterator for arrays using IndexBase-derived types.
			
 
				+template <typename IndexT>
			
 
				+class IndexIterator
			
 
				+    : public llvm::iterator_facade_base<IndexIterator<IndexT>,
			
 
				+                                        std::random_access_iterator_tag,
			
 
				+                                        const IndexT, int>,
			
 
				+      public Printable<IndexIterator<IndexT>> {
			
 
				+ public:
			
 
				+  IndexIterator() = delete;
			
 
				+
			
 
				+  explicit IndexIterator(IndexT index) : index_(index) {}
			
 
				+
			
 
				+  auto operator==(const IndexIterator& rhs) const -> bool {
			
 
				+    return index_ == rhs.index_;
			
 
				+  }
			
 
				+  auto operator<=>(const IndexIterator& rhs) const -> std::strong_ordering {
			
 
				+    return index_ <=> rhs.index_;
			
 
				+  }
			
 
				+
			
 
				+  auto operator*() const -> const IndexT& { return index_; }
			
 
				+
			
 
				+  using llvm::iterator_facade_base<IndexIterator,
			
 
				+                                   std::random_access_iterator_tag,
			
 
				+                                   const IndexT, int>::operator-;
			
 
				+  auto operator-(const IndexIterator& rhs) const -> int {
			
 
				+    return index_.index - rhs.index_.index;
			
 
				+  }
			
 
				+
			
 
				+  auto operator+=(int n) -> IndexIterator& {
			
 
				+    index_.index += n;
			
 
				+    return *this;
			
 
				+  }
			
 
				+  auto operator-=(int n) -> IndexIterator& {
			
 
				+    index_.index -= n;
			
 
				+    return *this;
			
 
				+  }
			
 
				+
			
 
				+  // Prints the raw token index.
			
 
				+  auto Print(llvm::raw_ostream& output) const -> void {
			
 
				+    output << index_.index;
			
 
				+  }
			
 
				+
			
 
				+ private:
			
 
				+  IndexT index_;
			
 
				+};
			
 
				+
			
 
				 }  // namespace Carbon
			
 
				 
			
 
				 #endif  // CARBON_TOOLCHAIN_BASE_INDEX_BASE_H_
			
--- a/toolchain/format/format.cpp
+++ b/toolchain/format/format.cpp
@@ -16,8 +16,20 @@ auto Format(const Lex::TokenizedBuffer& tokens, llvm::raw_ostream& out)
 
				     // TODO: Error recovery.
			
 
				     return false;
			
 
				   }
			
 
				+
			
 
				+  auto comments = tokens.comments();
			
 
				+  auto comment_it = comments.begin();
			
 
				+
			
 
				   llvm::ListSeparator sep(" ");
			
 
				+
			
 
				   for (auto token : tokens.tokens()) {
			
 
				+    while (comment_it != comments.end() &&
			
 
				+           tokens.IsAfterComment(token, *comment_it)) {
			
 
				+      // TODO: Fix newlines and indent.
			
 
				+      out << "\n" << tokens.GetCommentText(*comment_it) << "\n";
			
 
				+      ++comment_it;
			
 
				+    }
			
 
				+
			
 
				     switch (tokens.GetKind(token)) {
			
 
				       case Lex::TokenKind::FileStart:
			
 
				         break;
			
--- a/toolchain/format/testdata/basics/comments.carbon
+++ b/toolchain/format/testdata/basics/comments.carbon
@@ -0,0 +1,57 @@
 
				+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
			
 
				+// Exceptions. See /LICENSE for license information.
			
 
				+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
			
 
				+//
			
 
				+// AUTOUPDATE
			
 
				+// TIP: To test this file alone, run:
			
 
				+// TIP:   bazel test //toolchain/testing:file_test --test_arg=--file_tests=toolchain/format/testdata/basics/comments.carbon
			
 
				+// TIP: To dump output, run:
			
 
				+// TIP:   bazel run //toolchain/testing:file_test -- --dump_output --file_tests=toolchain/format/testdata/basics/comments.carbon
			
 
				+
			
 
				+// --- test.carbon
			
 
				+
			
 
				+// A comment
			
 
				+fn F() {}
			
 
				+
			
 
				+// Another comment
			
 
				+
			
 
				+  // Block
			
 
				+  // comment
			
 
				+
			
 
				+
			
 
				+class C {
			
 
				+    // Internal comment
			
 
				+}
			
 
				+
			
 
				+
			
 
				+  // Another
			
 
				+  // Block
			
 
				+  //
			
 
				+  // Comment
			
 
				+
			
 
				+// --- AUTOUPDATE-SPLIT
			
 
				+
			
 
				+// CHECK:STDOUT:
			
 
				+// CHECK:STDOUT: // A comment
			
 
				+// CHECK:STDOUT:
			
 
				+// CHECK:STDOUT: fn F ( ) { }
			
 
				+// CHECK:STDOUT: // Another comment
			
 
				+// CHECK:STDOUT:
			
 
				+// CHECK:STDOUT:
			
 
				+// CHECK:STDOUT: // Block
			
 
				+// CHECK:STDOUT:   // comment
			
 
				+// CHECK:STDOUT:
			
 
				+// CHECK:STDOUT:  class C {
			
 
				+// CHECK:STDOUT: // Internal comment
			
 
				+// CHECK:STDOUT:
			
 
				+// CHECK:STDOUT:  }
			
 
				+// CHECK:STDOUT: // Another
			
 
				+// CHECK:STDOUT:   // Block
			
 
				+// CHECK:STDOUT:
			
 
				+// CHECK:STDOUT:
			
 
				+// CHECK:STDOUT: //
			
 
				+// CHECK:STDOUT:
			
 
				+// CHECK:STDOUT:
			
 
				+// CHECK:STDOUT: // Comment
			
 
				+// CHECK:STDOUT:
			
 
				+// CHECK:STDOUT:
			
--- a/toolchain/format/testdata/basics/fail_invalid_comment.carbon
+++ b/toolchain/format/testdata/basics/fail_invalid_comment.carbon
@@ -8,7 +8,6 @@
 
				 // TIP: To dump output, run:
			
 
				 // TIP:   bazel run //toolchain/testing:file_test -- --dump_output --file_tests=toolchain/format/testdata/basics/fail_invalid_comment.carbon
			
 
				 
			
 
				-
			
 
				 // --- fail_test.carbon
			
 
				 
			
 
				 //f
			
--- a/toolchain/lex/lex.cpp
+++ b/toolchain/lex/lex.cpp
@@ -860,6 +860,7 @@ auto Lexer::LexCommentOrSlash(llvm::StringRef source_text, ssize_t& position)
 
				 
			
 
				 auto Lexer::LexComment(llvm::StringRef source_text, ssize_t& position) -> void {
			
 
				   CARBON_DCHECK(source_text.substr(position).starts_with("//"));
			
 
				+  int32_t comment_start = position;
			
 
				 
			
 
				   // Any comment must be the only non-whitespace on the line.
			
 
				   const auto* line_info = current_line_info();
			
@@ -874,6 +875,9 @@ auto Lexer::LexComment(llvm::StringRef source_text, ssize_t& position) -> void {
 
				     // whitespace, which already is designed to skip over any erroneous text at
			
 
				     // the end of the line.
			
 
				     LexVerticalWhitespace(source_text, position);
			
 
				+    buffer_.comments_.push_back(
			
 
				+        {.start = comment_start,
			
 
				+         .length = static_cast<int32_t>(position) - comment_start});
			
 
				     return;
			
 
				   }
			
 
				 
			
@@ -977,6 +981,10 @@ auto Lexer::LexComment(llvm::StringRef source_text, ssize_t& position) -> void {
 
				     }
			
 
				   }
			
 
				 
			
 
				+  buffer_.comments_.push_back(
			
 
				+      {.start = comment_start,
			
 
				+       .length = static_cast<int32_t>(position) - comment_start});
			
 
				+
			
 
				   // Now compute the indent of this next line before we finish.
			
 
				   ssize_t line_start = position;
			
 
				   SkipHorizontalWhitespace(source_text, position);
			
--- a/toolchain/lex/tokenized_buffer.cpp
+++ b/toolchain/lex/tokenized_buffer.cpp
@@ -345,15 +345,24 @@ auto TokenizedBuffer::AddLine(LineInfo info) -> LineIndex {
 
				   return LineIndex(static_cast<int>(line_infos_.size()) - 1);
			
 
				 }
			
 
				 
			
 
				+auto TokenizedBuffer::IsAfterComment(TokenIndex token,
			
 
				+                                     CommentIndex comment_index) const -> bool {
			
 
				+  const auto& comment_data = comments_[comment_index.index];
			
 
				+  return GetTokenInfo(token).byte_offset() > comment_data.start;
			
 
				+}
			
 
				+
			
 
				+auto TokenizedBuffer::GetCommentText(CommentIndex comment_index) const
			
 
				+    -> llvm::StringRef {
			
 
				+  const auto& comment_data = comments_[comment_index.index];
			
 
				+  return source_->text().substr(comment_data.start, comment_data.length);
			
 
				+}
			
 
				+
			
 
				 auto TokenizedBuffer::CollectMemUsage(MemUsage& mem_usage,
			
 
				                                       llvm::StringRef label) const -> void {
			
 
				   mem_usage.Add(MemUsage::ConcatLabel(label, "allocator_"), allocator_);
			
 
				   mem_usage.Add(MemUsage::ConcatLabel(label, "token_infos_"), token_infos_);
			
 
				   mem_usage.Add(MemUsage::ConcatLabel(label, "line_infos_"), line_infos_);
			
 
				-}
			
 
				-
			
 
				-auto TokenIterator::Print(llvm::raw_ostream& output) const -> void {
			
 
				-  output << token_.index;
			
 
				+  mem_usage.Add(MemUsage::ConcatLabel(label, "comments_"), comments_);
			
 
				 }
			
 
				 
			
 
				 auto TokenizedBuffer::SourceBufferDiagnosticConverter::ConvertLoc(
			
--- a/toolchain/lex/tokenized_buffer.h
+++ b/toolchain/lex/tokenized_buffer.h
@@ -5,15 +5,12 @@
 
				 #ifndef CARBON_TOOLCHAIN_LEX_TOKENIZED_BUFFER_H_
			
 
				 #define CARBON_TOOLCHAIN_LEX_TOKENIZED_BUFFER_H_
			
 
				 
			
 
				-#include <compare>
			
 
				 #include <cstdint>
			
 
				-#include <iterator>
			
 
				 
			
 
				 #include "common/ostream.h"
			
 
				 #include "llvm/ADT/APInt.h"
			
 
				 #include "llvm/ADT/SmallVector.h"
			
 
				 #include "llvm/ADT/StringRef.h"
			
 
				-#include "llvm/ADT/iterator.h"
			
 
				 #include "llvm/ADT/iterator_range.h"
			
 
				 #include "llvm/Support/Allocator.h"
			
 
				 #include "llvm/Support/raw_ostream.h"
			
@@ -45,50 +42,21 @@ struct LineIndex : public IndexBase {
 
				   using IndexBase::IndexBase;
			
 
				 };
			
 
				 
			
 
				-constexpr LineIndex LineIndex::Invalid(LineIndex::InvalidIndex);
			
 
				+constexpr LineIndex LineIndex::Invalid(InvalidIndex);
			
 
				 
			
 
				-// Random-access iterator over tokens within the buffer.
			
 
				-class TokenIterator
			
 
				-    : public llvm::iterator_facade_base<TokenIterator,
			
 
				-                                        std::random_access_iterator_tag,
			
 
				-                                        const TokenIndex, int>,
			
 
				-      public Printable<TokenIterator> {
			
 
				- public:
			
 
				-  TokenIterator() = delete;
			
 
				-
			
 
				-  explicit TokenIterator(TokenIndex token) : token_(token) {}
			
 
				-
			
 
				-  auto operator==(const TokenIterator& rhs) const -> bool {
			
 
				-    return token_ == rhs.token_;
			
 
				-  }
			
 
				-  auto operator<=>(const TokenIterator& rhs) const -> std::strong_ordering {
			
 
				-    return token_ <=> rhs.token_;
			
 
				-  }
			
 
				-
			
 
				-  auto operator*() const -> const TokenIndex& { return token_; }
			
 
				-
			
 
				-  using iterator_facade_base::operator-;
			
 
				-  auto operator-(const TokenIterator& rhs) const -> int {
			
 
				-    return token_.index - rhs.token_.index;
			
 
				-  }
			
 
				-
			
 
				-  auto operator+=(int n) -> TokenIterator& {
			
 
				-    token_.index += n;
			
 
				-    return *this;
			
 
				-  }
			
 
				-  auto operator-=(int n) -> TokenIterator& {
			
 
				-    token_.index -= n;
			
 
				-    return *this;
			
 
				-  }
			
 
				+// Indices for comments within the buffer.
			
 
				+struct CommentIndex : public IndexBase {
			
 
				+  static const CommentIndex Invalid;
			
 
				+  using IndexBase::IndexBase;
			
 
				+};
			
 
				 
			
 
				-  // Prints the raw token index.
			
 
				-  auto Print(llvm::raw_ostream& output) const -> void;
			
 
				+constexpr CommentIndex CommentIndex::Invalid(InvalidIndex);
			
 
				 
			
 
				- private:
			
 
				-  friend class TokenizedBuffer;
			
 
				+// Random-access iterator over comments within the buffer.
			
 
				+using CommentIterator = IndexIterator<CommentIndex>;
			
 
				 
			
 
				-  TokenIndex token_;
			
 
				-};
			
 
				+// Random-access iterator over tokens within the buffer.
			
 
				+using TokenIterator = IndexIterator<TokenIndex>;
			
 
				 
			
 
				 // A diagnostic location converter that maps token locations into source
			
 
				 // buffer locations.
			
@@ -115,6 +83,21 @@ class TokenDiagnosticConverter : public DiagnosticConverter<TokenIndex> {
 
				 // `HasError` returning true.
			
 
				 class TokenizedBuffer : public Printable<TokenizedBuffer> {
			
 
				  public:
			
 
				+  // A comment, which can be a block of lines.
			
 
				+  //
			
 
				+  // This is the API version of `CommentData`.
			
 
				+  struct CommentInfo {
			
 
				+    // The comment's full text, including `//` symbols. This may have several
			
 
				+    // lines for block comments.
			
 
				+    llvm::StringRef text;
			
 
				+
			
 
				+    // The comment's indent.
			
 
				+    int32_t indent;
			
 
				+
			
 
				+    // The first line of the comment.
			
 
				+    LineIndex start_line;
			
 
				+  };
			
 
				+
			
 
				   auto GetKind(TokenIndex token) const -> TokenKind;
			
 
				   auto GetLine(TokenIndex token) const -> LineIndex;
			
 
				 
			
@@ -179,6 +162,13 @@ class TokenizedBuffer : public Printable<TokenizedBuffer> {
 
				   // Returns the previous line handle.
			
 
				   auto GetPrevLine(LineIndex line) const -> LineIndex;
			
 
				 
			
 
				+  // Returns true if the token comes after the comment.
			
 
				+  auto IsAfterComment(TokenIndex token, CommentIndex comment_index) const
			
 
				+      -> bool;
			
 
				+
			
 
				+  // Returns the comment's full text range.
			
 
				+  auto GetCommentText(CommentIndex comment_index) const -> llvm::StringRef;
			
 
				+
			
 
				   // Prints a description of the tokenized stream to the provided `raw_ostream`.
			
 
				   //
			
 
				   // It prints one line of information for each token in the buffer, including
			
@@ -219,6 +209,11 @@ class TokenizedBuffer : public Printable<TokenizedBuffer> {
 
				 
			
 
				   auto size() const -> int { return token_infos_.size(); }
			
 
				 
			
 
				+  auto comments() const -> llvm::iterator_range<CommentIterator> {
			
 
				+    return llvm::make_range(CommentIterator(CommentIndex(0)),
			
 
				+                            CommentIterator(CommentIndex(comments_.size())));
			
 
				+  }
			
 
				+
			
 
				   // This is an upper bound on the number of output parse nodes in the absence
			
 
				   // of errors.
			
 
				   auto expected_max_parse_tree_size() const -> int {
			
@@ -418,6 +413,20 @@ class TokenizedBuffer : public Printable<TokenizedBuffer> {
 
				   static_assert(sizeof(TokenInfo) == 8,
			
 
				                 "Expected `TokenInfo` to pack to an 8-byte structure.");
			
 
				 
			
 
				+  // A comment, which can be a block of lines. These are tracked separately from
			
 
				+  // tokens because they don't affect parse; if they were part of tokens, we'd
			
 
				+  // need more general special-casing within token logic.
			
 
				+  //
			
 
				+  // Note that `CommentInfo` is used for an API to expose the comment.
			
 
				+  struct CommentData {
			
 
				+    // Zero-based byte offset of the start of the comment within the source
			
 
				+    // buffer provided.
			
 
				+    int32_t start;
			
 
				+
			
 
				+    // The comment's length.
			
 
				+    int32_t length;
			
 
				+  };
			
 
				+
			
 
				   struct LineInfo {
			
 
				     explicit LineInfo(int32_t start) : start(start), indent(0) {}
			
 
				 
			
@@ -458,6 +467,9 @@ class TokenizedBuffer : public Printable<TokenizedBuffer> {
 
				 
			
 
				   llvm::SmallVector<LineInfo> line_infos_;
			
 
				 
			
 
				+  // Comments in the file.
			
 
				+  llvm::SmallVector<CommentData> comments_;
			
 
				+
			
 
				   // An upper bound on the number of parse tree nodes that we expect to be
			
 
				   // created for the tokens in this buffer.
			
 
				   int expected_max_parse_tree_size_ = 0;