hai 1 ano · 1f268b5d8b
--- a/toolchain/lex/lex.cpp
+++ b/toolchain/lex/lex.cpp
@@ -915,7 +915,7 @@ auto Lexer::EndDumpSemIRRange(const char* diag_loc) -> void {
 
				     return;
			
 
				   }
			
 
				 
			
 
				-  buffer_.dump_sem_ir_ranges_.back().end = TokenIndex(buffer_.size());
			
 
				+  buffer_.dump_sem_ir_ranges_.back().end = TokenIndex(buffer_.size() - 1);
			
 
				 }
			
 
				 
			
 
				 auto Lexer::EndDumpSemIRRangeIfIncomplete(const char* diag_loc) -> void {
			
--- a/toolchain/lex/testdata/dump_sem_ir_range.carbon
+++ b/toolchain/lex/testdata/dump_sem_ir_range.carbon
@@ -20,7 +20,7 @@ b
 
				 c
			
 
				 // CHECK:STDOUT:   - { index: 3, kind: "Identifier", line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: "c", identifier: 2, has_leading_space: true }
			
 
				 // CHECK:STDOUT:   dump_sem_ir_ranges:
			
 
				-// CHECK:STDOUT:   - {begin: 1, end: 4}
			
 
				+// CHECK:STDOUT:   - {begin: 1, end: 3}
			
 
				 //@dump-sem-ir-end
			
 
				 
			
 
				 // --- multi_section.carbon
			
@@ -42,16 +42,16 @@ d
 
				 e
			
 
				 // CHECK:STDOUT:   - { index: 5, kind: "Identifier", line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: "e", identifier: 4, has_leading_space: true }
			
 
				 // CHECK:STDOUT:   dump_sem_ir_ranges:
			
 
				-// CHECK:STDOUT:   - {begin: 2, end: 3}
			
 
				-// CHECK:STDOUT:   - {begin: 4, end: 5}
			
 
				+// CHECK:STDOUT:   - {begin: 2, end: 2}
			
 
				+// CHECK:STDOUT:   - {begin: 4, end: 4}
			
 
				 
			
 
				 // --- compact.carbon
			
 
				 // CHECK:STDOUT: - filename: compact.carbon
			
 
				 // CHECK:STDOUT:   tokens:
			
 
				 // CHECK:STDOUT:   dump_sem_ir_ranges:
			
 
				-// CHECK:STDOUT:   - {begin: 1, end: 1}
			
 
				-// CHECK:STDOUT:   - {begin: 1, end: 1}
			
 
				-// CHECK:STDOUT:   - {begin: 1, end: 1}
			
 
				+// CHECK:STDOUT:   - {begin: 1, end: 0}
			
 
				+// CHECK:STDOUT:   - {begin: 1, end: 0}
			
 
				+// CHECK:STDOUT:   - {begin: 1, end: 0}
			
 
				 //@dump-sem-ir-begin
			
 
				 //@dump-sem-ir-end
			
 
				 //@dump-sem-ir-begin
			
@@ -73,7 +73,7 @@ e
 
				 // CHECK:STDOUT: - filename: fail_start_only.carbon
			
 
				 // CHECK:STDOUT:   tokens:
			
 
				 // CHECK:STDOUT:   dump_sem_ir_ranges:
			
 
				-// CHECK:STDOUT:   - {begin: 1, end: 1}
			
 
				+// CHECK:STDOUT:   - {begin: 1, end: 0}
			
 
				 
			
 
				 //@dump-sem-ir-begin
			
 
				 // CHECK:STDERR: fail_start_only.carbon:[[@LINE+4]]:1: error: missing `//@dump-sem-ir-end` to match `//@dump-sem-ir-begin` [DumpSemIRRangeMissingEnd]
			
@@ -118,8 +118,8 @@ d
 
				 e
			
 
				 // CHECK:STDOUT:   - { index: 5, kind: "Identifier", line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: "e", identifier: 4, has_leading_space: true }
			
 
				 // CHECK:STDOUT:   dump_sem_ir_ranges:
			
 
				-// CHECK:STDOUT:   - {begin: 2, end: 3}
			
 
				-// CHECK:STDOUT:   - {begin: 3, end: 4}
			
 
				+// CHECK:STDOUT:   - {begin: 2, end: 2}
			
 
				+// CHECK:STDOUT:   - {begin: 3, end: 3}
			
 
				 
			
 
				 // --- fail_count_mismatch.carbon
			
 
				 // CHECK:STDOUT: - filename: fail_count_mismatch.carbon
			
@@ -137,8 +137,8 @@ c
 
				 d
			
 
				 // CHECK:STDOUT:   - { index: 4, kind: "Identifier", line: {{ *}}[[@LINE-1]], column:  1, indent: 1, spelling: "d", identifier: 3, has_leading_space: true }
			
 
				 // CHECK:STDOUT:   dump_sem_ir_ranges:
			
 
				-// CHECK:STDOUT:   - {begin: 2, end: 3}
			
 
				-// CHECK:STDOUT:   - {begin: 4, end: 5}
			
 
				+// CHECK:STDOUT:   - {begin: 2, end: 2}
			
 
				+// CHECK:STDOUT:   - {begin: 4, end: 4}
			
 
				 
			
 
				 // CHECK:STDERR: fail_count_mismatch.carbon:[[@LINE+3]]:17: error: missing `//@dump-sem-ir-end` to match `//@dump-sem-ir-begin` [DumpSemIRRangeMissingEnd]
			
 
				 // CHECK:STDERR: // CHECK:STDERR:
			
--- a/toolchain/lex/tokenized_buffer.cpp
+++ b/toolchain/lex/tokenized_buffer.cpp
@@ -440,18 +440,15 @@ auto TokenizedBuffer::TokenToDiagnosticLoc(TokenIndex token) const
 
				   return converted;
			
 
				 }
			
 
				 
			
 
				-auto TokenizedBuffer::OverlapsWithDumpSemIRRange(TokenIndex begin,
			
 
				-                                                 TokenIndex inclusive_end) const
			
 
				-    -> bool {
			
 
				-  if (dump_sem_ir_ranges_.empty()) {
			
 
				-    return true;
			
 
				-  }
			
 
				+auto TokenizedBuffer::OverlapsWithDumpSemIRRange(
			
 
				+    Lex::InclusiveTokenRange range) const -> bool {
			
 
				+  CARBON_CHECK(!dump_sem_ir_ranges_.empty());
			
 
				 
			
 
				   // Ranges are ordered, so we can decide overlap as soon as we find a range
			
 
				   // that ends after `begin`.
			
 
				-  for (auto range : dump_sem_ir_ranges_) {
			
 
				-    if (range.end > begin) {
			
 
				-      return range.begin <= inclusive_end;
			
 
				+  for (auto dump_range : dump_sem_ir_ranges_) {
			
 
				+    if (dump_range.end >= range.begin) {
			
 
				+      return dump_range.begin <= range.end;
			
 
				     }
			
 
				   }
			
 
				   return false;
			
--- a/toolchain/lex/tokenized_buffer.h
+++ b/toolchain/lex/tokenized_buffer.h
@@ -60,6 +60,12 @@ using CommentIterator = IndexIterator<CommentIndex>;
 
				 // Random-access iterator over tokens within the buffer.
			
 
				 using TokenIterator = IndexIterator<TokenIndex>;
			
 
				 
			
 
				+// A token range which is inclusive of the begin and end.
			
 
				+struct InclusiveTokenRange {
			
 
				+  TokenIndex begin;
			
 
				+  TokenIndex end;
			
 
				+};
			
 
				+
			
 
				 // A buffer of tokenized Carbon source code.
			
 
				 //
			
 
				 // This is constructed by lexing the source code text into a series of tokens.
			
@@ -180,9 +186,10 @@ class TokenizedBuffer : public Printable<TokenizedBuffer> {
 
				   auto TokenToDiagnosticLoc(TokenIndex token) const
			
 
				       -> Diagnostics::ConvertedLoc;
			
 
				 
			
 
				-  // Returns true if the given range overlaps with a `DumpSemIRRange`.
			
 
				-  auto OverlapsWithDumpSemIRRange(TokenIndex begin,
			
 
				-                                  TokenIndex inclusive_end) const -> bool;
			
 
				+  // Returns true if the given range overlaps with an entry in
			
 
				+  // `dump_sem_ir_ranges_`. Must not be called when there are no ranges; query
			
 
				+  // `has_dump_sem_ir_ranges` first.
			
 
				+  auto OverlapsWithDumpSemIRRange(Lex::InclusiveTokenRange range) const -> bool;
			
 
				 
			
 
				   // Returns true if the buffer has errors that were detected at lexing time.
			
 
				   auto has_errors() const -> bool { return has_errors_; }
			
@@ -250,18 +257,6 @@ class TokenizedBuffer : public Printable<TokenizedBuffer> {
 
				     const TokenizedBuffer* tokens_;
			
 
				   };
			
 
				 
			
 
				-  // A range of tokens marked by `//@dump-semir-[begin|end]`. The end token is
			
 
				-  // non-inclusive: [begin, end).
			
 
				-  //
			
 
				-  // The particular syntax was chosen because it can be lexed efficiently. It
			
 
				-  // only occurs in invalid comment strings, so shouldn't slow down lexing of
			
 
				-  // correct code. It's also comment-like because its presence won't affect
			
 
				-  // parse/check.
			
 
				-  struct DumpSemIRRange {
			
 
				-    TokenIndex begin;
			
 
				-    TokenIndex end;
			
 
				-  };
			
 
				-
			
 
				   // Converts a pointer into the source to a diagnostic location.
			
 
				   auto SourcePointerToDiagnosticLoc(const char* loc) const
			
 
				       -> Diagnostics::ConvertedLoc;
			
@@ -503,8 +498,13 @@ class TokenizedBuffer : public Printable<TokenizedBuffer> {
 
				   // Comments in the file.
			
 
				   llvm::SmallVector<CommentData> comments_;
			
 
				 
			
 
				-  // Ranges of SemIR to dump.
			
 
				-  llvm::SmallVector<DumpSemIRRange> dump_sem_ir_ranges_;
			
 
				+  // A range of tokens marked by `//@dump-semir-[begin|end]`.
			
 
				+  //
			
 
				+  // The particular syntax was chosen because it can be lexed efficiently. It
			
 
				+  // only occurs in invalid comment strings, so shouldn't slow down lexing of
			
 
				+  // correct code. It's also comment-like because its presence won't affect
			
 
				+  // parse/check.
			
 
				+  llvm::SmallVector<InclusiveTokenRange> dump_sem_ir_ranges_;
			
 
				 
			
 
				   // An upper bound on the number of parse tree nodes that we expect to be
			
 
				   // created for the tokens in this buffer.
			
--- a/toolchain/parse/tree_and_subtrees.cpp
+++ b/toolchain/parse/tree_and_subtrees.cpp
@@ -232,9 +232,10 @@ auto TreeAndSubtrees::CollectMemUsage(MemUsage& mem_usage,
 
				                     subtree_sizes_);
			
 
				 }
			
 
				 
			
 
				-auto TreeAndSubtrees::GetSubtreeTokenRange(NodeId node_id) const -> TokenRange {
			
 
				-  TokenRange range = {.begin = tree_->node_token(node_id),
			
 
				-                      .end = Lex::TokenIndex::None};
			
 
				+auto TreeAndSubtrees::GetSubtreeTokenRange(NodeId node_id) const
			
 
				+    -> Lex::InclusiveTokenRange {
			
 
				+  Lex::InclusiveTokenRange range = {.begin = tree_->node_token(node_id),
			
 
				+                                    .end = Lex::TokenIndex::None};
			
 
				   range.end = range.begin;
			
 
				   for (NodeId desc : postorder(node_id)) {
			
 
				     Lex::TokenIndex desc_token = tree_->node_token(desc);
			
@@ -264,7 +265,7 @@ auto TreeAndSubtrees::NodeToDiagnosticLoc(NodeId node_id, bool token_only) const
 
				 
			
 
				   // Construct a location that encompasses all tokens that descend from this
			
 
				   // node (including the root).
			
 
				-  TokenRange token_range = GetSubtreeTokenRange(node_id);
			
 
				+  Lex::InclusiveTokenRange token_range = GetSubtreeTokenRange(node_id);
			
 
				   auto begin_loc = tree_->tokens().TokenToDiagnosticLoc(token_range.begin);
			
 
				   if (token_range.begin == token_range.end) {
			
 
				     return begin_loc;
			
--- a/toolchain/parse/tree_and_subtrees.h
+++ b/toolchain/parse/tree_and_subtrees.h
@@ -17,12 +17,6 @@ namespace Carbon::Parse {
 
				 // This requires a complete tree.
			
 
				 class TreeAndSubtrees {
			
 
				  public:
			
 
				-  // A range of tokens, returned by GetSubtreeTokenRange.
			
 
				-  struct TokenRange {
			
 
				-    Lex::TokenIndex begin;
			
 
				-    Lex::TokenIndex end;
			
 
				-  };
			
 
				-
			
 
				   class SiblingIterator;
			
 
				 
			
 
				   explicit TreeAndSubtrees(const Lex::TokenizedBuffer& tokens,
			
@@ -115,7 +109,7 @@ class TreeAndSubtrees {
 
				       -> void;
			
 
				 
			
 
				   // Returns the range of tokens in the node's subtree.
			
 
				-  auto GetSubtreeTokenRange(NodeId node_id) const -> TokenRange;
			
 
				+  auto GetSubtreeTokenRange(NodeId node_id) const -> Lex::InclusiveTokenRange;
			
 
				 
			
 
				   // Converts the node to a diagnostic location, covering either the full
			
 
				   // subtree or only the token.
			
--- a/toolchain/sem_ir/formatter.cpp
+++ b/toolchain/sem_ir/formatter.cpp
@@ -196,8 +196,7 @@ auto Formatter::ShouldFormatEntity(InstId decl_id, bool is_definition_start)
 
				   // the subtree (for example, it can miss modifiers), but finding the earliest
			
 
				   // token requires walking *all* children, whereas this approach is
			
 
				   // constant-time.
			
 
				-  auto begin = sem_ir_->parse_tree().node_token(
			
 
				-      *tree_and_subtrees.postorder(loc_id.node_id()).begin());
			
 
				+  auto begin_node_id = *tree_and_subtrees.postorder(loc_id.node_id()).begin();
			
 
				 
			
 
				   // Non-defining declarations will be associated with a `Decl` node.
			
 
				   // Definitions will have a `DefinitionStart` for which we can use the parent
			
@@ -207,10 +206,11 @@ auto Formatter::ShouldFormatEntity(InstId decl_id, bool is_definition_start)
 
				   if (is_definition_start) {
			
 
				     end_node_id = node_parents_[end_node_id.index];
			
 
				   }
			
 
				-  auto inclusive_end = sem_ir_->parse_tree().node_token(end_node_id);
			
 
				 
			
 
				-  return sem_ir_->parse_tree().tokens().OverlapsWithDumpSemIRRange(
			
 
				-      begin, inclusive_end);
			
 
				+  Lex::InclusiveTokenRange range = {
			
 
				+      .begin = sem_ir_->parse_tree().node_token(begin_node_id),
			
 
				+      .end = sem_ir_->parse_tree().node_token(end_node_id)};
			
 
				+  return sem_ir_->parse_tree().tokens().OverlapsWithDumpSemIRRange(range);
			
 
				 }
			
 
				 
			
 
				 auto Formatter::ShouldFormatEntity(const EntityWithParamsBase& entity) -> bool {
			
@@ -230,8 +230,8 @@ auto Formatter::ShouldFormatInst(InstId inst_id) -> bool {
 
				   }
			
 
				 
			
 
				   auto token = sem_ir_->parse_tree().node_token(loc_id.node_id());
			
 
				-  return sem_ir_->parse_tree().tokens().OverlapsWithDumpSemIRRange(token,
			
 
				-                                                                   token);
			
 
				+  return sem_ir_->parse_tree().tokens().OverlapsWithDumpSemIRRange(
			
 
				+      Lex::InclusiveTokenRange{.begin = token, .end = token});
			
 
				 }
			
 
				 
			
 
				 auto Formatter::OpenBrace() -> void {