Procházet zdrojové kódy

Add formatter support for dump-sem-ir ranges (#5379)

This prints instructions that are inside the range, and entities that
overlap with the range. Note this can lead to incomplete printing of
entity contents.
Jon Ross-Perkins před 1 rokem
rodič
revize
8eae40646a

+ 189 - 0
toolchain/check/testdata/basics/no_prelude/dump_sem_ir_range.carbon

@@ -0,0 +1,189 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// AUTOUPDATE
+// TIP: To test this file alone, run:
+// TIP:   bazel test //toolchain/testing:file_test --test_arg=--file_tests=toolchain/check/testdata/basics/no_prelude/dump_sem_ir_range.carbon
+// TIP: To dump output, run:
+// TIP:   bazel run //toolchain/testing:file_test -- --dump_output --file_tests=toolchain/check/testdata/basics/no_prelude/dump_sem_ir_range.carbon
+
+// --- function.carbon
+
+library "[[@TEST_NAME]]";
+
+fn A() {
+  var a: ();
+}
+
+fn B() -> () {
+  var b: ();
+  //@dump-sem-ir-begin
+  b = A();
+  //@dump-sem-ir-end
+  return b;
+}
+
+//@dump-sem-ir-begin
+fn C() -> () {
+  var c: ();
+  c = B();
+  return c;
+}
+//@dump-sem-ir-end
+
+// --- class.carbon
+
+library "[[@TEST_NAME]]";
+
+class A {
+  fn F();
+
+  //@dump-sem-ir-begin
+  fn G();
+  //@dump-sem-ir-end
+}
+
+class B {
+  fn H();
+}
+
+//@dump-sem-ir-begin
+class C {
+  fn I();
+  //@dump-sem-ir-end
+
+  fn J();
+}
+
+// --- call_params.carbon
+
+library "[[@TEST_NAME]]";
+
+fn F(a: (), b: (), c: ());
+
+fn A();
+fn B();
+fn C();
+
+fn G() {
+  F(
+    //@dump-sem-ir-begin
+    A(),
+    //@dump-sem-ir-end
+    B(),
+    //@dump-sem-ir-begin
+    C()
+    //@dump-sem-ir-end
+  );
+}
+
+// CHECK:STDOUT: --- function.carbon
+// CHECK:STDOUT:
+// CHECK:STDOUT: constants {
+// CHECK:STDOUT: }
+// CHECK:STDOUT:
+// CHECK:STDOUT: file {
+// CHECK:STDOUT:   %C.decl: %C.type = fn_decl @C [concrete = constants.%C] {
+// CHECK:STDOUT:     %return.patt: %pattern_type = return_slot_pattern
+// CHECK:STDOUT:     %return.param_patt: %pattern_type = out_param_pattern %return.patt, call_param0
+// CHECK:STDOUT:   } {
+// CHECK:STDOUT:     %.loc17_12.1: %empty_tuple.type = tuple_literal ()
+// CHECK:STDOUT:     %.loc17_12.2: type = converted %.loc17_12.1, constants.%empty_tuple.type [concrete = constants.%empty_tuple.type]
+// CHECK:STDOUT:     %return.param: ref %empty_tuple.type = out_param call_param0
+// CHECK:STDOUT:     %return: ref %empty_tuple.type = return_slot %return.param
+// CHECK:STDOUT:   }
+// CHECK:STDOUT: }
+// CHECK:STDOUT:
+// CHECK:STDOUT: fn @B() -> %empty_tuple.type {
+// CHECK:STDOUT: !entry:
+// CHECK:STDOUT:   %b.ref.loc11: ref %empty_tuple.type = name_ref b, %b
+// CHECK:STDOUT:   %A.ref: %A.type = name_ref A, file.%A.decl [concrete = constants.%A]
+// CHECK:STDOUT:   %A.call: init %empty_tuple.type = call %A.ref()
+// CHECK:STDOUT:   assign %b.ref.loc11, %A.call
+// CHECK:STDOUT: }
+// CHECK:STDOUT:
+// CHECK:STDOUT: fn @C() -> %empty_tuple.type {
+// CHECK:STDOUT: !entry:
+// CHECK:STDOUT:   name_binding_decl {
+// CHECK:STDOUT:     %c.patt: %pattern_type = binding_pattern c
+// CHECK:STDOUT:     %.loc18_3: %pattern_type = var_pattern %c.patt
+// CHECK:STDOUT:   }
+// CHECK:STDOUT:   %c.var: ref %empty_tuple.type = var c
+// CHECK:STDOUT:   %.loc18_11.1: type = splice_block %.loc18_11.3 [concrete = constants.%empty_tuple.type] {
+// CHECK:STDOUT:     %.loc18_11.2: %empty_tuple.type = tuple_literal ()
+// CHECK:STDOUT:     %.loc18_11.3: type = converted %.loc18_11.2, constants.%empty_tuple.type [concrete = constants.%empty_tuple.type]
+// CHECK:STDOUT:   }
+// CHECK:STDOUT:   %c: ref %empty_tuple.type = bind_name c, %c.var
+// CHECK:STDOUT:   %c.ref.loc19: ref %empty_tuple.type = name_ref c, %c
+// CHECK:STDOUT:   %B.ref: %B.type = name_ref B, file.%B.decl [concrete = constants.%B]
+// CHECK:STDOUT:   %B.call: init %empty_tuple.type = call %B.ref()
+// CHECK:STDOUT:   assign %c.ref.loc19, %B.call
+// CHECK:STDOUT:   %c.ref.loc20: ref %empty_tuple.type = name_ref c, %c
+// CHECK:STDOUT:   %tuple: %empty_tuple.type = tuple_value () [concrete = constants.%empty_tuple]
+// CHECK:STDOUT:   %.loc20: %empty_tuple.type = converted %c.ref.loc20, %tuple [concrete = constants.%empty_tuple]
+// CHECK:STDOUT:   return %.loc20
+// CHECK:STDOUT: }
+// CHECK:STDOUT:
+// CHECK:STDOUT: --- class.carbon
+// CHECK:STDOUT:
+// CHECK:STDOUT: constants {
+// CHECK:STDOUT: }
+// CHECK:STDOUT:
+// CHECK:STDOUT: file {
+// CHECK:STDOUT:   %C.decl: type = class_decl @C [concrete = constants.%C] {} {}
+// CHECK:STDOUT: }
+// CHECK:STDOUT:
+// CHECK:STDOUT: class @A {
+// CHECK:STDOUT:   %G.decl: %G.type = fn_decl @G [concrete = constants.%G] {} {}
+// CHECK:STDOUT:   %empty_struct_type: type = struct_type {} [concrete = constants.%empty_struct_type]
+// CHECK:STDOUT:   %complete_type: <witness> = complete_type_witness %empty_struct_type [concrete = constants.%complete_type]
+// CHECK:STDOUT:   complete_type_witness = %complete_type
+// CHECK:STDOUT:
+// CHECK:STDOUT: !members:
+// CHECK:STDOUT:   .Self = constants.%A
+// CHECK:STDOUT:   .F = %F.decl
+// CHECK:STDOUT:   .G = %G.decl
+// CHECK:STDOUT: }
+// CHECK:STDOUT:
+// CHECK:STDOUT: class @C {
+// CHECK:STDOUT:   %I.decl: %I.type = fn_decl @I [concrete = constants.%I] {} {}
+// CHECK:STDOUT:   %empty_struct_type: type = struct_type {} [concrete = constants.%empty_struct_type]
+// CHECK:STDOUT:   %complete_type: <witness> = complete_type_witness %empty_struct_type [concrete = constants.%complete_type]
+// CHECK:STDOUT:   complete_type_witness = %complete_type
+// CHECK:STDOUT:
+// CHECK:STDOUT: !members:
+// CHECK:STDOUT:   .Self = constants.%C
+// CHECK:STDOUT:   .I = %I.decl
+// CHECK:STDOUT:   .J = %J.decl
+// CHECK:STDOUT: }
+// CHECK:STDOUT:
+// CHECK:STDOUT: fn @G();
+// CHECK:STDOUT:
+// CHECK:STDOUT: fn @I();
+// CHECK:STDOUT:
+// CHECK:STDOUT: --- call_params.carbon
+// CHECK:STDOUT:
+// CHECK:STDOUT: constants {
+// CHECK:STDOUT: }
+// CHECK:STDOUT:
+// CHECK:STDOUT: file {}
+// CHECK:STDOUT:
+// CHECK:STDOUT: fn @G() {
+// CHECK:STDOUT: !entry:
+// CHECK:STDOUT:   %A.ref: %A.type = name_ref A, file.%A.decl [concrete = constants.%A]
+// CHECK:STDOUT:   %A.call: init %empty_tuple.type = call %A.ref()
+// CHECK:STDOUT:   %C.ref: %C.type = name_ref C, file.%C.decl [concrete = constants.%C]
+// CHECK:STDOUT:   %C.call: init %empty_tuple.type = call %C.ref()
+// CHECK:STDOUT:   %.loc13_7.1: ref %empty_tuple.type = temporary_storage
+// CHECK:STDOUT:   %.loc13_7.2: ref %empty_tuple.type = temporary %.loc13_7.1, %A.call
+// CHECK:STDOUT:   %tuple.loc13: %empty_tuple.type = tuple_value () [concrete = constants.%empty_tuple]
+// CHECK:STDOUT:   %.loc13_7.3: %empty_tuple.type = converted %A.call, %tuple.loc13 [concrete = constants.%empty_tuple]
+// CHECK:STDOUT:   %.loc17_7.1: ref %empty_tuple.type = temporary_storage
+// CHECK:STDOUT:   %.loc17_7.2: ref %empty_tuple.type = temporary %.loc17_7.1, %C.call
+// CHECK:STDOUT:   %tuple.loc17: %empty_tuple.type = tuple_value () [concrete = constants.%empty_tuple]
+// CHECK:STDOUT:   %.loc17_7.3: %empty_tuple.type = converted %C.call, %tuple.loc17 [concrete = constants.%empty_tuple]
+// CHECK:STDOUT:   %F.call: init %empty_tuple.type = call %F.ref(%.loc13_7.3, %.loc15_7.3, %.loc17_7.3)
+// CHECK:STDOUT:   return
+// CHECK:STDOUT: }
+// CHECK:STDOUT:

+ 2 - 1
toolchain/driver/compile_subcommand.cpp

@@ -568,7 +568,8 @@ auto CompilationUnit::PostCheck() -> void {
       return IncludeInDumps(import_ir->filename());
     };
 
-    SemIR::Formatter formatter(&*sem_ir_, should_format_entity);
+    SemIR::Formatter formatter(&*sem_ir_, should_format_entity,
+                               *tree_and_subtrees_getter_);
     formatter.Format();
     if (vlog_stream_) {
       CARBON_VLOG("*** SemIR::File ***\n");

+ 17 - 0
toolchain/lex/tokenized_buffer.cpp

@@ -440,4 +440,21 @@ auto TokenizedBuffer::TokenToDiagnosticLoc(TokenIndex token) const
   return converted;
 }
 
+auto TokenizedBuffer::OverlapsWithDumpSemIRRange(TokenIndex begin,
+                                                 TokenIndex inclusive_end) const
+    -> bool {
+  if (dump_sem_ir_ranges_.empty()) {
+    return true;
+  }
+
+  // Ranges are ordered, so we can decide overlap as soon as we find a range
+  // that ends after `begin`.
+  for (auto range : dump_sem_ir_ranges_) {
+    if (range.end > begin) {
+      return range.begin <= inclusive_end;
+    }
+  }
+  return false;
+}
+
 }  // namespace Carbon::Lex

+ 19 - 14
toolchain/lex/tokenized_buffer.h

@@ -85,18 +85,6 @@ class TokenizedBuffer : public Printable<TokenizedBuffer> {
     LineIndex start_line;
   };
 
-  // A range of tokens marked by `//@dump-semir-[begin|end]`. The end token is
-  // non-inclusive: [begin, end).
-  //
-  // The particular syntax was chosen because it can be lexed efficiently. It
-  // only occurs in invalid comment strings, so shouldn't slow down lexing of
-  // correct code. It's also comment-like because its presence won't affect
-  // parse/check.
-  struct DumpSemIRRange {
-    TokenIndex begin;
-    TokenIndex end;
-  };
-
   auto GetKind(TokenIndex token) const -> TokenKind;
   auto GetLine(TokenIndex token) const -> LineIndex;
 
@@ -192,6 +180,10 @@ class TokenizedBuffer : public Printable<TokenizedBuffer> {
   auto TokenToDiagnosticLoc(TokenIndex token) const
       -> Diagnostics::ConvertedLoc;
 
+  // Returns true if the given range overlaps with a `DumpSemIRRange`.
+  auto OverlapsWithDumpSemIRRange(TokenIndex begin,
+                                  TokenIndex inclusive_end) const -> bool;
+
   // Returns true if the buffer has errors that were detected at lexing time.
   auto has_errors() const -> bool { return has_errors_; }
 
@@ -209,8 +201,9 @@ class TokenizedBuffer : public Printable<TokenizedBuffer> {
 
   auto comments_size() const -> size_t { return comments_.size(); }
 
-  auto dump_sem_ir_ranges() -> llvm::ArrayRef<DumpSemIRRange> {
-    return dump_sem_ir_ranges_;
+  // Returns true if any `DumpSemIRRange`s were provided.
+  auto has_dump_sem_ir_ranges() const -> bool {
+    return !dump_sem_ir_ranges_.empty();
   }
 
   // This is an upper bound on the number of output parse nodes in the absence
@@ -257,6 +250,18 @@ class TokenizedBuffer : public Printable<TokenizedBuffer> {
     const TokenizedBuffer* tokens_;
   };
 
+  // A range of tokens marked by `//@dump-semir-[begin|end]`. The end token is
+  // non-inclusive: [begin, end).
+  //
+  // The particular syntax was chosen because it can be lexed efficiently. It
+  // only occurs in invalid comment strings, so shouldn't slow down lexing of
+  // correct code. It's also comment-like because its presence won't affect
+  // parse/check.
+  struct DumpSemIRRange {
+    TokenIndex begin;
+    TokenIndex end;
+  };
+
   // Converts a pointer into the source to a diagnostic location.
   auto SourcePointerToDiagnosticLoc(const char* loc) const
       -> Diagnostics::ConvertedLoc;

+ 64 - 11
toolchain/sem_ir/formatter.cpp

@@ -15,6 +15,7 @@
 #include "toolchain/base/shared_value_stores.h"
 #include "toolchain/lex/tokenized_buffer.h"
 #include "toolchain/parse/tree.h"
+#include "toolchain/parse/tree_and_subtrees.h"
 #include "toolchain/sem_ir/builtin_function_kind.h"
 #include "toolchain/sem_ir/constant.h"
 #include "toolchain/sem_ir/entity_with_params_base.h"
@@ -32,10 +33,12 @@
 namespace Carbon::SemIR {
 
 Formatter::Formatter(const File* sem_ir,
-                     ShouldFormatEntityFn should_format_entity)
+                     ShouldFormatEntityFn should_format_entity,
+                     Parse::GetTreeAndSubtreesFn get_tree_and_subtrees)
     : sem_ir_(sem_ir),
       inst_namer_(sem_ir_),
-      should_format_entity_(should_format_entity) {
+      should_format_entity_(should_format_entity),
+      get_tree_and_subtrees_(get_tree_and_subtrees) {
   // Create the first chunk and assign it to all instructions that don't have
   // a chunk of their own.
   auto first_chunk = AddChunkNoFlush(true);
@@ -141,15 +144,61 @@ auto Formatter::IncludeChunkInOutput(size_t chunk) -> void {
   }
 }
 
-auto Formatter::ShouldFormatEntity(InstId decl_id) -> bool {
+auto Formatter::OverlapsWithDumpSemIRRange(
+    InstId inst_id, llvm::ArrayRef<InstBlockId> body_block_ids) -> bool {
+  if (!sem_ir_->parse_tree().tokens().has_dump_sem_ir_ranges()) {
+    return true;
+  }
+
+  auto loc_id = sem_ir_->insts().GetCanonicalLocId(inst_id);
+  if (loc_id.kind() != LocId::Kind::NodeId) {
+    return false;
+  }
+
+  // For the declaration, we use the helper for checking the full range.
+  auto token_range =
+      get_tree_and_subtrees_().GetSubtreeTokenRange(loc_id.node_id());
+  if (sem_ir_->parse_tree().tokens().OverlapsWithDumpSemIRRange(
+          token_range.begin, token_range.end)) {
+    return true;
+  }
+
+  // If the declaration wasn't in scope, we need to check the body.
+  // TODO: We currently don't track the definition end, so this checks all
+  // instructions in the body. Maybe we should start tracking definition end
+  // nodes on entities?
+  for (auto body_block_id : body_block_ids) {
+    auto block = sem_ir_->inst_blocks().GetOrEmpty(body_block_id);
+    for (auto inst_id : block) {
+      auto loc_id = sem_ir_->insts().GetCanonicalLocId(inst_id);
+      if (loc_id.kind() == LocId::Kind::NodeId) {
+        auto token = sem_ir_->parse_tree().node_token(loc_id.node_id());
+        if (sem_ir_->parse_tree().tokens().OverlapsWithDumpSemIRRange(token,
+                                                                      token)) {
+          return true;
+        }
+      }
+    }
+  }
+  return false;
+}
+
+auto Formatter::ShouldFormatEntity(InstId decl_id,
+                                   llvm::ArrayRef<InstBlockId> body_block_ids)
+    -> bool {
   if (!decl_id.has_value()) {
     return true;
   }
-  return should_format_entity_(decl_id);
+  if (!should_format_entity_(decl_id)) {
+    return false;
+  }
+  return OverlapsWithDumpSemIRRange(decl_id, body_block_ids);
 }
 
-auto Formatter::ShouldFormatEntity(const EntityWithParamsBase& entity) -> bool {
-  return ShouldFormatEntity(entity.latest_decl_id());
+auto Formatter::ShouldFormatEntity(const EntityWithParamsBase& entity,
+                                   llvm::ArrayRef<InstBlockId> body_block_ids)
+    -> bool {
+  return ShouldFormatEntity(entity.latest_decl_id(), body_block_ids);
 }
 
 auto Formatter::OpenBrace() -> void {
@@ -217,7 +266,7 @@ auto Formatter::FormatScopeIfUsed(InstNamer::ScopeId scope_id,
 
 auto Formatter::FormatClass(ClassId id) -> void {
   const Class& class_info = sem_ir_->classes().Get(id);
-  if (!ShouldFormatEntity(class_info)) {
+  if (!ShouldFormatEntity(class_info, class_info.body_block_id)) {
     return;
   }
 
@@ -246,7 +295,7 @@ auto Formatter::FormatClass(ClassId id) -> void {
 
 auto Formatter::FormatInterface(InterfaceId id) -> void {
   const Interface& interface_info = sem_ir_->interfaces().Get(id);
-  if (!ShouldFormatEntity(interface_info)) {
+  if (!ShouldFormatEntity(interface_info, interface_info.body_block_id)) {
     return;
   }
 
@@ -282,7 +331,7 @@ auto Formatter::FormatInterface(InterfaceId id) -> void {
 auto Formatter::FormatAssociatedConstant(AssociatedConstantId id) -> void {
   const AssociatedConstant& assoc_const =
       sem_ir_->associated_constants().Get(id);
-  if (!ShouldFormatEntity(assoc_const.decl_id)) {
+  if (!ShouldFormatEntity(assoc_const.decl_id, /*body_block_ids=*/{})) {
     return;
   }
 
@@ -306,7 +355,7 @@ auto Formatter::FormatAssociatedConstant(AssociatedConstantId id) -> void {
 
 auto Formatter::FormatImpl(ImplId id) -> void {
   const Impl& impl_info = sem_ir_->impls().Get(id);
-  if (!ShouldFormatEntity(impl_info)) {
+  if (!ShouldFormatEntity(impl_info, impl_info.body_block_id)) {
     return;
   }
 
@@ -348,7 +397,7 @@ auto Formatter::FormatImpl(ImplId id) -> void {
 
 auto Formatter::FormatFunction(FunctionId id) -> void {
   const Function& fn = sem_ir_->functions().Get(id);
-  if (!ShouldFormatEntity(fn)) {
+  if (!ShouldFormatEntity(fn, fn.body_block_ids)) {
     return;
   }
 
@@ -649,6 +698,10 @@ auto Formatter::FormatInst(InstId inst_id, ImportRefUnloaded inst) -> void {
 }
 
 auto Formatter::FormatInst(InstId inst_id) -> void {
+  if (!OverlapsWithDumpSemIRRange(inst_id, /*body_block_ids=*/{})) {
+    return;
+  }
+
   if (!inst_id.has_value()) {
     Indent();
     out_ << "none\n";

+ 14 - 3
toolchain/sem_ir/formatter.h

@@ -8,6 +8,7 @@
 #include <concepts>
 
 #include "llvm/Support/raw_ostream.h"
+#include "toolchain/parse/tree_and_subtrees.h"
 #include "toolchain/sem_ir/file.h"
 #include "toolchain/sem_ir/inst_namer.h"
 
@@ -22,7 +23,8 @@ class Formatter {
       llvm::function_ref<auto(InstId decl_inst_id)->bool>;
 
   explicit Formatter(const File* sem_ir,
-                     ShouldFormatEntityFn should_format_entity);
+                     ShouldFormatEntityFn should_format_entity,
+                     Parse::GetTreeAndSubtreesFn get_tree_and_subtrees);
 
   // Prints the SemIR into an internal buffer.
   //
@@ -77,11 +79,19 @@ class Formatter {
   // is.
   auto IncludeChunkInOutput(size_t chunk) -> void;
 
+  // Returns true if the node subtree for the instruction or body overlaps with
+  // a dump range, or if there are no ranges.
+  auto OverlapsWithDumpSemIRRange(InstId inst_id,
+                                  llvm::ArrayRef<InstBlockId> body_block_ids)
+      -> bool;
+
   // Determines whether the specified entity should be included in the formatted
   // output.
-  auto ShouldFormatEntity(InstId decl_id) -> bool;
+  auto ShouldFormatEntity(InstId decl_id,
+                          llvm::ArrayRef<InstBlockId> body_block_ids) -> bool;
 
-  auto ShouldFormatEntity(const EntityWithParamsBase& entity) -> bool;
+  auto ShouldFormatEntity(const EntityWithParamsBase& entity,
+                          llvm::ArrayRef<InstBlockId> body_block_ids) -> bool;
 
   // Begins a braced block. Writes an open brace, and prepares to insert a
   // newline after it if the braced block is non-empty.
@@ -307,6 +317,7 @@ class Formatter {
   const File* sem_ir_;
   InstNamer inst_namer_;
   ShouldFormatEntityFn should_format_entity_;
+  Parse::GetTreeAndSubtreesFn get_tree_and_subtrees_;
 
   // The output stream buffer.
   std::string buffer_;