Explorar el Código

Split out the SharedValueStores to be per-compilation unit. (#3353)

Advantages:

- Allows lexing/parsing in parallel, since they are modifying fully
separate ValueStores.
- Allows SemIR to reliably be stored hermetically.

Disadvantages:

- Creates overlapping storage of duplicate strings when multiple files
are compiled together.
- Prevents Ids from being uniquely compared cross-file.

Per discussion, the decision is that the advantages are more important.

The looser ownership remains because both SemIR checking and
metaprogramming may still generate things we would want to deduplicate.
It would be somewhat odd if TokenizedBuffer owned something that
checking modified.
Jon Ross-Perkins hace 2 años
padre
commit
d096655cc6

+ 8 - 4
toolchain/base/value_store.h

@@ -186,8 +186,8 @@ class ValueStore<StringId> : public Yaml::Printable<ValueStore<StringId>> {
   llvm::SmallVector<llvm::StringRef> values_;
 };
 
-// Stores that will be used across compiler steps. This is provided mainly so
-// that they don't need to be passed separately.
+// Stores that will be used across compiler phases for a given compilation unit.
+// This is provided mainly so that they don't need to be passed separately.
 class SharedValueStores : public Yaml::Printable<SharedValueStores> {
  public:
   auto integers() -> ValueStore<IntegerId>& { return integers_; }
@@ -197,8 +197,12 @@ class SharedValueStores : public Yaml::Printable<SharedValueStores> {
   auto strings() -> ValueStore<StringId>& { return strings_; }
   auto strings() const -> const ValueStore<StringId>& { return strings_; }
 
-  auto OutputYaml() const -> Yaml::OutputMapping {
-    return Yaml::OutputMapping([&](Yaml::OutputMapping::Map map) {
+  auto OutputYaml(std::optional<llvm::StringRef> filename = std::nullopt) const
+      -> Yaml::OutputMapping {
+    return Yaml::OutputMapping([&, filename](Yaml::OutputMapping::Map map) {
+      if (filename) {
+        map.Add("filename", *filename);
+      }
       map.Add("shared_values",
               Yaml::OutputMapping([&](Yaml::OutputMapping::Map map) {
                 map.Add("integers", integers_.OutputYaml());

+ 1 - 1
toolchain/check/testdata/basics/multifile_raw_and_textual_ir.carbon

@@ -48,7 +48,7 @@ fn B() {}
 // CHECK:STDOUT: sem_ir:
 // CHECK:STDOUT:   cross_reference_irs_size: 1
 // CHECK:STDOUT:   functions:
-// CHECK:STDOUT:     function0:       {name: str1, param_refs: block0, body: [block1]}
+// CHECK:STDOUT:     function0:       {name: str0, param_refs: block0, body: [block1]}
 // CHECK:STDOUT:   classes:         {}
 // CHECK:STDOUT:   types:
 // CHECK:STDOUT:     type0:           {node: nodeFunctionType, value_rep: {kind: copy, type: type0}}

+ 1 - 1
toolchain/check/testdata/basics/multifile_raw_ir.carbon

@@ -39,7 +39,7 @@ fn B() {}
 // CHECK:STDOUT: sem_ir:
 // CHECK:STDOUT:   cross_reference_irs_size: 1
 // CHECK:STDOUT:   functions:
-// CHECK:STDOUT:     function0:       {name: str1, param_refs: block0, body: [block1]}
+// CHECK:STDOUT:     function0:       {name: str0, param_refs: block0, body: [block1]}
 // CHECK:STDOUT:   classes:         {}
 // CHECK:STDOUT:   types:
 // CHECK:STDOUT:     type0:           {node: nodeFunctionType, value_rep: {kind: copy, type: type0}}

+ 1 - 0
toolchain/driver/BUILD

@@ -21,6 +21,7 @@ cc_library(
     deps = [
         "//common:command_line",
         "//common:vlog",
+        "//toolchain/base:value_store",
         "//toolchain/check",
         "//toolchain/codegen",
         "//toolchain/diagnostics:diagnostic_emitter",

+ 18 - 11
toolchain/driver/driver.cpp

@@ -16,6 +16,7 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/Support/Path.h"
 #include "llvm/TargetParser/Host.h"
+#include "toolchain/base/value_store.h"
 #include "toolchain/check/check.h"
 #include "toolchain/codegen/codegen.h"
 #include "toolchain/diagnostics/diagnostic_emitter.h"
@@ -392,11 +393,9 @@ auto Driver::ValidateCompileOptions(const CompileOptions& options) const
 // Ties together information for a file being compiled.
 class Driver::CompilationUnit {
  public:
-  explicit CompilationUnit(Driver* driver, SharedValueStores* value_stores,
-                           const CompileOptions& options,
+  explicit CompilationUnit(Driver* driver, const CompileOptions& options,
                            llvm::StringRef input_file_name)
       : driver_(driver),
-        value_stores_(value_stores),
         options_(options),
         input_file_name_(input_file_name),
         vlog_stream_(driver_->vlog_stream_),
@@ -422,7 +421,7 @@ class Driver::CompilationUnit {
                   << source_->text() << "\n```\n";
 
     LogCall("Lex::TokenizedBuffer::Lex", [&] {
-      tokens_ = Lex::TokenizedBuffer::Lex(*value_stores_, *source_, *consumer_);
+      tokens_ = Lex::TokenizedBuffer::Lex(value_stores_, *source_, *consumer_);
     });
     if (options_.dump_tokens) {
       consumer_->Flush();
@@ -460,7 +459,7 @@ class Driver::CompilationUnit {
     CARBON_CHECK(parse_tree_);
 
     LogCall("Check::CheckParseTree", [&] {
-      sem_ir_ = Check::CheckParseTree(*value_stores_, builtins, *tokens_,
+      sem_ir_ = Check::CheckParseTree(value_stores_, builtins, *tokens_,
                                       *parse_tree_, *consumer_, vlog_stream_);
     });
 
@@ -572,6 +571,11 @@ class Driver::CompilationUnit {
   // Flushes output.
   auto Flush() -> void { consumer_->Flush(); }
 
+  auto PrintSharedValues() const -> void {
+    Yaml::Print(driver_->output_stream_,
+                value_stores_.OutputYaml(input_file_name_));
+  }
+
  private:
   // Wraps a call with log statements to indicate start and end.
   auto LogCall(llvm::StringLiteral label, llvm::function_ref<void()> fn)
@@ -582,7 +586,7 @@ class Driver::CompilationUnit {
   }
 
   Driver* driver_;
-  SharedValueStores* value_stores_;
+  SharedValueStores value_stores_;
   const CompileOptions& options_;
   llvm::StringRef input_file_name_;
 
@@ -617,15 +621,17 @@ auto Driver::Compile(const CompileOptions& options) -> bool {
       unit->Flush();
     }
   });
-  SharedValueStores value_stores;
+  // Shared values will always be printed last.
   auto dump_shared_values = llvm::make_scope_exit([&]() {
     if (options.dump_shared_values) {
-      output_stream_ << value_stores;
+      for (const auto& unit : units) {
+        unit->PrintSharedValues();
+      }
     }
   });
   for (const auto& input_file_name : options.input_file_names) {
-    units.push_back(std::make_unique<CompilationUnit>(
-        this, &value_stores, options, input_file_name));
+    units.push_back(
+        std::make_unique<CompilationUnit>(this, options, input_file_name));
   }
 
   // Lex.
@@ -646,7 +652,8 @@ auto Driver::Compile(const CompileOptions& options) -> bool {
   }
 
   // Check.
-  auto builtins = Check::MakeBuiltins(value_stores);
+  SharedValueStores builtin_value_stores;
+  auto builtins = Check::MakeBuiltins(builtin_value_stores);
   // TODO: Organize units to compile in dependency order.
   for (auto& unit : units) {
     success_before_lower &= unit->RunCheck(builtins);

+ 1 - 0
toolchain/driver/testdata/dump_shared_values.carbon

@@ -16,6 +16,7 @@ var str1: String = "abc";
 var str2: String = "ab'\"c";
 
 // CHECK:STDOUT: ---
+// CHECK:STDOUT: filename:        dump_shared_values.carbon
 // CHECK:STDOUT: shared_values:
 // CHECK:STDOUT:   integers:
 // CHECK:STDOUT:     int0:            32

+ 2 - 2
toolchain/lex/testdata/multifile.carbon

@@ -19,10 +19,10 @@ a;
 // CHECK:STDOUT:   tokens: [
 // CHECK:STDOUT:     { index: 0, kind: 'StartOfFile', line: {{ *\d+}}, column:  1, indent: 1, spelling: '', has_trailing_space: true },
 b;
-// CHECK:STDOUT:     { index: 1, kind:  'Identifier', line: {{ *}}[[@LINE-1]], column:  1, indent: 1, spelling: 'b', identifier: 1 },
+// CHECK:STDOUT:     { index: 1, kind:  'Identifier', line: {{ *}}[[@LINE-1]], column:  1, indent: 1, spelling: 'b', identifier: 0 },
 // CHECK:STDOUT:     { index: 2, kind:        'Semi', line: {{ *}}[[@LINE-2]], column:  2, indent: 1, spelling: ';', has_trailing_space: true },
 a;
-// CHECK:STDOUT:     { index: 3, kind:  'Identifier', line: {{ *}}[[@LINE-1]], column:  1, indent: 1, spelling: 'a', identifier: 0 },
+// CHECK:STDOUT:     { index: 3, kind:  'Identifier', line: {{ *}}[[@LINE-1]], column:  1, indent: 1, spelling: 'a', identifier: 1 },
 // CHECK:STDOUT:     { index: 4, kind:        'Semi', line: {{ *}}[[@LINE-2]], column:  2, indent: 1, spelling: ';', has_trailing_space: true },
 
 // CHECK:STDOUT:     { index: 5, kind:   'EndOfFile', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '' },