Просмотр исходного кода

Fix `clang` runner to avoid leaking memory (#4972)

Sadly, the Clang driver unconditionally injects the `-disable-free` flag
to CC1 invocations. =/ This ends up with us leaking memory when invoking
Clang programmatically, for example in unit tests.

I've fixed this by post-processing the flags. The alternatives I see all
involve duplicating even more code from within Clang's internals into
our runner, and we already have a lot of that. I have left a TODO to try
and follow up with upstream about fixing this in a more sustainable way,
but wanted to get the testing in place anyways.

I've also threaded a flag through the various layers so that when we're
on the command line we can actually skip this and get the same compile
time benefits that Clang itself gets from disabling freeing all of the
internal data structures.

---------

Co-authored-by: Jon Ross-Perkins <jperkins@google.com>
Chandler Carruth 1 год назад
Родитель
Сommit
5b30888dd8

+ 16 - 1
toolchain/driver/clang_runner.cpp

@@ -146,7 +146,22 @@ auto ClangRunner::Run(llvm::ArrayRef<llvm::StringRef> args) -> bool {
   // Note the subprocessing will effectively call `clang -cc1`, which turns into
   // `carbon-busybox clang -cc1`, which results in an equivalent `clang_main`
   // call.
-  auto cc1_main = [](llvm::SmallVectorImpl<const char*>& cc1_args) -> int {
+  //
+  // Also note that we only do `-disable-free` filtering in the in-process
+  // execution here, as subprocesses leaking memory won't impact this process.
+  auto cc1_main = [enable_leaking = enable_leaking_](
+                      llvm::SmallVectorImpl<const char*>& cc1_args) -> int {
+    // Clang doesn't expose any option to disable injecting `-disable-free` into
+    // the CC1 invocation, or any way to append a flag that undoes it. So to
+    // avoid leaks, we need to edit the `cc1_args` here and remove
+    // `-disable-free`.
+    //
+    // TODO: We should see if upstream would be open to some configuration hook
+    // to suppress this when it is generating the CC1 flags and use that.
+    if (!enable_leaking) {
+      llvm::erase(cc1_args, llvm::StringRef("-disable-free"));
+    }
+
     // cc1_args[0] will be the `clang_path` so we don't need the prepend arg.
     llvm::ToolContext tool_context = {
         .Path = cc1_args[0], .PrependArg = "clang", .NeedsPrependArg = false};

+ 13 - 0
toolchain/driver/clang_runner.h

@@ -49,6 +49,17 @@ class ClangRunner {
   // Run Clang with the provided arguments.
   auto Run(llvm::ArrayRef<llvm::StringRef> args) -> bool;
 
+  // Enable leaking memory.
+  //
+  // Clang can avoid deallocating some of its memory to improve compile time.
+  // However, this isn't compatible with library-based invocations. When using
+  // the runner in a context where memory leaks are acceptable, such as from a
+  // command line driver, you can use this to enable that leaking behavior. Note
+  // that this will not override _explicit_ `args` in a run invocation that
+  // cause leaking, it will merely disable Clang's libraries injecting that
+  // behavior.
+  auto EnableLeakingMemory() -> void { enable_leaking_ = true; }
+
  private:
   const InstallPaths* installation_;
 
@@ -57,6 +68,8 @@ class ClangRunner {
   llvm::raw_ostream* vlog_stream_;
 
   llvm::IntrusiveRefCntPtr<clang::DiagnosticIDs> diagnostic_ids_;
+
+  bool enable_leaking_ = false;
 };
 
 }  // namespace Carbon

+ 35 - 0
toolchain/driver/clang_runner_test.cpp

@@ -159,5 +159,40 @@ TEST(ClangRunnerTest, BuitinHeaders) {
   EXPECT_THAT(err, StrEq(""));
 }
 
+TEST(ClangRunnerTest, CompileMultipleFiles) {
+  const auto install_paths =
+      InstallPaths::MakeForBazelRunfiles(Testing::GetExePath());
+
+  // Memory leaks and other errors from running Clang can at times only manifest
+  // with repeated compilations. Use a lambda to just do a series of compiles.
+  auto compile = [&](llvm::StringRef filename, llvm::StringRef source) {
+    std::string output_file = std::string(filename.split('.').first) + ".o";
+    std::filesystem::path file = *Testing::WriteTestFile(filename, source);
+    std::filesystem::path output = *Testing::WriteTestFile(output_file, "");
+
+    RawStringOstream verbose_out;
+    std::string target = llvm::sys::getDefaultTargetTriple();
+    auto vfs = llvm::vfs::getRealFileSystem();
+    ClangRunner runner(&install_paths, target, vfs, &verbose_out);
+    std::string out;
+    std::string err;
+    EXPECT_TRUE(Testing::CallWithCapturedOutput(
+        out, err,
+        [&] {
+          return runner.Run({"-c", file.string(), "-o", output.string()});
+        }))
+        << "Verbose output from runner:\n"
+        << verbose_out.TakeStr() << "\n";
+    verbose_out.clear();
+
+    EXPECT_THAT(out, StrEq(""));
+    EXPECT_THAT(err, StrEq(""));
+  };
+
+  compile("test1.cpp", "int test1() { return 0; }");
+  compile("test2.cpp", "int test2() { return 0; }");
+  compile("test3.cpp", "int test3() { return 0; }");
+}
+
 }  // namespace
 }  // namespace Carbon

+ 5 - 0
toolchain/driver/clang_subcommand.cpp

@@ -58,6 +58,11 @@ auto ClangSubcommand::Run(DriverEnv& driver_env) -> DriverResult {
     return {.success = false};
   }
 
+  // Only enable Clang's leaking of memory if the driver can support that.
+  if (driver_env.enable_leaking) {
+    runner.EnableLeakingMemory();
+  }
+
   return {.success = runner.Run(options_.args)};
 }
 

+ 3 - 2
toolchain/driver/driver.h

@@ -25,9 +25,10 @@ class Driver {
   explicit Driver(llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> fs,
                   const InstallPaths* installation, FILE* input_stream,
                   llvm::raw_pwrite_stream* output_stream,
-                  llvm::raw_pwrite_stream* error_stream, bool fuzzing = false)
+                  llvm::raw_pwrite_stream* error_stream, bool fuzzing = false,
+                  bool enable_leaking = false)
       : driver_env_(std::move(fs), installation, input_stream, output_stream,
-                    error_stream, fuzzing) {}
+                    error_stream, fuzzing, enable_leaking) {}
 
   // Parses the given arguments into both a subcommand to select the operation
   // to perform and any arguments to that subcommand.

+ 8 - 1
toolchain/driver/driver_env.h

@@ -20,13 +20,15 @@ struct DriverEnv {
   explicit DriverEnv(llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> fs,
                      const InstallPaths* installation, FILE* input_stream,
                      llvm::raw_pwrite_stream* output_stream,
-                     llvm::raw_pwrite_stream* error_stream, bool fuzzing)
+                     llvm::raw_pwrite_stream* error_stream, bool fuzzing,
+                     bool enable_leaking)
       : fs(std::move(fs)),
         installation(installation),
         input_stream(input_stream),
         output_stream(output_stream),
         error_stream(error_stream),
         fuzzing(fuzzing),
+        enable_leaking(enable_leaking),
         consumer(error_stream),
         emitter(&consumer) {}
 
@@ -48,6 +50,11 @@ struct DriverEnv {
   // fuzzing.
   bool fuzzing;
 
+  // Tracks whether the driver can leak resources, typically because it is being
+  // invoked as part of a single command line program execution. Defaults to
+  // `false` for safe and correct library execution.
+  bool enable_leaking = false;
+
   // A diagnostic consumer, to be able to connect output.
   StreamDiagnosticConsumer consumer;
 

+ 2 - 1
toolchain/install/busybox_main.cpp

@@ -45,7 +45,8 @@ static auto Main(int argc, char** argv) -> ErrorOr<int> {
   }
   args.append(argv + 1, argv + argc);
 
-  Driver driver(fs, &install_paths, stdin, &llvm::outs(), &llvm::errs());
+  Driver driver(fs, &install_paths, stdin, &llvm::outs(), &llvm::errs(),
+                /*fuzzing=*/false, /*enable_leaking=*/true);
   bool success = driver.RunCommand(args).success;
   return success ? EXIT_SUCCESS : EXIT_FAILURE;
 }