Prechádzať zdrojové kódy

Handle jj diff-style conflict markers in autoupdate (#6559)

Geoff Romer 3 mesiacov pred
rodič
commit
2e59a0d520

+ 17 - 1
testing/file_test/BUILD

@@ -2,6 +2,7 @@
 # Exceptions. See /LICENSE for license information.
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
+load("@rules_cc//cc:cc_test.bzl", "cc_test")
 load("//bazel/cc_rules:defs.bzl", "cc_library")
 load("rules.bzl", "file_test")
 
@@ -35,9 +36,11 @@ cc_library(
         "run_test.cpp",
         "run_test.h",
         "test_file.cpp",
+    ],
+    hdrs = [
+        "file_test_base.h",
         "test_file.h",
     ],
-    hdrs = ["file_test_base.h"],
     deps = [
         ":autoupdate",
         ":manifest",
@@ -60,6 +63,19 @@ cc_library(
     ],
 )
 
+cc_test(
+    name = "test_file_test",
+    size = "small",
+    srcs = ["test_file_test.cpp"],
+    deps = [
+        ":file_test_base",
+        ":manifest_impl",
+        "//common:error_test_helpers",
+        "//testing/base:file_helpers",
+        "@googletest//:gtest",
+    ],
+)
+
 file_test(
     name = "file_test_base_test",
     size = "small",

+ 99 - 58
testing/file_test/test_file.cpp

@@ -25,75 +25,116 @@ using ::testing::Matcher;
 using ::testing::MatchesRegex;
 using ::testing::StrEq;
 
-// Processes conflict markers, including tracking of whether code is within a
-// conflict marker. Returns true if the line is consumed.
+// Represents the different kinds of version-control conflict markers that are
+// relevant for the autoupdater. One key concern here is the distinction between
+// "snapshot" and "diff" conflict regions. Snapshot regions are the more
+// traditional kind, where the entire region between two markers represents the
+// exact state of a region of the underlying file at some snapshot (e.g. the
+// base commit or one of the conflicting commits). Diff regions are
+// produced by jj. They show the diff between the base and one side of the
+// conflict, using a prefix character on each line: '+' indicates an added line,
+// '-' indicates a removed line, and ' ' indicates an unchanged line. Note that
+// a single conflict may contain both snapshot and diff regions.
+//
+// See https://docs.jj-vcs.dev/latest/conflicts/ for more information.
+enum class MarkerKind {
+  // Represents a line that is not a conflict marker.
+  None,
+  // Marks the start of a conflict, and potentially a snapshot region.
+  Start,
+  // Marks the end of a conflict.
+  End,
+  // Marks the start of a snapshot region.
+  Snapshot,
+  // Marks the start of a diff region.
+  Diff
+};
+
+// Processes conflict markers, including tracking the previous conflict marker.
+// Returns true if the line is consumed.
 static auto TryConsumeConflictMarker(bool running_autoupdate,
                                      llvm::StringRef line,
                                      llvm::StringRef line_trimmed,
-                                     bool& inside_conflict_marker)
+                                     MarkerKind& previous_marker)
     -> ErrorOr<bool> {
-  bool is_start = line.starts_with("<<<<<<<");
-  bool is_end = line.starts_with(">>>>>>>");
-  bool is_middle =
-      // git internal conflict markers ("merge" and "diff3" style).
-      line.starts_with("=======") ||
-      line.starts_with("|||||||")
-      // jj internal conflict markers ("snapshot" style).
-      || line.starts_with("+++++++") || line.starts_with("-------");
-  // jj internal conflict marker ("diff" style)
-  bool is_jj_diff = line.starts_with("%%%%%%%");
+  MarkerKind new_marker;
+  if (line.starts_with("<<<<<<<")) {
+    new_marker = MarkerKind::Start;
+  } else if (line.starts_with(">>>>>>>")) {
+    new_marker = MarkerKind::End;
+  } else if (line.starts_with("=======") || line.starts_with("|||||||") ||
+             line.starts_with("+++++++") || line.starts_with("-------")) {
+    // git uses "=======" and "|||||||" to mark boundaries between conflict
+    // regions (which are always snapshots). jj uses "+++++++" and "-------" to
+    // mark the start of different kinds of snapshot regions.
+    new_marker = MarkerKind::Snapshot;
+  } else if (line.starts_with("%%%%%%%") || line.starts_with(R"(\\\\\\\)")) {
+    // jj uses "%%%%%%%" to mark the start of a diff region, and "\\\\\\\" to
+    // add a second line to a "%%%%%%%" marker for formatting purposes.
+    new_marker = MarkerKind::Diff;
+  } else {
+    new_marker = MarkerKind::None;
+  }
 
   // When running the test, any conflict marker is an error.
-  if (!running_autoupdate && (is_start || is_middle || is_end || is_jj_diff)) {
+  if (!running_autoupdate && (new_marker != MarkerKind::None)) {
     return ErrorBuilder() << "Conflict marker found:\n" << line;
   }
 
-  if (is_jj_diff && running_autoupdate) {
-    // TODO: Add support for JJ's diff-style conflict markers.
-    return ErrorBuilder()
-           << "Found jj \"diff\" style conflict marker."
-              " Autoupdate only supports \"snapshot\" style conflict markers."
-              " To switch, use `jj config set --repo ui.conflict-marker-style"
-              " \"snapshot\"`, and then run `jj new` (or `jj edit`) again to "
-              " materialize the new style. For more details, see: "
-              "https://docs.jj-vcs.dev/latest/conflicts/";
-  }
-
-  // Autoupdate tracks conflict markers for context, and will discard
-  // conflicting lines when it can autoupdate them.
-  if (inside_conflict_marker) {
-    if (is_start) {
-      return ErrorBuilder() << "Unexpected conflict marker inside conflict:\n"
-                            << line;
-    }
-    if (is_middle) {
+  bool inside_conflict_marker = [&] {
+    switch (previous_marker) {
+      case MarkerKind::None:
+      case MarkerKind::End:
+        return false;
+      case MarkerKind::Start:
+      case MarkerKind::Snapshot:
+      case MarkerKind::Diff:
+        return true;
+    }
+  }();
+
+  switch (new_marker) {
+    case MarkerKind::End:
+    case MarkerKind::Snapshot:
+    case MarkerKind::Diff:
+      if (!inside_conflict_marker) {
+        return ErrorBuilder()
+               << "Unexpected conflict marker outside conflict:\n"
+               << line;
+      }
+      previous_marker = new_marker;
       return true;
-    }
-    if (is_end) {
-      inside_conflict_marker = false;
+    case MarkerKind::Start:
+      if (inside_conflict_marker) {
+        return ErrorBuilder() << "Unexpected conflict marker inside conflict:\n"
+                              << line;
+      }
+      previous_marker = new_marker;
       return true;
-    }
+    case MarkerKind::None:
+      if (!inside_conflict_marker) {
+        return false;
+      }
 
-    // Look for CHECK and TIP lines, which can be discarded.
-    if (line_trimmed.starts_with("// CHECK:STDOUT:") ||
-        line_trimmed.starts_with("// CHECK:STDERR:") ||
-        line_trimmed.starts_with("// TIP:")) {
-      return true;
-    }
+      if (previous_marker == MarkerKind::Diff) {
+        if (!line.consume_front(" ") && !line.consume_front("+") &&
+            !line.consume_front("-")) {
+          return ErrorBuilder() << "Line inside diff-style conflict doesn't "
+                                   "start with '+', '-', or ' ':\n"
+                                << line;
+        }
+        line_trimmed = line.ltrim();
+      }
 
-    return ErrorBuilder()
-           << "Autoupdate can't discard non-CHECK lines inside conflicts:\n"
-           << line;
-  } else {
-    if (is_start) {
-      inside_conflict_marker = true;
-      return true;
-    }
-    if (is_middle || is_end) {
-      return ErrorBuilder() << "Unexpected conflict marker outside conflict:\n"
-                            << line;
-    }
-    return false;
+      // Look for CHECK and TIP lines, which can be discarded.
+      if (line_trimmed.starts_with("// CHECK:STDOUT:") ||
+          line_trimmed.starts_with("// CHECK:STDERR:") ||
+          line_trimmed.starts_with("// TIP:")) {
+        return true;
+      }
+
+      return ErrorBuilder() << "Autoupdate can't discard non-CHECK lines "
+                               "inside conflicts:\n";
   }
 }
 
@@ -718,7 +759,7 @@ static auto ProcessFileContent(llvm::StringRef filename,
 
   // When autoupdating, we track whether we're inside conflict markers.
   // Otherwise conflict markers are errors.
-  bool inside_conflict_marker = false;
+  auto previous_conflict_marker = MarkerKind::None;
 
   SplitState split_state;
 
@@ -732,7 +773,7 @@ static auto ProcessFileContent(llvm::StringRef filename,
     CARBON_ASSIGN_OR_RETURN(
         is_consumed,
         TryConsumeConflictMarker(running_autoupdate, line, line_trimmed,
-                                 inside_conflict_marker));
+                                 previous_conflict_marker));
     if (is_consumed) {
       continue;
     }

+ 170 - 0
testing/file_test/test_file_test.cpp

@@ -0,0 +1,170 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "testing/file_test/test_file.h"
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+#include "common/error_test_helpers.h"
+#include "testing/base/file_helpers.h"
+#include "testing/file_test/file_test_base.h"
+
+// The current BUILD structure of this library means that in order to unit-test
+// test_file.h, we have to pretend to be a file_test. That means there has to
+// be a definition of CarbonFileTestManifest (normally generated by a file_test
+// Bazel rule), and there has to be a registered FileTestFactory, so we define
+// them both as dummies.
+// TODO: restructure BUILD rules to avoid the need for this hack.
+
+// Dummy empty manifest.
+// NOLINTNEXTLINE(readability-identifier-naming): manifest.cpp dictates spelling
+const char* CarbonFileTestManifest[] = {nullptr};
+
+namespace Carbon::Testing {
+
+// Dummy unusable FileTestBase.
+class DummyFileTest : public FileTestBase {
+ public:
+  DummyFileTest(llvm::StringRef /*exe_path*/, llvm::StringRef test_name)
+      : FileTestBase(test_name) {}
+
+  auto Run(const llvm::SmallVector<llvm::StringRef>& /*test_args*/,
+           llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem>& /*fs*/,
+           FILE* /*input_stream*/, llvm::raw_pwrite_stream& /*output_stream*/,
+           llvm::raw_pwrite_stream& /*error_stream*/) const
+      -> ErrorOr<RunResult> override {
+    CARBON_FATAL("Called method of dummy object");
+  }
+
+  auto GetDefaultArgs() const -> llvm::SmallVector<std::string> override {
+    CARBON_FATAL("Called method of dummy object");
+  }
+};
+CARBON_FILE_TEST_FACTORY(DummyFileTest)
+
+namespace {
+
+using ::testing::_;
+using ::testing::StrEq;
+
+// Returns the non-check lines of test_file as a string.
+auto NonCheckFileContents(const TestFile& test_file) -> std::string {
+  std::string buffer;
+  llvm::raw_string_ostream out(buffer);
+  for (const auto& line : test_file.non_check_lines) {
+    line.Print(out);
+    out << "\n";
+  }
+  return buffer;
+}
+
+// Returns a string consisting of 7 repetitions of `c`, for use as a simulated
+// merge conflict marker. We use this in test inputs instead of writing the
+// conflict markers directly in the string literals so that tools don't treat
+// them as genuine merge conflicts in this file.
+auto Marker(char c) -> std::string { return std::string(7, c); }
+
+TEST(AutoupdateTest, SnapshotMergeConflict) {
+  std::string initial_contents = R"(
+// AUTOUPDATE
+Some text
+)" + Marker('<') + R"(
+// CHECK:STDOUT: side 1
+)" + Marker('=') + R"(
+// CHECK:STDOUT: side 2
+)" + Marker('>') + R"(
+More text
+)";
+
+  constexpr std::string_view ExpectedContents = R"(
+// AUTOUPDATE
+Some text
+More text
+)";
+  auto file_path = *WriteTestFile("tmp", initial_contents);
+  auto test_file =
+      ProcessTestFile(file_path.c_str(), /*running_autoupdate=*/true);
+  ASSERT_THAT(test_file, IsSuccess(_)) << test_file.error();
+  EXPECT_THAT(NonCheckFileContents(*test_file), StrEq(ExpectedContents));
+}
+
+TEST(AutoupdateTest, SnapshotTheeWayMergeConflict) {
+  std::string initial_contents = R"(
+// AUTOUPDATE
+Some text
+)" + Marker('<') + R"(
+// CHECK:STDOUT: side 1
+)" + Marker('|') + R"(
+// CHECK:STDOUT: base
+)" + Marker('=') + R"(
+// CHECK:STDOUT: side 2
+)" + Marker('>') + R"(
+More text
+)";
+
+  constexpr std::string_view ExpectedContents = R"(
+// AUTOUPDATE
+Some text
+More text
+)";
+  auto file_path = *WriteTestFile("tmp", initial_contents);
+  auto test_file =
+      ProcessTestFile(file_path.c_str(), /*running_autoupdate=*/true);
+  ASSERT_THAT(test_file, IsSuccess(_)) << test_file.error();
+  EXPECT_THAT(NonCheckFileContents(*test_file), StrEq(ExpectedContents));
+}
+
+TEST(AutoupdateTest, DiffMergeConflict) {
+  std::string initial_contents = R"(
+// AUTOUPDATE
+Some text
+)" + Marker('<') + R"(
+)" + Marker('%') + R"(
+)" + Marker('\\') + R"(
+ // CHECK:STDOUT: unchanged
+-// CHECK:STDOUT: base
++// CHECK:STDOUT: side 1
+)" + Marker('+') + R"(
+// CHECK:STDOUT: side 2
+)" + Marker('>') + R"(
+More text
+)";
+
+  constexpr std::string_view ExpectedContents = R"(
+// AUTOUPDATE
+Some text
+More text
+)";
+  auto file_path = *WriteTestFile("tmp", initial_contents);
+  auto test_file =
+      ProcessTestFile(file_path.c_str(), /*running_autoupdate=*/true);
+  ASSERT_THAT(test_file, IsSuccess(_)) << test_file.error();
+  EXPECT_THAT(NonCheckFileContents(*test_file), StrEq(ExpectedContents));
+}
+
+TEST(AutoupdateTest, NonCheckInDiffRegion) {
+  std::string initial_contents = R"(
+// AUTOUPDATE
+Some text
+)" + Marker('<') + R"(
+)" + Marker('%') + R"(
+)" + Marker('\\') + R"(
+ // unchanged
+-// CHECK:STDOUT: base
++// CHECK:STDOUT: side 1
+)" + Marker('+') + R"(
+// CHECK:STDOUT: side 2
+)" + Marker('>') + R"(
+More text
+)";
+
+  auto file_path = *WriteTestFile("tmp", initial_contents);
+  auto test_file =
+      ProcessTestFile(file_path.c_str(), /*running_autoupdate=*/true);
+  ASSERT_THAT(test_file, IsError(_));
+}
+
+}  // namespace
+}  // namespace Carbon::Testing