| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542 |
- #!/usr/bin/env python3
- """Updates the CHECK: lines in tests with an AUTOUPDATE line."""
- __copyright__ = """
- Part of the Carbon Language project, under the Apache License v2.0 with LLVM
- Exceptions. See /LICENSE for license information.
- SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- """
- from abc import ABC, abstractmethod
- import argparse
- from concurrent import futures
- import os
- from pathlib import Path
- import re
- import subprocess
- from typing import (
- Any,
- Dict,
- List,
- Match,
- NamedTuple,
- Optional,
- Pattern,
- Set,
- Tuple,
- )
- # A prefix followed by a command to run for autoupdating checked output.
- AUTOUPDATE_MARKER = "// AUTOUPDATE"
- # Indicates no autoupdate is requested.
- NOAUTOUPDATE_MARKER = "// NOAUTOUPDATE"
- # Supported tools.
- TOOLS = {
- "carbon": "//toolchain/driver:carbon",
- "explorer": "//explorer:explorer",
- }
- class ParsedArgs(NamedTuple):
- autoupdate_args: List[str]
- build_mode: str
- extra_check_replacements: List[Tuple[Pattern, Pattern, str]]
- line_number_delta_prefix: str
- line_number_pattern: Pattern
- lit_run: List[str]
- testdata: str
- tests: List[Path]
- tool: str
- def parse_args() -> ParsedArgs:
- """Parses command-line arguments and flags."""
- parser = argparse.ArgumentParser(description=__doc__)
- parser.add_argument("tests", nargs="*")
- parser.add_argument(
- "--autoupdate_arg",
- metavar="COMMAND",
- default=[],
- action="append",
- help="Optional arguments to pass to the autoupdate command.",
- )
- parser.add_argument(
- "--build_mode",
- metavar="MODE",
- default="opt",
- help="The build mode to use. Defaults to opt for faster execution.",
- )
- parser.add_argument(
- "--extra_check_replacement",
- nargs=3,
- metavar=("MATCHING", "BEFORE", "AFTER"),
- default=[],
- action="append",
- help="On a CHECK line with MATCHING, does a regex replacement of "
- "BEFORE with AFTER.",
- )
- parser.add_argument(
- "--line_number_delta_prefix",
- metavar="PREFIX",
- default="",
- help="An optional prefix to add before the [[@LINE+delta]] marker.",
- )
- parser.add_argument(
- "--line_number_pattern",
- metavar="PATTERN",
- default=r"(?P<prefix>(?P<filename>\w+\.carbon):)"
- r"(?P<line>\d+)(?P<suffix>(?:\D|$))",
- help="A regular expression which matches line numbers to update as its "
- "only group. Capture groups 'prefix', 'line', and 'suffix' are "
- "required for structure. The 'filename' capture group is optional and "
- "should be provided when lines may belong to different files.",
- )
- parser.add_argument(
- "--lit_run",
- metavar="COMMAND",
- default=[],
- required=False,
- action="append",
- help="RUN lines to set.",
- )
- parser.add_argument(
- "--testdata",
- metavar="PATH",
- required=True,
- help="The path to the testdata to update, relative to the workspace "
- "root.",
- )
- parser.add_argument(
- "--tool",
- metavar="TOOL",
- required=True,
- choices=TOOLS.keys(),
- help="The tool being tested.",
- )
- parsed_args = parser.parse_args()
- extra_check_replacements = [
- (re.compile(line_matcher), re.compile(before), after)
- for line_matcher, before, after in parsed_args.extra_check_replacement
- ]
- return ParsedArgs(
- autoupdate_args=parsed_args.autoupdate_arg,
- build_mode=parsed_args.build_mode,
- extra_check_replacements=extra_check_replacements,
- line_number_delta_prefix=parsed_args.line_number_delta_prefix,
- line_number_pattern=re.compile(parsed_args.line_number_pattern),
- lit_run=parsed_args.lit_run,
- testdata=parsed_args.testdata,
- tests=[Path(test).resolve() for test in parsed_args.tests],
- tool=parsed_args.tool,
- )
- def get_tests(testdata: str) -> Set[Path]:
- """Get the list of tests from the filesystem."""
- tests = set()
- for root, _, files in os.walk(testdata):
- for f in files:
- if f in {"lit.cfg.py", "BUILD", "README.md"}:
- # Ignore the lit config.
- continue
- if os.path.splitext(f)[1] == ".carbon":
- tests.add(Path(root).joinpath(f))
- else:
- exit(f"Unrecognized file type in testdata: {f}")
- return tests
- class Line(ABC):
- """A line that may appear in the resulting test file."""
- @abstractmethod
- def format(
- self, *, output_line_number: int, line_number_remap: Dict[int, int]
- ) -> str:
- raise NotImplementedError
- class OriginalLine(Line):
- """A line that was copied from the original test file."""
- def __init__(self, line_number: int, text: str) -> None:
- self.line_number = line_number
- self.text = text
- def format(self, **kwargs: Any) -> str:
- return self.text
- class RunLine(Line):
- """A RUN line."""
- def __init__(self, text: str) -> None:
- self.text = text
- def format(self, **kwargs: Any) -> str:
- return self.text
- class CheckLine(Line):
- """A `// CHECK:` line generated from the test output.
- If there's a line number, it'll be fixed up after we've figured out which
- lines to include in the resulting test file and in what order, because
- their contents depend on where an original input line appears in the output.
- """
- def __init__(
- self,
- test: str,
- out_line: str,
- line_number_delta_prefix: str,
- line_number_pattern: Pattern,
- ) -> None:
- super().__init__()
- self.filename = Path(test).name
- self.indent = ""
- self.out_line = out_line.rstrip()
- self.line_number_delta_prefix = line_number_delta_prefix
- self.line_number_pattern = line_number_pattern
- # If any match is specific to this file, use the first matched line for
- # the location of the CHECK comment.
- self.line_in_file = None
- for match in line_number_pattern.finditer(self.out_line):
- if self._matches_filename(match):
- self.line_in_file = int(match.group("line")) - 1
- break
- def format(
- self, *, output_line_number: int, line_number_remap: Dict[int, int]
- ) -> str:
- assert self.out_line
- result = self.out_line
- while True:
- match = self.line_number_pattern.search(result)
- if not match:
- break
- if self._matches_filename(match):
- line_number = int(match.group("line")) - 1
- delta = line_number_remap[line_number] - output_line_number
- # We use `:+d` here to produce `LINE-n` or `LINE+n` as
- # appropriate.
- result = self.line_number_pattern.sub(
- rf"\g<prefix>{self.line_number_delta_prefix}"
- rf"[[@LINE{delta:+d}]]\g<suffix>",
- result,
- count=1,
- )
- else:
- result = self.line_number_pattern.sub(
- r"\g<prefix>{{.*}}\g<suffix>",
- result,
- count=1,
- )
- return f"{self.indent}// CHECK:{result}\n"
- def _matches_filename(self, match: Match) -> bool:
- return (
- "filename" not in match.groupdict()
- or match.group("filename") == self.filename
- )
- def find_autoupdate(test: str, orig_lines: List[str]) -> Optional[int]:
- """Figures out whether autoupdate should occur.
- For AUTOUPDATE, returns the line. For NOAUTOUPDATE, returns None.
- """
- found = 0
- result = None
- for line_number, line in enumerate(orig_lines):
- if line.startswith(AUTOUPDATE_MARKER):
- found += 1
- result = line_number
- elif line.startswith(NOAUTOUPDATE_MARKER):
- found += 1
- if found == 0:
- raise ValueError(
- f"{test} must have either '{AUTOUPDATE_MARKER}' or "
- f"'{NOAUTOUPDATE_MARKER}'"
- )
- elif found > 1:
- raise ValueError(
- f"{test} must have only one of '{AUTOUPDATE_MARKER}' or "
- f"'{NOAUTOUPDATE_MARKER}'"
- )
- return result
- def replace_all(s: str, replacements: List[Tuple[str, str]]) -> str:
- """Runs multiple replacements on a string."""
- for before, after in replacements:
- s = s.replace(before, after)
- return s
- def label_output(label: str, output: str) -> List[str]:
- """Merges output with labels.
- This mirrors label_output in lit_test/merge_output.py and should
- be kept in sync. They're separate in order to avoid a subprocess or import
- complexity.
- """
- result = []
- if output:
- for line in output.splitlines():
- result.append(" ".join(filter(None, (label, line))))
- return result
- def get_matchable_test_output(
- autoupdate_args: List[str],
- for_lit: bool,
- extra_check_replacements: List[Tuple[Pattern, Pattern, str]],
- tool: str,
- bazel_runfiles: Pattern,
- llvm_symbolizer: str,
- test: str,
- ) -> List[str]:
- """Runs the autoupdate command and returns the output lines."""
- # Run the autoupdate command to generate output.
- # (`bazel run` would serialize)
- autoupdate_cmd = Path.cwd().joinpath(
- TOOLS[tool].replace("//", "./bazel-bin/").replace(":", "/")
- )
- p = subprocess.run(
- [str(autoupdate_cmd)] + autoupdate_args + [Path(test).name],
- env={"LLVM_SYMBOLIZER_PATH": llvm_symbolizer},
- stdout=subprocess.PIPE,
- stderr=subprocess.PIPE,
- encoding="utf-8",
- cwd=str(Path(test).parent),
- )
- out_lines = label_output("STDOUT:", p.stdout)
- out_lines.extend(label_output("STDERR:", p.stderr))
- for i, line in enumerate(out_lines):
- # Escape things that mirror FileCheck special characters.
- line = line.replace("{{", "{{[{][{]}}")
- line = line.replace("[[", "{{[[][[]}}")
- if for_lit:
- # `lit` uses full paths to the test file, so use a regex to ignore
- # paths when used.
- line = line.replace(test, f"{{{{.*}}}}/{test}")
- line = bazel_runfiles.sub("{{.*}}/", line)
- for line_matcher, before, after in extra_check_replacements:
- if line_matcher.match(line):
- line = before.sub(after, line)
- out_lines[i] = line
- return out_lines
- def is_replaced(line: str) -> bool:
- """Returns true if autoupdate should replace the line."""
- line = line.lstrip()
- return line.startswith("// CHECK") or line.startswith("// RUN:")
- def merge_lines(
- line_number_delta_prefix: str,
- line_number_pattern: Pattern,
- lit_run: List[str],
- test: str,
- autoupdate_line_number: int,
- raw_orig_lines: List[str],
- out_lines: List[str],
- ) -> List[Line]:
- """Merges the original output and new lines."""
- orig_lines = [
- OriginalLine(i, line)
- for i, line in enumerate(raw_orig_lines)
- if not is_replaced(line)
- ]
- check_lines = [
- CheckLine(test, out_line, line_number_delta_prefix, line_number_pattern)
- for out_line in out_lines
- ]
- result_lines: List[Line] = []
- # CHECK lines must go after AUTOUPDATE.
- while orig_lines and orig_lines[0].line_number <= autoupdate_line_number:
- result_lines.append(orig_lines.pop(0))
- for line in lit_run:
- run_not = ""
- if Path(test).name.startswith("fail_"):
- run_not = "%{not} "
- result_lines.append(RunLine(f"// RUN: {run_not}{line}\n"))
- # Interleave the original lines and the CHECK: lines.
- while orig_lines and check_lines:
- # Original lines go first when the CHECK line is known and later.
- if (
- check_lines[0].line_in_file is not None
- and check_lines[0].line_in_file > orig_lines[0].line_number
- ):
- result_lines.append(orig_lines.pop(0))
- else:
- check_line = check_lines.pop(0)
- # Indent to match the next original line.
- check_line.indent = re.findall("^ *", orig_lines[0].text)[0]
- result_lines.append(check_line)
- # One list is non-empty; append remaining lines from both to catch it.
- result_lines.extend(orig_lines)
- result_lines.extend(check_lines)
- return result_lines
- def update_check(
- parsed_args: ParsedArgs,
- bazel_runfiles: Pattern,
- llvm_symbolizer: str,
- test: Path,
- ) -> bool:
- """Updates the CHECK: lines for `test` by running the tool.
- Returns true if a change was made.
- """
- with test.open() as f:
- orig_lines = f.readlines()
- # Make sure we're supposed to autoupdate.
- autoupdate_line = find_autoupdate(str(test), orig_lines)
- if autoupdate_line is None:
- return False
- # Determine the merged output lines.
- out_lines = get_matchable_test_output(
- parsed_args.autoupdate_args,
- bool(parsed_args.lit_run),
- parsed_args.extra_check_replacements,
- parsed_args.tool,
- bazel_runfiles,
- llvm_symbolizer,
- str(test),
- )
- result_lines = merge_lines(
- parsed_args.line_number_delta_prefix,
- parsed_args.line_number_pattern,
- parsed_args.lit_run,
- str(test),
- autoupdate_line,
- orig_lines,
- out_lines,
- )
- # Calculate the remap for original lines.
- line_number_remap = dict(
- [
- (line.line_number, i)
- for i, line in enumerate(result_lines)
- if isinstance(line, OriginalLine)
- ]
- )
- # If the last line of the original output was a CHECK, replace it with an
- # empty line.
- if orig_lines[-1].lstrip().startswith("// CHECK"):
- line_number_remap[len(orig_lines) - 1] = len(result_lines) - 1
- # Generate contents for any lines that depend on line numbers.
- formatted_result_lines = [
- line.format(output_line_number=i, line_number_remap=line_number_remap)
- for i, line in enumerate(result_lines)
- ]
- # If nothing's changed, we're done.
- if formatted_result_lines == orig_lines:
- return False
- # Interleave the new CHECK: lines with the tested content.
- with test.open("w") as f:
- f.writelines(formatted_result_lines)
- return True
- def update_checks(
- parsed_args: ParsedArgs,
- bazel_runfiles: Pattern,
- llvm_symbolizer: str,
- tests: Set[Path],
- ) -> None:
- """Updates CHECK: lines in lit tests."""
- def map_helper(test: Path) -> bool:
- try:
- updated = update_check(
- parsed_args, bazel_runfiles, llvm_symbolizer, test
- )
- except Exception as e:
- raise ValueError(f"Failed to update {test}") from e
- print(".", end="", flush=True)
- return updated
- print(f"Updating {len(tests)} lit test(s)...")
- with futures.ThreadPoolExecutor() as exec:
- # list() iterates in order to immediately propagate exceptions.
- results = list(exec.map(map_helper, tests))
- # Each update call indicates progress with a dot without a newline, so put a
- # newline to wrap.
- print(f"\nUpdated {results.count(True)} lit test(s).")
- def main() -> None:
- # Parse arguments relative to the working directory.
- parsed_args = parse_args()
- # Remaining script logic should be relative to the repository root.
- root = Path(__file__).parent.parent.parent
- os.chdir(root)
- if parsed_args.tests:
- tests = {test.relative_to(root) for test in parsed_args.tests}
- else:
- print(
- "HINT: run `autoupdate_testdata.py f1 f2 ...` "
- "to update specific tests"
- )
- tests = get_tests(parsed_args.testdata)
- # Build inputs.
- print(f"Building {parsed_args.tool}...")
- subprocess.check_call(
- [
- "bazel",
- "build",
- "-c",
- parsed_args.build_mode,
- TOOLS[parsed_args.tool],
- ]
- )
- bazel_bin_dir = subprocess.check_output(
- ["bazel", "info", "-c", parsed_args.build_mode, "bazel-bin"],
- encoding="utf-8",
- ).strip()
- bazel_runfiles = re.compile(
- r"{0}/.*\.runfiles/carbon/".format(re.escape(bazel_bin_dir))
- )
- # Grab the symbolizer.
- clang_var_content = Path(
- "bazel-execroot/external/bazel_cc_toolchain/"
- "clang_detected_variables.bzl"
- ).read_text()
- llvm_symbolizer = re.search(
- '(?m)^llvm_symbolizer = "(.*)"$', clang_var_content
- )
- assert llvm_symbolizer is not None
- # Run updates.
- update_checks(parsed_args, bazel_runfiles, llvm_symbolizer[1], tests)
- if __name__ == "__main__":
- main()
|