#!/usr/bin/env python3 """Updates the CHECK: lines in tests with an AUTOUPDATE line.""" __copyright__ = """ Part of the Carbon Language project, under the Apache License v2.0 with LLVM Exceptions. See /LICENSE for license information. SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception """ from abc import ABC, abstractmethod import argparse from concurrent import futures import os from pathlib import Path import re import subprocess from typing import ( Any, Dict, List, Match, NamedTuple, Optional, Pattern, Set, Tuple, ) # A prefix followed by a command to run for autoupdating checked output. AUTOUPDATE_MARKER = "// AUTOUPDATE" # Indicates no autoupdate is requested. NOAUTOUPDATE_MARKER = "// NOAUTOUPDATE" # Supported tools. TOOLS = { "carbon": "//toolchain/driver:carbon", "explorer": "//explorer:explorer", } class ParsedArgs(NamedTuple): autoupdate_args: List[str] build_mode: str extra_check_replacements: List[Tuple[Pattern, Pattern, str]] line_number_delta_prefix: str line_number_pattern: Pattern lit_run: List[str] testdata: str tests: List[Path] tool: str def parse_args() -> ParsedArgs: """Parses command-line arguments and flags.""" parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("tests", nargs="*") parser.add_argument( "--autoupdate_arg", metavar="COMMAND", default=[], action="append", help="Optional arguments to pass to the autoupdate command.", ) parser.add_argument( "--build_mode", metavar="MODE", default="opt", help="The build mode to use. Defaults to opt for faster execution.", ) parser.add_argument( "--extra_check_replacement", nargs=3, metavar=("MATCHING", "BEFORE", "AFTER"), default=[], action="append", help="On a CHECK line with MATCHING, does a regex replacement of " "BEFORE with AFTER.", ) parser.add_argument( "--line_number_delta_prefix", metavar="PREFIX", default="", help="An optional prefix to add before the [[@LINE+delta]] marker.", ) parser.add_argument( "--line_number_pattern", metavar="PATTERN", default=r"(?P(?P\w+\.carbon):)" r"(?P\d+)(?P(?:\D|$))", help="A regular expression which matches line numbers to update as its " "only group. Capture groups 'prefix', 'line', and 'suffix' are " "required for structure. The 'filename' capture group is optional and " "should be provided when lines may belong to different files.", ) parser.add_argument( "--lit_run", metavar="COMMAND", default=[], required=False, action="append", help="RUN lines to set.", ) parser.add_argument( "--testdata", metavar="PATH", required=True, help="The path to the testdata to update, relative to the workspace " "root.", ) parser.add_argument( "--tool", metavar="TOOL", required=True, choices=TOOLS.keys(), help="The tool being tested.", ) parsed_args = parser.parse_args() extra_check_replacements = [ (re.compile(line_matcher), re.compile(before), after) for line_matcher, before, after in parsed_args.extra_check_replacement ] return ParsedArgs( autoupdate_args=parsed_args.autoupdate_arg, build_mode=parsed_args.build_mode, extra_check_replacements=extra_check_replacements, line_number_delta_prefix=parsed_args.line_number_delta_prefix, line_number_pattern=re.compile(parsed_args.line_number_pattern), lit_run=parsed_args.lit_run, testdata=parsed_args.testdata, tests=[Path(test).resolve() for test in parsed_args.tests], tool=parsed_args.tool, ) def get_tests(testdata: str) -> Set[Path]: """Get the list of tests from the filesystem.""" tests = set() for root, _, files in os.walk(testdata): for f in files: if f in {"lit.cfg.py", "BUILD", "README.md"}: # Ignore the lit config. continue if os.path.splitext(f)[1] == ".carbon": tests.add(Path(root).joinpath(f)) else: exit(f"Unrecognized file type in testdata: {f}") return tests class Line(ABC): """A line that may appear in the resulting test file.""" @abstractmethod def format( self, *, output_line_number: int, line_number_remap: Dict[int, int] ) -> str: raise NotImplementedError class OriginalLine(Line): """A line that was copied from the original test file.""" def __init__(self, line_number: int, text: str) -> None: self.line_number = line_number self.text = text def format(self, **kwargs: Any) -> str: return self.text class RunLine(Line): """A RUN line.""" def __init__(self, text: str) -> None: self.text = text def format(self, **kwargs: Any) -> str: return self.text class CheckLine(Line): """A `// CHECK:` line generated from the test output. If there's a line number, it'll be fixed up after we've figured out which lines to include in the resulting test file and in what order, because their contents depend on where an original input line appears in the output. """ def __init__( self, test: str, out_line: str, line_number_delta_prefix: str, line_number_pattern: Pattern, ) -> None: super().__init__() self.filename = Path(test).name self.indent = "" self.out_line = out_line self.line_number_delta_prefix = line_number_delta_prefix self.line_number_pattern = line_number_pattern self.time_elapsed_pattern = re.compile( r"Time elapsed in (\S+): (\d+)ms" ) self.trailing_whitespace_pattern = re.compile(r"(\s+$)") # If any match is specific to this file, use the first matched line for # the location of the CHECK comment. self.line_in_file = None for match in line_number_pattern.finditer(self.out_line): if self._matches_filename(match): self.line_in_file = int(match.group("line")) - 1 break def format( self, *, output_line_number: int, line_number_remap: Dict[int, int] ) -> str: assert self.out_line result = self.out_line while True: line_match = self.line_number_pattern.search(result) time_match = self.time_elapsed_pattern.search(result) trailing_match = self.trailing_whitespace_pattern.search(result) if line_match: if self._matches_filename(line_match): line_number = int(line_match.group("line")) - 1 delta = line_number_remap[line_number] - output_line_number # We use `:+d` here to produce `LINE-n` or `LINE+n` as # appropriate. result = self.line_number_pattern.sub( rf"\g{self.line_number_delta_prefix}" rf"[[@LINE{delta:+d}]]\g", result, count=1, ) else: result = self.line_number_pattern.sub( r"\g{{.*}}\g", result, count=1, ) elif time_match: result = self.time_elapsed_pattern.sub( r"Time elapsed in \1: {{[0-9]+}}ms", result, count=1 ) elif trailing_match: result = self.trailing_whitespace_pattern.sub(r"{{\1}}", result) else: break return f"{self.indent}// CHECK:{result}\n" def _matches_filename(self, match: Match) -> bool: return ( "filename" not in match.groupdict() or match.group("filename") == self.filename ) def find_autoupdate(test: str, orig_lines: List[str]) -> Optional[int]: """Figures out whether autoupdate should occur. For AUTOUPDATE, returns the line. For NOAUTOUPDATE, returns None. """ found = 0 result = None for line_number, line in enumerate(orig_lines): if line.startswith(AUTOUPDATE_MARKER): found += 1 result = line_number elif line.startswith(NOAUTOUPDATE_MARKER): found += 1 if found == 0: raise ValueError( f"{test} must have either '{AUTOUPDATE_MARKER}' or " f"'{NOAUTOUPDATE_MARKER}'" ) elif found > 1: raise ValueError( f"{test} must have only one of '{AUTOUPDATE_MARKER}' or " f"'{NOAUTOUPDATE_MARKER}'" ) return result def replace_all(s: str, replacements: List[Tuple[str, str]]) -> str: """Runs multiple replacements on a string.""" for before, after in replacements: s = s.replace(before, after) return s def label_output(label: str, output: str) -> List[str]: """Merges output with labels. This mirrors label_output in lit_test/merge_output.py and should be kept in sync. They're separate in order to avoid a subprocess or import complexity. """ result = [] if output: for line in output.splitlines(): result.append(" ".join(filter(None, (label, line)))) return result def get_matchable_test_output( autoupdate_args: List[str], extra_check_replacements: List[Tuple[Pattern, Pattern, str]], tool: str, bazel_runfiles: Pattern, llvm_symbolizer: str, test: str, ) -> List[str]: """Runs the autoupdate command and returns the output lines.""" # Run the autoupdate command to generate output. # (`bazel run` would serialize) autoupdate_cmd = Path.cwd().joinpath( TOOLS[tool].replace("//", "./bazel-bin/").replace(":", "/") ) p = subprocess.run( [str(autoupdate_cmd)] + autoupdate_args + [Path(test).name], env={"LLVM_SYMBOLIZER_PATH": llvm_symbolizer}, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding="utf-8", cwd=str(Path(test).parent), ) out_lines = label_output("STDOUT:", p.stdout) out_lines.extend(label_output("STDERR:", p.stderr)) for i, line in enumerate(out_lines): # Escape things that mirror FileCheck special characters. line = line.replace("{{", "{{[{][{]}}") line = line.replace("[[", "{{[[][[]}}") line = bazel_runfiles.sub("{{.*}}/", line) for line_matcher, before, after in extra_check_replacements: if line_matcher.match(line): line = before.sub(after, line) out_lines[i] = line return out_lines def is_replaced(line: str) -> bool: """Returns true if autoupdate should replace the line.""" line = line.lstrip() return line.startswith("// CHECK") or line.startswith("// RUN:") def merge_lines( line_number_delta_prefix: str, line_number_pattern: Pattern, lit_run: List[str], test: str, autoupdate_line_number: int, raw_orig_lines: List[str], out_lines: List[str], ) -> List[Line]: """Merges the original output and new lines.""" orig_lines = [ OriginalLine(i, line) for i, line in enumerate(raw_orig_lines) if not is_replaced(line) ] check_lines = [ CheckLine(test, out_line, line_number_delta_prefix, line_number_pattern) for out_line in out_lines ] result_lines: List[Line] = [] # CHECK lines must go after AUTOUPDATE. while orig_lines and orig_lines[0].line_number <= autoupdate_line_number: result_lines.append(orig_lines.pop(0)) for line in lit_run: run_not = "" if Path(test).name.startswith("fail_"): run_not = "%{not} " result_lines.append(RunLine(f"// RUN: {run_not}{line}\n")) # Interleave the original lines and the CHECK: lines. while orig_lines and check_lines: # Original lines go first when the CHECK line is known and later. if ( check_lines[0].line_in_file is not None and check_lines[0].line_in_file > orig_lines[0].line_number ): result_lines.append(orig_lines.pop(0)) else: check_line = check_lines.pop(0) # Indent to match the next original line. check_line.indent = re.findall("^ *", orig_lines[0].text)[0] result_lines.append(check_line) # One list is non-empty; append remaining lines from both to catch it. result_lines.extend(orig_lines) result_lines.extend(check_lines) return result_lines def update_check( parsed_args: ParsedArgs, bazel_runfiles: Pattern, llvm_symbolizer: str, test: Path, ) -> bool: """Updates the CHECK: lines for `test` by running the tool. Returns true if a change was made. """ with test.open() as f: orig_lines = f.readlines() # Make sure we're supposed to autoupdate. autoupdate_line = find_autoupdate(str(test), orig_lines) if autoupdate_line is None: return False # Determine the merged output lines. out_lines = get_matchable_test_output( parsed_args.autoupdate_args, parsed_args.extra_check_replacements, parsed_args.tool, bazel_runfiles, llvm_symbolizer, str(test), ) result_lines = merge_lines( parsed_args.line_number_delta_prefix, parsed_args.line_number_pattern, parsed_args.lit_run, str(test), autoupdate_line, orig_lines, out_lines, ) # Calculate the remap for original lines. line_number_remap = dict( [ (line.line_number, i) for i, line in enumerate(result_lines) if isinstance(line, OriginalLine) ] ) # If the last line of the original output was a CHECK, replace it with an # empty line. if orig_lines[-1].lstrip().startswith("// CHECK"): line_number_remap[len(orig_lines) - 1] = len(result_lines) - 1 # Generate contents for any lines that depend on line numbers. formatted_result_lines = [ line.format(output_line_number=i, line_number_remap=line_number_remap) for i, line in enumerate(result_lines) ] # If nothing's changed, we're done. if formatted_result_lines == orig_lines: return False # Interleave the new CHECK: lines with the tested content. with test.open("w") as f: f.writelines(formatted_result_lines) return True def update_checks( parsed_args: ParsedArgs, bazel_runfiles: Pattern, llvm_symbolizer: str, tests: Set[Path], ) -> None: """Updates CHECK: lines in lit tests.""" def map_helper(test: Path) -> bool: try: updated = update_check( parsed_args, bazel_runfiles, llvm_symbolizer, test ) except Exception as e: raise ValueError(f"Failed to update {test}") from e print(".", end="", flush=True) return updated print(f"Updating {len(tests)} lit test(s)...") with futures.ThreadPoolExecutor() as exec: # list() iterates in order to immediately propagate exceptions. results = list(exec.map(map_helper, tests)) # Each update call indicates progress with a dot without a newline, so put a # newline to wrap. print(f"\nUpdated {results.count(True)} lit test(s).") def main() -> None: # Parse arguments relative to the working directory. parsed_args = parse_args() # Remaining script logic should be relative to the repository root. root = Path(__file__).parent.parent.parent os.chdir(root) if parsed_args.tests: tests = {test.relative_to(root) for test in parsed_args.tests} else: print( "HINT: run `autoupdate_testdata.py f1 f2 ...` " "to update specific tests" ) tests = get_tests(parsed_args.testdata) # Build inputs. print(f"Building {parsed_args.tool}...") subprocess.check_call( [ "bazel", "build", "-c", parsed_args.build_mode, TOOLS[parsed_args.tool], ] ) bazel_bin_dir = subprocess.check_output( ["bazel", "info", "-c", parsed_args.build_mode, "bazel-bin"], encoding="utf-8", ).strip() bazel_runfiles = re.compile( r"{0}/.*\.runfiles/carbon/".format(re.escape(bazel_bin_dir)) ) # Grab the symbolizer. clang_var_content = Path( "bazel-execroot/external/bazel_cc_toolchain/" "clang_detected_variables.bzl" ).read_text() llvm_symbolizer = re.search( '(?m)^llvm_symbolizer = "(.*)"$', clang_var_content ) assert llvm_symbolizer is not None # Run updates. update_checks(parsed_args, bazel_runfiles, llvm_symbolizer[1], tests) if __name__ == "__main__": main()