Browse Source

Automate naming of fuzzer files (#1250)

We use sha1 right now, this just automates consistency.
Jon Meow 4 years ago
parent
commit
9619533519
2 changed files with 62 additions and 8 deletions
  1. 19 8
      .pre-commit-config.yaml
  2. 43 0
      scripts/check_sha_filenames.py

+ 19 - 8
.pre-commit-config.yaml

@@ -23,10 +23,12 @@ repos:
       - id: check-yaml
       - id: detect-private-key
       - id: end-of-file-fixer
+        exclude: '^(.*/fuzzer_corpus/.*)$'
       - id: mixed-line-ending
         args: ['--fix=lf']
+        exclude: '^(.*/fuzzer_corpus/.*)$'
       - id: trailing-whitespace
-        exclude: '^(.*/testdata/.*\.golden)$'
+        exclude: '^(.*/fuzzer_corpus/.*|.*/testdata/.*\.golden)$'
   - repo: https://github.com/google/pre-commit-tool-hooks
     rev: cb78d9293306d9f737c64d9702bbaa88e157caaa # frozen: v1.2.2
     hooks:
@@ -57,6 +59,13 @@ repos:
         entry: scripts/run_buildifier.py
         language: python
         files: '^(.*/)?(BUILD\.bazel|BUILD|WORKSPACE)$|\.BUILD$|\.bzl$'
+      - id: clang-format
+        name: clang-format
+        entry: clang-format
+        types_or: [c++, proto]
+        language: python
+        args: ['-i']
+        additional_dependencies: ['clang-format==13.0.1']
       - id: explorer-format-grammar
         name: Format the explorer grammar file
         entry: explorer/syntax/format_grammar.py
@@ -64,13 +73,15 @@ repos:
         files: ^explorer/syntax/(lexer.lpp|parser.ypp)$
         pass_filenames: false
         additional_dependencies: ['clang-format==13.0.1']
-      - id: clang-format
-        name: clang-format
-        entry: clang-format
-        types_or: [c++, proto]
+
+  # This may rename files, so it's deliberately between formatters and linters.
+  - repo: local
+    hooks:
+      - id: check-sha-filenames
+        name: Check fuzzer SHA filenames
+        entry: scripts/check_sha_filenames.py
         language: python
-        args: ['-i']
-        additional_dependencies: ['clang-format==13.0.1']
+        files: ^.*/fuzzer_corpus/.*$
 
   # Run linters last, as formatters and other checks may fix issues.
   - repo: local
@@ -139,6 +150,7 @@ repos:
               third_party/.*|
               .*\.def|
               .*\.svg|
+              .*/fuzzer_corpus/.*|
               .*/testdata/.*\.golden
           )$
       - id: check-links
@@ -149,5 +161,4 @@ exclude: |
   (?x)^(
       third_party/examples/.*/carbon/.*|
       third_party/llvm-project/.*|
-      .*/fuzzer_corpus/.*
   )$

+ 43 - 0
scripts/check_sha_filenames.py

@@ -0,0 +1,43 @@
+#!/usr/bin/env python3
+
+"""Requires files be named for their SHA1.
+
+We name fuzzer corpus files for their SHA1. The choice of SHA1 is for
+consistency with git.
+
+This maintains the current extension for .textproto, but at some point we might
+want to specify the extension by path.
+"""
+
+__copyright__ = """
+Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+Exceptions. See /LICENSE for license information.
+SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+"""
+
+import hashlib
+from pathlib import Path
+import sys
+
+
+def main() -> None:
+    has_errors = False
+    for arg in sys.argv[1:]:
+        path = Path(arg)
+        with path.open("rb") as f:
+            content = f.read()
+        if len(content) == 0:
+            want = "empty"
+        else:
+            want = hashlib.sha1(content).hexdigest()
+        want_path = path.parent.joinpath(want).with_suffix(path.suffix)
+        if path != want_path:
+            print(f"Renaming {path} to {want_path}", file=sys.stderr)
+            path.rename(want_path)
+            has_errors = True
+    if has_errors:
+        exit(1)
+
+
+if __name__ == "__main__":
+    main()