regen_corpus.py 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118
  1. #!/usr/bin/env python3
  2. """Regenerates explorer fuzzer corpus files."""
  3. __copyright__ = """
  4. Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  5. Exceptions. See /LICENSE for license information.
  6. SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  7. """
  8. import hashlib
  9. from pathlib import Path
  10. from concurrent import futures
  11. import os
  12. import subprocess
  13. import tempfile
  14. from typing import List, Set
  15. from collections.abc import Iterable
  16. _TESTDATA = "explorer/testdata"
  17. _FUZZER_CORPUS = "explorer/fuzzing/fuzzer_corpus"
  18. def _get_files(folder: str, extension: str) -> Set[str]:
  19. """Gets the list of files with the specified extension."""
  20. matching_files = set()
  21. for root, _, files in os.walk(folder):
  22. for f in files:
  23. if os.path.splitext(f)[1] == extension:
  24. matching_files.add(os.path.join(root, f))
  25. return matching_files
  26. def _carbon_to_proto(carbon_file: str) -> str:
  27. """Converts carbon file to text proto string."""
  28. try:
  29. p = subprocess.run(
  30. f"bazel-bin/explorer/fuzzing/ast_to_proto {carbon_file}",
  31. shell=True,
  32. check=True,
  33. stdout=subprocess.PIPE,
  34. stderr=subprocess.STDOUT,
  35. )
  36. text_proto = p.stdout.decode("utf-8")
  37. print(".", end="", flush=True)
  38. return text_proto
  39. except subprocess.SubprocessError:
  40. print("x", end="", flush=True)
  41. return ""
  42. def _write_corpus_files(text_protos: Iterable[str], corpus_dir: str) -> None:
  43. """Writes text proto contents to files in corpus directory."""
  44. for text_proto in text_protos:
  45. file_name = (
  46. Path(corpus_dir)
  47. .joinpath(hashlib.sha1(text_proto.encode("utf-8")).hexdigest())
  48. .with_suffix(".textproto")
  49. )
  50. with open(file_name, "w") as f:
  51. f.write(text_proto)
  52. def main() -> None:
  53. os.chdir(os.path.join(os.path.dirname(__file__), "../.."))
  54. print("Building ast_to_proto...", flush=True)
  55. subprocess.check_call(
  56. [
  57. "bazel",
  58. "build",
  59. "//explorer/fuzzing:ast_to_proto",
  60. ]
  61. )
  62. carbon_sources = _get_files(_TESTDATA, ".carbon")
  63. print(
  64. f"Converting {len(carbon_sources)} carbon files to proto...",
  65. flush=True,
  66. )
  67. text_protos: List[str] = []
  68. with futures.ThreadPoolExecutor() as exec:
  69. all_protos = exec.map(_carbon_to_proto, carbon_sources)
  70. text_protos.extend(p for p in all_protos if p)
  71. with tempfile.TemporaryDirectory() as new_corpus_dir:
  72. print(
  73. f"\nWriting {len(text_protos)} corpus files to {new_corpus_dir}...",
  74. flush=True,
  75. )
  76. _write_corpus_files(text_protos, new_corpus_dir)
  77. print("Building explorer_fuzzer...", flush=True)
  78. subprocess.check_call(
  79. [
  80. "bazel",
  81. "build",
  82. "--config=fuzzer",
  83. "//explorer/fuzzing:explorer_fuzzer.full_corpus",
  84. ]
  85. )
  86. print(
  87. f"Merging interesting inputs into {_FUZZER_CORPUS}...",
  88. flush=True,
  89. )
  90. subprocess.check_call(
  91. [
  92. "bazel-bin/explorer/fuzzing/explorer_fuzzer.full_corpus",
  93. "-merge=1",
  94. _FUZZER_CORPUS,
  95. new_corpus_dir,
  96. ]
  97. )
  98. print("All done!", flush=True)
  99. if __name__ == "__main__":
  100. main()