pr_comments.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474
  1. #!/usr/bin/env python3
  2. """Figure out comments on a GitHub PR."""
  3. __copyright__ = """
  4. Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  5. Exceptions. See /LICENSE for license information.
  6. SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  7. """
  8. import argparse
  9. import datetime
  10. import hashlib
  11. import os
  12. import importlib.util
  13. import textwrap
  14. from typing import Any, Callable, Optional
  15. # Do some extra work to support direct runs.
  16. try:
  17. from github_tools import github_helpers
  18. except ImportError:
  19. github_helpers_spec = importlib.util.spec_from_file_location(
  20. "github_helpers",
  21. os.path.join(os.path.dirname(__file__), "github_helpers.py"),
  22. )
  23. assert github_helpers_spec is not None
  24. github_helpers = importlib.util.module_from_spec(github_helpers_spec)
  25. github_helpers_spec.loader.exec_module(github_helpers) # type: ignore
  26. # The main query, into which other queries are composed.
  27. _QUERY = """
  28. {
  29. repository(owner: "carbon-language", name: "%(repo)s") {
  30. pullRequest(number: %(pr_num)d) {
  31. author {
  32. login
  33. }
  34. createdAt
  35. title
  36. %(comments)s
  37. %(reviews)s
  38. %(review_threads)s
  39. }
  40. }
  41. }
  42. """
  43. # Queries for comments on the PR. These are direct, non-review comments on the
  44. # PR.
  45. _QUERY_COMMENTS = """
  46. comments(first: 100%(cursor)s) {
  47. nodes {
  48. author {
  49. login
  50. }
  51. body
  52. createdAt
  53. url
  54. }
  55. %(pagination)s
  56. }
  57. """
  58. # Queries for reviews on the PR, which have a non-empty body if a review has
  59. # a summary comment.
  60. _QUERY_REVIEWS = """
  61. reviews(first: 100%(cursor)s) {
  62. nodes {
  63. author {
  64. login
  65. }
  66. body
  67. createdAt
  68. url
  69. }
  70. %(pagination)s
  71. }
  72. """
  73. # Queries for review threads on the PR.
  74. _QUERY_REVIEW_THREADS = """
  75. reviewThreads(first: 100%(cursor)s) {
  76. nodes {
  77. comments(first: 100) {
  78. nodes {
  79. author {
  80. login
  81. }
  82. body
  83. createdAt
  84. originalPosition
  85. originalCommit {
  86. abbreviatedOid
  87. }
  88. path
  89. }
  90. }
  91. isResolved
  92. resolvedBy {
  93. createdAt
  94. login
  95. }
  96. }
  97. %(pagination)s
  98. }
  99. """
  100. class _Comment:
  101. """A comment, either on a review thread or top-level on the PR."""
  102. def __init__(self, author: str, timestamp: str, body: str):
  103. self.author = author
  104. self.timestamp = datetime.datetime.strptime(
  105. timestamp, "%Y-%m-%dT%H:%M:%SZ"
  106. )
  107. self.body = body
  108. @staticmethod
  109. def from_raw_comment(raw_comment: dict) -> "_Comment":
  110. """Creates the comment from a raw comment dict."""
  111. return _Comment(
  112. raw_comment["author"]["login"],
  113. raw_comment["createdAt"],
  114. raw_comment["body"],
  115. )
  116. @staticmethod
  117. def _rewrap(content: str) -> str:
  118. """Rewraps a comment to fit in 80 columns with an indent."""
  119. lines = []
  120. for line in content.split("\n"):
  121. lines.extend(
  122. [
  123. x
  124. for x in textwrap.wrap(
  125. line,
  126. width=80,
  127. initial_indent=" " * 4,
  128. subsequent_indent=" " * 4,
  129. )
  130. ]
  131. )
  132. return "\n".join(lines)
  133. def format(self, long: bool) -> str:
  134. """Formats the comment."""
  135. if long:
  136. return "%s%s at %s:\n%s" % (
  137. " " * 2,
  138. self.author,
  139. self.timestamp.strftime("%Y-%m-%d %H:%M"),
  140. self._rewrap(self.body),
  141. )
  142. else:
  143. # Compact newlines down into pilcrows, leaving a space after.
  144. body = self.body.replace("\r", "").replace("\n", "¶ ")
  145. while "¶ ¶" in body:
  146. body = body.replace("¶ ¶", "¶¶")
  147. line = "%s%s: %s" % (" " * 2, self.author, body)
  148. return line if len(line) <= 80 else line[:77] + "..."
  149. class _PRComment(_Comment):
  150. """A comment on the top-level PR."""
  151. def __init__(self, raw_comment: dict):
  152. super().__init__(
  153. raw_comment["author"]["login"],
  154. raw_comment["createdAt"],
  155. raw_comment["body"],
  156. )
  157. self.url = raw_comment["url"]
  158. def __lt__(self, other: "_PRComment") -> bool:
  159. return self.timestamp < other.timestamp
  160. def format(self, long: bool) -> str:
  161. return "%s\n%s" % (self.url, super().format(long))
  162. class _Thread:
  163. """A review thread on a line of code."""
  164. def __init__(self, parsed_args: argparse.Namespace, thread: dict):
  165. self.is_resolved: bool = thread["isResolved"]
  166. comments = thread["comments"]["nodes"]
  167. first_comment = comments[0]
  168. self.line: int = first_comment["originalPosition"]
  169. self.path: str = first_comment["path"]
  170. # Link to the comment in the commit; GitHub features work better there
  171. # than in the conversation view. The diff_url allows viewing changes
  172. # since the comment, although the comment won't be visible there.
  173. template = (
  174. "https://github.com/carbon-language/%(repo)s/pull/%(pr_num)s/"
  175. "files/%(oid)s%(head)s#diff-%(path_md5)s%(line_side)s%(line)s"
  176. )
  177. # GitHub uses an md5 of the file's path for the link.
  178. path_md5 = hashlib.md5()
  179. path_md5.update(bytearray(self.path, "utf-8"))
  180. format_dict = {
  181. "head": "",
  182. "line_side": "R",
  183. "line": self.line,
  184. "oid": first_comment["originalCommit"]["abbreviatedOid"],
  185. "path_md5": path_md5.hexdigest(),
  186. "pr_num": parsed_args.pr_num,
  187. "repo": parsed_args.repo,
  188. }
  189. self.url: str = template % format_dict
  190. format_dict["head"] = "..HEAD"
  191. format_dict["line_side"] = "L"
  192. self.diff_url: str = template % format_dict
  193. self.comments = [
  194. _Comment.from_raw_comment(comment)
  195. for comment in thread["comments"]["nodes"]
  196. ]
  197. if self.is_resolved:
  198. self.comments.append(
  199. _Comment(
  200. thread["resolvedBy"]["login"],
  201. thread["resolvedBy"]["createdAt"],
  202. "<resolved>",
  203. )
  204. )
  205. def __lt__(self, other: "_Thread") -> bool:
  206. """Sort threads by line then timestamp."""
  207. if self.line != other.line:
  208. return bool(self.line < other.line)
  209. return self.comments[0].timestamp < other.comments[0].timestamp
  210. def format(self, long: bool) -> str:
  211. """Formats the review thread with comments."""
  212. lines = [
  213. "%s\n - line %d; %s"
  214. % (
  215. self.url,
  216. self.line,
  217. ("resolved" if self.is_resolved else "unresolved"),
  218. )
  219. ]
  220. if self.diff_url:
  221. lines.append(" - diff: %s" % self.diff_url)
  222. for comment in self.comments:
  223. lines.append(comment.format(long))
  224. return "\n".join(lines)
  225. def has_comment_from(self, comments_from: str) -> bool:
  226. """Returns true if comments has a comment from comments_from."""
  227. for comment in self.comments:
  228. if comment.author == comments_from:
  229. return True
  230. return False
  231. def _parse_args(args: Optional[list[str]] = None) -> argparse.Namespace:
  232. """Parses command-line arguments and flags."""
  233. parser = argparse.ArgumentParser(description="Lists comments on a PR.")
  234. parser.add_argument(
  235. "pr_num",
  236. metavar="PR#",
  237. type=int,
  238. help="The pull request to fetch comments from.",
  239. )
  240. github_helpers.add_access_token_arg(parser, "repo")
  241. parser.add_argument(
  242. "--comments-after",
  243. metavar="LOGIN",
  244. help="Only print threads where the final comment is not from the given "
  245. "user. For example, use when looking for threads that you still need "
  246. "to respond to.",
  247. )
  248. parser.add_argument(
  249. "--comments-from",
  250. metavar="LOGIN",
  251. help="Only print threads with comments from the given user. For "
  252. "example, use when looking for threads that you've commented on.",
  253. )
  254. parser.add_argument(
  255. "--include-resolved",
  256. action="store_true",
  257. help="Whether to include resolved review threads. By default, only "
  258. "unresolved threads will be shown.",
  259. )
  260. parser.add_argument(
  261. "--repo",
  262. choices=["carbon-lang"],
  263. default="carbon-lang",
  264. help="The Carbon repo to query. Defaults to %(default)s.",
  265. )
  266. parser.add_argument(
  267. "--long",
  268. action="store_true",
  269. help="Prints long output, with the full comment.",
  270. )
  271. return parser.parse_args(args=args)
  272. def _query(
  273. parsed_args: argparse.Namespace, field_name: Optional[str] = None
  274. ) -> str:
  275. """Returns a query for the passed field_name, or all by default."""
  276. print(".", end="", flush=True)
  277. format = {
  278. "pr_num": parsed_args.pr_num,
  279. "repo": parsed_args.repo,
  280. "comments": "",
  281. "review_threads": "",
  282. "reviews": "",
  283. }
  284. if field_name:
  285. # Use a cursor for pagination of the field.
  286. if field_name == "comments":
  287. format["comments"] = _QUERY_COMMENTS
  288. elif field_name == "reviewThreads":
  289. format["review_threads"] = _QUERY_REVIEW_THREADS
  290. elif field_name == "reviews":
  291. format["reviews"] = _QUERY_REVIEWS
  292. else:
  293. raise ValueError("Unexpected field_name: %s" % field_name)
  294. else:
  295. # Fetch the first page of all fields.
  296. subformat = {"cursor": "", "pagination": github_helpers.PAGINATION}
  297. format["comments"] = _QUERY_COMMENTS % subformat
  298. format["review_threads"] = _QUERY_REVIEW_THREADS % subformat
  299. format["reviews"] = _QUERY_REVIEWS % subformat
  300. return _QUERY % format
  301. def _accumulate_pr_comment(
  302. parsed_args: argparse.Namespace,
  303. comments: list[_PRComment],
  304. raw_comment: dict,
  305. ) -> None:
  306. """Collects top-level comments and reviews."""
  307. # Elide reviews that have no top-level comment body.
  308. if raw_comment["body"]:
  309. comments.append(_PRComment(raw_comment))
  310. def _accumulate_thread(
  311. parsed_args: argparse.Namespace,
  312. threads_by_path: dict[str, list[_Thread]],
  313. raw_thread: dict,
  314. ) -> None:
  315. """Adds threads to threads_by_path for later sorting."""
  316. thread = _Thread(parsed_args, raw_thread)
  317. # Optionally skip resolved threads.
  318. if not parsed_args.include_resolved and thread.is_resolved:
  319. return
  320. # Optionally skip threads where the given user isn't the last commenter.
  321. if (
  322. parsed_args.comments_after
  323. and thread.comments[-1].author == parsed_args.comments_after
  324. ):
  325. return
  326. # Optionally skip threads where the given user hasn't commented.
  327. if parsed_args.comments_from and not thread.has_comment_from(
  328. parsed_args.comments_from
  329. ):
  330. return
  331. if thread.path not in threads_by_path:
  332. threads_by_path[thread.path] = []
  333. threads_by_path[thread.path].append(thread)
  334. def _paginate(
  335. field_name: str,
  336. accumulator: Callable[[argparse.Namespace, Any, dict], None],
  337. parsed_args: argparse.Namespace,
  338. client: github_helpers.Client,
  339. main_result: dict,
  340. output: Any,
  341. ) -> None:
  342. """Paginates through the given field_name, accumulating results."""
  343. query = _query(parsed_args, field_name=field_name)
  344. path = ("repository", "pullRequest", field_name)
  345. for node in client.execute_and_paginate(
  346. query, path, first_page=main_result
  347. ):
  348. accumulator(parsed_args, output, node)
  349. def _fetch_comments(
  350. parsed_args: argparse.Namespace,
  351. ) -> tuple[list[_PRComment], dict[str, list[_Thread]]]:
  352. """Fetches comments and review threads from GitHub."""
  353. # Each _query call will print a '.' for progress.
  354. print(
  355. "Loading https://github.com/carbon-language/%s/pull/%d ..."
  356. % (parsed_args.repo, parsed_args.pr_num),
  357. end="",
  358. flush=True,
  359. )
  360. client = github_helpers.Client(parsed_args)
  361. # Get the initial set of review threads, and print the PR summary.
  362. main_result = client.execute(_query(parsed_args))
  363. pull_request = main_result["repository"]["pullRequest"]
  364. # Paginate comments, reviews, and review threads.
  365. comments: list[_PRComment] = []
  366. _paginate(
  367. "comments",
  368. _accumulate_pr_comment,
  369. parsed_args,
  370. client,
  371. main_result,
  372. comments,
  373. )
  374. # Combine reviews into comments for interleaving.
  375. _paginate(
  376. "reviews",
  377. _accumulate_pr_comment,
  378. parsed_args,
  379. client,
  380. main_result,
  381. comments,
  382. )
  383. threads_by_path: dict[str, list[_Thread]] = {}
  384. _paginate(
  385. "reviewThreads",
  386. _accumulate_thread,
  387. parsed_args,
  388. client,
  389. main_result,
  390. threads_by_path,
  391. )
  392. # Now that loading is done (no more progress indicators), print the header.
  393. print()
  394. pr_desc = _Comment(
  395. pull_request["author"]["login"],
  396. pull_request["createdAt"],
  397. pull_request["title"],
  398. )
  399. print(pr_desc.format(parsed_args.long))
  400. return comments, threads_by_path
  401. def main() -> None:
  402. parsed_args = _parse_args()
  403. comments, threads_by_path = _fetch_comments(parsed_args)
  404. for comment in sorted(comments):
  405. print()
  406. print(comment.format(parsed_args.long))
  407. for path, threads in sorted(threads_by_path.items()):
  408. # Print a header for each path.
  409. print()
  410. print("=" * 80)
  411. print(path)
  412. print("=" * 80)
  413. for thread in sorted(threads):
  414. print()
  415. print(thread.format(parsed_args.long))
  416. if __name__ == "__main__":
  417. main()