pr_comments.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475
  1. #!/usr/bin/env python3
  2. """Figure out comments on a GitHub PR."""
  3. __copyright__ = """
  4. Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  5. Exceptions. See /LICENSE for license information.
  6. SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  7. """
  8. import argparse
  9. import datetime
  10. import hashlib
  11. import os
  12. import importlib.util
  13. import textwrap
  14. from typing import Any, Dict, Callable, List, Optional, Tuple
  15. # Do some extra work to support direct runs.
  16. try:
  17. from github_tools import github_helpers
  18. except ImportError:
  19. github_helpers_spec = importlib.util.spec_from_file_location(
  20. "github_helpers",
  21. os.path.join(os.path.dirname(__file__), "github_helpers.py"),
  22. )
  23. github_helpers = importlib.util.module_from_spec(github_helpers_spec)
  24. github_helpers_spec.loader.exec_module(github_helpers) # type: ignore
  25. # The main query, into which other queries are composed.
  26. _QUERY = """
  27. {
  28. repository(owner: "carbon-language", name: "%(repo)s") {
  29. pullRequest(number: %(pr_num)d) {
  30. author {
  31. login
  32. }
  33. createdAt
  34. title
  35. %(comments)s
  36. %(reviews)s
  37. %(review_threads)s
  38. }
  39. }
  40. }
  41. """
  42. # Queries for comments on the PR. These are direct, non-review comments on the
  43. # PR.
  44. _QUERY_COMMENTS = """
  45. comments(first: 100%(cursor)s) {
  46. nodes {
  47. author {
  48. login
  49. }
  50. body
  51. createdAt
  52. url
  53. }
  54. %(pagination)s
  55. }
  56. """
  57. # Queries for reviews on the PR, which have a non-empty body if a review has
  58. # a summary comment.
  59. _QUERY_REVIEWS = """
  60. reviews(first: 100%(cursor)s) {
  61. nodes {
  62. author {
  63. login
  64. }
  65. body
  66. createdAt
  67. url
  68. }
  69. %(pagination)s
  70. }
  71. """
  72. # Queries for review threads on the PR.
  73. _QUERY_REVIEW_THREADS = """
  74. reviewThreads(first: 100%(cursor)s) {
  75. nodes {
  76. comments(first: 100) {
  77. nodes {
  78. author {
  79. login
  80. }
  81. body
  82. createdAt
  83. originalPosition
  84. originalCommit {
  85. abbreviatedOid
  86. }
  87. path
  88. }
  89. }
  90. isResolved
  91. resolvedBy {
  92. createdAt
  93. login
  94. }
  95. }
  96. %(pagination)s
  97. }
  98. """
  99. class _Comment(object):
  100. """A comment, either on a review thread or top-level on the PR."""
  101. def __init__(self, author: str, timestamp: str, body: str):
  102. self.author = author
  103. self.timestamp = datetime.datetime.strptime(
  104. timestamp, "%Y-%m-%dT%H:%M:%SZ"
  105. )
  106. self.body = body
  107. @staticmethod
  108. def from_raw_comment(raw_comment: Dict) -> "_Comment":
  109. """Creates the comment from a raw comment dict."""
  110. return _Comment(
  111. raw_comment["author"]["login"],
  112. raw_comment["createdAt"],
  113. raw_comment["body"],
  114. )
  115. @staticmethod
  116. def _rewrap(content: str) -> str:
  117. """Rewraps a comment to fit in 80 columns with an indent."""
  118. lines = []
  119. for line in content.split("\n"):
  120. lines.extend(
  121. [
  122. x
  123. for x in textwrap.wrap(
  124. line,
  125. width=80,
  126. initial_indent=" " * 4,
  127. subsequent_indent=" " * 4,
  128. )
  129. ]
  130. )
  131. return "\n".join(lines)
  132. def format(self, long: bool) -> str:
  133. """Formats the comment."""
  134. if long:
  135. return "%s%s at %s:\n%s" % (
  136. " " * 2,
  137. self.author,
  138. self.timestamp.strftime("%Y-%m-%d %H:%M"),
  139. self._rewrap(self.body),
  140. )
  141. else:
  142. # Compact newlines down into pilcrows, leaving a space after.
  143. body = self.body.replace("\r", "").replace("\n", "¶ ")
  144. while "¶ ¶" in body:
  145. body = body.replace("¶ ¶", "¶¶")
  146. line = "%s%s: %s" % (" " * 2, self.author, body)
  147. return line if len(line) <= 80 else line[:77] + "..."
  148. class _PRComment(_Comment):
  149. """A comment on the top-level PR."""
  150. def __init__(self, raw_comment: Dict):
  151. super().__init__(
  152. raw_comment["author"]["login"],
  153. raw_comment["createdAt"],
  154. raw_comment["body"],
  155. )
  156. self.url = raw_comment["url"]
  157. def __lt__(self, other: "_PRComment") -> bool:
  158. return self.timestamp < other.timestamp
  159. def format(self, long: bool) -> str:
  160. return "%s\n%s" % (self.url, super().format(long))
  161. class _Thread(object):
  162. """A review thread on a line of code."""
  163. def __init__(self, parsed_args: argparse.Namespace, thread: Dict):
  164. self.is_resolved: bool = thread["isResolved"]
  165. comments = thread["comments"]["nodes"]
  166. first_comment = comments[0]
  167. self.line: int = first_comment["originalPosition"]
  168. self.path: str = first_comment["path"]
  169. # Link to the comment in the commit; GitHub features work better there
  170. # than in the conversation view. The diff_url allows viewing changes
  171. # since the comment, although the comment won't be visible there.
  172. template = (
  173. "https://github.com/carbon-language/%(repo)s/pull/%(pr_num)s/"
  174. "files/%(oid)s%(head)s#diff-%(path_md5)s%(line_side)s%(line)s"
  175. )
  176. # GitHub uses an md5 of the file's path for the link.
  177. path_md5 = hashlib.md5()
  178. path_md5.update(bytearray(self.path, "utf-8"))
  179. format_dict = {
  180. "head": "",
  181. "line_side": "R",
  182. "line": self.line,
  183. "oid": first_comment["originalCommit"]["abbreviatedOid"],
  184. "path_md5": path_md5.hexdigest(),
  185. "pr_num": parsed_args.pr_num,
  186. "repo": parsed_args.repo,
  187. }
  188. self.url: str = template % format_dict
  189. format_dict["head"] = "..HEAD"
  190. format_dict["line_side"] = "L"
  191. self.diff_url: str = template % format_dict
  192. self.comments = [
  193. _Comment.from_raw_comment(comment)
  194. for comment in thread["comments"]["nodes"]
  195. ]
  196. if self.is_resolved:
  197. self.comments.append(
  198. _Comment(
  199. thread["resolvedBy"]["login"],
  200. thread["resolvedBy"]["createdAt"],
  201. "<resolved>",
  202. )
  203. )
  204. def __lt__(self, other: "_Thread") -> bool:
  205. """Sort threads by line then timestamp."""
  206. if self.line != other.line:
  207. return bool(self.line < other.line)
  208. return self.comments[0].timestamp < other.comments[0].timestamp
  209. def format(self, long: bool) -> str:
  210. """Formats the review thread with comments."""
  211. lines = []
  212. lines.append(
  213. "%s\n - line %d; %s"
  214. % (
  215. self.url,
  216. self.line,
  217. ("resolved" if self.is_resolved else "unresolved"),
  218. )
  219. )
  220. if self.diff_url:
  221. lines.append(" - diff: %s" % self.diff_url)
  222. for comment in self.comments:
  223. lines.append(comment.format(long))
  224. return "\n".join(lines)
  225. def has_comment_from(self, comments_from: str) -> bool:
  226. """Returns true if comments has a comment from comments_from."""
  227. for comment in self.comments:
  228. if comment.author == comments_from:
  229. return True
  230. return False
  231. def _parse_args(args: Optional[List[str]] = None) -> argparse.Namespace:
  232. """Parses command-line arguments and flags."""
  233. parser = argparse.ArgumentParser(description="Lists comments on a PR.")
  234. parser.add_argument(
  235. "pr_num",
  236. metavar="PR#",
  237. type=int,
  238. help="The pull request to fetch comments from.",
  239. )
  240. github_helpers.add_access_token_arg(parser, "repo")
  241. parser.add_argument(
  242. "--comments-after",
  243. metavar="LOGIN",
  244. help="Only print threads where the final comment is not from the given "
  245. "user. For example, use when looking for threads that you still need "
  246. "to respond to.",
  247. )
  248. parser.add_argument(
  249. "--comments-from",
  250. metavar="LOGIN",
  251. help="Only print threads with comments from the given user. For "
  252. "example, use when looking for threads that you've commented on.",
  253. )
  254. parser.add_argument(
  255. "--include-resolved",
  256. action="store_true",
  257. help="Whether to include resolved review threads. By default, only "
  258. "unresolved threads will be shown.",
  259. )
  260. parser.add_argument(
  261. "--repo",
  262. choices=["carbon-lang"],
  263. default="carbon-lang",
  264. help="The Carbon repo to query. Defaults to %(default)s.",
  265. )
  266. parser.add_argument(
  267. "--long",
  268. action="store_true",
  269. help="Prints long output, with the full comment.",
  270. )
  271. return parser.parse_args(args=args)
  272. def _query(
  273. parsed_args: argparse.Namespace, field_name: Optional[str] = None
  274. ) -> str:
  275. """Returns a query for the passed field_name, or all by default."""
  276. print(".", end="", flush=True)
  277. format = {
  278. "pr_num": parsed_args.pr_num,
  279. "repo": parsed_args.repo,
  280. "comments": "",
  281. "review_threads": "",
  282. "reviews": "",
  283. }
  284. if field_name:
  285. # Use a cursor for pagination of the field.
  286. if field_name == "comments":
  287. format["comments"] = _QUERY_COMMENTS
  288. elif field_name == "reviewThreads":
  289. format["review_threads"] = _QUERY_REVIEW_THREADS
  290. elif field_name == "reviews":
  291. format["reviews"] = _QUERY_REVIEWS
  292. else:
  293. raise ValueError("Unexpected field_name: %s" % field_name)
  294. else:
  295. # Fetch the first page of all fields.
  296. subformat = {"cursor": "", "pagination": github_helpers.PAGINATION}
  297. format["comments"] = _QUERY_COMMENTS % subformat
  298. format["review_threads"] = _QUERY_REVIEW_THREADS % subformat
  299. format["reviews"] = _QUERY_REVIEWS % subformat
  300. return _QUERY % format
  301. def _accumulate_pr_comment(
  302. parsed_args: argparse.Namespace,
  303. comments: List[_PRComment],
  304. raw_comment: Dict,
  305. ) -> None:
  306. """Collects top-level comments and reviews."""
  307. # Elide reviews that have no top-level comment body.
  308. if raw_comment["body"]:
  309. comments.append(_PRComment(raw_comment))
  310. def _accumulate_thread(
  311. parsed_args: argparse.Namespace,
  312. threads_by_path: Dict[str, List[_Thread]],
  313. raw_thread: Dict,
  314. ) -> None:
  315. """Adds threads to threads_by_path for later sorting."""
  316. thread = _Thread(parsed_args, raw_thread)
  317. # Optionally skip resolved threads.
  318. if not parsed_args.include_resolved and thread.is_resolved:
  319. return
  320. # Optionally skip threads where the given user isn't the last commenter.
  321. if (
  322. parsed_args.comments_after
  323. and thread.comments[-1].author == parsed_args.comments_after
  324. ):
  325. return
  326. # Optionally skip threads where the given user hasn't commented.
  327. if parsed_args.comments_from and not thread.has_comment_from(
  328. parsed_args.comments_from
  329. ):
  330. return
  331. if thread.path not in threads_by_path:
  332. threads_by_path[thread.path] = []
  333. threads_by_path[thread.path].append(thread)
  334. def _paginate(
  335. field_name: str,
  336. accumulator: Callable[[argparse.Namespace, Any, Dict], None],
  337. parsed_args: argparse.Namespace,
  338. client: github_helpers.Client,
  339. main_result: Dict,
  340. output: Any,
  341. ) -> None:
  342. """Paginates through the given field_name, accumulating results."""
  343. query = _query(parsed_args, field_name=field_name)
  344. path = ("repository", "pullRequest", field_name)
  345. for node in client.execute_and_paginate(
  346. query, path, first_page=main_result
  347. ):
  348. accumulator(parsed_args, output, node)
  349. def _fetch_comments(
  350. parsed_args: argparse.Namespace,
  351. ) -> Tuple[List[_PRComment], Dict[str, List[_Thread]]]:
  352. """Fetches comments and review threads from GitHub."""
  353. # Each _query call will print a '.' for progress.
  354. print(
  355. "Loading https://github.com/carbon-language/%s/pull/%d ..."
  356. % (parsed_args.repo, parsed_args.pr_num),
  357. end="",
  358. flush=True,
  359. )
  360. client = github_helpers.Client(parsed_args)
  361. # Get the initial set of review threads, and print the PR summary.
  362. main_result = client.execute(_query(parsed_args))
  363. pull_request = main_result["repository"]["pullRequest"]
  364. # Paginate comments, reviews, and review threads.
  365. comments: List[_PRComment] = []
  366. _paginate(
  367. "comments",
  368. _accumulate_pr_comment,
  369. parsed_args,
  370. client,
  371. main_result,
  372. comments,
  373. )
  374. # Combine reviews into comments for interleaving.
  375. _paginate(
  376. "reviews",
  377. _accumulate_pr_comment,
  378. parsed_args,
  379. client,
  380. main_result,
  381. comments,
  382. )
  383. threads_by_path: Dict[str, List[_Thread]] = {}
  384. _paginate(
  385. "reviewThreads",
  386. _accumulate_thread,
  387. parsed_args,
  388. client,
  389. main_result,
  390. threads_by_path,
  391. )
  392. # Now that loading is done (no more progress indicators), print the header.
  393. print()
  394. pr_desc = _Comment(
  395. pull_request["author"]["login"],
  396. pull_request["createdAt"],
  397. pull_request["title"],
  398. )
  399. print(pr_desc.format(parsed_args.long))
  400. return comments, threads_by_path
  401. def main() -> None:
  402. parsed_args = _parse_args()
  403. comments, threads_by_path = _fetch_comments(parsed_args)
  404. for comment in sorted(comments):
  405. print()
  406. print(comment.format(parsed_args.long))
  407. for path, threads in sorted(threads_by_path.items()):
  408. # Print a header for each path.
  409. print()
  410. print("=" * 80)
  411. print(path)
  412. print("=" * 80)
  413. for thread in sorted(threads):
  414. print()
  415. print(thread.format(parsed_args.long))
  416. if __name__ == "__main__":
  417. main()