pr_comments.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475
  1. #!/usr/bin/env python3
  2. """Figure out comments on a GitHub PR."""
  3. __copyright__ = """
  4. Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  5. Exceptions. See /LICENSE for license information.
  6. SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  7. """
  8. import argparse
  9. import datetime
  10. import hashlib
  11. import os
  12. import importlib.util
  13. import textwrap
  14. from typing import Any, Dict, Callable, List, Optional, Tuple
  15. # Do some extra work to support direct runs.
  16. try:
  17. from github_tools import github_helpers
  18. except ImportError:
  19. github_helpers_spec = importlib.util.spec_from_file_location(
  20. "github_helpers",
  21. os.path.join(os.path.dirname(__file__), "github_helpers.py"),
  22. )
  23. assert github_helpers_spec is not None
  24. github_helpers = importlib.util.module_from_spec(github_helpers_spec)
  25. github_helpers_spec.loader.exec_module(github_helpers) # type: ignore
  26. # The main query, into which other queries are composed.
  27. _QUERY = """
  28. {
  29. repository(owner: "carbon-language", name: "%(repo)s") {
  30. pullRequest(number: %(pr_num)d) {
  31. author {
  32. login
  33. }
  34. createdAt
  35. title
  36. %(comments)s
  37. %(reviews)s
  38. %(review_threads)s
  39. }
  40. }
  41. }
  42. """
  43. # Queries for comments on the PR. These are direct, non-review comments on the
  44. # PR.
  45. _QUERY_COMMENTS = """
  46. comments(first: 100%(cursor)s) {
  47. nodes {
  48. author {
  49. login
  50. }
  51. body
  52. createdAt
  53. url
  54. }
  55. %(pagination)s
  56. }
  57. """
  58. # Queries for reviews on the PR, which have a non-empty body if a review has
  59. # a summary comment.
  60. _QUERY_REVIEWS = """
  61. reviews(first: 100%(cursor)s) {
  62. nodes {
  63. author {
  64. login
  65. }
  66. body
  67. createdAt
  68. url
  69. }
  70. %(pagination)s
  71. }
  72. """
  73. # Queries for review threads on the PR.
  74. _QUERY_REVIEW_THREADS = """
  75. reviewThreads(first: 100%(cursor)s) {
  76. nodes {
  77. comments(first: 100) {
  78. nodes {
  79. author {
  80. login
  81. }
  82. body
  83. createdAt
  84. originalPosition
  85. originalCommit {
  86. abbreviatedOid
  87. }
  88. path
  89. }
  90. }
  91. isResolved
  92. resolvedBy {
  93. createdAt
  94. login
  95. }
  96. }
  97. %(pagination)s
  98. }
  99. """
  100. class _Comment:
  101. """A comment, either on a review thread or top-level on the PR."""
  102. def __init__(self, author: str, timestamp: str, body: str):
  103. self.author = author
  104. self.timestamp = datetime.datetime.strptime(
  105. timestamp, "%Y-%m-%dT%H:%M:%SZ"
  106. )
  107. self.body = body
  108. @staticmethod
  109. def from_raw_comment(raw_comment: Dict) -> "_Comment":
  110. """Creates the comment from a raw comment dict."""
  111. return _Comment(
  112. raw_comment["author"]["login"],
  113. raw_comment["createdAt"],
  114. raw_comment["body"],
  115. )
  116. @staticmethod
  117. def _rewrap(content: str) -> str:
  118. """Rewraps a comment to fit in 80 columns with an indent."""
  119. lines = []
  120. for line in content.split("\n"):
  121. lines.extend(
  122. [
  123. x
  124. for x in textwrap.wrap(
  125. line,
  126. width=80,
  127. initial_indent=" " * 4,
  128. subsequent_indent=" " * 4,
  129. )
  130. ]
  131. )
  132. return "\n".join(lines)
  133. def format(self, long: bool) -> str:
  134. """Formats the comment."""
  135. if long:
  136. return "%s%s at %s:\n%s" % (
  137. " " * 2,
  138. self.author,
  139. self.timestamp.strftime("%Y-%m-%d %H:%M"),
  140. self._rewrap(self.body),
  141. )
  142. else:
  143. # Compact newlines down into pilcrows, leaving a space after.
  144. body = self.body.replace("\r", "").replace("\n", "¶ ")
  145. while "¶ ¶" in body:
  146. body = body.replace("¶ ¶", "¶¶")
  147. line = "%s%s: %s" % (" " * 2, self.author, body)
  148. return line if len(line) <= 80 else line[:77] + "..."
  149. class _PRComment(_Comment):
  150. """A comment on the top-level PR."""
  151. def __init__(self, raw_comment: Dict):
  152. super().__init__(
  153. raw_comment["author"]["login"],
  154. raw_comment["createdAt"],
  155. raw_comment["body"],
  156. )
  157. self.url = raw_comment["url"]
  158. def __lt__(self, other: "_PRComment") -> bool:
  159. return self.timestamp < other.timestamp
  160. def format(self, long: bool) -> str:
  161. return "%s\n%s" % (self.url, super().format(long))
  162. class _Thread:
  163. """A review thread on a line of code."""
  164. def __init__(self, parsed_args: argparse.Namespace, thread: Dict):
  165. self.is_resolved: bool = thread["isResolved"]
  166. comments = thread["comments"]["nodes"]
  167. first_comment = comments[0]
  168. self.line: int = first_comment["originalPosition"]
  169. self.path: str = first_comment["path"]
  170. # Link to the comment in the commit; GitHub features work better there
  171. # than in the conversation view. The diff_url allows viewing changes
  172. # since the comment, although the comment won't be visible there.
  173. template = (
  174. "https://github.com/carbon-language/%(repo)s/pull/%(pr_num)s/"
  175. "files/%(oid)s%(head)s#diff-%(path_md5)s%(line_side)s%(line)s"
  176. )
  177. # GitHub uses an md5 of the file's path for the link.
  178. path_md5 = hashlib.md5()
  179. path_md5.update(bytearray(self.path, "utf-8"))
  180. format_dict = {
  181. "head": "",
  182. "line_side": "R",
  183. "line": self.line,
  184. "oid": first_comment["originalCommit"]["abbreviatedOid"],
  185. "path_md5": path_md5.hexdigest(),
  186. "pr_num": parsed_args.pr_num,
  187. "repo": parsed_args.repo,
  188. }
  189. self.url: str = template % format_dict
  190. format_dict["head"] = "..HEAD"
  191. format_dict["line_side"] = "L"
  192. self.diff_url: str = template % format_dict
  193. self.comments = [
  194. _Comment.from_raw_comment(comment)
  195. for comment in thread["comments"]["nodes"]
  196. ]
  197. if self.is_resolved:
  198. self.comments.append(
  199. _Comment(
  200. thread["resolvedBy"]["login"],
  201. thread["resolvedBy"]["createdAt"],
  202. "<resolved>",
  203. )
  204. )
  205. def __lt__(self, other: "_Thread") -> bool:
  206. """Sort threads by line then timestamp."""
  207. if self.line != other.line:
  208. return bool(self.line < other.line)
  209. return self.comments[0].timestamp < other.comments[0].timestamp
  210. def format(self, long: bool) -> str:
  211. """Formats the review thread with comments."""
  212. lines = [
  213. "%s\n - line %d; %s"
  214. % (
  215. self.url,
  216. self.line,
  217. ("resolved" if self.is_resolved else "unresolved"),
  218. )
  219. ]
  220. if self.diff_url:
  221. lines.append(" - diff: %s" % self.diff_url)
  222. for comment in self.comments:
  223. lines.append(comment.format(long))
  224. return "\n".join(lines)
  225. def has_comment_from(self, comments_from: str) -> bool:
  226. """Returns true if comments has a comment from comments_from."""
  227. for comment in self.comments:
  228. if comment.author == comments_from:
  229. return True
  230. return False
  231. def _parse_args(args: Optional[List[str]] = None) -> argparse.Namespace:
  232. """Parses command-line arguments and flags."""
  233. parser = argparse.ArgumentParser(description="Lists comments on a PR.")
  234. parser.add_argument(
  235. "pr_num",
  236. metavar="PR#",
  237. type=int,
  238. help="The pull request to fetch comments from.",
  239. )
  240. github_helpers.add_access_token_arg(parser, "repo")
  241. parser.add_argument(
  242. "--comments-after",
  243. metavar="LOGIN",
  244. help="Only print threads where the final comment is not from the given "
  245. "user. For example, use when looking for threads that you still need "
  246. "to respond to.",
  247. )
  248. parser.add_argument(
  249. "--comments-from",
  250. metavar="LOGIN",
  251. help="Only print threads with comments from the given user. For "
  252. "example, use when looking for threads that you've commented on.",
  253. )
  254. parser.add_argument(
  255. "--include-resolved",
  256. action="store_true",
  257. help="Whether to include resolved review threads. By default, only "
  258. "unresolved threads will be shown.",
  259. )
  260. parser.add_argument(
  261. "--repo",
  262. choices=["carbon-lang"],
  263. default="carbon-lang",
  264. help="The Carbon repo to query. Defaults to %(default)s.",
  265. )
  266. parser.add_argument(
  267. "--long",
  268. action="store_true",
  269. help="Prints long output, with the full comment.",
  270. )
  271. return parser.parse_args(args=args)
  272. def _query(
  273. parsed_args: argparse.Namespace, field_name: Optional[str] = None
  274. ) -> str:
  275. """Returns a query for the passed field_name, or all by default."""
  276. print(".", end="", flush=True)
  277. format = {
  278. "pr_num": parsed_args.pr_num,
  279. "repo": parsed_args.repo,
  280. "comments": "",
  281. "review_threads": "",
  282. "reviews": "",
  283. }
  284. if field_name:
  285. # Use a cursor for pagination of the field.
  286. if field_name == "comments":
  287. format["comments"] = _QUERY_COMMENTS
  288. elif field_name == "reviewThreads":
  289. format["review_threads"] = _QUERY_REVIEW_THREADS
  290. elif field_name == "reviews":
  291. format["reviews"] = _QUERY_REVIEWS
  292. else:
  293. raise ValueError("Unexpected field_name: %s" % field_name)
  294. else:
  295. # Fetch the first page of all fields.
  296. subformat = {"cursor": "", "pagination": github_helpers.PAGINATION}
  297. format["comments"] = _QUERY_COMMENTS % subformat
  298. format["review_threads"] = _QUERY_REVIEW_THREADS % subformat
  299. format["reviews"] = _QUERY_REVIEWS % subformat
  300. return _QUERY % format
  301. def _accumulate_pr_comment(
  302. parsed_args: argparse.Namespace,
  303. comments: List[_PRComment],
  304. raw_comment: Dict,
  305. ) -> None:
  306. """Collects top-level comments and reviews."""
  307. # Elide reviews that have no top-level comment body.
  308. if raw_comment["body"]:
  309. comments.append(_PRComment(raw_comment))
  310. def _accumulate_thread(
  311. parsed_args: argparse.Namespace,
  312. threads_by_path: Dict[str, List[_Thread]],
  313. raw_thread: Dict,
  314. ) -> None:
  315. """Adds threads to threads_by_path for later sorting."""
  316. thread = _Thread(parsed_args, raw_thread)
  317. # Optionally skip resolved threads.
  318. if not parsed_args.include_resolved and thread.is_resolved:
  319. return
  320. # Optionally skip threads where the given user isn't the last commenter.
  321. if (
  322. parsed_args.comments_after
  323. and thread.comments[-1].author == parsed_args.comments_after
  324. ):
  325. return
  326. # Optionally skip threads where the given user hasn't commented.
  327. if parsed_args.comments_from and not thread.has_comment_from(
  328. parsed_args.comments_from
  329. ):
  330. return
  331. if thread.path not in threads_by_path:
  332. threads_by_path[thread.path] = []
  333. threads_by_path[thread.path].append(thread)
  334. def _paginate(
  335. field_name: str,
  336. accumulator: Callable[[argparse.Namespace, Any, Dict], None],
  337. parsed_args: argparse.Namespace,
  338. client: github_helpers.Client,
  339. main_result: Dict,
  340. output: Any,
  341. ) -> None:
  342. """Paginates through the given field_name, accumulating results."""
  343. query = _query(parsed_args, field_name=field_name)
  344. path = ("repository", "pullRequest", field_name)
  345. for node in client.execute_and_paginate(
  346. query, path, first_page=main_result
  347. ):
  348. accumulator(parsed_args, output, node)
  349. def _fetch_comments(
  350. parsed_args: argparse.Namespace,
  351. ) -> Tuple[List[_PRComment], Dict[str, List[_Thread]]]:
  352. """Fetches comments and review threads from GitHub."""
  353. # Each _query call will print a '.' for progress.
  354. print(
  355. "Loading https://github.com/carbon-language/%s/pull/%d ..."
  356. % (parsed_args.repo, parsed_args.pr_num),
  357. end="",
  358. flush=True,
  359. )
  360. client = github_helpers.Client(parsed_args)
  361. # Get the initial set of review threads, and print the PR summary.
  362. main_result = client.execute(_query(parsed_args))
  363. pull_request = main_result["repository"]["pullRequest"]
  364. # Paginate comments, reviews, and review threads.
  365. comments: List[_PRComment] = []
  366. _paginate(
  367. "comments",
  368. _accumulate_pr_comment,
  369. parsed_args,
  370. client,
  371. main_result,
  372. comments,
  373. )
  374. # Combine reviews into comments for interleaving.
  375. _paginate(
  376. "reviews",
  377. _accumulate_pr_comment,
  378. parsed_args,
  379. client,
  380. main_result,
  381. comments,
  382. )
  383. threads_by_path: Dict[str, List[_Thread]] = {}
  384. _paginate(
  385. "reviewThreads",
  386. _accumulate_thread,
  387. parsed_args,
  388. client,
  389. main_result,
  390. threads_by_path,
  391. )
  392. # Now that loading is done (no more progress indicators), print the header.
  393. print()
  394. pr_desc = _Comment(
  395. pull_request["author"]["login"],
  396. pull_request["createdAt"],
  397. pull_request["title"],
  398. )
  399. print(pr_desc.format(parsed_args.long))
  400. return comments, threads_by_path
  401. def main() -> None:
  402. parsed_args = _parse_args()
  403. comments, threads_by_path = _fetch_comments(parsed_args)
  404. for comment in sorted(comments):
  405. print()
  406. print(comment.format(parsed_args.long))
  407. for path, threads in sorted(threads_by_path.items()):
  408. # Print a header for each path.
  409. print()
  410. print("=" * 80)
  411. print(path)
  412. print("=" * 80)
  413. for thread in sorted(threads):
  414. print()
  415. print(thread.format(parsed_args.long))
  416. if __name__ == "__main__":
  417. main()