tomteb
/
carbon-lang


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474
							#!/usr/bin/env python3

"""Figure out comments on a GitHub PR."""

__copyright__ = """
Part of the Carbon Language project, under the Apache License v2.0 with LLVM
Exceptions. See /LICENSE for license information.
SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
"""

import argparse
import datetime
import hashlib
import os
import importlib.util
import textwrap
from typing import Any, Callable, Optional

# Do some extra work to support direct runs.
try:
    from github_tools import github_helpers
except ImportError:
    github_helpers_spec = importlib.util.spec_from_file_location(
        "github_helpers",
        os.path.join(os.path.dirname(__file__), "github_helpers.py"),
    )
    assert github_helpers_spec is not None
    github_helpers = importlib.util.module_from_spec(github_helpers_spec)
    github_helpers_spec.loader.exec_module(github_helpers)  # type: ignore


# The main query, into which other queries are composed.
_QUERY = """
{
  repository(owner: "carbon-language", name: "%(repo)s") {
    pullRequest(number: %(pr_num)d) {
      author {
        login
      }
      createdAt
      title

      %(comments)s
      %(reviews)s
      %(review_threads)s
    }
  }
}
"""

# Queries for comments on the PR. These are direct, non-review comments on the
# PR.
_QUERY_COMMENTS = """
      comments(first: 100%(cursor)s) {
        nodes {
          author {
            login
          }
          body
          createdAt
          url
        }
        %(pagination)s
      }
"""

# Queries for reviews on the PR, which have a non-empty body if a review has
# a summary comment.
_QUERY_REVIEWS = """
      reviews(first: 100%(cursor)s) {
        nodes {
          author {
            login
          }
          body
          createdAt
          url
        }
        %(pagination)s
      }
"""

# Queries for review threads on the PR.
_QUERY_REVIEW_THREADS = """
      reviewThreads(first: 100%(cursor)s) {
        nodes {
          comments(first: 100) {
            nodes {
              author {
                login
              }
              body
              createdAt
              originalPosition
              originalCommit {
                abbreviatedOid
              }
              path
            }
          }
          isResolved
          resolvedBy {
            createdAt
            login
          }
        }
        %(pagination)s
      }
"""


class _Comment:
    """A comment, either on a review thread or top-level on the PR."""

    def __init__(self, author: str, timestamp: str, body: str):
        self.author = author
        self.timestamp = datetime.datetime.strptime(
            timestamp, "%Y-%m-%dT%H:%M:%SZ"
        )
        self.body = body

    @staticmethod
    def from_raw_comment(raw_comment: dict) -> "_Comment":
        """Creates the comment from a raw comment dict."""
        return _Comment(
            raw_comment["author"]["login"],
            raw_comment["createdAt"],
            raw_comment["body"],
        )

    @staticmethod
    def _rewrap(content: str) -> str:
        """Rewraps a comment to fit in 80 columns with an indent."""
        lines = []
        for line in content.split("\n"):
            lines.extend(
                [
                    x
                    for x in textwrap.wrap(
                        line,
                        width=80,
                        initial_indent=" " * 4,
                        subsequent_indent=" " * 4,
                    )
                ]
            )
        return "\n".join(lines)

    def format(self, long: bool) -> str:
        """Formats the comment."""
        if long:
            return "%s%s at %s:\n%s" % (
                " " * 2,
                self.author,
                self.timestamp.strftime("%Y-%m-%d %H:%M"),
                self._rewrap(self.body),
            )
        else:
            # Compact newlines down into pilcrows, leaving a space after.
            body = self.body.replace("\r", "").replace("\n", "¶ ")
            while "¶ ¶" in body:
                body = body.replace("¶ ¶", "¶¶")
            line = "%s%s: %s" % (" " * 2, self.author, body)
            return line if len(line) <= 80 else line[:77] + "..."


class _PRComment(_Comment):
    """A comment on the top-level PR."""

    def __init__(self, raw_comment: dict):
        super().__init__(
            raw_comment["author"]["login"],
            raw_comment["createdAt"],
            raw_comment["body"],
        )
        self.url = raw_comment["url"]

    def __lt__(self, other: "_PRComment") -> bool:
        return self.timestamp < other.timestamp

    def format(self, long: bool) -> str:
        return "%s\n%s" % (self.url, super().format(long))


class _Thread:
    """A review thread on a line of code."""

    def __init__(self, parsed_args: argparse.Namespace, thread: dict):
        self.is_resolved: bool = thread["isResolved"]

        comments = thread["comments"]["nodes"]
        first_comment = comments[0]
        self.line: int = first_comment["originalPosition"]
        self.path: str = first_comment["path"]

        # Link to the comment in the commit; GitHub features work better there
        # than in the conversation view. The diff_url allows viewing changes
        # since the comment, although the comment won't be visible there.
        template = (
            "https://github.com/carbon-language/%(repo)s/pull/%(pr_num)s/"
            "files/%(oid)s%(head)s#diff-%(path_md5)s%(line_side)s%(line)s"
        )
        # GitHub uses an md5 of the file's path for the link.
        path_md5 = hashlib.md5()
        path_md5.update(bytearray(self.path, "utf-8"))
        format_dict = {
            "head": "",
            "line_side": "R",
            "line": self.line,
            "oid": first_comment["originalCommit"]["abbreviatedOid"],
            "path_md5": path_md5.hexdigest(),
            "pr_num": parsed_args.pr_num,
            "repo": parsed_args.repo,
        }
        self.url: str = template % format_dict
        format_dict["head"] = "..HEAD"
        format_dict["line_side"] = "L"
        self.diff_url: str = template % format_dict

        self.comments = [
            _Comment.from_raw_comment(comment)
            for comment in thread["comments"]["nodes"]
        ]
        if self.is_resolved:
            self.comments.append(
                _Comment(
                    thread["resolvedBy"]["login"],
                    thread["resolvedBy"]["createdAt"],
                    "<resolved>",
                )
            )

    def __lt__(self, other: "_Thread") -> bool:
        """Sort threads by line then timestamp."""
        if self.line != other.line:
            return bool(self.line < other.line)
        return self.comments[0].timestamp < other.comments[0].timestamp

    def format(self, long: bool) -> str:
        """Formats the review thread with comments."""
        lines = [
            "%s\n  - line %d; %s"
            % (
                self.url,
                self.line,
                ("resolved" if self.is_resolved else "unresolved"),
            )
        ]
        if self.diff_url:
            lines.append("  - diff: %s" % self.diff_url)
        for comment in self.comments:
            lines.append(comment.format(long))
        return "\n".join(lines)

    def has_comment_from(self, comments_from: str) -> bool:
        """Returns true if comments has a comment from comments_from."""
        for comment in self.comments:
            if comment.author == comments_from:
                return True
        return False


def _parse_args(args: Optional[list[str]] = None) -> argparse.Namespace:
    """Parses command-line arguments and flags."""
    parser = argparse.ArgumentParser(description="Lists comments on a PR.")
    parser.add_argument(
        "pr_num",
        metavar="PR#",
        type=int,
        help="The pull request to fetch comments from.",
    )
    github_helpers.add_access_token_arg(parser, "repo")
    parser.add_argument(
        "--comments-after",
        metavar="LOGIN",
        help="Only print threads where the final comment is not from the given "
        "user. For example, use when looking for threads that you still need "
        "to respond to.",
    )
    parser.add_argument(
        "--comments-from",
        metavar="LOGIN",
        help="Only print threads with comments from the given user. For "
        "example, use when looking for threads that you've commented on.",
    )
    parser.add_argument(
        "--include-resolved",
        action="store_true",
        help="Whether to include resolved review threads. By default, only "
        "unresolved threads will be shown.",
    )
    parser.add_argument(
        "--repo",
        choices=["carbon-lang"],
        default="carbon-lang",
        help="The Carbon repo to query. Defaults to %(default)s.",
    )
    parser.add_argument(
        "--long",
        action="store_true",
        help="Prints long output, with the full comment.",
    )
    return parser.parse_args(args=args)


def _query(
    parsed_args: argparse.Namespace, field_name: Optional[str] = None
) -> str:
    """Returns a query for the passed field_name, or all by default."""
    print(".", end="", flush=True)
    format = {
        "pr_num": parsed_args.pr_num,
        "repo": parsed_args.repo,
        "comments": "",
        "review_threads": "",
        "reviews": "",
    }
    if field_name:
        # Use a cursor for pagination of the field.
        if field_name == "comments":
            format["comments"] = _QUERY_COMMENTS
        elif field_name == "reviewThreads":
            format["review_threads"] = _QUERY_REVIEW_THREADS
        elif field_name == "reviews":
            format["reviews"] = _QUERY_REVIEWS
        else:
            raise ValueError("Unexpected field_name: %s" % field_name)
    else:
        # Fetch the first page of all fields.
        subformat = {"cursor": "", "pagination": github_helpers.PAGINATION}
        format["comments"] = _QUERY_COMMENTS % subformat
        format["review_threads"] = _QUERY_REVIEW_THREADS % subformat
        format["reviews"] = _QUERY_REVIEWS % subformat
    return _QUERY % format


def _accumulate_pr_comment(
    parsed_args: argparse.Namespace,
    comments: list[_PRComment],
    raw_comment: dict,
) -> None:
    """Collects top-level comments and reviews."""
    # Elide reviews that have no top-level comment body.
    if raw_comment["body"]:
        comments.append(_PRComment(raw_comment))


def _accumulate_thread(
    parsed_args: argparse.Namespace,
    threads_by_path: dict[str, list[_Thread]],
    raw_thread: dict,
) -> None:
    """Adds threads to threads_by_path for later sorting."""
    thread = _Thread(parsed_args, raw_thread)

    # Optionally skip resolved threads.
    if not parsed_args.include_resolved and thread.is_resolved:
        return

    # Optionally skip threads where the given user isn't the last commenter.
    if (
        parsed_args.comments_after
        and thread.comments[-1].author == parsed_args.comments_after
    ):
        return

    # Optionally skip threads where the given user hasn't commented.
    if parsed_args.comments_from and not thread.has_comment_from(
        parsed_args.comments_from
    ):
        return

    if thread.path not in threads_by_path:
        threads_by_path[thread.path] = []
    threads_by_path[thread.path].append(thread)


def _paginate(
    field_name: str,
    accumulator: Callable[[argparse.Namespace, Any, dict], None],
    parsed_args: argparse.Namespace,
    client: github_helpers.Client,
    main_result: dict,
    output: Any,
) -> None:
    """Paginates through the given field_name, accumulating results."""
    query = _query(parsed_args, field_name=field_name)
    path = ("repository", "pullRequest", field_name)
    for node in client.execute_and_paginate(
        query, path, first_page=main_result
    ):
        accumulator(parsed_args, output, node)


def _fetch_comments(
    parsed_args: argparse.Namespace,
) -> tuple[list[_PRComment], dict[str, list[_Thread]]]:
    """Fetches comments and review threads from GitHub."""
    # Each _query call will print a '.' for progress.
    print(
        "Loading https://github.com/carbon-language/%s/pull/%d ..."
        % (parsed_args.repo, parsed_args.pr_num),
        end="",
        flush=True,
    )

    client = github_helpers.Client(parsed_args)

    # Get the initial set of review threads, and print the PR summary.
    main_result = client.execute(_query(parsed_args))
    pull_request = main_result["repository"]["pullRequest"]

    # Paginate comments, reviews, and review threads.
    comments: list[_PRComment] = []
    _paginate(
        "comments",
        _accumulate_pr_comment,
        parsed_args,
        client,
        main_result,
        comments,
    )
    # Combine reviews into comments for interleaving.
    _paginate(
        "reviews",
        _accumulate_pr_comment,
        parsed_args,
        client,
        main_result,
        comments,
    )
    threads_by_path: dict[str, list[_Thread]] = {}
    _paginate(
        "reviewThreads",
        _accumulate_thread,
        parsed_args,
        client,
        main_result,
        threads_by_path,
    )

    # Now that loading is done (no more progress indicators), print the header.
    print()
    pr_desc = _Comment(
        pull_request["author"]["login"],
        pull_request["createdAt"],
        pull_request["title"],
    )
    print(pr_desc.format(parsed_args.long))
    return comments, threads_by_path


def main() -> None:
    parsed_args = _parse_args()
    comments, threads_by_path = _fetch_comments(parsed_args)

    for comment in sorted(comments):
        print()
        print(comment.format(parsed_args.long))

    for path, threads in sorted(threads_by_path.items()):
        # Print a header for each path.
        print()
        print("=" * 80)
        print(path)
        print("=" * 80)

        for thread in sorted(threads):
            print()
            print(thread.format(parsed_args.long))


if __name__ == "__main__":
    main()