github_helpers.py 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110
  1. """GitHub GraphQL helpers.
  2. https://developer.github.com/v4/explorer/ is very useful for building queries.
  3. """
  4. __copyright__ = """
  5. Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  6. Exceptions. See /LICENSE for license information.
  7. SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  8. """
  9. import argparse
  10. from collections.abc import Generator
  11. import os
  12. from typing import Optional
  13. # https://pypi.org/project/gql/
  14. import gql # type: ignore
  15. import gql.transport.requests # type: ignore
  16. _ENV_TOKEN = "GITHUB_ACCESS_TOKEN"
  17. # Query elements for pagination.
  18. PAGINATION = """pageInfo {
  19. hasNextPage
  20. endCursor
  21. }
  22. totalCount"""
  23. def add_access_token_arg(
  24. parser: argparse.ArgumentParser, permissions: str
  25. ) -> None:
  26. """Adds a flag to set the access token."""
  27. access_token = os.environ.get(_ENV_TOKEN, default=None)
  28. parser.add_argument(
  29. "--access-token",
  30. metavar="ACCESS_TOKEN",
  31. default=access_token,
  32. required=not access_token,
  33. help="The access token for use with GitHub. May also be specified in "
  34. "the environment as %s. The access token should have permissions: %s"
  35. % (_ENV_TOKEN, permissions),
  36. )
  37. class Client:
  38. """A GitHub GraphQL client."""
  39. def __init__(self, parsed_args: argparse.Namespace):
  40. """Connects to GitHub."""
  41. transport = gql.transport.requests.RequestsHTTPTransport(
  42. url="https://api.github.com/graphql",
  43. headers={"Authorization": "bearer %s" % parsed_args.access_token},
  44. )
  45. self._client = gql.Client(transport=transport)
  46. def execute(self, query: str) -> dict:
  47. """Runs a query."""
  48. return self._client.execute(gql.gql(query)) # type: ignore
  49. def execute_and_paginate(
  50. self,
  51. query: str,
  52. path: tuple[str, ...],
  53. first_page: Optional[dict] = None,
  54. ) -> Generator[dict, None, None]:
  55. """Runs a query with pagination.
  56. Arguments:
  57. query: The GraphQL query template, which must have both 'cursor' and
  58. 'pagination' fields to fill in. The cursor should be part of the
  59. location query (with 'first'), and the pagination should be at the
  60. same level as nodes.
  61. path: A list of strings indicating the path to the nodes in the
  62. result.
  63. first_page: An optional object for the first page of results, which
  64. will otherwise automatically be collected. This exists for callers
  65. to optimize by collecting other data with the first page.
  66. """
  67. format = {"cursor": "", "pagination": PAGINATION}
  68. count = 0
  69. exp_count = None
  70. while True:
  71. if first_page:
  72. result = first_page
  73. first_page = None
  74. else:
  75. result = self.execute(query % format)
  76. # Follow the path to the nodes being paginated.
  77. node_parent = result
  78. for entry in path:
  79. node_parent = node_parent[entry]
  80. # Store the total count of responses.
  81. if not exp_count:
  82. exp_count = node_parent["totalCount"]
  83. # Yield each node individually.
  84. for node in node_parent["nodes"]:
  85. yield node
  86. count += 1
  87. # Check for pagination, verifying the total count on exit.
  88. page_info = node_parent["pageInfo"]
  89. if not page_info["hasNextPage"]:
  90. assert exp_count == count, "exp %d != actual %d at path %s" % (
  91. exp_count,
  92. count,
  93. path,
  94. )
  95. return
  96. format["cursor"] = ' after: "%s"' % page_info["endCursor"]