check_dependent_pr.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611
  1. #!/usr/bin/env python3
  2. """Check if a PR depends on other open PRs based on shared commits.
  3. Usage examples:
  4. # Check a specific PR in dry-run mode:
  5. GITHUB_ACCESS_TOKEN=$(gh auth token) \
  6. python3 github_tools/check_dependent_pr.py --pr-number <PR_NUMBER> --dry-run
  7. # Scan all dependent PRs in dry-run mode:
  8. GITHUB_ACCESS_TOKEN=$(gh auth token) \
  9. python3 github_tools/check_dependent_pr.py --scan --dry-run
  10. """
  11. __copyright__ = """
  12. Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  13. Exceptions. See /LICENSE for license information.
  14. SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  15. """
  16. import argparse
  17. import datetime
  18. import importlib.util
  19. import json
  20. import re
  21. import os
  22. import sys
  23. import requests
  24. from typing import Any, Optional
  25. # Do some extra work to support direct runs.
  26. try:
  27. from github_tools import github_helpers
  28. except ImportError:
  29. github_helpers_spec = importlib.util.spec_from_file_location(
  30. "github_helpers",
  31. os.path.join(os.path.dirname(__file__), "github_helpers.py"),
  32. )
  33. assert github_helpers_spec is not None
  34. github_helpers = importlib.util.module_from_spec(github_helpers_spec)
  35. github_helpers_spec.loader.exec_module(github_helpers) # type: ignore
  36. # Queries
  37. _QUERY_OPEN_PRS = """
  38. {
  39. repository(owner: "carbon-language", name: "carbon-lang") {
  40. pullRequests(states: OPEN, first: 100%(cursor)s) {
  41. nodes {
  42. number
  43. headRefOid
  44. commits(first: 100) {
  45. nodes {
  46. commit {
  47. oid
  48. }
  49. }
  50. }
  51. }
  52. %(pagination)s
  53. }
  54. }
  55. }
  56. """
  57. _QUERY_DEPENDENT_PRS = """
  58. {
  59. repository(owner: "carbon-language", name: "carbon-lang") {
  60. pullRequests(states: OPEN, labels: ["dependent"], first: 100%(cursor)s) {
  61. nodes {
  62. number
  63. }
  64. %(pagination)s
  65. }
  66. }
  67. }
  68. """
  69. _QUERY_PR_DETAILS = """
  70. query GetPrDetails($prNumber: Int!) {
  71. repository(owner: "carbon-language", name: "carbon-lang") {
  72. pullRequest(number: $prNumber) {
  73. id
  74. headRefOid
  75. labels(first: 100) {
  76. nodes {
  77. name
  78. id
  79. }
  80. }
  81. commits(first: 100) {
  82. nodes {
  83. commit {
  84. oid
  85. }
  86. }
  87. }
  88. comments(first: 100) {
  89. nodes {
  90. id
  91. body
  92. isMinimized
  93. }
  94. }
  95. }
  96. }
  97. }
  98. """
  99. _QUERY_LABEL = """
  100. {
  101. repository(owner: "carbon-language", name: "carbon-lang") {
  102. label(name: "dependent") {
  103. id
  104. }
  105. }
  106. }
  107. """
  108. _QUERY_MAX_MERGED_PR = """
  109. {
  110. repository(owner: "carbon-language", name: "carbon-lang") {
  111. pullRequests(
  112. states: MERGED
  113. orderBy: {field: CREATED_AT, direction: DESC}
  114. first: 1
  115. ) {
  116. nodes {
  117. number
  118. }
  119. }
  120. }
  121. }
  122. """
  123. _MUTATION_ADD_LABEL = """
  124. mutation AddLabel($labelableId: ID!, $labelIds: [ID!]!) {
  125. addLabelsToLabelable(
  126. input: {labelableId: $labelableId, labelIds: $labelIds}
  127. ) {
  128. clientMutationId
  129. }
  130. }
  131. """
  132. _MUTATION_REMOVE_LABEL = """
  133. mutation RemoveLabel($labelableId: ID!, $labelIds: [ID!]!) {
  134. removeLabelsFromLabelable(
  135. input: {labelableId: $labelableId, labelIds: $labelIds}
  136. ) {
  137. clientMutationId
  138. }
  139. }
  140. """
  141. _MUTATION_UPDATE_COMMENT = """
  142. mutation UpdateComment($id: ID!, $body: String!) {
  143. updateIssueComment(input: {id: $id, body: $body}) {
  144. clientMutationId
  145. }
  146. }
  147. """
  148. _MUTATION_ADD_COMMENT = """
  149. mutation AddComment($subjectId: ID!, $body: String!) {
  150. addComment(input: {subjectId: $subjectId, body: $body}) {
  151. clientMutationId
  152. }
  153. }
  154. """
  155. def _print_err(*args: Any, **kwargs: Any) -> None:
  156. """Prints to stderr."""
  157. kwargs["file"] = sys.stderr
  158. print(*args, **kwargs)
  159. def _parse_pr_number(x: Any) -> Optional[int]:
  160. """Parses x into a positive integer if possible."""
  161. if isinstance(x, int):
  162. return x if x > 0 else None
  163. if isinstance(x, str) and x.isdigit():
  164. val = int(x)
  165. return val if val > 0 else None
  166. return None
  167. def _parse_and_validate_state(
  168. json_str: str,
  169. open_pr_numbers: set[int],
  170. max_merged_pr: int = 10000,
  171. pr_number: int = 0,
  172. ) -> tuple[list[int], list[int], Optional[str]]:
  173. """Parses and validates the state from a JSON string."""
  174. parsed_open: list[int] = []
  175. parsed_merged: list[int] = []
  176. first_commit: Optional[str] = None
  177. raw_state = json.loads(json_str)
  178. if not isinstance(raw_state, dict):
  179. raise ValueError(f"PR #{pr_number}: Parsed JSON is not a dictionary.")
  180. for x in raw_state.get("open", []):
  181. val = _parse_pr_number(x)
  182. if val is None:
  183. raise ValueError(
  184. f"PR #{pr_number}: Invalid PR number format in 'open': {x}"
  185. )
  186. elif val not in open_pr_numbers and val > max_merged_pr:
  187. raise ValueError(
  188. f"PR #{pr_number}: Rejecting PR #{val} from 'open' because "
  189. "it is not an open PR and exceeds maximum merged PR "
  190. f"#{max_merged_pr}."
  191. )
  192. else:
  193. parsed_open.append(val)
  194. for x in raw_state.get("merged", []):
  195. val = _parse_pr_number(x)
  196. if val is None:
  197. raise ValueError(
  198. f"PR #{pr_number}: Invalid PR number format in 'merged': {x}"
  199. )
  200. elif val in open_pr_numbers:
  201. raise ValueError(
  202. f"PR #{pr_number}: Rejecting PR #{val} from 'merged' "
  203. "because it is actually open."
  204. )
  205. elif val > max_merged_pr:
  206. raise ValueError(
  207. f"PR #{pr_number}: Rejecting PR #{val} from 'merged' "
  208. f"because it exceeds maximum merged PR #{max_merged_pr}."
  209. )
  210. else:
  211. parsed_merged.append(val)
  212. if "first_commit" in raw_state:
  213. fc = raw_state["first_commit"]
  214. if isinstance(fc, str) and re.fullmatch(r"[0-9a-fA-F]{40}", fc):
  215. first_commit = fc
  216. else:
  217. raise ValueError(
  218. f"PR #{pr_number}: Invalid commit OID format in "
  219. f"'first_commit': {fc}"
  220. )
  221. return parsed_open, parsed_merged, first_commit
  222. def _set_commit_status(
  223. sha: str,
  224. state: str,
  225. description: str,
  226. token: str,
  227. dry_run: bool,
  228. ) -> None:
  229. """Sets the commit status via the GitHub REST API."""
  230. url = (
  231. "https://api.github.com/repos/carbon-language/carbon-lang/"
  232. f"statuses/{sha}"
  233. )
  234. headers = {
  235. "Authorization": f"bearer {token}",
  236. "Accept": "application/vnd.github.v3+json",
  237. }
  238. payload = {
  239. "state": state,
  240. "description": description,
  241. "context": "PR dependencies check",
  242. }
  243. if dry_run:
  244. _print_err(
  245. f"[Dry-run] Would set commit status on {sha[:8]} to {state} "
  246. f"({description})"
  247. )
  248. return
  249. try:
  250. response = requests.post(url, headers=headers, json=payload)
  251. response.raise_for_status()
  252. _print_err(f"Set commit status on {sha[:8]} to {state}")
  253. except Exception as e:
  254. _print_err(f"Error setting commit status on {sha[:8]}: {e}")
  255. def _process_pr(
  256. client: github_helpers.Client,
  257. pr_number: int,
  258. pr_to_commits: dict[int, set[str]],
  259. pr_to_head: dict[int, str],
  260. open_pr_numbers: set[int],
  261. label_id: str,
  262. token: str,
  263. dry_run: bool = False,
  264. scanning: bool = False,
  265. max_merged_pr: int = 10000,
  266. ) -> None:
  267. """Processes a single PR to check for dependencies and update comments."""
  268. current_res = client.execute(
  269. _QUERY_PR_DETAILS, variable_values={"prNumber": pr_number}
  270. )
  271. pr_node = current_res["repository"]["pullRequest"]
  272. if not pr_node:
  273. _print_err(f"PR #{pr_number} not found.")
  274. return
  275. pr_id = pr_node["id"]
  276. commits = pr_node["commits"]["nodes"]
  277. comments = pr_node["comments"]["nodes"]
  278. labels = pr_node["labels"]["nodes"]
  279. open_deps: list[int] = []
  280. current_oids = [c["commit"]["oid"] for c in commits]
  281. if len(commits) <= 1:
  282. _print_err(
  283. f"PR #{pr_number} has 1 or fewer commits, skipping overlap check."
  284. )
  285. else:
  286. # Dependency Logic: Overlap and Sequence
  287. #
  288. # We consider PR B dependent on PR A if:
  289. # 1. The dependency PR A was created before PR B (A.number < B.number).
  290. # 2. There is a non-empty overlap of commits between PR A and PR B.
  291. # 3. PR B has at least one commit not present in PR A.
  292. #
  293. # Why this works:
  294. # - Ensures the dependency direction reflects the creation sequence.
  295. # - Handles minor fixes or differences by only requiring overlap, not
  296. # strict subset inclusion.
  297. # - Avoids circular dependencies via the sequence check.
  298. current_oids_set = set(current_oids)
  299. for other_pr_num, other_oids_set in pr_to_commits.items():
  300. if other_pr_num >= pr_number:
  301. continue
  302. if not (other_oids_set & current_oids_set):
  303. continue
  304. if not (current_oids_set - other_oids_set):
  305. continue
  306. open_deps.append(other_pr_num)
  307. # Parse existing comment
  308. marker_prefix = "<!-- check_dependent_pr "
  309. existing_comment_id = None
  310. parsed_open_deps: list[int] = []
  311. parsed_merged_deps: list[int] = []
  312. previous_first_commit: Optional[str] = None
  313. matching_comment = None
  314. for comment in comments:
  315. # If a marker comment is hidden (minimized), we ignore it and treat
  316. # the PR as if it never had that comment.
  317. if marker_prefix in comment["body"] and not comment.get("isMinimized"):
  318. matching_comment = comment
  319. break
  320. if matching_comment:
  321. existing_comment_id = matching_comment["id"]
  322. body = matching_comment["body"]
  323. start = body.find(marker_prefix) + len(marker_prefix)
  324. end = body.find(" -->", start)
  325. if end != -1:
  326. parsed_open_deps, parsed_merged_deps, previous_first_commit = (
  327. _parse_and_validate_state(
  328. body[start:end], open_pr_numbers, max_merged_pr, pr_number
  329. )
  330. )
  331. # Keep tracking previously identified dependencies if they are still open,
  332. # even if they no longer pass the subset check (e.g. they got new commits).
  333. for pr in parsed_open_deps:
  334. if pr in open_pr_numbers and pr not in open_deps:
  335. open_deps.append(pr)
  336. # Identify newly merged PRs
  337. newly_merged_deps = []
  338. for pr in parsed_open_deps:
  339. if pr not in open_deps and pr not in open_pr_numbers:
  340. newly_merged_deps.append(pr)
  341. merged_deps = list(set(parsed_merged_deps + newly_merged_deps))
  342. if open_deps:
  343. state = "pending"
  344. pr_list_str = ", ".join([f"#{num}" for num in open_deps])
  345. description = f"This PR has open dependencies: {pr_list_str}"
  346. else:
  347. state = "success"
  348. description = "This PR has no open dependencies"
  349. _set_commit_status(
  350. pr_node["headRefOid"], state, description, token, dry_run
  351. )
  352. first_independent_commit_oid = None
  353. if open_deps:
  354. dependent_oids = set()
  355. for d in open_deps:
  356. dependent_oids.update(pr_to_commits[d])
  357. # previous_first_commit already assigned from comment state.
  358. if previous_first_commit and previous_first_commit in current_oids:
  359. start_idx = current_oids.index(previous_first_commit)
  360. else:
  361. start_idx = 0
  362. # Assumes `current_oids` is in chronological order (oldest first).
  363. # This guarantees we find the first independent commit to start the
  364. # review.
  365. for oid in current_oids[start_idx:]:
  366. if oid not in dependent_oids:
  367. first_independent_commit_oid = oid
  368. break
  369. last_dep_pr_num = max(open_deps)
  370. last_dep_oids = pr_to_commits[last_dep_pr_num]
  371. last_dep_head_oid = pr_to_head[last_dep_pr_num]
  372. # Detect non-linear history: any commit in the current PR that is in
  373. # *some* dependency but *not* in the last dependency.
  374. any_later_dependent_oids = any(
  375. oid in dependent_oids and oid not in last_dep_oids
  376. for oid in current_oids
  377. )
  378. if (
  379. open_deps == parsed_open_deps
  380. and merged_deps == parsed_merged_deps
  381. and first_independent_commit_oid == previous_first_commit
  382. ):
  383. return
  384. # Construct new comment
  385. timestamp = datetime.datetime.now(datetime.timezone.utc).strftime(
  386. "%Y-%m-%d %H:%M:%S UTC"
  387. )
  388. new_state: dict[str, Any] = {
  389. "open": open_deps,
  390. "merged": merged_deps,
  391. "first_commit": first_independent_commit_oid,
  392. }
  393. state_json = json.dumps(new_state)
  394. comment_body = f"{marker_prefix}{state_json} -->\n"
  395. if open_deps:
  396. pr_list_str = ", ".join([f"#{num}" for num in open_deps])
  397. if last_dep_head_oid:
  398. changes_url = (
  399. "https://github.com/carbon-language/carbon-lang/pull/"
  400. f"{pr_number}/changes/{last_dep_head_oid}..HEAD"
  401. )
  402. comment_body += (
  403. f"Depends on {pr_list_str}, start review with "
  404. f"[these changes]({changes_url})"
  405. )
  406. if any_later_dependent_oids:
  407. comment_body += (
  408. "\n\n> [!WARNING]\n"
  409. "> Also contains changes from dependent PRs due to "
  410. "non-linear history."
  411. )
  412. else:
  413. comment_body += (
  414. f"Depends on {pr_list_str}, unable to identify starting review "
  415. f"commit from simple analysis"
  416. )
  417. else:
  418. comment_body += "All dependent PRs are merged."
  419. if merged_deps:
  420. merged_str = ", ".join([f"#{num}" for num in sorted(merged_deps)])
  421. comment_body += f"\n\nMerged dependent PRs: {merged_str}"
  422. comment_body += f"\n\n(Last updated: {timestamp})"
  423. _print_err(f"PR #{pr_number}: Updating comment. New body:\n{comment_body}")
  424. # Apply mutations
  425. has_dependent_label = any(label["name"] == "dependent" for label in labels)
  426. if open_deps and not has_dependent_label and not scanning:
  427. if dry_run:
  428. _print_err(
  429. f"[Dry-run] Would add 'dependent' label to PR #{pr_number}"
  430. )
  431. else:
  432. client.execute(
  433. _MUTATION_ADD_LABEL,
  434. variable_values={"labelableId": pr_id, "labelIds": [label_id]},
  435. )
  436. elif not open_deps and has_dependent_label:
  437. if dry_run:
  438. _print_err(
  439. f"[Dry-run] Would remove 'dependent' label from PR #{pr_number}"
  440. )
  441. else:
  442. client.execute(
  443. _MUTATION_REMOVE_LABEL,
  444. variable_values={"labelableId": pr_id, "labelIds": [label_id]},
  445. )
  446. if existing_comment_id:
  447. if dry_run:
  448. _print_err(f"[Dry-run] Would update comment {existing_comment_id}")
  449. else:
  450. client.execute(
  451. _MUTATION_UPDATE_COMMENT,
  452. variable_values={
  453. "id": existing_comment_id,
  454. "body": comment_body,
  455. },
  456. )
  457. else:
  458. if scanning:
  459. _print_err(
  460. f"PR #{pr_number}: Skipping new comment creation in scan mode."
  461. )
  462. return
  463. if dry_run:
  464. _print_err(f"[Dry-run] Would add comment to PR #{pr_number}")
  465. else:
  466. client.execute(
  467. _MUTATION_ADD_COMMENT,
  468. variable_values={"subjectId": pr_id, "body": comment_body},
  469. )
  470. def _parse_args(args: Optional[list[str]] = None) -> argparse.Namespace:
  471. """Parses command-line arguments."""
  472. parser = argparse.ArgumentParser(
  473. description=__doc__,
  474. formatter_class=argparse.RawDescriptionHelpFormatter,
  475. )
  476. group = parser.add_mutually_exclusive_group(required=True)
  477. group.add_argument(
  478. "--pr-number",
  479. type=int,
  480. help="The pull request number to check.",
  481. )
  482. group.add_argument(
  483. "--scan",
  484. action="store_true",
  485. help="Scan all open PRs with 'dependent' label and update them.",
  486. )
  487. parser.add_argument(
  488. "--dry-run",
  489. action="store_true",
  490. help="Print mutations without updating GitHub",
  491. )
  492. github_helpers.add_access_token_arg(parser, "repo")
  493. return parser.parse_args(args=args)
  494. def main() -> None:
  495. parsed_args = _parse_args()
  496. client = github_helpers.Client(parsed_args)
  497. _print_err("Loading open PRs ...", end="", flush=True)
  498. pr_to_commits: dict[int, set[str]] = {}
  499. pr_to_head: dict[int, str] = {}
  500. open_pr_numbers: set[int] = set()
  501. for node in client.execute_and_paginate(
  502. _QUERY_OPEN_PRS, ("repository", "pullRequests")
  503. ):
  504. _print_err(".", end="", flush=True)
  505. other_pr_num = node["number"]
  506. open_pr_numbers.add(other_pr_num)
  507. pr_to_head[other_pr_num] = node["headRefOid"]
  508. pr_to_commits[other_pr_num] = {
  509. c["commit"]["oid"] for c in node["commits"]["nodes"]
  510. }
  511. _print_err()
  512. label_res = client.execute(_QUERY_LABEL)
  513. label_id = label_res["repository"]["label"]["id"]
  514. merged_res = client.execute(_QUERY_MAX_MERGED_PR)
  515. merged_nodes = merged_res["repository"]["pullRequests"]["nodes"]
  516. max_merged_pr = merged_nodes[0]["number"] if merged_nodes else 0
  517. if parsed_args.pr_number:
  518. _process_pr(
  519. client,
  520. parsed_args.pr_number,
  521. pr_to_commits,
  522. pr_to_head,
  523. open_pr_numbers,
  524. label_id,
  525. parsed_args.access_token,
  526. dry_run=parsed_args.dry_run,
  527. max_merged_pr=max_merged_pr,
  528. )
  529. elif parsed_args.scan:
  530. for node in client.execute_and_paginate(
  531. _QUERY_DEPENDENT_PRS, ("repository", "pullRequests")
  532. ):
  533. _process_pr(
  534. client,
  535. node["number"],
  536. pr_to_commits,
  537. pr_to_head,
  538. open_pr_numbers,
  539. label_id,
  540. parsed_args.access_token,
  541. dry_run=parsed_args.dry_run,
  542. scanning=True,
  543. max_merged_pr=max_merged_pr,
  544. )
  545. if __name__ == "__main__":
  546. main()