From c6588336caae9b8a68954af1b8095ac556553b13 Mon Sep 17 00:00:00 2001 From: Rodos Date: Wed, 17 Jun 2026 08:47:08 +1000 Subject: [PATCH] Skip repositories with pull requests disabled (HTTP 404) instead of crashing (#511) Also harden the advisories/hooks 404 checks to match HTTPError.code instead of a substring. --- github_backup/github_backup.py | 65 ++++++++++------- tests/test_disabled_issues_pulls.py | 105 ++++++++++++++++++++++++++++ 2 files changed, 146 insertions(+), 24 deletions(-) create mode 100644 tests/test_disabled_issues_pulls.py diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index fd8a080..e72622d 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -2657,10 +2657,20 @@ def track_newest_pull_update(pull): def pull_is_due_for_repository_checkpoint(pull): return not repository_since or pull["updated_at"] > repository_since - if not args.include_pull_details: - pull_states = ["open", "closed"] - for pull_state in pull_states: - query_args["state"] = pull_state + try: + if not args.include_pull_details: + pull_states = ["open", "closed"] + for pull_state in pull_states: + query_args["state"] = pull_state + for pull in retrieve_data( + args, _pulls_template, query_args=query_args, lazy=True + ): + track_newest_pull_update(pull) + if pulls_since and pull["updated_at"] <= pulls_since: + break + if not pulls_since or pull["updated_at"] > pulls_since: + pulls[pull["number"]] = pull + else: for pull in retrieve_data( args, _pulls_template, query_args=query_args, lazy=True ): @@ -2669,22 +2679,29 @@ def pull_is_due_for_repository_checkpoint(pull): break if not pulls_since or pull["updated_at"] > pulls_since: pulls[pull["number"]] = pull - else: - for pull in retrieve_data( - args, _pulls_template, query_args=query_args, lazy=True - ): - track_newest_pull_update(pull) - if pulls_since and pull["updated_at"] <= pulls_since: - break - if not pulls_since or pull["updated_at"] > pulls_since: - if pull_is_due_for_repository_checkpoint(pull): - pulls[pull["number"]] = retrieve_data( - args, - _pulls_template + "/{}".format(pull["number"]), - paginated=False, - )[0] - else: - pulls[pull["number"]] = pull + except HTTPError as e: + # Repositories with pull requests disabled return HTTP 404 for the + # listing endpoint; treat that as "no pull requests" rather than a + # fatal error. retrieve_data is lazy here, so the 404 surfaces during + # iteration. Only the listing call is guarded — a 404 on an individual + # pull's detail fetch below is a different condition and must propagate. + if e.code == 404: + logger.info("Pull requests are disabled for this repository, skipping") + return + raise + + if args.include_pull_details: + # Replace the summary payload with the full pull detail. Done outside + # the disabled-feature guard above so a 404 on a single pull (e.g. one + # deleted between listing and fetch) is not mistaken for the whole + # feature being disabled, which would discard every collected pull. + for number in list(pulls.keys()): + if pull_is_due_for_repository_checkpoint(pulls[number]): + pulls[number] = retrieve_data( + args, + _pulls_template + "/{}".format(number), + paginated=False, + )[0] logger.info("Saving {0} pull requests to disk".format(len(list(pulls.keys())))) # Comments from pulls API are only _review_ comments @@ -2813,8 +2830,8 @@ def backup_security_advisories(args, repo_cwd, repository, repos_template): try: _advisories = retrieve_data(args, template) - except Exception as e: - if "404" in str(e): + except HTTPError as e: + if e.code == 404: logger.info("Security advisories are not available for this repository, skipping") return raise @@ -2861,8 +2878,8 @@ def backup_hooks(args, repo_cwd, repository, repos_template): template = "{0}/{1}/hooks".format(repos_template, repository["full_name"]) try: _backup_data(args, "hooks", template, output_file, hook_cwd) - except Exception as e: - if "404" in str(e): + except HTTPError as e: + if e.code == 404: logger.info("Unable to read hooks, skipping") else: raise e diff --git a/tests/test_disabled_issues_pulls.py b/tests/test_disabled_issues_pulls.py new file mode 100644 index 0000000..bfbb05e --- /dev/null +++ b/tests/test_disabled_issues_pulls.py @@ -0,0 +1,105 @@ +"""Tests for repositories with pull requests disabled (GitHub issue #511). + +When pull requests are disabled on a repository, the ``/pulls`` endpoint +responds with HTTP 404. The backup should treat that as "no pull requests" +and continue, rather than aborting the whole run. (The ``/issues`` endpoint +returns 200 with an empty list when issues are disabled, so it needs no such +handling.) +""" + +import logging +from urllib.error import HTTPError + +from github_backup import github_backup + + +def _http_error(template, code, reason): + return HTTPError(template, code, reason, hdrs=None, fp=None) + + +def test_backup_pulls_skips_on_404(create_args, tmp_path, monkeypatch, caplog): + args = create_args(include_pulls=True, since=None) + repository = {"full_name": "owner/repo"} + + def fake_retrieve_data(passed_args, template, query_args=None, lazy=False, **kwargs): + # retrieve_data is called lazily for the pull listing; the 404 must + # surface while iterating the returned generator. + def gen(): + raise _http_error(template, 404, "Not Found") + yield # pragma: no cover - generator marker + + if lazy: + return gen() + raise _http_error(template, 404, "Not Found") + + monkeypatch.setattr(github_backup, "retrieve_data", fake_retrieve_data) + + # Should not raise, and should log that it skipped. + with caplog.at_level(logging.INFO, logger=github_backup.logger.name): + github_backup.backup_pulls( + args, tmp_path, repository, "https://api.github.com/repos" + ) + + assert "Pull requests are disabled for this repository, skipping" in caplog.text + + pulls_dir = tmp_path / "pulls" + if pulls_dir.exists(): + assert not [p for p in pulls_dir.iterdir() if p.suffix == ".json"] + + +def test_backup_pulls_reraises_non_404(create_args, tmp_path, monkeypatch): + args = create_args(include_pulls=True, since=None) + repository = {"full_name": "owner/repo"} + + def fake_retrieve_data(passed_args, template, query_args=None, lazy=False, **kwargs): + def gen(): + raise _http_error(template, 500, "Server Error") + yield # pragma: no cover - generator marker + + if lazy: + return gen() + raise _http_error(template, 500, "Server Error") + + monkeypatch.setattr(github_backup, "retrieve_data", fake_retrieve_data) + + try: + github_backup.backup_pulls( + args, tmp_path, repository, "https://api.github.com/repos" + ) + except HTTPError as exc: + assert exc.code == 500 + else: + raise AssertionError("Expected non-404 error to propagate") + + +def test_backup_pulls_details_individual_404_propagates( + create_args, tmp_path, monkeypatch +): + # With --pull-details the listing succeeds (feature is enabled), but an + # individual pull's detail fetch 404s (e.g. deleted mid-backup). That must + # propagate, NOT be swallowed as "pull requests are disabled". + args = create_args(include_pulls=True, include_pull_details=True, since=None) + repository = {"full_name": "owner/repo"} + + listing = "https://api.github.com/repos/owner/repo/pulls" + + def fake_retrieve_data(passed_args, template, query_args=None, lazy=False, **kwargs): + if template == listing: + # Listing works fine -> pull requests are clearly not disabled. + def gen(): + yield {"number": 1, "updated_at": "2026-02-01T00:00:00Z"} + + return gen() + # Individual pull detail fetch (.../pulls/1) is missing. + raise _http_error(template, 404, "Not Found") + + monkeypatch.setattr(github_backup, "retrieve_data", fake_retrieve_data) + + try: + github_backup.backup_pulls( + args, tmp_path, repository, "https://api.github.com/repos" + ) + except HTTPError as exc: + assert exc.code == 404 + else: + raise AssertionError("Expected individual-pull 404 to propagate")