Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 41 additions & 24 deletions github_backup/github_backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2657,10 +2657,20 @@ def track_newest_pull_update(pull):
def pull_is_due_for_repository_checkpoint(pull):
return not repository_since or pull["updated_at"] > repository_since

if not args.include_pull_details:
pull_states = ["open", "closed"]
for pull_state in pull_states:
query_args["state"] = pull_state
try:
if not args.include_pull_details:
pull_states = ["open", "closed"]
for pull_state in pull_states:
query_args["state"] = pull_state
for pull in retrieve_data(
args, _pulls_template, query_args=query_args, lazy=True
):
track_newest_pull_update(pull)
if pulls_since and pull["updated_at"] <= pulls_since:
break
if not pulls_since or pull["updated_at"] > pulls_since:
pulls[pull["number"]] = pull
else:
for pull in retrieve_data(
args, _pulls_template, query_args=query_args, lazy=True
):
Expand All @@ -2669,22 +2679,29 @@ def pull_is_due_for_repository_checkpoint(pull):
break
if not pulls_since or pull["updated_at"] > pulls_since:
pulls[pull["number"]] = pull
else:
for pull in retrieve_data(
args, _pulls_template, query_args=query_args, lazy=True
):
track_newest_pull_update(pull)
if pulls_since and pull["updated_at"] <= pulls_since:
break
if not pulls_since or pull["updated_at"] > pulls_since:
if pull_is_due_for_repository_checkpoint(pull):
pulls[pull["number"]] = retrieve_data(
args,
_pulls_template + "/{}".format(pull["number"]),
paginated=False,
)[0]
else:
pulls[pull["number"]] = pull
except HTTPError as e:
# Repositories with pull requests disabled return HTTP 404 for the
# listing endpoint; treat that as "no pull requests" rather than a
# fatal error. retrieve_data is lazy here, so the 404 surfaces during
# iteration. Only the listing call is guarded — a 404 on an individual
# pull's detail fetch below is a different condition and must propagate.
if e.code == 404:
logger.info("Pull requests are disabled for this repository, skipping")
return
raise

if args.include_pull_details:
# Replace the summary payload with the full pull detail. Done outside
# the disabled-feature guard above so a 404 on a single pull (e.g. one
# deleted between listing and fetch) is not mistaken for the whole
# feature being disabled, which would discard every collected pull.
for number in list(pulls.keys()):
if pull_is_due_for_repository_checkpoint(pulls[number]):
pulls[number] = retrieve_data(
args,
_pulls_template + "/{}".format(number),
paginated=False,
)[0]

logger.info("Saving {0} pull requests to disk".format(len(list(pulls.keys()))))
# Comments from pulls API are only _review_ comments
Expand Down Expand Up @@ -2813,8 +2830,8 @@ def backup_security_advisories(args, repo_cwd, repository, repos_template):

try:
_advisories = retrieve_data(args, template)
except Exception as e:
if "404" in str(e):
except HTTPError as e:
if e.code == 404:
logger.info("Security advisories are not available for this repository, skipping")
return
raise
Expand Down Expand Up @@ -2861,8 +2878,8 @@ def backup_hooks(args, repo_cwd, repository, repos_template):
template = "{0}/{1}/hooks".format(repos_template, repository["full_name"])
try:
_backup_data(args, "hooks", template, output_file, hook_cwd)
except Exception as e:
if "404" in str(e):
except HTTPError as e:
if e.code == 404:
logger.info("Unable to read hooks, skipping")
else:
raise e
Expand Down
105 changes: 105 additions & 0 deletions tests/test_disabled_issues_pulls.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
"""Tests for repositories with pull requests disabled (GitHub issue #511).

When pull requests are disabled on a repository, the ``/pulls`` endpoint
responds with HTTP 404. The backup should treat that as "no pull requests"
and continue, rather than aborting the whole run. (The ``/issues`` endpoint
returns 200 with an empty list when issues are disabled, so it needs no such
handling.)
"""

import logging
from urllib.error import HTTPError

from github_backup import github_backup


def _http_error(template, code, reason):
return HTTPError(template, code, reason, hdrs=None, fp=None)


def test_backup_pulls_skips_on_404(create_args, tmp_path, monkeypatch, caplog):
args = create_args(include_pulls=True, since=None)
repository = {"full_name": "owner/repo"}

def fake_retrieve_data(passed_args, template, query_args=None, lazy=False, **kwargs):
# retrieve_data is called lazily for the pull listing; the 404 must
# surface while iterating the returned generator.
def gen():
raise _http_error(template, 404, "Not Found")
yield # pragma: no cover - generator marker

if lazy:
return gen()
raise _http_error(template, 404, "Not Found")

monkeypatch.setattr(github_backup, "retrieve_data", fake_retrieve_data)

# Should not raise, and should log that it skipped.
with caplog.at_level(logging.INFO, logger=github_backup.logger.name):
github_backup.backup_pulls(
args, tmp_path, repository, "https://api.github.com/repos"
)

assert "Pull requests are disabled for this repository, skipping" in caplog.text

pulls_dir = tmp_path / "pulls"
if pulls_dir.exists():
assert not [p for p in pulls_dir.iterdir() if p.suffix == ".json"]


def test_backup_pulls_reraises_non_404(create_args, tmp_path, monkeypatch):
args = create_args(include_pulls=True, since=None)
repository = {"full_name": "owner/repo"}

def fake_retrieve_data(passed_args, template, query_args=None, lazy=False, **kwargs):
def gen():
raise _http_error(template, 500, "Server Error")
yield # pragma: no cover - generator marker

if lazy:
return gen()
raise _http_error(template, 500, "Server Error")

monkeypatch.setattr(github_backup, "retrieve_data", fake_retrieve_data)

try:
github_backup.backup_pulls(
args, tmp_path, repository, "https://api.github.com/repos"
)
except HTTPError as exc:
assert exc.code == 500
else:
raise AssertionError("Expected non-404 error to propagate")


def test_backup_pulls_details_individual_404_propagates(
create_args, tmp_path, monkeypatch
):
# With --pull-details the listing succeeds (feature is enabled), but an
# individual pull's detail fetch 404s (e.g. deleted mid-backup). That must
# propagate, NOT be swallowed as "pull requests are disabled".
args = create_args(include_pulls=True, include_pull_details=True, since=None)
repository = {"full_name": "owner/repo"}

listing = "https://api.github.com/repos/owner/repo/pulls"

def fake_retrieve_data(passed_args, template, query_args=None, lazy=False, **kwargs):
if template == listing:
# Listing works fine -> pull requests are clearly not disabled.
def gen():
yield {"number": 1, "updated_at": "2026-02-01T00:00:00Z"}

return gen()
# Individual pull detail fetch (.../pulls/1) is missing.
raise _http_error(template, 404, "Not Found")

monkeypatch.setattr(github_backup, "retrieve_data", fake_retrieve_data)

try:
github_backup.backup_pulls(
args, tmp_path, repository, "https://api.github.com/repos"
)
except HTTPError as exc:
assert exc.code == 404
else:
raise AssertionError("Expected individual-pull 404 to propagate")