diff --git a/config/settings/base.py b/config/settings/base.py index 3106e21f..8fc5ea98 100644 --- a/config/settings/base.py +++ b/config/settings/base.py @@ -419,7 +419,7 @@ "HTML_SELECT_CUTOFF": 20, "DEFAULT_AUTHENTICATION_CLASSES": ( "rest_framework.authentication.SessionAuthentication", - "rest_framework_simplejwt.authentication.JWTAuthentication", + "documentcloud.core.authentication.SquareletJWTAuthentication", "documentcloud.core.authentication.ProcessingTokenAuthentication", ), "DEFAULT_VERSIONING_CLASS": "documentcloud.core.versioning.QueryParameterVersioning", diff --git a/documentcloud/core/authentication.py b/documentcloud/core/authentication.py index c6e32a69..48d06d99 100644 --- a/documentcloud/core/authentication.py +++ b/documentcloud/core/authentication.py @@ -7,6 +7,17 @@ # Standard Library import hmac +import logging + +# Third Party +import requests +from rest_framework_simplejwt.authentication import JWTAuthentication +from rest_framework_simplejwt.settings import api_settings +from squarelet_auth import settings as squarelet_settings +from squarelet_auth.users.utils import squarelet_update_or_create +from squarelet_auth.utils import squarelet_get + +logger = logging.getLogger(__name__) class ProcessingTokenAuthentication(BaseAuthentication): @@ -44,3 +55,59 @@ def authenticate_credentials(self, key): def authenticate_header(self, request): return "processing-token" + + +class SquareletJWTAuthentication(JWTAuthentication): + """JWT authentication that lazily provisions users from Squarelet. + + Squarelet issues JWTs for users who may not yet have a mirrored ``User`` + row in DocumentCloud's database. That row is normally created on first + interactive login, or via the asynchronous cache-invalidation webhook. + Either of these can lose a race against an immediate API call, like when + we're trying to fetch information about a user's add-ons in Klaxon. + + When the user is missing locally we fetch their data from Squarelet + synchronously, create the row inline, and retry, so the very first + authenticated request is self-healing and the timing race is eliminated. + + Provisioning is gated by ``SQUARELET_DISABLE_CREATE`` (via + ``squarelet_auth.settings.DISABLE_CREATE``), matching the webhook's + ``pull_data`` task: where creating users from Squarelet is disabled, an + unknown user still 401s rather than being provisioned here. + """ + + def get_user(self, validated_token): + try: + return super().get_user(validated_token) + except exceptions.AuthenticationFailed as exc: + # Only provision when the token is valid but the user simply does + # not exist locally yet. Genuinely invalid tokens (and any other + # failures) must still surface as a 401. simplejwt wraps its detail + # in a dict (``{"detail": ..., "code": ...}``) while plain DRF uses + # an ``ErrorDetail`` string, so handle both shapes. + if isinstance(exc.detail, dict): + code = exc.detail.get("code") + else: + code = getattr(exc.detail, "code", None) + if code != "user_not_found": + raise + + # Respect the same gate as the webhook's pull_data task: when + # creating users from Squarelet is disabled, don't provision them + # here either -- let the request 401. + if squarelet_settings.DISABLE_CREATE: + raise + + uuid = validated_token[api_settings.USER_ID_CLAIM] + logger.info("[JWT] Lazily provisioning user from Squarelet: %s", uuid) + try: + resp = squarelet_get(f"/api/users/{uuid}/") + resp.raise_for_status() + squarelet_update_or_create(uuid, resp.json()) + except requests.exceptions.RequestException: + logger.exception("[JWT] Failed to fetch user from Squarelet: %s", uuid) + # Re-raise the original auth failure so the request 401s + raise exc + + # Retry now that the user should exist locally + return super().get_user(validated_token) diff --git a/documentcloud/core/tests.py b/documentcloud/core/tests.py index e0b25cae..d38420b9 100644 --- a/documentcloud/core/tests.py +++ b/documentcloud/core/tests.py @@ -4,6 +4,7 @@ from django.db import transaction from django.test import TestCase from django.urls import reverse +from rest_framework.exceptions import AuthenticationFailed # Standard Library import hashlib @@ -14,8 +15,11 @@ # Third Party import pytest +import requests +from rest_framework_simplejwt.settings import api_settings # DocumentCloud +from documentcloud.core.authentication import SquareletJWTAuthentication from documentcloud.users.tests.factories import UserFactory @@ -93,3 +97,91 @@ def test_invalid_signature(self): f"{user.mailkey}@uploads.documentcloud.org", sign=False ) assert response.status_code == 403 + + +@pytest.mark.django_db() +class TestSquareletJWTAuthentication: + """Tests for lazy user provisioning during JWT authentication""" + + def token(self, user_uuid): + """Build a minimal validated token carrying the user's uuid claim""" + return {api_settings.USER_ID_CLAIM: str(user_uuid)} + + @mock.patch("documentcloud.core.authentication.squarelet_update_or_create") + @mock.patch("documentcloud.core.authentication.squarelet_get") + def test_existing_user(self, mock_get, mock_update): + """A user that already exists locally is returned without a callback""" + user = UserFactory() + auth = SquareletJWTAuthentication() + + result = auth.get_user(self.token(user.uuid)) + + assert result == user + mock_get.assert_not_called() + mock_update.assert_not_called() + + @mock.patch( + "documentcloud.core.authentication.squarelet_settings.DISABLE_CREATE", False + ) + @mock.patch("documentcloud.core.authentication.squarelet_update_or_create") + @mock.patch("documentcloud.core.authentication.squarelet_get") + def test_lazy_provision_missing_user(self, mock_get, mock_update): + """A missing user is fetched from Squarelet, created, and returned""" + missing_uuid = uuid.uuid4() + data = {"preferred_username": "newuser", "organizations": []} + mock_get.return_value.json.return_value = data + # Simulate squarelet_update_or_create creating the local mirror row + mock_update.side_effect = lambda _uuid, _data: UserFactory(uuid=missing_uuid) + auth = SquareletJWTAuthentication() + + result = auth.get_user(self.token(missing_uuid)) + + assert result.uuid == missing_uuid + mock_get.assert_called_once_with(f"/api/users/{missing_uuid}/") + # The uuid comes off the JWT claim as a string, matching how the + # webhook's pull_data task calls squarelet_update_or_create + mock_update.assert_called_once_with(str(missing_uuid), data) + + @mock.patch("documentcloud.core.authentication.squarelet_update_or_create") + @mock.patch("documentcloud.core.authentication.squarelet_get") + def test_invalid_token_not_provisioned(self, mock_get, mock_update): + """A token without a user claim must 401 without contacting Squarelet""" + auth = SquareletJWTAuthentication() + + with pytest.raises(AuthenticationFailed): + auth.get_user({}) + + mock_get.assert_not_called() + mock_update.assert_not_called() + + @mock.patch( + "documentcloud.core.authentication.squarelet_settings.DISABLE_CREATE", False + ) + @mock.patch("documentcloud.core.authentication.squarelet_update_or_create") + @mock.patch("documentcloud.core.authentication.squarelet_get") + def test_squarelet_fetch_fails(self, mock_get, mock_update): + """If the Squarelet fetch fails, the request still 401s""" + missing_uuid = uuid.uuid4() + mock_get.side_effect = requests.exceptions.RequestException + auth = SquareletJWTAuthentication() + + with pytest.raises(AuthenticationFailed): + auth.get_user(self.token(missing_uuid)) + + mock_update.assert_not_called() + + @mock.patch( + "documentcloud.core.authentication.squarelet_settings.DISABLE_CREATE", True + ) + @mock.patch("documentcloud.core.authentication.squarelet_update_or_create") + @mock.patch("documentcloud.core.authentication.squarelet_get") + def test_disable_create_skips_provisioning(self, mock_get, mock_update): + """When SQUARELET_DISABLE_CREATE is set, missing users still 401""" + missing_uuid = uuid.uuid4() + auth = SquareletJWTAuthentication() + + with pytest.raises(AuthenticationFailed): + auth.get_user(self.token(missing_uuid)) + + mock_get.assert_not_called() + mock_update.assert_not_called()