Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions contentcuration/contentcuration/tests/test_storage_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@

from .base import StudioTestCase
from contentcuration.models import generate_object_storage_name
from contentcuration.utils.gcs_storage import GoogleCloudStorage
from contentcuration.utils.storage_common import _get_gcs_presigned_put_url
from contentcuration.utils.storage_common import create_resumable_upload_session
from contentcuration.utils.storage_common import determine_content_type
from contentcuration.utils.storage_common import get_presigned_upload_url
from contentcuration.utils.storage_common import UnknownStorageBackendError
Expand Down Expand Up @@ -157,6 +159,21 @@ def test_generate_signed_url_called_with_required_arguments(self):
content_type=mimetype,
)

def test_create_resumable_session_pins_md5_size_and_returns_url(self):
storage = GoogleCloudStorage(self.client, "bucket")
blob = self.client.get_bucket.return_value.blob.return_value
blob.create_resumable_upload_session.return_value = "https://session.url"

url = create_resumable_upload_session(
"storage/a/b/abc.jpg", "md5==\n", 2048, storage=storage
)

assert url == "https://session.url"
assert blob.md5_hash == "md5==" # whitespace stripped
assert blob.content_type == "image/jpeg"
assert blob.metadata == {"declared-size": "2048"}
blob.create_resumable_upload_session.assert_called_once()


class S3StoragePresignedURLUnitTestCase(StudioTestCase):
"""
Expand Down
99 changes: 99 additions & 0 deletions contentcuration/contentcuration/tests/viewsets/test_file.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import codecs
import uuid
from unittest import mock

from django.urls import reverse
from le_utils.constants import content_kinds
Expand All @@ -12,6 +14,8 @@
from contentcuration.tests.viewsets.base import generate_delete_event
from contentcuration.tests.viewsets.base import generate_update_event
from contentcuration.tests.viewsets.base import SyncTestMixin
from contentcuration.viewsets.file import FileUploadURLSerializer
from contentcuration.viewsets.file import MAX_NON_RESUMABLE_UPLOAD_SIZE
from contentcuration.viewsets.sync.constants import CONTENTNODE
from contentcuration.viewsets.sync.constants import FILE

Expand Down Expand Up @@ -546,6 +550,9 @@ def test_mismatched_preset_upload(self):

def test_insufficient_storage(self):
self.file["size"] = 100000000000000
self.file[
"resumable"
] = True # resumable bypasses the >500MB guard so this still exercises the quota (412) path

self.client.force_authenticate(user=self.user)
response = self.client.post(
Expand Down Expand Up @@ -590,6 +597,26 @@ def test_duration_zero(self):

self.assertEqual(response.status_code, 400)

def test_fractional_size_rejected(self):
s = FileUploadURLSerializer(data={**self.file, "size": 1000.5})
assert not s.is_valid()
assert "size" in s.errors

def test_large_non_resumable_rejected(self):
self.file["size"] = MAX_NON_RESUMABLE_UPLOAD_SIZE + 1
self.client.force_authenticate(user=self.user)
resp = self.client.post(reverse("file-upload-url"), self.file, format="json")
assert resp.status_code == 400

def test_large_resumable_allowed(self):
self.user.disk_space = 10 * 1024 * 1024 * 1024
self.user.save()
self.file["size"] = MAX_NON_RESUMABLE_UPLOAD_SIZE + 1
self.file["resumable"] = True
self.client.force_authenticate(user=self.user)
resp = self.client.post(reverse("file-upload-url"), self.file, format="json")
assert resp.status_code == 200


class ContentIDTestCase(SyncTestMixin, StudioAPITestCase):
def setUp(self):
Expand Down Expand Up @@ -763,3 +790,75 @@ def test_content_id__thumbnails_dont_update_content_id(self):
self.assertEqual(
copied_node_content_id_before_upload, copied_node_content_id_after_upload
)


class ResumableUploadURLTestCase(StudioAPITestCase):
def setUp(self):
super(ResumableUploadURLTestCase, self).setUp()
self.user = testdata.user()
# Give user enough quota to handle resumable uploads
self.user.disk_space = 10 * 1024 * 1024 * 1024
self.user.save()
self.file = {
"size": 1000,
"checksum": uuid.uuid4().hex,
"name": "le_studio",
"file_format": file_formats.MP3,
"preset": format_presets.AUDIO,
"duration": 10.123,
"resumable": True,
}

def _checksum_b64(self):
return (
codecs.encode(codecs.decode(self.file["checksum"], "hex"), "base64")
.decode()
.strip()
)

@mock.patch("contentcuration.viewsets.file.default_storage")
@mock.patch("contentcuration.viewsets.file.create_resumable_upload_session")
@mock.patch("contentcuration.viewsets.file.get_stored_object_md5_b64")
def test_resumable_returns_session_when_not_stored(
self, mock_md5, mock_session, mock_storage
):
mock_storage.supports_resumable = True
mock_md5.return_value = None
mock_session.return_value = "https://session.url"
self.client.force_authenticate(user=self.user)
resp = self.client.post(reverse("file-upload-url"), self.file, format="json")
assert resp.status_code == 200
data = resp.json()
assert data["resumable"] is True
assert data["uploadURL"] == "https://session.url"
assert data["alreadyUploaded"] is False
assert "file" in data
assert data["file"]["id"]
mock_session.assert_called_once()

@mock.patch("contentcuration.viewsets.file.default_storage")
@mock.patch("contentcuration.viewsets.file.create_resumable_upload_session")
@mock.patch("contentcuration.viewsets.file.get_stored_object_md5_b64")
def test_resumable_skips_when_already_stored(
self, mock_md5, mock_session, mock_storage
):
mock_storage.supports_resumable = True
mock_md5.return_value = self._checksum_b64()
self.client.force_authenticate(user=self.user)
resp = self.client.post(reverse("file-upload-url"), self.file, format="json")
data = resp.json()
assert data["resumable"] is True and data["alreadyUploaded"] is True
assert data["uploadURL"] is None
assert "file" in data
assert data["file"]["id"]
mock_session.assert_not_called()

def test_resumable_falls_back_to_single_put_on_s3(self):
# default_storage is S3 in the test env → no resumable support
self.client.force_authenticate(user=self.user)
resp = self.client.post(reverse("file-upload-url"), self.file, format="json")
data = resp.json()
assert data["resumable"] is False
assert "uploadURL" in data
assert "file" in data
assert data["file"]["id"]
4 changes: 4 additions & 0 deletions contentcuration/contentcuration/utils/gcs_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ def _create_default_client(


class GoogleCloudStorage(Storage):
supports_resumable = True

def __init__(self, client, bucket_name):
self.client = client
self.bucket = self.client.get_bucket(bucket_name)
Expand Down Expand Up @@ -217,6 +219,8 @@ def _is_file_empty(fobj):


class CompositeGCS(Storage):
supports_resumable = True

def __init__(self):
self.backends = []
self.backends.append(
Expand Down
24 changes: 24 additions & 0 deletions contentcuration/contentcuration/utils/storage_common.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import codecs
import mimetypes
import os
from datetime import timedelta
Expand Down Expand Up @@ -35,6 +36,29 @@ def determine_content_type(filename):
return typ


def hex_to_base64(hexdigest):
"""Convert a hex-encoded digest (e.g. an MD5 checksum) to base64."""
return codecs.encode(codecs.decode(hexdigest, "hex"), "base64").decode()


def create_resumable_upload_session(filepath, md5_b64, size, storage=default_storage):
client = storage.get_client()
blob = client.get_bucket(settings.AWS_S3_BUCKET_NAME).blob(filepath)
blob.md5_hash = md5_b64.strip()
blob.content_type = determine_content_type(filepath)
blob.metadata = {"declared-size": str(size)}
return blob.create_resumable_upload_session(client=client)


def get_stored_object_md5_b64(filepath, storage=default_storage):
if not getattr(storage, "supports_resumable", False):
return None
blob = (
storage.get_client().get_bucket(settings.AWS_S3_BUCKET_NAME).get_blob(filepath)
)
return blob.md5_hash if blob is not None else None


def get_presigned_upload_url(
filepath,
md5sum_b64,
Expand Down
50 changes: 34 additions & 16 deletions contentcuration/contentcuration/viewsets/file.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import codecs
import math

from django.core.exceptions import PermissionDenied
from django.core.files.storage import default_storage
from django.http import HttpResponseBadRequest
from le_utils.constants import file_formats
from le_utils.constants import format_presets
Expand All @@ -18,7 +18,10 @@
from contentcuration.models import generate_storage_url
from contentcuration.utils.cache import ResourceSizeCache
from contentcuration.utils.sentry import report_exception
from contentcuration.utils.storage_common import create_resumable_upload_session
from contentcuration.utils.storage_common import get_presigned_upload_url
from contentcuration.utils.storage_common import get_stored_object_md5_b64
from contentcuration.utils.storage_common import hex_to_base64
from contentcuration.utils.user import calculate_user_storage
from contentcuration.viewsets.base import BulkDeleteMixin
from contentcuration.viewsets.base import BulkListSerializer
Expand All @@ -34,6 +37,8 @@

PRESET_LOOKUP = {p.id: p for p in format_presets.PRESETLIST}

MAX_NON_RESUMABLE_UPLOAD_SIZE = 500 * 1024 * 1024


class StrictFloatField(serializers.FloatField):
def to_internal_value(self, data):
Expand All @@ -48,7 +53,7 @@ class FileUploadURLSerializer(serializers.Serializer):
"""
Serializer to validate inputs for the upload_url endpoint.
Required:
- size: a float value
- size: an integer value (bytes)
- checksum: a 32-digit hex string
- name: a string (note: mapped from request.data['name'])
- file_format: a valid file format choice from file_formats.choices
Expand All @@ -57,12 +62,13 @@ class FileUploadURLSerializer(serializers.Serializer):
- duration: a number that will be floored to an integer and must be > 0
"""

size = serializers.FloatField(required=True)
size = serializers.IntegerField(required=True)
checksum = serializers.RegexField(regex=r"^[0-9a-f]{32}$", required=True)
name = serializers.CharField(required=True)
file_format = serializers.ChoiceField(choices=file_formats.choices, required=True)
preset = serializers.ChoiceField(choices=format_presets.choices, required=True)
duration = StrictFloatField(required=False, allow_null=True)
resumable = serializers.BooleanField(required=False, default=False)

def validate_duration(self, value):
if value is None:
Expand All @@ -89,6 +95,10 @@ def validate(self, attrs):
raise serializers.ValidationError(
f"File format {attrs['file_format']} is not an allowed format for this preset {attrs['preset']}"
)
if not attrs["resumable"] and attrs["size"] > MAX_NON_RESUMABLE_UPLOAD_SIZE:
raise serializers.ValidationError(
"Files larger than 500 MB must use a resumable upload."
)
return attrs


Expand Down Expand Up @@ -235,26 +245,37 @@ def upload_url(self, request):
file_format = validated_data["file_format"]
preset = validated_data["preset"]
duration = validated_data.get("duration")
resumable = validated_data["resumable"]

try:
request.user.check_space(float(size), checksum)
request.user.check_space(size, checksum)
except PermissionDenied:
return HttpResponseBadRequest(
reason="Not enough space. Check your storage under Settings page.",
status=412,
)

might_skip = File.objects.filter(checksum=checksum).exists()

filepath = generate_object_storage_name(
checksum, filename, default_ext=file_format
)
checksum_base64 = codecs.encode(
codecs.decode(checksum, "hex"), "base64"
).decode()
retval = get_presigned_upload_url(
filepath, checksum_base64, 600, content_length=size
)
checksum_base64 = hex_to_base64(checksum)

if resumable and getattr(default_storage, "supports_resumable", False):
# Resumable response omits mimetype/might_skip.
stored = get_stored_object_md5_b64(filepath) == checksum_base64.strip()
retval = {
"resumable": True,
"uploadURL": None
if stored
else create_resumable_upload_session(filepath, checksum_base64, size),
"alreadyUploaded": stored,
}
else:
retval = get_presigned_upload_url(
filepath, checksum_base64, 600, content_length=size
)
retval["resumable"] = False
retval["might_skip"] = File.objects.filter(checksum=checksum).exists()

file = File(
file_size=size,
Expand All @@ -270,8 +291,5 @@ def upload_url(self, request):
# Avoid using our file_on_disk attribute for checks
file.save(set_by_file_on_disk=False)

retval.update(
{"might_skip": might_skip, "file": self.serialize_object(id=file.id)}
)

retval["file"] = self.serialize_object(id=file.id)
return Response(retval)
Loading