From f5e6373b4a79a5b196903b756cb26f4d2ab1eed1 Mon Sep 17 00:00:00 2001 From: Ethan Green Date: Mon, 30 Mar 2026 21:16:18 +0300 Subject: [PATCH] Optimize package index cache serialization Eliminate N+1 queries in both chunked and non-chunked index serializations by prefetching package versions and deps, annotating ratings up front, and consuming prefetch data during serialization --- .../repository/api/v1/serializers.py | 13 ++- .../repository/api/v1/viewsets.py | 30 ++++++- .../thunderstore/repository/models/cache.py | 85 +++++++++++++------ 3 files changed, 95 insertions(+), 33 deletions(-) diff --git a/django/thunderstore/repository/api/v1/serializers.py b/django/thunderstore/repository/api/v1/serializers.py index c02ea71d8..51249db6f 100644 --- a/django/thunderstore/repository/api/v1/serializers.py +++ b/django/thunderstore/repository/api/v1/serializers.py @@ -1,3 +1,5 @@ +from distutils.version import StrictVersion + from rest_framework.fields import Field from rest_framework.serializers import ModelSerializer, SerializerMethodField @@ -64,16 +66,23 @@ class PackageListingSerializer(ModelSerializer): date_created = RelatedObjectField(relation_name="package") date_updated = RelatedObjectField(relation_name="package") uuid4 = RelatedObjectField(relation_name="package") - rating_score = RelatedObjectField(relation_name="package") + rating_score = SerializerMethodField() is_pinned = RelatedObjectField(relation_name="package") is_deprecated = RelatedObjectField(relation_name="package") categories = SerializerMethodField() versions = SerializerMethodField() def get_versions(self, instance): - versions = instance.package.available_versions + versions = sorted( + [v for v in instance.package.versions.all() if v.is_active], + key=lambda v: StrictVersion(v.version_number), + reverse=True, + ) return PackageVersionSerializer(versions, many=True, context=self.context).data + def get_rating_score(self, instance): + return instance.rating_score + def get_owner(self, instance): return instance.package.owner.name diff --git a/django/thunderstore/repository/api/v1/viewsets.py b/django/thunderstore/repository/api/v1/viewsets.py index 02ab8d386..246e40c5a 100644 --- a/django/thunderstore/repository/api/v1/viewsets.py +++ b/django/thunderstore/repository/api/v1/viewsets.py @@ -2,6 +2,7 @@ from io import BytesIO from typing import Any, Optional +from django.db.models import Count, Prefetch from django.http import HttpResponse from django.utils.cache import get_conditional_response from django.utils.http import http_date @@ -23,7 +24,7 @@ order_package_listing_queryset, ) from thunderstore.repository.mixins import CommunityMixin -from thunderstore.repository.models import Package +from thunderstore.repository.models import Package, PackageVersion from thunderstore.repository.models.cache import APIV1PackageCache from thunderstore.utils.batch import batch @@ -31,6 +32,29 @@ SERIALIZER_BATCH_SIZE = 200 +def _get_prefetched_listing_queryset(ids): + versions_prefetch = Prefetch( + "package__versions", + queryset=PackageVersion.objects.filter(is_active=True) + .select_related("package", "package__owner") + .prefetch_related( + "dependencies", + "dependencies__package", + "dependencies__package__owner", + ), + ) + return ( + order_package_listing_queryset(PackageListing.objects.filter(id__in=ids)) + .select_related("community", "package", "package__owner") + .prefetch_related( + "categories", + "community__sites__site", + versions_prefetch, + ) + .annotate(_rating_score=Count("package__package_ratings")) + ) + + def serialize_package_list_for_community(community: Community) -> bytes: listing_ids = get_package_listing_queryset( community_identifier=community.identifier @@ -41,9 +65,7 @@ def serialize_package_list_for_community(community: Community) -> bytes: result.write(b"[") for index, ids in enumerate(batch(batch_size, listing_ids)): - queryset = order_package_listing_queryset( - PackageListing.objects.filter(id__in=ids) - ) + queryset = _get_prefetched_listing_queryset(ids) serializer = PACKAGE_SERIALIZER( queryset, many=True, diff --git a/django/thunderstore/repository/models/cache.py b/django/thunderstore/repository/models/cache.py index 6a0f2947a..e628a9e90 100644 --- a/django/thunderstore/repository/models/cache.py +++ b/django/thunderstore/repository/models/cache.py @@ -2,10 +2,14 @@ import io import json from datetime import timedelta +from distutils.version import StrictVersion from typing import Any, Iterable, List, Optional +from django.conf import settings from django.core.files.base import ContentFile from django.db import models +from django.db.models import Count, Prefetch +from django.urls import reverse from django.utils import timezone from thunderstore.community.models import Community, PackageListing @@ -249,45 +253,73 @@ def get_package_listing_ids(community: Community) -> Iterable[List[int]]: def get_package_listing_chunk( listing_ids: List[int], -) -> models.QuerySet["PackageListing"]: - # Keep the ordering as it was when the whole id list was read. - ordering = models.Case( - *[models.When(id=id, then=pos) for pos, id in enumerate(listing_ids)] +) -> List[PackageListing]: + from thunderstore.repository.models import PackageVersion + + versions_prefetch = Prefetch( + "package__versions", + queryset=PackageVersion.objects.filter(is_active=True) + .select_related("package", "package__owner") + .prefetch_related( + "dependencies", + "dependencies__package", + "dependencies__package__owner", + ), ) - listing_ref = PackageListing.objects.filter(pk=models.OuterRef("pk")) - return ( + listings = ( PackageListing.objects.filter(id__in=listing_ids) .select_related("community", "package", "package__owner") - .prefetch_related("categories", "community__sites", "package__versions") - .annotate( - _rating_score=models.Subquery( - listing_ref.annotate( - ratings=models.Count("package__package_ratings"), - ).values("ratings"), - ), + .prefetch_related( + "categories", + "community__sites__site", + versions_prefetch, ) - .order_by(ordering) + .annotate(_rating_score=Count("package__package_ratings")) ) + order_map = {lid: pos for pos, lid in enumerate(listing_ids)} + return sorted(listings, key=lambda l: order_map[l.id]) + + +def _get_sorted_active_versions(package): + versions = list(package.versions.all()) + versions.sort(key=lambda v: StrictVersion(v.version_number), reverse=True) + return versions + + +def _version_download_url(version) -> str: + path = reverse( + "old_urls:packages.download", + kwargs={ + "owner": version.package.owner.name, + "name": version.package.name, + "version": version.version_number, + }, + ) + return f"{settings.PROTOCOL}{settings.PRIMARY_HOST}{path}" + def listing_to_json(listing: PackageListing) -> bytes: + package = listing.package + owner = package.owner + versions = _get_sorted_active_versions(package) + return json.dumps( { - "name": listing.package.name, - "full_name": listing.package.full_package_name, - "owner": listing.package.owner.name, + "name": package.name, + "full_name": package.full_package_name, + "owner": owner.name, "package_url": listing.get_full_url(), - "donation_link": listing.package.owner.donation_link, - "date_created": listing.package.date_created.isoformat(), - "date_updated": listing.package.date_updated.isoformat(), - "uuid4": str(listing.package.uuid4), + "donation_link": owner.donation_link, + "date_created": package.date_created.isoformat(), + "date_updated": package.date_updated.isoformat(), + "uuid4": str(package.uuid4), "rating_score": listing.rating_score, - "is_pinned": listing.package.is_pinned, - "is_deprecated": listing.package.is_deprecated, + "is_pinned": package.is_pinned, + "is_deprecated": package.is_deprecated, "has_nsfw_content": listing.has_nsfw_content, "categories": [c.name for c in listing.categories.all()], - # TODO: this generates awfully lot of database hits "versions": [ { "name": version.name, @@ -298,16 +330,15 @@ def listing_to_json(listing: PackageListing) -> bytes: "dependencies": [ d.full_version_name for d in version.dependencies.all() ], - "download_url": version.full_download_url, + "download_url": _version_download_url(version), "downloads": version.downloads, "date_created": version.date_created.isoformat(), "website_url": version.website_url, - # TODO: what is this needed for, inactive ones have been filtered out anyway? "is_active": version.is_active, "uuid4": str(version.uuid4), "file_size": version.file_size, } - for version in listing.package.available_versions + for version in versions ], }, ).encode()