From 6ccfdd5246fb033de2e4d91b2292d95b195c83c4 Mon Sep 17 00:00:00 2001 From: Ismail Muhammed Ismail Date: Mon, 23 Feb 2026 20:47:11 +0200 Subject: [PATCH 1/2] feat: Implement VCS PURL aliasing and resolution for package merges. Signed-off-by: Ismail Muhammed Ismail --- minecode/model_utils.py | 39 ++++++++++++++++++++++++--- packagedb/migrations/0095_vcsalias.py | 26 ++++++++++++++++++ packagedb/models.py | 33 +++++++++++++++++++++++ 3 files changed, 95 insertions(+), 3 deletions(-) create mode 100644 packagedb/migrations/0095_vcsalias.py diff --git a/minecode/model_utils.py b/minecode/model_utils.py index bea192cd..dcd53a95 100644 --- a/minecode/model_utils.py +++ b/minecode/model_utils.py @@ -16,9 +16,10 @@ from packagedb.models import PackageSet from packagedb.models import Party from packagedb.models import Resource +from packagedb.models import VcsAlias + from packagedb.serializers import DependentPackageSerializer from packagedb.serializers import PartySerializer - TRACE = False logger = logging.getLogger(__name__) @@ -67,6 +68,22 @@ def add_package_to_scan_queue(package, pipelines=DEFAULT_PIPELINES, priority=0, logger.debug(f" + Inserted ScannableURI\t: {uri}") +def _create_vcs_aliases(old_url, new_url): + try: + from purl2vcs.find_source_repo import convert_repo_urls_to_purls + from packagedb.models import VcsAlias + old_purls = list(convert_repo_urls_to_purls([old_url])) + new_purls = list(convert_repo_urls_to_purls([new_url])) + + for old_purl in old_purls: + for new_purl in new_purls: + VcsAlias.objects.get_or_create( + old_vcs_purl=str(old_purl), + new_vcs_purl=str(new_purl) + ) + except Exception as e: + logger.error(f"Failed to create VcsAlias: {e}") + def merge_packages(existing_package, new_package_data, replace=False): """ Merge the data from the `new_package_data` mapping into the @@ -82,7 +99,6 @@ def merge_packages(existing_package, new_package_data, replace=False): field value is left unchanged in this case. """ existing_mapping = existing_package.to_dict() - # We remove `purl` from `existing_mapping` because we use the other purl # fields (type, namespace, name, version, etc.) to generate the purl. existing_mapping.pop("purl") @@ -209,6 +225,10 @@ def merge_packages(existing_package, new_package_data, replace=False): new_value = new_mapping.extra_data.get("package_content") if not new_value: continue + elif existing_field == "vcs_url" or existing_field == "homepage_url": + if existing_value and new_value and existing_value != new_value: + _create_vcs_aliases(existing_value, new_value) + # Continue normally to update the field elif existing_field in fields_to_skip: # Continue to next field continue @@ -243,7 +263,6 @@ def merge_or_create_package(scanned_package, visit_level, override=False, filena merged = False package = None map_error = "" - mining_level = visit_level if override: # this will force the data override @@ -395,6 +414,20 @@ def merge_or_create_package(scanned_package, visit_level, override=False, filena created_package, created = Package.objects.get_or_create(**package_data) if created: created_package.append_to_history(f"New Package created from URI: {package_uri}") + + older_packages = Package.objects.filter( + type=scanned_package.type or "", + namespace=scanned_package.namespace or "", + name=scanned_package.name or "", + ).exclude(version=scanned_package.version) + + if older_packages.exists(): + older_package = older_packages.order_by('-pk').first() + if older_package.vcs_url and created_package.vcs_url and older_package.vcs_url != created_package.vcs_url: + _create_vcs_aliases(older_package.vcs_url, created_package.vcs_url) + if older_package.homepage_url and created_package.homepage_url and older_package.homepage_url != created_package.homepage_url: + # Some packages have their homepage url set to their vcs url, so we should create an alias for that too + _create_vcs_aliases(older_package.homepage_url, created_package.homepage_url) # This is used in the case of Maven packages created from the priority queue for h in history: diff --git a/packagedb/migrations/0095_vcsalias.py b/packagedb/migrations/0095_vcsalias.py new file mode 100644 index 00000000..b3d831e3 --- /dev/null +++ b/packagedb/migrations/0095_vcsalias.py @@ -0,0 +1,26 @@ +# Generated by Django 5.1.13 on 2026-02-23 17:03 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('packagedb', '0094_package_packagedb_p_package_d39839_idx'), + ] + + operations = [ + migrations.CreateModel( + name='VcsAlias', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('old_vcs_purl', models.CharField(db_index=True, max_length=2048)), + ('new_vcs_purl', models.CharField(db_index=True, max_length=2048)), + ('created_date', models.DateTimeField(auto_now_add=True)), + ], + options={ + 'indexes': [models.Index(fields=['old_vcs_purl'], name='packagedb_v_old_vcs_88807e_idx'), models.Index(fields=['new_vcs_purl'], name='packagedb_v_new_vcs_0f8a3b_idx')], + 'unique_together': {('old_vcs_purl', 'new_vcs_purl')}, + }, + ), + ] diff --git a/packagedb/models.py b/packagedb/models.py index a774592e..e576581a 100644 --- a/packagedb/models.py +++ b/packagedb/models.py @@ -1429,6 +1429,39 @@ def create_auth_token(sender, instance=None, created=False, **kwargs): Token.objects.get_or_create(user_id=instance.pk) +class VcsAlias(models.Model): + old_vcs_purl = models.CharField(max_length=2048, db_index=True) + new_vcs_purl = models.CharField(max_length=2048, db_index=True) + created_date = models.DateTimeField(auto_now_add=True) + + class Meta: + unique_together = ["old_vcs_purl", "new_vcs_purl"] + indexes = [ + models.Index(fields=["old_vcs_purl"]), + models.Index(fields=["new_vcs_purl"]), + ] + + @classmethod + def resolve_purl(cls, vcs_purl_str): + """ + Given a VCS PURL string, follows the VcsAlias chain to find and return + the latest active PURL. Returns the original string if no alias exists. + """ + current_purl = vcs_purl_str + visited = set() + + while current_purl not in visited: + visited.add(current_purl) + alias = cls.objects.filter(old_vcs_purl=current_purl).first() + + if not alias: + break + + current_purl = alias.new_vcs_purl + + return current_purl + + class PackageActivity(FederatedCodePackageActivityMixin): """Record of package activity from a FederatedCode.""" From 2cec09f0bd892dcb6927ad404c44f8c4beeac8a0 Mon Sep 17 00:00:00 2001 From: Ismail Muhammed Ismail Date: Mon, 23 Feb 2026 20:52:03 +0200 Subject: [PATCH 2/2] refactor: Move `VcsAlias` import into the `_create_vcs_aliases` function and reformat related conditional statements and function calls. Signed-off-by: Ismail Muhammed Ismail --- minecode/model_utils.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/minecode/model_utils.py b/minecode/model_utils.py index dcd53a95..fc8d5662 100644 --- a/minecode/model_utils.py +++ b/minecode/model_utils.py @@ -16,10 +16,10 @@ from packagedb.models import PackageSet from packagedb.models import Party from packagedb.models import Resource -from packagedb.models import VcsAlias from packagedb.serializers import DependentPackageSerializer from packagedb.serializers import PartySerializer + TRACE = False logger = logging.getLogger(__name__) @@ -72,18 +72,19 @@ def _create_vcs_aliases(old_url, new_url): try: from purl2vcs.find_source_repo import convert_repo_urls_to_purls from packagedb.models import VcsAlias + old_purls = list(convert_repo_urls_to_purls([old_url])) new_purls = list(convert_repo_urls_to_purls([new_url])) for old_purl in old_purls: for new_purl in new_purls: VcsAlias.objects.get_or_create( - old_vcs_purl=str(old_purl), - new_vcs_purl=str(new_purl) + old_vcs_purl=str(old_purl), new_vcs_purl=str(new_purl) ) except Exception as e: logger.error(f"Failed to create VcsAlias: {e}") + def merge_packages(existing_package, new_package_data, replace=False): """ Merge the data from the `new_package_data` mapping into the @@ -414,7 +415,7 @@ def merge_or_create_package(scanned_package, visit_level, override=False, filena created_package, created = Package.objects.get_or_create(**package_data) if created: created_package.append_to_history(f"New Package created from URI: {package_uri}") - + older_packages = Package.objects.filter( type=scanned_package.type or "", namespace=scanned_package.namespace or "", @@ -422,10 +423,18 @@ def merge_or_create_package(scanned_package, visit_level, override=False, filena ).exclude(version=scanned_package.version) if older_packages.exists(): - older_package = older_packages.order_by('-pk').first() - if older_package.vcs_url and created_package.vcs_url and older_package.vcs_url != created_package.vcs_url: + older_package = older_packages.order_by("-pk").first() + if ( + older_package.vcs_url + and created_package.vcs_url + and older_package.vcs_url != created_package.vcs_url + ): _create_vcs_aliases(older_package.vcs_url, created_package.vcs_url) - if older_package.homepage_url and created_package.homepage_url and older_package.homepage_url != created_package.homepage_url: + if ( + older_package.homepage_url + and created_package.homepage_url + and older_package.homepage_url != created_package.homepage_url + ): # Some packages have their homepage url set to their vcs url, so we should create an alias for that too _create_vcs_aliases(older_package.homepage_url, created_package.homepage_url)