diff --git a/changes/11494.fix.md b/changes/11494.fix.md new file mode 100644 index 00000000000..7155c029049 --- /dev/null +++ b/changes/11494.fix.md @@ -0,0 +1 @@ +Report `current_revision_id` correctly on deployment responses during rolling updates. diff --git a/src/ai/backend/manager/api/adapters/deployment/adapter.py b/src/ai/backend/manager/api/adapters/deployment/adapter.py index e381b1635fa..9508752efc6 100644 --- a/src/ai/backend/manager/api/adapters/deployment/adapter.py +++ b/src/ai/backend/manager/api/adapters/deployment/adapter.py @@ -2191,7 +2191,7 @@ def _deployment_data_to_dto(data: ModelDeploymentData) -> DeploymentNode: created_user_id=data.created_user_id, options=deployment_options_to_info(data.options), scaling_state=data.scaling_state, - current_revision_id=data.revision.id if data.revision is not None else None, + current_revision_id=data.current_revision_id, deploying_revision_id=data.deploying_revision_id, policy=policy_info, ) diff --git a/src/ai/backend/manager/api/rest/service/handler.py b/src/ai/backend/manager/api/rest/service/handler.py index de1bf1bc89b..95b881ff049 100644 --- a/src/ai/backend/manager/api/rest/service/handler.py +++ b/src/ai/backend/manager/api/rest/service/handler.py @@ -68,6 +68,7 @@ DeploymentNetworkSpec, ExecutionSpec, ImageIdentifierDraft, + ModelRevisionSpec, ModelRevisionSpecDraft, MountMetadata, ReplicaSpec, @@ -167,6 +168,14 @@ def _serve_info_from_dto(dto: ServiceInfo, runtime_variant_name: RuntimeVariant) ) +def _resolve_target_revision_spec(info: DeploymentInfo) -> ModelRevisionSpec | None: + """Resolve the target revision spec by id (current first, then deploying).""" + target_id = info.current_revision_id or info.deploying_revision_id + if target_id is None: + return None + return next((r for r in info.model_revisions if r.revision_id == target_id), None) + + def _serve_info_from_deployment_info( deployment_info: DeploymentInfo, runtime_variant_name: RuntimeVariant, @@ -177,7 +186,7 @@ def _serve_info_from_deployment_info( active revision's ``runtime_variant_id`` (internal data types are id-only; the legacy REST response still exposes the name string). """ - model_revision = deployment_info.model_revisions[0] if deployment_info.model_revisions else None + model_revision = _resolve_target_revision_spec(deployment_info) return ServeInfoModel( endpoint_id=deployment_info.id, @@ -378,9 +387,7 @@ async def create(self, body: BodyParam[NewServiceRequestModel], req: RequestCtx) await self._deployment.create_legacy_deployment.wait_for_complete(deployment_action) ) deployment_info = deployment_result.data - model_revision = ( - deployment_info.model_revisions[0] if deployment_info.model_revisions else None - ) + model_revision = _resolve_target_revision_spec(deployment_info) if model_revision is None: raise RuntimeVariantNotFound() runtime_variant_name = await self._resolve_runtime_variant_name( diff --git a/src/ai/backend/manager/data/deployment/types.py b/src/ai/backend/manager/data/deployment/types.py index cdd520a9484..ac072072032 100644 --- a/src/ai/backend/manager/data/deployment/types.py +++ b/src/ai/backend/manager/data/deployment/types.py @@ -959,6 +959,7 @@ class ModelDeploymentData: metadata: ModelDeploymentMetadataInfo network_access: DeploymentNetworkSpec revision: ModelRevisionData | None + current_revision_id: DeploymentRevisionID | None deploying_revision_id: DeploymentRevisionID | None revision_history_ids: list[DeploymentRevisionID] scaling_rule_ids: list[UUID] diff --git a/src/ai/backend/manager/models/endpoint/row.py b/src/ai/backend/manager/models/endpoint/row.py index 1c4e6f2ce0b..53422f308bc 100644 --- a/src/ai/backend/manager/models/endpoint/row.py +++ b/src/ai/backend/manager/models/endpoint/row.py @@ -291,6 +291,7 @@ class EndpointRow(Base): # type: ignore[misc] "DeploymentRevisionRow", back_populates="endpoint_row", primaryjoin=_get_endpoint_revisions_join_condition, + order_by="DeploymentRevisionRow.revision_number.desc()", ) auto_scaling_policy: Mapped[DeploymentAutoScalingPolicyRow | None] = relationship( diff --git a/src/ai/backend/manager/services/deployment/service.py b/src/ai/backend/manager/services/deployment/service.py index f8aa330af75..77ae25b2cdb 100644 --- a/src/ai/backend/manager/services/deployment/service.py +++ b/src/ai/backend/manager/services/deployment/service.py @@ -233,10 +233,23 @@ def _convert_deployment_info_to_data(info: DeploymentInfo) -> ModelDeploymentDat Note: Some fields are set to defaults as DeploymentInfo doesn't have all the data. """ - # Map revision if available revision: ModelRevisionData | None = None - if info.model_revisions: - rev = info.model_revisions[0] + rev: ModelRevisionSpec | None = None + if info.current_revision_id is not None: + rev = next( + (r for r in info.model_revisions if r.revision_id == info.current_revision_id), + None, + ) + if rev is None: + log.error( + "Deployment {} has current_revision_id {} but no matching " + "ModelRevisionSpec was found in DeploymentInfo.model_revisions; " + "current_revision will be reported as null. This usually means " + "EndpointRow.revisions was not eagerly loaded by the caller.", + info.id, + info.current_revision_id, + ) + if rev is not None: if rev.revision_id is None: raise ValueError(f"ModelRevisionSpec has no revision_id for deployment {info.id}") revision = ModelRevisionData( @@ -283,6 +296,7 @@ def _convert_deployment_info_to_data(info: DeploymentInfo) -> ModelDeploymentDat network_access=info.network, revision_history_ids=[info.current_revision_id] if info.current_revision_id else [], revision=revision, + current_revision_id=info.current_revision_id, deploying_revision_id=info.deploying_revision_id, scaling_rule_ids=[], # Not available in DeploymentInfo replica_state=ReplicaStateData( diff --git a/tests/unit/manager/services/deployment/test_deployment_service.py b/tests/unit/manager/services/deployment/test_deployment_service.py index 02531f3ddd7..6fc9d4c58a2 100644 --- a/tests/unit/manager/services/deployment/test_deployment_service.py +++ b/tests/unit/manager/services/deployment/test_deployment_service.py @@ -7,6 +7,7 @@ from __future__ import annotations import uuid +from collections.abc import Callable from datetime import UTC, datetime from typing import cast from unittest.mock import AsyncMock, MagicMock @@ -21,6 +22,7 @@ ) from ai.backend.common.dto.manager.v2.deployment.types import IntOrPercent from ai.backend.common.identifier.deployment import DeploymentID +from ai.backend.common.identifier.deployment_revision import DeploymentRevisionID from ai.backend.common.identifier.image import ImageID from ai.backend.common.identifier.runtime_variant import RuntimeVariantID from ai.backend.common.identifier.vfolder import VFolderUUID @@ -50,7 +52,9 @@ ExecutionSpec, ModelMountConfigData, ModelRevisionData, + ModelRevisionSpec, ModelRuntimeConfigData, + MountMetadata, ReplicaSpec, ResourceConfigData, ResourceSpec, @@ -76,7 +80,10 @@ AddModelRevisionAction, ) from ai.backend.manager.services.deployment.processors import DeploymentProcessors -from ai.backend.manager.services.deployment.service import DeploymentService +from ai.backend.manager.services.deployment.service import ( + DeploymentService, + _convert_deployment_info_to_data, +) from ai.backend.manager.sokovan.deployment import DeploymentController @@ -634,3 +641,72 @@ async def test_persists_coordinator_jwt_instead_of_random( creator = cast(RBACEntityCreator[object], repo_call.args[0]) spec = cast(EndpointTokenCreatorSpec, creator.spec) assert spec.token == sample_coordinator_jwt + + +class TestConvertDeploymentInfoToData: + """Regression test for ``_convert_deployment_info_to_data`` (BA-5963).""" + + @pytest.fixture + def make_revision_spec(self) -> Callable[[], ModelRevisionSpec]: + def make() -> ModelRevisionSpec: + return ModelRevisionSpec( + revision_id=DeploymentRevisionID(uuid.uuid4()), + image_id=ImageID(uuid.uuid4()), + resource_spec=ResourceSpec( + cluster_mode=ClusterMode.SINGLE_NODE, + cluster_size=1, + resource_slots={"cpu": "1"}, + ), + mounts=MountMetadata( + model_vfolder_id=VFolderUUID(uuid.uuid4()), + model_definition_path="model-definition.yaml", + model_mount_destination="/models", + extra_mounts=[], + ), + execution=ExecutionSpec( + runtime_variant_id=RuntimeVariantID(uuid.uuid4()), + ), + ) + + return make + + def test_current_revision_resolved_by_id_match_not_list_order( + self, + make_revision_spec: Callable[[], ModelRevisionSpec], + ) -> None: + """Pin: revision lookup must use explicit ``current_revision_id``, not list[0].""" + deploying_spec = make_revision_spec() + current_spec = make_revision_spec() + + deployment_info = DeploymentInfo( + id=DeploymentID(uuid.uuid4()), + metadata=DeploymentMetadata( + name="ba5963-test", + domain="default", + project=uuid.uuid4(), + resource_group="default", + created_user=uuid.uuid4(), + session_owner=uuid.uuid4(), + created_at=datetime(2024, 1, 1, tzinfo=UTC), + revision_history_limit=10, + ), + state=DeploymentState( + lifecycle=EndpointLifecycle.DEPLOYING, + scaling_state=ScalingState.STABLE, + retry_count=0, + ), + replica_spec=ReplicaSpec(replica_count=1), + network=DeploymentNetworkSpec(open_to_public=False), + model_revisions=[deploying_spec, current_spec], + options=DeploymentOptions(), + current_revision_id=current_spec.revision_id, + deploying_revision_id=deploying_spec.revision_id, + ) + + deployment_data = _convert_deployment_info_to_data(deployment_info) + + assert deployment_data.current_revision_id == current_spec.revision_id + assert deployment_data.deploying_revision_id == deploying_spec.revision_id + assert deployment_data.current_revision_id != deployment_data.deploying_revision_id + assert deployment_data.revision is not None + assert deployment_data.revision.id == current_spec.revision_id