diff --git a/changes/11507.fix.md b/changes/11507.fix.md new file mode 100644 index 00000000000..7fc5e3479b3 --- /dev/null +++ b/changes/11507.fix.md @@ -0,0 +1 @@ +Allow deployment names to be reused within a project so a hidden record from another user no longer blocks creation. diff --git a/src/ai/backend/common/exception.py b/src/ai/backend/common/exception.py index f065d65847a..510861a539e 100644 --- a/src/ai/backend/common/exception.py +++ b/src/ai/backend/common/exception.py @@ -818,18 +818,6 @@ def error_code(self) -> ErrorCode: ) -class DeploymentNameAlreadyExists(BackendAIError, web.HTTPConflict): - error_type = "https://api.backend.ai/probs/deployment-name-already-exists" - error_title = "Deployment name already exists." - - def error_code(self) -> ErrorCode: - return ErrorCode( - domain=ErrorDomain.MODEL_DEPLOYMENT, - operation=ErrorOperation.CREATE, - error_detail=ErrorDetail.ALREADY_EXISTS, - ) - - class PassthroughError(BackendAIError): """ Wraps and forwards errors from requests with original status code and message. diff --git a/src/ai/backend/manager/models/alembic/versions/c7d58e2a4f93_drop_endpoint_name_unique_index.py b/src/ai/backend/manager/models/alembic/versions/c7d58e2a4f93_drop_endpoint_name_unique_index.py new file mode 100644 index 00000000000..39fda6863c3 --- /dev/null +++ b/src/ai/backend/manager/models/alembic/versions/c7d58e2a4f93_drop_endpoint_name_unique_index.py @@ -0,0 +1,47 @@ +"""Drop endpoint name unique index + +Revision ID: c7d58e2a4f93 +Revises: 3632aad9d5d9 +Create Date: 2026-05-07 19:40:00.000000 + +Drop the partial unique index on (name, domain, project). The previous +scope did not match `my_deployments` (which is scoped by `created_user`), +so a deployment by another user in the same project blocked creates while +staying invisible to the caller. + +Downgrade fails if active rows now hold duplicate (name, domain, project); +operator must dedupe first. + +""" + +from alembic import op +from sqlalchemy.exc import IntegrityError + +# revision identifiers, used by Alembic. +revision = "c7d58e2a4f93" +down_revision = "3632aad9d5d9" +# Part of: 26.5.0 +branch_labels = None +depends_on = None + + +INDEX_NAME = "ix_endpoints_unique_name_when_active" +PREDICATE = "lifecycle_stage NOT IN ('destroying', 'destroyed')" + + +def upgrade() -> None: + op.execute(f"DROP INDEX IF EXISTS {INDEX_NAME}") + + +def downgrade() -> None: + try: + op.execute( + f"CREATE UNIQUE INDEX IF NOT EXISTS {INDEX_NAME} " + f"ON endpoints (name, domain, project) " + f"WHERE {PREDICATE}" + ) + except IntegrityError as exc: + raise RuntimeError( + f"Duplicate (name, domain, project) among active `endpoints` rows; " + f"delete duplicates before retrying downgrade to recreate `{INDEX_NAME}`." + ) from exc diff --git a/src/ai/backend/manager/models/endpoint/row.py b/src/ai/backend/manager/models/endpoint/row.py index 95d1f31f6f1..1c4e6f2ce0b 100644 --- a/src/ai/backend/manager/models/endpoint/row.py +++ b/src/ai/backend/manager/models/endpoint/row.py @@ -166,17 +166,6 @@ class EndpointRow(Base): # type: ignore[misc] __tablename__ = "endpoints" __table_args__ = ( - sa.Index( - "ix_endpoints_unique_name_when_active", - "name", - "domain", - "project", - unique=True, - postgresql_where=sa.column("lifecycle_stage").notin_([ - EndpointLifecycle.DESTROYING.value, - EndpointLifecycle.DESTROYED.value, - ]), - ), sa.Index( "ix_endpoints_lifecycle_sub_step", "lifecycle_stage", diff --git a/src/ai/backend/manager/repositories/deployment/db_source/db_source.py b/src/ai/backend/manager/repositories/deployment/db_source/db_source.py index 609528cbef1..d9b558e33f7 100644 --- a/src/ai/backend/manager/repositories/deployment/db_source/db_source.py +++ b/src/ai/backend/manager/repositories/deployment/db_source/db_source.py @@ -25,7 +25,6 @@ PresetTarget, PresetValueType, ) -from ai.backend.common.exception import DeploymentNameAlreadyExists from ai.backend.common.identifier.deployment import DeploymentID from ai.backend.common.identifier.deployment_preset import DeploymentPresetID from ai.backend.common.identifier.deployment_revision import DeploymentRevisionID @@ -301,9 +300,6 @@ async def create_endpoint( spec = cast(DeploymentCreatorSpec, creator.spec) async with self._begin_session_read_committed() as db_sess: await self._check_group_exists(db_sess, spec.metadata.domain, spec.metadata.project_id) - await self._check_endpoint_name_exists( - db_sess, spec.metadata.domain, spec.metadata.project_id, spec.metadata.name - ) # Create endpoint with RBAC scope association rbac_result = await execute_rbac_entity_creator(db_sess, creator) @@ -367,36 +363,6 @@ async def _check_group_exists( if result.first() is None: raise ProjectNotFound(f"Project {group_id} not found in domain {domain_name}") - async def _check_endpoint_name_exists( - self, - db_sess: SASession, - domain_name: str, - project_id: uuid.UUID, - name: str, - ) -> None: - """Check if endpoint name already exists in the project. - - Raises: - DeploymentNameAlreadyExists: If an endpoint with the same name exists. - """ - query = ( - sa.select(EndpointRow.id) - .where( - sa.and_( - EndpointRow.domain == domain_name, - EndpointRow.project == project_id, - EndpointRow.name == name, - EndpointRow.lifecycle_stage != EndpointLifecycle.DESTROYED, - ) - ) - .limit(1) - ) - result = await db_sess.execute(query) - if result.first() is not None: - raise DeploymentNameAlreadyExists( - f"Deployment with name '{name}' already exists in this project" - ) - async def get_image_id(self, image: ImageIdentifier) -> ImageID: """Get image ID from ImageIdentifier. diff --git a/tests/unit/manager/repositories/deployment/test_deployment_repository.py b/tests/unit/manager/repositories/deployment/test_deployment_repository.py index 2f773106a8d..b5e16639720 100644 --- a/tests/unit/manager/repositories/deployment/test_deployment_repository.py +++ b/tests/unit/manager/repositories/deployment/test_deployment_repository.py @@ -18,7 +18,6 @@ from ai.backend.common.data.model_deployment.types import DeploymentStrategy from ai.backend.common.data.permission.types import RBACElementType from ai.backend.common.dto.manager.v2.deployment.types import IntOrPercent -from ai.backend.common.exception import DeploymentNameAlreadyExists from ai.backend.common.identifier.deployment import DeploymentID from ai.backend.common.identifier.image import ImageID from ai.backend.common.identifier.runtime_variant import RuntimeVariantID @@ -3652,37 +3651,6 @@ def _create_endpoint_creator( ), ) - async def test_create_endpoint_raises_when_duplicate_name( - self, - deployment_repository: DeploymentRepository, - test_domain: DomainRow, - test_group: GroupRow, - test_scaling_group: ScalingGroupRow, - test_image_id: uuid.UUID, - ) -> None: - """Test that create_endpoint raises DeploymentNameAlreadyExists for duplicate name.""" - # Create first endpoint with specific name - first_creator = self._create_endpoint_creator( - name="duplicate-test-endpoint", - domain=test_domain, - group=test_group, - scaling_group=test_scaling_group, - image_id=test_image_id, - ) - await deployment_repository.create_endpoint(first_creator) - - # Attempt to create second endpoint with same name should fail - second_creator = self._create_endpoint_creator( - name="duplicate-test-endpoint", - domain=test_domain, - group=test_group, - scaling_group=test_scaling_group, - image_id=test_image_id, - ) - - with pytest.raises(DeploymentNameAlreadyExists): - await deployment_repository.create_endpoint(second_creator) - async def test_create_endpoint_succeeds_with_different_name( self, deployment_repository: DeploymentRepository,