Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
"""remove cleanup_after_slug_deletion trigger

Removes the cleanup trigger added in revision 8413f10ef77f

Revision ID: 42049656cdb8
Revises: d437be68a4fb
Create Date: 2025-10-23 09:55:19.905709

"""

from alembic import op

# revision identifiers, used by Alembic.
revision = "42049656cdb8"
down_revision = "d437be68a4fb"
branch_labels = None
depends_on = None


def upgrade() -> None:
op.execute("DROP TRIGGER IF EXISTS cleanup_after_slug_deletion ON common.entity_slugs")
op.execute("DROP FUNCTION cleanup_after_slug_deletion")


def downgrade() -> None:
op.execute("""CREATE OR REPLACE FUNCTION cleanup_after_slug_deletion()
RETURNS TRIGGER AS
$$
BEGIN
IF OLD.project_id IS NOT NULL AND OLD.data_connector_id IS NULL THEN
DELETE FROM projects.projects WHERE projects.id = OLD.project_id;
ELSIF old.data_connector_id IS NOT NULL THEN
DELETE FROM storage.data_connectors WHERE data_connectors.id = OLD.data_connector_id;
END IF;
RETURN OLD;
END;
$$
LANGUAGE plpgsql;""")
op.execute("""CREATE OR REPLACE TRIGGER cleanup_after_slug_deletion
AFTER DELETE ON common.entity_slugs
FOR EACH ROW
EXECUTE FUNCTION cleanup_after_slug_deletion();""")
33 changes: 27 additions & 6 deletions components/renku_data_services/namespace/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from datetime import UTC, datetime
from typing import Any, overload

from sqlalchemy import Select, delete, func, select, text
from sqlalchemy import Select, delete, distinct, func, select, text
from sqlalchemy.exc import IntegrityError
from sqlalchemy.ext.asyncio import AsyncSession, AsyncSessionTransaction
from sqlalchemy.orm import joinedload, selectinload
Expand All @@ -30,6 +30,7 @@
ProjectPath,
Slug,
)
from renku_data_services.data_connectors import orm as dc_schemas
from renku_data_services.data_connectors.models import DataConnector
from renku_data_services.namespace import models
from renku_data_services.namespace import orm as schemas
Expand Down Expand Up @@ -263,13 +264,33 @@ async def delete_group(
message=f"You cannot delete a group by using an old group slug {slug.value}.",
detail=f"The latest slug is {group.namespace.slug}, please use this for deletions.",
)
# NOTE: We have a stored procedure that gets triggered when a project slug is removed to remove the project.
# This is required because the slug has a foreign key pointing to the project, so when a project is removed
# the slug is removed but the converse is not true. The stored procedure in migration 89aa4573cfa9 has the
# trigger and procedure that does the cleanup when a slug is removed.

dcs = await session.execute(
select(distinct(schemas.EntitySlugORM.data_connector_id))
.join(schemas.NamespaceORM, schemas.NamespaceORM.id == schemas.EntitySlugORM.namespace_id)
.where(schemas.NamespaceORM.group_id == group.id)
.where(schemas.EntitySlugORM.data_connector_id.is_not(None))
)
dcs = [e for e in dcs.scalars().all() if e]

projs = await session.execute(
select(distinct(schemas.EntitySlugORM.project_id))
.join(schemas.NamespaceORM, schemas.NamespaceORM.id == schemas.EntitySlugORM.namespace_id)
.where(schemas.NamespaceORM.group_id == group.id)
.where(schemas.EntitySlugORM.project_id.is_not(None))
)
projs = [e for e in projs.scalars().all() if e]
Comment on lines +276 to +282
Copy link
Member

@leafty leafty Oct 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FYI, this is too many projects at the moment, see this comment: #1004 (comment)

If there was any data connector in a project that was moved out, then there is a remaining entry in the EntitySlugORM table which will match that moved project and the project will get wrongly deleted.

We can look at merging this PR now or decide to fix the slug table issue first. Not sure what is best.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, great you spotted this! Unfortunate, but I probably would fix the other issue first or maybe disable the "move a project" feature for the next release so things get not messed up even more.


stmt = delete(schemas.GroupORM).where(schemas.GroupORM.id == group.id)
await session.execute(stmt)
return models.DeletedGroup(id=group.id)

if projs != []:
await session.execute(delete(ProjectORM).where(ProjectORM.id.in_(projs)))

if dcs != []:
await session.execute(delete(dc_schemas.DataConnectorORM).where(dc_schemas.DataConnectorORM.id.in_(dcs)))

return models.DeletedGroup(id=group.id, data_connectors=dcs, projects=projs)

@with_db_transaction
async def delete_group_member(
Expand Down
2 changes: 2 additions & 0 deletions components/renku_data_services/namespace/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ class DeletedGroup:
"""A group that was deleted from the DB."""

id: ULID
data_connectors: list[ULID]
projects: list[ULID]


@dataclass
Expand Down
15 changes: 13 additions & 2 deletions components/renku_data_services/project/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from typing import Concatenate, ParamSpec, TypeVar

from cryptography.hazmat.primitives.asymmetric import rsa
from sqlalchemy import ColumnElement, Select, delete, func, or_, select, update
from sqlalchemy import ColumnElement, Select, delete, distinct, func, or_, select, update
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import undefer
from sqlalchemy.sql.functions import coalesce
Expand All @@ -24,6 +24,7 @@
from renku_data_services.base_api.pagination import PaginationRequest
from renku_data_services.base_models import RESET
from renku_data_services.base_models.core import Slug
from renku_data_services.data_connectors import orm as dc_schemas
from renku_data_services.namespace import orm as ns_schemas
from renku_data_services.namespace.db import GroupRepository
from renku_data_services.project import apispec as project_apispec
Expand Down Expand Up @@ -427,13 +428,23 @@ async def delete_project(
if project is None:
return None

dcs = await session.execute(
select(distinct(ns_schemas.EntitySlugORM.data_connector_id))
.where(ns_schemas.EntitySlugORM.project_id == project_id)
.where(ns_schemas.EntitySlugORM.data_connector_id.is_not(None))
)
dcs = [e for e in dcs.scalars().all() if e]

await session.execute(delete(schemas.ProjectORM).where(schemas.ProjectORM.id == project_id))

await session.execute(
delete(storage_schemas.CloudStorageORM).where(storage_schemas.CloudStorageORM.project_id == str(project_id))
)

return models.DeletedProject(id=project.id)
if dcs != []:
await session.execute(delete(dc_schemas.DataConnectorORM).where(dc_schemas.DataConnectorORM.id.in_(dcs)))

return models.DeletedProject(id=project.id, data_connectors=dcs)

async def get_project_permissions(self, user: base_models.APIUser, project_id: ULID) -> models.ProjectPermissions:
"""Get the permissions of the user on a given project."""
Expand Down
1 change: 1 addition & 0 deletions components/renku_data_services/project/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ class DeletedProject:
"""Indicates that a project was deleted."""

id: ULID
data_connectors: list[ULID]


@dataclass
Expand Down
7 changes: 7 additions & 0 deletions components/renku_data_services/search/decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,10 @@ async def func_wrapper(self: _WithSearchUpdateRepo, *args: _P.args, **kwargs: _P

case DeletedProject() as p:
record = DeleteDoc.project(p.id)
dcs = [DeleteDoc.data_connector(id) for id in p.data_connectors]
await self.search_updates_repo.upsert(record)
for d in dcs:
await self.search_updates_repo.upsert(d)

case UserInfo() as u:
await self.search_updates_repo.upsert(u)
Expand All @@ -80,7 +83,11 @@ async def func_wrapper(self: _WithSearchUpdateRepo, *args: _P.args, **kwargs: _P

case DeletedGroup() as g:
record = DeleteDoc.group(g.id)
dcs = [DeleteDoc.data_connector(id) for id in g.data_connectors]
prs = [DeleteDoc.project(id) for id in g.projects]
await self.search_updates_repo.upsert(record)
for d in dcs + prs:
await self.search_updates_repo.upsert(d)

case DataConnector() as dc:
await self.search_updates_repo.upsert(dc)
Expand Down
104 changes: 0 additions & 104 deletions test/bases/renku_data_services/data_api/test_namespaces.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import contextlib

import pytest
from sqlalchemy import select
from sqlalchemy.exc import IntegrityError

from renku_data_services.authz.models import Visibility
Expand All @@ -24,7 +23,6 @@
)
from renku_data_services.errors.errors import ConflictError, MissingResourceError, ValidationError
from renku_data_services.namespace.models import UnsavedGroup
from renku_data_services.namespace.orm import EntitySlugORM
from renku_data_services.project.models import Project, ProjectPatch, UnsavedProject
from renku_data_services.users.models import UserInfo

Expand Down Expand Up @@ -404,108 +402,6 @@ async def test_listing_project_namespaces(sanic_client, user_headers) -> None:
assert response.json[1]["path"] == "test1/proj2"


@pytest.mark.asyncio
async def test_stored_procedure_cleanup_after_project_slug_deletion(
create_project,
user_headers,
app_manager: DependencyManager,
sanic_client,
create_data_connector,
) -> None:
# We use stored procedures to remove a project when its slug is removed
proj = await create_project(name="test1")
proj_id = proj.get("id")
assert proj_id is not None
namespace = proj.get("namespace")
assert namespace is not None
proj_slug = proj.get("slug")
assert proj_slug is not None
_, response = await sanic_client.get(f"/api/data/namespaces/{namespace}", headers=user_headers)
assert response.status_code == 200
dc = await create_data_connector(name="test-dc", namespace=f"{namespace}/{proj_slug}")
dc_id = dc.get("id")
assert dc_id is not None
assert dc is not None
async with app_manager.config.db.async_session_maker() as session, session.begin():
# We do not have APIs exposed that will remove the slug so this is the only way to trigger this
stmt = (
select(EntitySlugORM)
.where(EntitySlugORM.project_id == proj_id)
.where(EntitySlugORM.namespace_id.is_not(None))
.where(EntitySlugORM.data_connector_id.is_(None))
)
res = await session.scalar(stmt)
assert res is not None
await session.delete(res)
await session.flush()
# The project namespace is not there
_, response = await sanic_client.get(f"/api/data/namespaces/{namespace}/{proj_slug}", headers=user_headers)
assert response.status_code == 404
# The user or group namespace is untouched
_, response = await sanic_client.get(f"/api/data/namespaces/{namespace}", headers=user_headers)
assert response.status_code == 200
# The project and data connector are both gone
_, response = await sanic_client.get(f"/api/data/projects/{proj_id}", headers=user_headers)
assert response.status_code == 404
_, response = await sanic_client.get(f"/api/data/data_connectors/{dc_id}", headers=user_headers)
assert response.status_code == 404


@pytest.mark.asyncio
async def test_stored_procedure_cleanup_after_data_connector_slug_deletion(
create_project,
user_headers,
app_manager: DependencyManager,
sanic_client,
create_data_connector,
) -> None:
# We use stored procedures to remove a data connector when its slug is removed
proj = await create_project(name="test1")
proj_id = proj.get("id")
assert proj_id is not None
namespace = proj.get("namespace")
assert namespace is not None
proj_slug = proj.get("slug")
assert proj_slug is not None
_, response = await sanic_client.get(f"/api/data/namespaces/{namespace}", headers=user_headers)
assert response.status_code == 200
dc1 = await create_data_connector(name="test-dc", namespace=f"{namespace}/{proj_slug}")
dc1_id = dc1.get("id")
assert dc1_id is not None
assert dc1 is not None
dc2 = await create_data_connector(name="test-dc", namespace=namespace)
dc2_id = dc2.get("id")
assert dc2_id is not None
assert dc2 is not None
async with app_manager.config.db.async_session_maker() as session, session.begin():
# We do not have APIs exposed that will remove the slug so this is the only way to trigger this
stmt = select(EntitySlugORM).where(EntitySlugORM.data_connector_id == dc1_id)
scalars = await session.scalars(stmt)
res = scalars.one_or_none()
assert res is not None
await session.delete(res)
stmt = select(EntitySlugORM).where(EntitySlugORM.data_connector_id == dc2_id)
scalars = await session.scalars(stmt)
res = scalars.one_or_none()
assert res is not None
await session.delete(res)
await session.flush()
# The project namespace is still there
_, response = await sanic_client.get(f"/api/data/namespaces/{namespace}/{proj_slug}", headers=user_headers)
assert response.status_code == 200
# The user or group namespace is untouched
_, response = await sanic_client.get(f"/api/data/namespaces/{namespace}", headers=user_headers)
assert response.status_code == 200
# The project is still there
_, response = await sanic_client.get(f"/api/data/projects/{proj_id}", headers=user_headers)
assert response.status_code == 200
# The data connectors are gone
_, response = await sanic_client.get(f"/api/data/data_connectors/{dc1_id}", headers=user_headers)
assert response.status_code == 404
_, response = await sanic_client.get(f"/api/data/data_connectors/{dc2_id}", headers=user_headers)
assert response.status_code == 404


async def test_cleanup_with_group_deletion(
create_project,
create_group,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Namespaces test module."""
Loading