Skip to content

Commit 65cb402

Browse files
authored
fix: sync search data when groups or projects are removed (#1075)
* Update search with child-entities if a project or group is removed * Delete containing data connectors with a project When deleting a project, contained data connectors should be deleted as well. This is was handled by a trigger in the db watching the `entity_slugs` table. Removes the trigger that did this before and implements it into the corresponding repositories
1 parent 8505a95 commit 65cb402

File tree

13 files changed

+397
-113
lines changed

13 files changed

+397
-113
lines changed
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
"""remove cleanup_after_slug_deletion trigger
2+
3+
Removes the cleanup trigger added in revision 8413f10ef77f
4+
5+
Revision ID: 42049656cdb8
6+
Revises: d437be68a4fb
7+
Create Date: 2025-10-23 09:55:19.905709
8+
9+
"""
10+
11+
from alembic import op
12+
13+
# revision identifiers, used by Alembic.
14+
revision = "42049656cdb8"
15+
down_revision = "d437be68a4fb"
16+
branch_labels = None
17+
depends_on = None
18+
19+
20+
def upgrade() -> None:
21+
op.execute("DROP TRIGGER IF EXISTS cleanup_after_slug_deletion ON common.entity_slugs")
22+
op.execute("DROP FUNCTION cleanup_after_slug_deletion")
23+
24+
25+
def downgrade() -> None:
26+
op.execute("""CREATE OR REPLACE FUNCTION cleanup_after_slug_deletion()
27+
RETURNS TRIGGER AS
28+
$$
29+
BEGIN
30+
IF OLD.project_id IS NOT NULL AND OLD.data_connector_id IS NULL THEN
31+
DELETE FROM projects.projects WHERE projects.id = OLD.project_id;
32+
ELSIF old.data_connector_id IS NOT NULL THEN
33+
DELETE FROM storage.data_connectors WHERE data_connectors.id = OLD.data_connector_id;
34+
END IF;
35+
RETURN OLD;
36+
END;
37+
$$
38+
LANGUAGE plpgsql;""")
39+
op.execute("""CREATE OR REPLACE TRIGGER cleanup_after_slug_deletion
40+
AFTER DELETE ON common.entity_slugs
41+
FOR EACH ROW
42+
EXECUTE FUNCTION cleanup_after_slug_deletion();""")

components/renku_data_services/namespace/db.py

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from datetime import UTC, datetime
1010
from typing import Any, overload
1111

12-
from sqlalchemy import Select, and_, delete, func, select, text
12+
from sqlalchemy import Select, and_, delete, distinct, func, select, text
1313
from sqlalchemy.exc import IntegrityError
1414
from sqlalchemy.ext.asyncio import AsyncSession, AsyncSessionTransaction
1515
from sqlalchemy.orm import joinedload, selectinload
@@ -30,6 +30,7 @@
3030
ProjectPath,
3131
Slug,
3232
)
33+
from renku_data_services.data_connectors import orm as dc_schemas
3334
from renku_data_services.data_connectors.models import DataConnector
3435
from renku_data_services.data_connectors.orm import DataConnectorORM
3536
from renku_data_services.namespace import models
@@ -311,13 +312,33 @@ async def delete_group(
311312
message=f"You cannot delete a group by using an old group slug {slug.value}.",
312313
detail=f"The latest slug is {group.namespace.slug}, please use this for deletions.",
313314
)
314-
# NOTE: We have a stored procedure that gets triggered when a project slug is removed to remove the project.
315-
# This is required because the slug has a foreign key pointing to the project, so when a project is removed
316-
# the slug is removed but the converse is not true. The stored procedure in migration 89aa4573cfa9 has the
317-
# trigger and procedure that does the cleanup when a slug is removed.
315+
316+
dcs = await session.execute(
317+
select(distinct(schemas.EntitySlugORM.data_connector_id))
318+
.join(schemas.NamespaceORM, schemas.NamespaceORM.id == schemas.EntitySlugORM.namespace_id)
319+
.where(schemas.NamespaceORM.group_id == group.id)
320+
.where(schemas.EntitySlugORM.data_connector_id.is_not(None))
321+
)
322+
dcs = [e for e in dcs.scalars().all() if e]
323+
324+
projs = await session.execute(
325+
select(distinct(schemas.EntitySlugORM.project_id))
326+
.join(schemas.NamespaceORM, schemas.NamespaceORM.id == schemas.EntitySlugORM.namespace_id)
327+
.where(schemas.NamespaceORM.group_id == group.id)
328+
.where(schemas.EntitySlugORM.project_id.is_not(None))
329+
)
330+
projs = [e for e in projs.scalars().all() if e]
331+
318332
stmt = delete(schemas.GroupORM).where(schemas.GroupORM.id == group.id)
319333
await session.execute(stmt)
320-
return models.DeletedGroup(id=group.id)
334+
335+
if projs != []:
336+
await session.execute(delete(ProjectORM).where(ProjectORM.id.in_(projs)))
337+
338+
if dcs != []:
339+
await session.execute(delete(dc_schemas.DataConnectorORM).where(dc_schemas.DataConnectorORM.id.in_(dcs)))
340+
341+
return models.DeletedGroup(id=group.id, data_connectors=dcs, projects=projs)
321342

322343
@with_db_transaction
323344
async def delete_group_member(

components/renku_data_services/namespace/models.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ class DeletedGroup:
4141
"""A group that was deleted from the DB."""
4242

4343
id: ULID
44+
data_connectors: list[ULID]
45+
projects: list[ULID]
4446

4547

4648
@dataclass

components/renku_data_services/project/db.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from typing import Concatenate, ParamSpec, TypeVar
1212

1313
from cryptography.hazmat.primitives.asymmetric import rsa
14-
from sqlalchemy import ColumnElement, Select, delete, func, or_, select, update
14+
from sqlalchemy import ColumnElement, Select, delete, distinct, func, or_, select, update
1515
from sqlalchemy.ext.asyncio import AsyncSession
1616
from sqlalchemy.orm import undefer
1717
from sqlalchemy.sql.functions import coalesce
@@ -24,6 +24,7 @@
2424
from renku_data_services.base_api.pagination import PaginationRequest
2525
from renku_data_services.base_models import RESET, ProjectPath, ProjectSlug
2626
from renku_data_services.base_models.core import Slug
27+
from renku_data_services.data_connectors import orm as dc_schemas
2728
from renku_data_services.namespace import orm as ns_schemas
2829
from renku_data_services.namespace.db import GroupRepository
2930
from renku_data_services.project import apispec as project_apispec
@@ -401,13 +402,23 @@ async def delete_project(
401402
if project is None:
402403
return None
403404

405+
dcs = await session.execute(
406+
select(distinct(ns_schemas.EntitySlugORM.data_connector_id))
407+
.where(ns_schemas.EntitySlugORM.project_id == project_id)
408+
.where(ns_schemas.EntitySlugORM.data_connector_id.is_not(None))
409+
)
410+
dcs = [e for e in dcs.scalars().all() if e]
411+
404412
await session.execute(delete(schemas.ProjectORM).where(schemas.ProjectORM.id == project_id))
405413

406414
await session.execute(
407415
delete(storage_schemas.CloudStorageORM).where(storage_schemas.CloudStorageORM.project_id == str(project_id))
408416
)
409417

410-
return models.DeletedProject(id=project.id)
418+
if dcs != []:
419+
await session.execute(delete(dc_schemas.DataConnectorORM).where(dc_schemas.DataConnectorORM.id.in_(dcs)))
420+
421+
return models.DeletedProject(id=project.id, data_connectors=dcs)
411422

412423
async def get_project_permissions(self, user: base_models.APIUser, project_id: ULID) -> models.ProjectPermissions:
413424
"""Get the permissions of the user on a given project."""

components/renku_data_services/project/models.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ class DeletedProject:
9090
"""Indicates that a project was deleted."""
9191

9292
id: ULID
93+
data_connectors: list[ULID]
9394

9495

9596
@dataclass

components/renku_data_services/search/decorators.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,10 @@ async def func_wrapper(self: _WithSearchUpdateRepo, *args: _P.args, **kwargs: _P
6363

6464
case DeletedProject() as p:
6565
record = DeleteDoc.project(p.id)
66+
dcs = [DeleteDoc.data_connector(id) for id in p.data_connectors]
6667
await self.search_updates_repo.upsert(record)
68+
for d in dcs:
69+
await self.search_updates_repo.upsert(d)
6770

6871
case UserInfo() as u:
6972
await self.search_updates_repo.upsert(u)
@@ -80,7 +83,11 @@ async def func_wrapper(self: _WithSearchUpdateRepo, *args: _P.args, **kwargs: _P
8083

8184
case DeletedGroup() as g:
8285
record = DeleteDoc.group(g.id)
86+
dcs = [DeleteDoc.data_connector(id) for id in g.data_connectors]
87+
prs = [DeleteDoc.project(id) for id in g.projects]
8388
await self.search_updates_repo.upsert(record)
89+
for d in dcs + prs:
90+
await self.search_updates_repo.upsert(d)
8491

8592
case DataConnector() as dc:
8693
await self.search_updates_repo.upsert(dc)

test/bases/renku_data_services/data_api/test_namespaces.py

Lines changed: 0 additions & 104 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import contextlib
22

33
import pytest
4-
from sqlalchemy import select
54
from sqlalchemy.exc import IntegrityError
65

76
from renku_data_services.authz.models import Visibility
@@ -24,7 +23,6 @@
2423
)
2524
from renku_data_services.errors.errors import ConflictError, MissingResourceError, ValidationError
2625
from renku_data_services.namespace.models import UnsavedGroup
27-
from renku_data_services.namespace.orm import EntitySlugORM
2826
from renku_data_services.project.models import Project, ProjectPatch, UnsavedProject
2927
from renku_data_services.users.models import UserInfo
3028

@@ -404,108 +402,6 @@ async def test_listing_project_namespaces(sanic_client, user_headers) -> None:
404402
assert response.json[1]["path"] == "test1/proj2"
405403

406404

407-
@pytest.mark.asyncio
408-
async def test_stored_procedure_cleanup_after_project_slug_deletion(
409-
create_project,
410-
user_headers,
411-
app_manager: DependencyManager,
412-
sanic_client,
413-
create_data_connector,
414-
) -> None:
415-
# We use stored procedures to remove a project when its slug is removed
416-
proj = await create_project(name="test1")
417-
proj_id = proj.get("id")
418-
assert proj_id is not None
419-
namespace = proj.get("namespace")
420-
assert namespace is not None
421-
proj_slug = proj.get("slug")
422-
assert proj_slug is not None
423-
_, response = await sanic_client.get(f"/api/data/namespaces/{namespace}", headers=user_headers)
424-
assert response.status_code == 200
425-
dc = await create_data_connector(name="test-dc", namespace=f"{namespace}/{proj_slug}")
426-
dc_id = dc.get("id")
427-
assert dc_id is not None
428-
assert dc is not None
429-
async with app_manager.config.db.async_session_maker() as session, session.begin():
430-
# We do not have APIs exposed that will remove the slug so this is the only way to trigger this
431-
stmt = (
432-
select(EntitySlugORM)
433-
.where(EntitySlugORM.project_id == proj_id)
434-
.where(EntitySlugORM.namespace_id.is_not(None))
435-
.where(EntitySlugORM.data_connector_id.is_(None))
436-
)
437-
res = await session.scalar(stmt)
438-
assert res is not None
439-
await session.delete(res)
440-
await session.flush()
441-
# The project namespace is not there
442-
_, response = await sanic_client.get(f"/api/data/namespaces/{namespace}/{proj_slug}", headers=user_headers)
443-
assert response.status_code == 404
444-
# The user or group namespace is untouched
445-
_, response = await sanic_client.get(f"/api/data/namespaces/{namespace}", headers=user_headers)
446-
assert response.status_code == 200
447-
# The project and data connector are both gone
448-
_, response = await sanic_client.get(f"/api/data/projects/{proj_id}", headers=user_headers)
449-
assert response.status_code == 404
450-
_, response = await sanic_client.get(f"/api/data/data_connectors/{dc_id}", headers=user_headers)
451-
assert response.status_code == 404
452-
453-
454-
@pytest.mark.asyncio
455-
async def test_stored_procedure_cleanup_after_data_connector_slug_deletion(
456-
create_project,
457-
user_headers,
458-
app_manager: DependencyManager,
459-
sanic_client,
460-
create_data_connector,
461-
) -> None:
462-
# We use stored procedures to remove a data connector when its slug is removed
463-
proj = await create_project(name="test1")
464-
proj_id = proj.get("id")
465-
assert proj_id is not None
466-
namespace = proj.get("namespace")
467-
assert namespace is not None
468-
proj_slug = proj.get("slug")
469-
assert proj_slug is not None
470-
_, response = await sanic_client.get(f"/api/data/namespaces/{namespace}", headers=user_headers)
471-
assert response.status_code == 200
472-
dc1 = await create_data_connector(name="test-dc", namespace=f"{namespace}/{proj_slug}")
473-
dc1_id = dc1.get("id")
474-
assert dc1_id is not None
475-
assert dc1 is not None
476-
dc2 = await create_data_connector(name="test-dc", namespace=namespace)
477-
dc2_id = dc2.get("id")
478-
assert dc2_id is not None
479-
assert dc2 is not None
480-
async with app_manager.config.db.async_session_maker() as session, session.begin():
481-
# We do not have APIs exposed that will remove the slug so this is the only way to trigger this
482-
stmt = select(EntitySlugORM).where(EntitySlugORM.data_connector_id == dc1_id)
483-
scalars = await session.scalars(stmt)
484-
res = scalars.one_or_none()
485-
assert res is not None
486-
await session.delete(res)
487-
stmt = select(EntitySlugORM).where(EntitySlugORM.data_connector_id == dc2_id)
488-
scalars = await session.scalars(stmt)
489-
res = scalars.one_or_none()
490-
assert res is not None
491-
await session.delete(res)
492-
await session.flush()
493-
# The project namespace is still there
494-
_, response = await sanic_client.get(f"/api/data/namespaces/{namespace}/{proj_slug}", headers=user_headers)
495-
assert response.status_code == 200
496-
# The user or group namespace is untouched
497-
_, response = await sanic_client.get(f"/api/data/namespaces/{namespace}", headers=user_headers)
498-
assert response.status_code == 200
499-
# The project is still there
500-
_, response = await sanic_client.get(f"/api/data/projects/{proj_id}", headers=user_headers)
501-
assert response.status_code == 200
502-
# The data connectors are gone
503-
_, response = await sanic_client.get(f"/api/data/data_connectors/{dc1_id}", headers=user_headers)
504-
assert response.status_code == 404
505-
_, response = await sanic_client.get(f"/api/data/data_connectors/{dc2_id}", headers=user_headers)
506-
assert response.status_code == 404
507-
508-
509405
async def test_cleanup_with_group_deletion(
510406
create_project,
511407
create_group,
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
"""Namespaces test module."""

0 commit comments

Comments
 (0)