Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Merge Software by ID #55

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified data/wikibase-test-data.db
Binary file not shown.
1 change: 1 addition & 0 deletions fetch_data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,4 @@
create_special_statistics_observation,
update_software_data,
)
from fetch_data.update_data import merge_software_by_id
3 changes: 3 additions & 0 deletions fetch_data/update_data/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
"""Update Data"""

from fetch_data.update_data.merge_software import merge_software_by_id
88 changes: 88 additions & 0 deletions fetch_data/update_data/merge_software.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
"""Merge Software"""

from sqlalchemy import Select, Update, and_, delete, select, update
from data.database_connection import get_async_session
from model.database import (
WikibaseSoftwareVersionModel,
)
from model.database import WikibaseSoftwareModel
from model.database.wikibase_software.software_tag_xref_model import (
software_tag_xref_table,
)


async def merge_software_by_id(base_id: int, additional_id: int) -> bool:
"""Merge Software by ID"""

software_query = get_select_software_query([base_id, additional_id])
update_software_version_query = get_update_software_version_query(
base_id, additional_id
)
update_software_tags_query = get_update_software_tags_query(base_id, additional_id)
delete_additional_tags_query = software_tag_xref_table.delete().where(
software_tag_xref_table.c.wikibase_software_id == additional_id
)
delete_software_query = delete(WikibaseSoftwareModel).where(
WikibaseSoftwareModel.id == additional_id
)

async with get_async_session() as async_session:
software_list = (await async_session.scalars(software_query)).all()
assert len({s.software_type for s in software_list}) == 1

await async_session.execute(update_software_version_query)
await async_session.execute(update_software_tags_query)
await async_session.execute(delete_additional_tags_query)
await async_session.flush()

await async_session.execute(delete_software_query)
await async_session.commit()

async with get_async_session() as async_session:
remaining = (await async_session.scalars(software_query)).all()
return len(remaining) == 1


def get_select_software_query(id_list: list[int]) -> Select[WikibaseSoftwareModel]:
"""Select WikibaseSoftwareModel in ID list"""

software_query = select(WikibaseSoftwareModel).where(
WikibaseSoftwareModel.id.in_(id_list)
)

return software_query


def get_update_software_tags_query(base_id: int, additional_id: int) -> Update:
"""Add Additional Software Tags to Base"""

update_software_tags_query = software_tag_xref_table.insert().from_select(
[
software_tag_xref_table.c.wikibase_software_id,
software_tag_xref_table.c.wikibase_software_tag_id,
],
select(base_id, software_tag_xref_table.c.wikibase_software_tag_id).where(
and_(
software_tag_xref_table.c.wikibase_software_id == additional_id,
software_tag_xref_table.c.wikibase_software_tag_id.not_in(
select(software_tag_xref_table.c.wikibase_software_tag_id).where(
software_tag_xref_table.c.wikibase_software_id == base_id
)
),
)
),
)

return update_software_tags_query


def get_update_software_version_query(base_id: int, additional_id: int) -> Update:
"""Update Software Version from Additional ID to Base ID"""

update_software_version_query = (
update(WikibaseSoftwareVersionModel)
.where(WikibaseSoftwareVersionModel.software_id == additional_id)
.values(software_id=base_id)
)

return update_software_version_query
6 changes: 6 additions & 0 deletions model/strawberry/mutation.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
create_software_version_observation,
create_special_statistics_observation,
create_user_observation,
merge_software_by_id,
Copy link
Collaborator

@rti rti Jan 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do it see it correctly, that the merging is something that the user has to do manually?

Could you please shortly describe the workflow this feature would allow?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Correct. This is strictly a manual process.
On identification of a duplicate extension - such as Discord Notifications with and without space, or Miraheze Magic and <extensionname-mirahezemagic>, both of which we've actually encountered in this project - this would allow us to merge the two records. We call the first record "base" and the second "additional", and pass in the baseId and additionalId.
All of the software version records collected from wikibases that referred to the additional software would be shifted over to the base software. Any tags collected from Mediawiki for the additional would also be shifted to the base (avoiding duplicates, of course).

)


Expand Down Expand Up @@ -57,3 +58,8 @@ class Mutation:
description="Scrape data from Special:Version page",
resolver=create_software_version_observation,
)

merge_software_by_id = strawberry.mutation(
description="Merge Software",
resolver=merge_software_by_id,
)
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@
@freeze_time("2024-03-01")
@pytest.mark.asyncio
@pytest.mark.dependency(
name="update-software-data", depends=["software-version-success"], scope="session"
name="update-software-data",
depends=["software-version-success", "merge-software-by-id"],
scope="session",
)
@pytest.mark.version
async def test_update_software_data(mocker):
Expand Down
25 changes: 25 additions & 0 deletions tests/test_mutation/test_merge_software.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
"""Test Merge Software"""

import pytest

from tests.test_schema import test_schema


MERGE_SOFTWARE_QUERY = """
mutation MyMutation($baseId: Int!, $additionalId: Int!) {
mergeSoftwareById(baseId: $baseId, additionalId: $additionalId)
}"""


@pytest.mark.asyncio
@pytest.mark.mutation
@pytest.mark.dependency(name="merge-software-by-id")
async def test_merge_software_by_id_mutation():
"""Test Add Wikibase"""

result = await test_schema.execute(
MERGE_SOFTWARE_QUERY, variable_values={"baseId": 1, "additionalId": 3}
)
assert result.errors is None
assert result.data is not None
assert result.data.get("mergeSoftwareById")
20 changes: 10 additions & 10 deletions tests/test_query/test_extension_list_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ async def test_extension_list_query():
),
(
1,
"17",
"18",
"Google Analytics Integration",
"Google_Analytics_Integration",
False,
Expand All @@ -105,7 +105,7 @@ async def test_extension_list_query():
),
(
2,
"11",
"12",
"LabeledSectionTransclusion",
"Labeled_Section_Transclusion",
False,
Expand All @@ -129,11 +129,11 @@ async def test_extension_list_query():
None,
None,
None,
[],
["Magic", "extensionname"],
),
(
4,
"18",
"19",
"ProofreadPage",
"Proofread_Page",
False,
Expand All @@ -148,7 +148,7 @@ async def test_extension_list_query():
),
(
5,
"12",
"13",
"Scribunto",
"Scribunto",
False,
Expand All @@ -162,7 +162,7 @@ async def test_extension_list_query():
),
(
6,
"19",
"20",
"UniversalLanguageSelector",
"UniversalLanguageSelector",
False,
Expand All @@ -176,7 +176,7 @@ async def test_extension_list_query():
),
(
7,
"13",
"14",
"WikibaseClient",
"Wikibase_Client",
False,
Expand All @@ -190,7 +190,7 @@ async def test_extension_list_query():
),
(
8,
"14",
"15",
"WikibaseLib",
"WikibaseLib",
True,
Expand All @@ -204,7 +204,7 @@ async def test_extension_list_query():
),
(
9,
"15",
"16",
"WikibaseRepository",
"Wikibase_Repository",
False,
Expand All @@ -218,7 +218,7 @@ async def test_extension_list_query():
),
(
10,
"16",
"17",
"WikibaseView",
"WikibaseView",
False,
Expand Down
Loading