From 42421cd6c1dd97fa35a9f7a72b34cee87c3a6e8a Mon Sep 17 00:00:00 2001 From: Eemeli Aro Date: Fri, 20 Dec 2024 00:09:40 +0200 Subject: [PATCH] Refactor sync (#3312) --- docs/user/localizing-your-projects.rst | 6 +- pontoon/administration/views.py | 12 +- pontoon/base/__init__.py | 13 - .../management/commands/calculate_stats.py | 25 +- .../0018_populate_entity_context.py | 6 +- pontoon/base/models/changed_entity_locale.py | 10 +- pontoon/base/models/entity.py | 168 +---- pontoon/base/models/locale.py | 10 +- pontoon/base/models/project.py | 123 +--- pontoon/base/models/repository.py | 173 +---- pontoon/base/models/resource.py | 40 -- pontoon/base/models/translated_resource.py | 175 ++--- pontoon/base/models/translation.py | 23 +- pontoon/base/tests/__init__.py | 4 +- pontoon/base/tests/managers/test_entity.py | 144 ---- pontoon/base/tests/models/test_entity.py | 3 +- pontoon/base/tests/models/test_project.py | 79 +-- pontoon/base/tests/models/test_repository.py | 195 +----- pontoon/base/tests/models/test_stats.py | 129 +--- pontoon/base/tests/test_utils.py | 13 - pontoon/base/tests/views/test_download.py | 87 +++ pontoon/base/tests/views/test_upload.py | 7 +- pontoon/base/utils.py | 317 +-------- pontoon/base/views.py | 48 +- pontoon/checks/libraries/compare_locales.py | 2 +- pontoon/pretranslation/tasks.py | 47 +- pontoon/pretranslation/tests/test_tasks.py | 6 +- pontoon/sync/__init__.py | 2 - pontoon/sync/changeset.py | 589 ---------------- pontoon/sync/core.py | 457 ------------ pontoon/sync/core/__init__.py | 114 +++ pontoon/sync/core/checkout.py | 119 ++++ pontoon/sync/core/entities.py | 355 ++++++++++ pontoon/sync/core/paths.py | 69 ++ pontoon/sync/core/stats.py | 215 ++++++ pontoon/sync/core/translations_from_repo.py | 494 +++++++++++++ pontoon/sync/core/translations_to_repo.py | 222 ++++++ pontoon/sync/formats/__init__.py | 3 +- pontoon/sync/formats/base.py | 7 +- pontoon/sync/formats/base_json_file.py | 4 +- pontoon/sync/{ => formats}/exceptions.py | 0 pontoon/sync/formats/ftl.py | 4 +- pontoon/sync/formats/json_extensions.py | 2 +- pontoon/sync/formats/json_keyvalue.py | 2 +- pontoon/sync/formats/po.py | 7 +- pontoon/sync/formats/silme.py | 4 +- pontoon/sync/formats/utils.py | 54 ++ pontoon/sync/formats/xliff.py | 2 +- pontoon/sync/formats/xml.py | 9 +- .../sync/management/commands/sync_projects.py | 8 +- pontoon/sync/models.py | 69 +- pontoon/sync/repositories/__init__.py | 51 +- pontoon/sync/repositories/git.py | 140 ++-- pontoon/sync/repositories/hg.py | 24 +- pontoon/sync/repositories/svn.py | 19 +- pontoon/sync/repositories/utils.py | 6 +- pontoon/sync/tasks.py | 422 +---------- .../sync/templates/sync/commit_message.jinja | 6 - pontoon/sync/tests/__init__.py | 177 +---- pontoon/sync/tests/formats/__init__.py | 12 +- pontoon/sync/tests/formats/test_ftl.py | 2 +- pontoon/sync/tests/formats/test_po.py | 7 +- pontoon/sync/tests/formats/test_silme.py | 2 +- pontoon/sync/tests/formats/test_xliff.py | 5 +- pontoon/sync/tests/formats/test_xml.py | 2 +- pontoon/sync/tests/test_changeset.py | 654 ------------------ pontoon/sync/tests/test_checkouts.py | 149 ++++ pontoon/sync/tests/test_checks.py | 110 --- ...{test_sync_projects.py => test_command.py} | 44 +- pontoon/sync/tests/test_core.py | 426 ------------ pontoon/sync/tests/test_e2e.py | 374 ++++++++++ pontoon/sync/tests/test_entities.py | 261 +++++++ pontoon/sync/tests/test_paths.py | 160 +++++ pontoon/sync/tests/test_repositories.py | 1 + pontoon/sync/tests/test_tasks.py | 450 ------------ .../sync/tests/test_translations_from_repo.py | 215 ++++++ .../sync/tests/test_translations_to_repo.py | 238 +++++++ pontoon/sync/tests/test_utils.py | 6 - pontoon/sync/tests/test_vcs_config.py | 57 -- pontoon/sync/tests/test_vcs_models.py | 629 ----------------- pontoon/sync/tests/utils.py | 23 + pontoon/sync/utils.py | 291 +++----- pontoon/sync/vcs/config.py | 190 ----- pontoon/sync/vcs/project.py | 540 --------------- pontoon/sync/vcs/resource.py | 150 ---- requirements/default.in | 5 +- requirements/default.txt | 38 +- 87 files changed, 3781 insertions(+), 6781 deletions(-) create mode 100644 pontoon/base/tests/views/test_download.py delete mode 100644 pontoon/sync/changeset.py delete mode 100644 pontoon/sync/core.py create mode 100644 pontoon/sync/core/__init__.py create mode 100644 pontoon/sync/core/checkout.py create mode 100644 pontoon/sync/core/entities.py create mode 100644 pontoon/sync/core/paths.py create mode 100644 pontoon/sync/core/stats.py create mode 100644 pontoon/sync/core/translations_from_repo.py create mode 100644 pontoon/sync/core/translations_to_repo.py rename pontoon/sync/{ => formats}/exceptions.py (100%) create mode 100644 pontoon/sync/formats/utils.py delete mode 100644 pontoon/sync/templates/sync/commit_message.jinja delete mode 100644 pontoon/sync/tests/test_changeset.py create mode 100644 pontoon/sync/tests/test_checkouts.py delete mode 100644 pontoon/sync/tests/test_checks.py rename pontoon/sync/tests/{test_sync_projects.py => test_command.py} (75%) delete mode 100644 pontoon/sync/tests/test_core.py create mode 100644 pontoon/sync/tests/test_e2e.py create mode 100644 pontoon/sync/tests/test_entities.py create mode 100644 pontoon/sync/tests/test_paths.py delete mode 100644 pontoon/sync/tests/test_tasks.py create mode 100644 pontoon/sync/tests/test_translations_from_repo.py create mode 100644 pontoon/sync/tests/test_translations_to_repo.py delete mode 100644 pontoon/sync/tests/test_utils.py delete mode 100644 pontoon/sync/tests/test_vcs_config.py delete mode 100644 pontoon/sync/tests/test_vcs_models.py create mode 100644 pontoon/sync/tests/utils.py delete mode 100644 pontoon/sync/vcs/config.py delete mode 100644 pontoon/sync/vcs/project.py delete mode 100644 pontoon/sync/vcs/resource.py diff --git a/docs/user/localizing-your-projects.rst b/docs/user/localizing-your-projects.rst index 835e7d5755..b448e22081 100644 --- a/docs/user/localizing-your-projects.rst +++ b/docs/user/localizing-your-projects.rst @@ -68,11 +68,7 @@ following required fields: #. **Locales**: select at least one Localizable locale by clicking on it. #. **Repository URL**: enter your repository's SSH URL of the form ``git@github.com:user/repo.git``. -#. **Download prefix or path to TOML file**: a URL prefix for downloading localized files. For - GitHub repositories, select any localized file on GitHub, click ``Raw`` and - replace locale code and the following bits in the URL with ``{locale_code}``. - If you use one, you need to select the `project configuration file`_ instead - of a localized file. +#. **Download prefix or path to TOML file**: a URL prefix for downloading localized files. #. Click **SAVE PROJECT** at the bottom of the page. #. After the page reloads, click **SYNC** and wait for Pontoon to import strings. You can monitor the progress in the Sync log (``/sync/log/``). diff --git a/pontoon/administration/views.py b/pontoon/administration/views.py index 5f8d1b21e2..23ea5e8b8e 100644 --- a/pontoon/administration/views.py +++ b/pontoon/administration/views.py @@ -30,9 +30,9 @@ Translation, ) from pontoon.base.utils import require_AJAX -from pontoon.pretranslation.tasks import pretranslate +from pontoon.pretranslation.tasks import pretranslate_task from pontoon.sync.models import SyncLog -from pontoon.sync.tasks import sync_project +from pontoon.sync.tasks import sync_project_task log = logging.getLogger(__name__) @@ -431,7 +431,7 @@ def _create_or_update_translated_resources( resource = _get_resource_for_database_project(project) for locale in locales: - tr, created = TranslatedResource.objects.get_or_create( + tr, _ = TranslatedResource.objects.get_or_create( locale_id=locale.pk, resource=resource, ) @@ -542,9 +542,9 @@ def manually_sync_project(request, slug): "Forbidden: You don't have permission for syncing projects" ) - sync_log = SyncLog.objects.create(start_time=timezone.now()) project = Project.objects.get(slug=slug) - sync_project.delay(project.pk, sync_log.pk) + sync_log = SyncLog.objects.create(start_time=timezone.now()) + sync_project_task.delay(project.pk, sync_log.pk) return HttpResponse("ok") @@ -558,6 +558,6 @@ def manually_pretranslate_project(request, slug): ) project = Project.objects.get(slug=slug) - pretranslate.delay(project.pk) + pretranslate_task.delay(project.pk) return HttpResponse("ok") diff --git a/pontoon/base/__init__.py b/pontoon/base/__init__.py index 0ecf0af035..e69de29bb2 100644 --- a/pontoon/base/__init__.py +++ b/pontoon/base/__init__.py @@ -1,13 +0,0 @@ -MOZILLA_REPOS = ( - "ssh://hg.mozilla.org/users/m_owca.info/firefox-beta/", - "ssh://hg.mozilla.org/users/m_owca.info/firefox-for-android-beta/", - "ssh://hg.mozilla.org/users/m_owca.info/thunderbird-beta/", - "ssh://hg.mozilla.org/users/m_owca.info/lightning-beta/", - "ssh://hg.mozilla.org/users/m_owca.info/seamonkey-beta/", - "ssh://hg.mozilla.org/users/m_owca.info/firefox-central/", - "ssh://hg.mozilla.org/users/m_owca.info/firefox-for-android-central/", - "ssh://hg.mozilla.org/users/m_owca.info/thunderbird-central/", - "ssh://hg.mozilla.org/users/m_owca.info/lightning-central/", - "ssh://hg.mozilla.org/users/m_owca.info/seamonkey-central/", - "git@gitlab.com:seamonkey-project/seamonkey-central-l10n.git", -) diff --git a/pontoon/base/management/commands/calculate_stats.py b/pontoon/base/management/commands/calculate_stats.py index ecf402d5ba..04e4e933f4 100644 --- a/pontoon/base/management/commands/calculate_stats.py +++ b/pontoon/base/management/commands/calculate_stats.py @@ -3,10 +3,8 @@ from django.core.management.base import BaseCommand from django.db.models import Count -from pontoon.base.models import ( - Project, - TranslatedResource, -) +from pontoon.base.models import Project +from pontoon.sync.core.stats import update_locale_stats, update_stats log = logging.getLogger(__name__) @@ -34,20 +32,9 @@ def handle(self, *args, **options): "disabled", "resource_count" ) - for index, project in enumerate(projects): - log.info( - 'Calculating stats for project "{project}" ({index}/{total})'.format( - index=index + 1, - total=len(projects), - project=project.name, - ) - ) - - translated_resources = TranslatedResource.objects.filter( - resource__project=project - ) - - for translated_resource in translated_resources: - translated_resource.calculate_stats() + log.info(f"Calculating stats for {len(projects)} projects...") + for project in projects: + update_stats(project, update_locales=False) + update_locale_stats() log.info("Calculating stats complete for all projects.") diff --git a/pontoon/base/migrations/0018_populate_entity_context.py b/pontoon/base/migrations/0018_populate_entity_context.py index 9585a56a02..dd87dad344 100644 --- a/pontoon/base/migrations/0018_populate_entity_context.py +++ b/pontoon/base/migrations/0018_populate_entity_context.py @@ -3,22 +3,20 @@ from django.db import migrations from django.db.models import F, Func, TextField, Value -from pontoon.sync import KEY_SEPARATOR - def add_entity_context(apps, schema_editor): Entity = apps.get_model("base", "Entity") split_key_po = Func( F("key"), - Value(KEY_SEPARATOR), + Value("\x04"), Value(1), function="split_part", output_field=TextField(), ) split_key_xliff = Func( F("key"), - Value(KEY_SEPARATOR), + Value("\x04"), Value(2), function="split_part", output_field=TextField(), diff --git a/pontoon/base/models/changed_entity_locale.py b/pontoon/base/models/changed_entity_locale.py index 8ec4cbaf11..ee3d5cecd0 100644 --- a/pontoon/base/models/changed_entity_locale.py +++ b/pontoon/base/models/changed_entity_locale.py @@ -1,15 +1,21 @@ +from typing import TYPE_CHECKING + from django.db import models from django.utils import timezone +if TYPE_CHECKING: + from pontoon.base.models import Entity, Locale + + class ChangedEntityLocale(models.Model): """ ManyToMany model for storing what locales have changed translations for a specific entity since the last sync. """ - entity = models.ForeignKey("Entity", models.CASCADE) - locale = models.ForeignKey("Locale", models.CASCADE) + entity: models.ForeignKey["Entity"] = models.ForeignKey("Entity", models.CASCADE) + locale: models.ForeignKey["Locale"] = models.ForeignKey("Locale", models.CASCADE) when = models.DateTimeField(default=timezone.now) class Meta: diff --git a/pontoon/base/models/entity.py b/pontoon/base/models/entity.py index 2c5f44f333..31852d61af 100644 --- a/pontoon/base/models/entity.py +++ b/pontoon/base/models/entity.py @@ -1,3 +1,4 @@ +from collections.abc import Iterable from functools import reduce from operator import ior from re import escape, findall, match @@ -14,7 +15,6 @@ from pontoon.base.models.project import Project from pontoon.base.models.project_locale import ProjectLocale from pontoon.base.models.resource import Resource -from pontoon.sync import KEY_SEPARATOR def get_word_count(string): @@ -443,50 +443,6 @@ def prefetch_entities_data(self, locale, preferred_source_locale): return entities - def reset_active_translations(self, locale): - """ - Reset active translation for given set of entities and locale. - """ - from pontoon.base.models.translation import Translation - - translations = Translation.objects.filter( - entity__in=self, - locale=locale, - ) - - # First, deactivate all translations - translations.update(active=False) - - # Mark all approved, pretranslated and fuzzy translations as active. - translations.filter( - Q(approved=True) | Q(pretranslated=True) | Q(fuzzy=True) - ).update(active=True) - - # Mark most recent unreviewed suggestions without active siblings - # for any given combination of (locale, entity, plural_form) as active. - unreviewed_pks = set() - unreviewed = translations.filter( - approved=False, - pretranslated=False, - fuzzy=False, - rejected=False, - ).values_list("entity", "plural_form") - - for entity, plural_form in unreviewed: - siblings = ( - Translation.objects.filter( - entity=entity, - locale=locale, - plural_form=plural_form, - ) - .exclude(rejected=True) - .order_by("-active", "-date") - ) - if siblings and not siblings[0].active: - unreviewed_pks.add(siblings[0].pk) - - translations.filter(pk__in=unreviewed_pks).update(active=True) - def get_or_create(self, defaults=None, **kwargs): kwargs["word_count"] = get_word_count(kwargs["string"]) return super().get_or_create(defaults=defaults, **kwargs) @@ -532,18 +488,6 @@ class Meta: models.Index(fields=["resource", "obsolete", "string_plural"]), ] - @property - def cleaned_key(self): - """ - Get cleaned key, without the source string and Translate Toolkit - separator. - """ - key = self.key.split(KEY_SEPARATOR)[0] - if key == self.string: - key = "" - - return key - def __str__(self): return self.string @@ -559,68 +503,27 @@ def get_stats(self, locale): :return: a dictionary with stats for an Entity, all keys are suffixed with `_diff` to make them easier to pass into adjust_all_stats. """ - translations = list( - self.translation_set.filter(locale=locale).prefetch_related( - "errors", - "warnings", - ) - ) - - approved_strings_count = len( - [ - t - for t in translations - if t.approved and not (t.errors.exists() or t.warnings.exists()) - ] - ) - - pretranslated_strings_count = len( - [ - t - for t in translations - if t.pretranslated and not (t.errors.exists() or t.warnings.exists()) - ] - ) - - if self.string_plural: - approved = int(approved_strings_count == locale.nplurals) - pretranslated = int(pretranslated_strings_count == locale.nplurals) - - else: - approved = int(approved_strings_count > 0) - pretranslated = int(pretranslated_strings_count > 0) - - if not (approved or pretranslated): - has_errors = bool( - [ - t - for t in translations - if (t.approved or t.pretranslated or t.fuzzy) and t.errors.exists() - ] - ) - has_warnings = bool( - [ - t - for t in translations - if (t.approved or t.pretranslated or t.fuzzy) - and t.warnings.exists() - ] - ) - - errors = int(has_errors) - warnings = int(has_warnings) - - else: - errors = 0 - warnings = 0 - - unreviewed_count = len( - [ - t - for t in translations - if not (t.approved or t.pretranslated or t.fuzzy or t.rejected) - ] - ) + approved = 0 + pretranslated = 0 + errors = 0 + warnings = 0 + unreviewed = 0 + + for t in self.translation_set.filter(locale=locale).prefetch_related( + "errors", "warnings" + ): + if t.errors.exists(): + if t.approved or t.pretranslated or t.fuzzy: + errors += 1 + elif t.warnings.exists(): + if t.approved or t.pretranslated or t.fuzzy: + warnings += 1 + elif t.approved: + approved += 1 + elif t.pretranslated: + pretranslated += 1 + if not (t.approved or t.pretranslated or t.fuzzy or t.rejected): + unreviewed += 1 return { "total_strings_diff": 0, @@ -628,21 +531,7 @@ def get_stats(self, locale): "pretranslated_strings_diff": pretranslated, "strings_with_errors_diff": errors, "strings_with_warnings_diff": warnings, - "unreviewed_strings_diff": unreviewed_count, - } - - @classmethod - def get_stats_diff(cls, stats_before, stats_after): - """ - Return stat difference between the two states of the entity. - - :arg dict stats_before: dict returned by get_stats() for the initial state. - :arg dict stats_after: dict returned by get_stats() for the current state. - :return: dictionary with differences between provided stats. - """ - return { - stat_name: stats_after[stat_name] - stats_before[stat_name] - for stat_name in stats_before + "unreviewed_strings_diff": unreviewed, } def has_changed(self, locale): @@ -942,7 +831,9 @@ def map_entities( ): entities_array = [] - entities = entities.prefetch_entities_data(locale, preferred_source_locale) + entities: Iterable[Entity] = entities.prefetch_entities_data( + locale, preferred_source_locale + ) # If requested entity not in the current page if requested_entity and requested_entity not in [e.pk for e in entities]: @@ -981,13 +872,18 @@ def map_entities( if original_plural != "": original_plural = entity.alternative_originals[-1].string + key_separator = "\x04" + cleaned_key = entity.key.split(key_separator)[0] + if cleaned_key == entity.string: + cleaned_key = "" + entities_array.append( { "pk": entity.pk, "original": original, "original_plural": original_plural, "machinery_original": entity.string, - "key": entity.cleaned_key, + "key": cleaned_key, "context": entity.context, "path": entity.resource.path, "project": entity.resource.project.serialize(), diff --git a/pontoon/base/models/locale.py b/pontoon/base/models/locale.py index ac12a8568d..cb5d8914ac 100644 --- a/pontoon/base/models/locale.py +++ b/pontoon/base/models/locale.py @@ -390,6 +390,7 @@ def stats(self): { "title": "all-resources", "resource__path": [], + # FIXME rename as total_strings "resource__total_strings": self.total_strings, "pretranslated_strings": self.pretranslated_strings, "strings_with_errors": self.strings_with_errors, @@ -409,6 +410,7 @@ def get_details(parts): "title", "resource__path", "resource__deadline", + # FIXME rename as total_strings "resource__total_strings", "pretranslated_strings", "strings_with_errors", @@ -421,7 +423,12 @@ def get_details(parts): resource__project=project, resource__entities__obsolete=False, locale=self ).distinct() details = list( - get_details(translatedresources.annotate(title=F("resource__path"))) + get_details( + translatedresources.annotate( + resource__total_strings=F("total_strings"), + title=F("resource__path"), + ) + ) ) all_resources = ProjectLocale.objects.get(project=project, locale=self) @@ -430,6 +437,7 @@ def get_details(parts): "title": "all-resources", "resource__path": [], "resource__deadline": [], + # FIXME rename as total_strings "resource__total_strings": all_resources.total_strings, "pretranslated_strings": all_resources.pretranslated_strings, "strings_with_errors": all_resources.strings_with_errors, diff --git a/pontoon/base/models/project.py b/pontoon/base/models/project.py index de5ccc2b8d..ca90e6c262 100644 --- a/pontoon/base/models/project.py +++ b/pontoon/base/models/project.py @@ -1,20 +1,21 @@ -from collections import defaultdict -from os.path import basename, join, normpath -from urllib.parse import urlparse +from os.path import join +from typing import TYPE_CHECKING from django.conf import settings from django.contrib.auth.models import User from django.db import models from django.db.models import Prefetch +from django.db.models.manager import BaseManager from django.utils import timezone -from django.utils.functional import cached_property -from pontoon.base import utils from pontoon.base.models.aggregated_stats import AggregatedStats -from pontoon.base.models.changed_entity_locale import ChangedEntityLocale from pontoon.base.models.locale import Locale +if TYPE_CHECKING: + from pontoon.base.models import Resource + + class Priority(models.IntegerChoices): LOWEST = 1, "Lowest" LOW = 2, "Low" @@ -103,6 +104,8 @@ class Project(AggregatedStats): slug = models.SlugField(unique=True) locales = models.ManyToManyField(Locale, through="ProjectLocale") + resources: BaseManager["Resource"] + class DataSource(models.TextChoices): REPOSITORY = "repository", "Repository" DATABASE = "database", "Database" @@ -263,119 +266,11 @@ def save(self, *args, **kwargs): for locale in self.locales.all(): locale.aggregate_stats() - def changed_resources(self, now): - """ - Returns a map of resource paths and their locales - that where changed from the last sync. - """ - resources = defaultdict(set) - changes = ChangedEntityLocale.objects.filter( - entity__resource__project=self, when__lte=now - ).prefetch_related("locale", "entity__resource") - - for change in changes: - resources[change.entity.resource.path].add(change.locale) - - return resources - - @cached_property - def unsynced_locales(self): - """ - Project Locales that haven't been synchronized yet. - """ - return list( - set(self.locales.all()) - - set(Locale.objects.filter(translatedresources__resource__project=self)) - ) - - @property - def needs_sync(self): - """ - True if the project has changed since the last sync such that - another sync is required. - """ - changes = ChangedEntityLocale.objects.filter(entity__resource__project=self) - return changes.exists() or self.unsynced_locales - @property def checkout_path(self): """Path where this project's VCS checkouts are located.""" return join(settings.MEDIA_ROOT, "projects", self.slug) - # For compatibility with the old sync, these properties refer to the - # first repository by ID. - def _repo_compat_attr(self, attribute): - repo = self.repositories.first() - return getattr(repo, attribute) if repo is not None else None - - @property - def repository_type(self): - return self._repo_compat_attr("type") - - @property - def repository_url(self): - return self._repo_compat_attr("url") - - @property - def repository_path(self): - return self._repo_compat_attr("checkout_path") - - def repository_for_path(self, path): - """ - Return the repository instance whose checkout contains the given - path. If no matching repo is found, raise a ValueError. - """ - repo = utils.first( - self.repositories.all(), lambda r: path.startswith(r.checkout_path) - ) - - if repo is None: - raise ValueError(f"Could not find repo matching path {path}.") - else: - return repo - - @property - def has_multi_locale_repositories(self): - for repo in self.repositories.all(): - if repo.multi_locale: - return True - - return False - - @property - def has_single_repo(self): - return self.repositories.count() == 1 - - @cached_property - def source_repository(self): - """ - Returns an instance of repository which contains the path to source files. - """ - if not self.has_single_repo: - from pontoon.sync.vcs.project import VCSProject - - source_directories = VCSProject.SOURCE_DIR_SCORES.keys() - - for repo in self.repositories.all(): - last_directory = basename(normpath(urlparse(repo.url).path)) - if repo.source_repo or last_directory in source_directories: - return repo - - return self.repositories.first() - - def translation_repositories(self): - """ - Returns a list of project repositories containing translations. - """ - from pontoon.base.models.repository import Repository - - pks = [ - repo.pk - for repo in self.repositories.all() - if repo.is_translation_repository - ] - return Repository.objects.filter(pk__in=pks) - def get_latest_activity(self, locale=None): from pontoon.base.models.project_locale import ProjectLocale diff --git a/pontoon/base/models/repository.py b/pontoon/base/models/repository.py index 5c0a693098..2597c498e1 100644 --- a/pontoon/base/models/repository.py +++ b/pontoon/base/models/repository.py @@ -1,8 +1,7 @@ import logging import re -from os import sep -from os.path import join +from os.path import join, normpath from urllib.parse import urlparse from jsonfield import JSONField @@ -63,12 +62,10 @@ class Type(models.TextChoices): """, ) + last_synced_revisions = JSONField(blank=True, default=dict) """ - Mapping of locale codes to VCS revisions of each repo at the last - sync. If this isn't a multi-locale repo, the mapping has a single - key named "single_locale" with the revision. + Mapping with a single key named "single_locale" with the VCS revision of its last sync. """ - last_synced_revisions = JSONField(blank=True, default=dict) source_repo = models.BooleanField( default=False, @@ -86,50 +83,24 @@ def __repr__(self): repo_kind = "SourceRepository" return f"<{repo_kind}[{self.pk}:{self.type}:{self.url}]" - @property - def multi_locale(self): - """ - Checks if url contains locale code variable. System will replace - this variable by the locale codes of all enabled locales for the - project during pulls and commits. - """ - return "{locale_code}" in self.url - - @property - def is_source_repository(self): - """ - Returns true if repository contains source strings. - """ - return self == self.project.source_repository - - @property - def is_translation_repository(self): - """ - Returns true if repository contains translations. - """ - return self.project.has_single_repo or not self.is_source_repository - @property def checkout_path(self): """ Path where the checkout for this repo is located. Does not include a trailing path separator. """ - path_components = [self.project.checkout_path] # Include path components from the URL in case it has locale # information, like https://hg.mozilla.org/gaia-l10n/fr/. # No worry about overlap between repos, any overlap of locale # directories is an error already. - path_components += urlparse(self.url).path.split("/") - if self.multi_locale: - path_components = [c for c in path_components if c != "{locale_code}"] - - if self.source_repo: - path_components.append("templates") + path_components = [ + self.project.checkout_path, + *urlparse(self.url).path.split("/"), + ] - # Remove trailing separator for consistency. - return join(*path_components).rstrip(sep) + # Normalize path for consistency. + return normpath(join(*path_components)) @cached_property def api_config(self): @@ -164,127 +135,19 @@ def api_config(self): return None - def locale_checkout_path(self, locale): - """ - Path where the checkout for the given locale for this repo is - located. If this repo is not a multi-locale repo, a ValueError - is raised. - """ - if not self.multi_locale: - raise ValueError( - "Cannot get locale_checkout_path for non-multi-locale repos." - ) - - return join(self.checkout_path, locale.code) - - def locale_url(self, locale): - """ - URL for the repo for the given locale. If this repo is not a - multi-locale repo, a ValueError is raised. - """ - if not self.multi_locale: - raise ValueError("Cannot get locale_url for non-multi-locale repos.") - - return self.url.format(locale_code=locale.code) - - def url_for_path(self, path): - """ - Determine the locale-specific repo URL for the given path. - - If this is not a multi-locale repo, raise a ValueError. If no - repo is found for the given path, also raise a ValueError. - """ - for locale in self.project.locales.all(): - if path.startswith(self.locale_checkout_path(locale)): - return self.locale_url(locale) - - raise ValueError(f"No repo found for path: {path}") - - def pull(self, locales=None): - """ - Pull changes from VCS. Returns the revision(s) of the repo after - pulling. - """ - from pontoon.sync.repositories import ( - PullFromRepositoryException, - get_revision, - update_from_vcs, + @property + def last_synced_revision(self) -> str | None: + return ( + self.last_synced_revisions.get("single_locale", None) + if self.last_synced_revisions + else None ) - if not self.multi_locale: - update_from_vcs(self.type, self.url, self.checkout_path, self.branch) - return {"single_locale": get_revision(self.type, self.checkout_path)} - else: - current_revisions = {} - locales = locales or self.project.locales.all() - - for locale in locales: - repo_type = self.type - url = self.locale_url(locale) - checkout_path = self.locale_checkout_path(locale) - repo_branch = self.branch - - try: - update_from_vcs(repo_type, url, checkout_path, repo_branch) - current_revisions[locale.code] = get_revision( - repo_type, checkout_path - ) - except PullFromRepositoryException as e: - log.error(f"{repo_type.upper()} Pull Error for {url}: {e}") - - return current_revisions - - def commit(self, message, author, path): - """Commit changes to VCS.""" - # For multi-locale repos, figure out which sub-repo corresponds - # to the given path. - url = self.url - if self.multi_locale: - url = self.url_for_path(path) - - from pontoon.sync.repositories import commit_to_vcs - - return commit_to_vcs(self.type, path, message, author, self.branch, url) - - def set_last_synced_revisions(self, locales=None): - """ - Set last_synced_revisions to a dictionary of revisions - that are currently downloaded on the disk. - """ - from pontoon.sync.repositories import get_revision - - current_revisions = {} - - if self.multi_locale: - for locale in self.project.locales.all(): - if locales is not None and locale not in locales: - revision = self.last_synced_revisions.get(locale.code) - else: - revision = get_revision( - self.type, self.locale_checkout_path(locale) - ) - - if revision: - current_revisions[locale.code] = revision - - else: - current_revisions["single_locale"] = get_revision( - self.type, self.checkout_path - ) - - self.last_synced_revisions = current_revisions + @last_synced_revision.setter + def last_synced_revision(self, revision: str) -> None: + self.last_synced_revisions = {"single_locale": revision} self.save(update_fields=["last_synced_revisions"]) - def get_last_synced_revisions(self, locale=None): - """ - Get revision from the last_synced_revisions dictionary if exists. - """ - if self.last_synced_revisions: - key = locale or "single_locale" - return self.last_synced_revisions.get(key) - else: - return None - class Meta: unique_together = ("project", "url") ordering = ["id"] diff --git a/pontoon/base/models/resource.py b/pontoon/base/models/resource.py index 810264c9e4..a580aceea2 100644 --- a/pontoon/base/models/resource.py +++ b/pontoon/base/models/resource.py @@ -1,14 +1,7 @@ -from os.path import splitext - from django.db import models from django.utils import timezone -class ResourceQuerySet(models.QuerySet): - def asymmetric(self): - return self.filter(format__in=Resource.ASYMMETRIC_FORMATS) - - class Resource(models.Model): project = models.ForeignKey("Project", models.CASCADE, related_name="resources") path = models.TextField() # Path to localization file @@ -46,19 +39,6 @@ class Format(models.TextChoices): deadline = models.DateField(blank=True, null=True) - SOURCE_EXTENSIONS = ["pot"] # Extensions of source-only formats. - ALLOWED_EXTENSIONS = Format.values + SOURCE_EXTENSIONS - - ASYMMETRIC_FORMATS = { - Format.DTD, - Format.FTL, - Format.INC, - Format.INI, - Format.JSON, - Format.PROPERTIES, - Format.XML, - } - # Formats that allow empty translations EMPTY_TRANSLATION_FORMATS = { Format.DTD, @@ -67,16 +47,9 @@ class Format(models.TextChoices): Format.PROPERTIES, } - objects = ResourceQuerySet.as_manager() - class Meta: unique_together = (("project", "path"),) - @property - def is_asymmetric(self): - """Return True if this resource is in an asymmetric format.""" - return self.format in self.ASYMMETRIC_FORMATS - @property def allows_empty_translations(self): """Return True if this resource allows empty translations.""" @@ -91,16 +64,3 @@ def __str__(self): project=self.project.name, resource=self.path, ) - - @classmethod - def get_path_format(self, path): - filename, extension = splitext(path) - path_format = extension[1:].lower() - - # Special case: pot files are considered the po format - if path_format == "pot": - return "po" - elif path_format == "xlf": - return "xliff" - else: - return path_format diff --git a/pontoon/base/models/translated_resource.py b/pontoon/base/models/translated_resource.py index 26a759b9f4..3f6fa84009 100644 --- a/pontoon/base/models/translated_resource.py +++ b/pontoon/base/models/translated_resource.py @@ -1,3 +1,7 @@ +import logging + +from typing import Any + from django.db import models from django.db.models import Q, Sum @@ -11,10 +15,13 @@ from pontoon.base.models.translation import Translation +log = logging.getLogger(__name__) + + class TranslatedResourceQuerySet(models.QuerySet): def aggregated_stats(self): return self.aggregate( - total=Sum("resource__total_strings"), + total=Sum("total_strings"), approved=Sum("approved_strings"), pretranslated=Sum("pretranslated_strings"), errors=Sum("strings_with_errors"), @@ -73,44 +80,67 @@ def update_stats(self): """ Update stats on a list of TranslatedResource. """ - self = self.prefetch_related("resource__project", "locale") - locales = Locale.objects.filter( - translatedresources__in=self, - ).distinct() + def _log(n: int, thing: str): + things = thing if n == 1 else f"{thing}s" + log.debug(f"update_stats: {n} {things}") - projects = Project.objects.filter( - resources__translatedresources__in=self, - ).distinct() - - projectlocales = ProjectLocale.objects.filter( - project__resources__translatedresources__in=self, - locale__translatedresources__in=self, - ).distinct() + fields = [ + "total_strings", + "approved_strings", + "pretranslated_strings", + "strings_with_errors", + "strings_with_warnings", + "unreviewed_strings", + ] + self = self.prefetch_related("resource__project", "locale") for translated_resource in self: translated_resource.calculate_stats(save=False) + TranslatedResource.objects.bulk_update(self, fields=fields) + _log(len(self), "translated resource") - TranslatedResource.objects.bulk_update( - list(self), - fields=[ - "total_strings", - "approved_strings", - "pretranslated_strings", - "strings_with_errors", - "strings_with_warnings", - "unreviewed_strings", - ], - ) - - for project in projects: - project.aggregate_stats() + projectlocale_count = 0 + for projectlocale in ProjectLocale.objects.filter( + project__resources__translatedresources__in=self, + locale__translatedresources__in=self, + ).distinct(): + projectlocale.aggregate_stats() + projectlocale_count += 1 + _log(projectlocale_count, "projectlocale") + project_count = 0 + for project in Project.objects.filter( + resources__translatedresources__in=self, + ).distinct(): + stats: dict[str, Any] = ProjectLocale.objects.filter( + project=project + ).aggregated_stats() + project.total_strings = stats["total_strings"] or 0 + project.approved_strings = stats["approved_strings"] or 0 + project.pretranslated_strings = stats["pretranslated_strings"] or 0 + project.strings_with_errors = stats["strings_with_errors"] or 0 + project.strings_with_warnings = stats["strings_with_warnings"] or 0 + project.unreviewed_strings = stats["unreviewed_strings"] or 0 + project.save(update_fields=fields) + project_count += 1 + _log(project_count, "project") + + locales = Locale.objects.filter(translatedresources__in=self).distinct() for locale in locales: - locale.aggregate_stats() - - for projectlocale in projectlocales: - projectlocale.aggregate_stats() + stats: dict[str, Any] = ProjectLocale.objects.filter( + locale=locale, + project__system_project=False, + project__visibility=Project.Visibility.PUBLIC, + ).aggregated_stats() + locale.total_strings = stats["total_strings"] or 0 + locale.approved_strings = stats["approved_strings"] or 0 + locale.pretranslated_strings = stats["pretranslated_strings"] or 0 + locale.strings_with_errors = stats["strings_with_errors"] or 0 + locale.strings_with_warnings = stats["strings_with_warnings"] or 0 + locale.unreviewed_strings = stats["unreviewed_strings"] or 0 + Locale.objects.bulk_update(locales, fields=fields) + _log(len(locales), "locale") class TranslatedResource(AggregatedStats): @@ -162,20 +192,23 @@ def adjust_all_stats(self, *args, **kwargs): if project_locale: project_locale.adjust_stats(*args, **kwargs) + def count_total_strings(self): + entities = Entity.objects.filter(resource=self.resource, obsolete=False) + total = entities.count() + plural_count = entities.exclude(string_plural="").count() + if plural_count: + total += (self.locale.nplurals - 1) * plural_count + return total + def calculate_stats(self, save=True): """Update stats, including denormalized ones.""" - resource = self.resource - locale = self.locale - entity_ids = Translation.objects.filter(locale=locale).values("entity") - translated_entities = Entity.objects.filter( - pk__in=entity_ids, resource=resource, obsolete=False - ) + total = self.count_total_strings() - # Singular translations = Translation.objects.filter( - entity__in=translated_entities.filter(string_plural=""), - locale=locale, + entity__resource=self.resource, + entity__obsolete=False, + locale=self.locale, ) approved = translations.filter( @@ -219,66 +252,8 @@ def calculate_stats(self, save=True): fuzzy=False, ).count() - # Plural - nplurals = locale.nplurals or 1 - for e in translated_entities.exclude(string_plural="").values_list("pk"): - translations = Translation.objects.filter( - entity_id=e, - locale=locale, - ) - - plural_approved_count = translations.filter( - approved=True, - errors__isnull=True, - warnings__isnull=True, - ).count() - - plural_pretranslated_count = translations.filter( - pretranslated=True, - errors__isnull=True, - warnings__isnull=True, - ).count() - - if plural_approved_count == nplurals: - approved += 1 - elif plural_pretranslated_count == nplurals: - pretranslated += 1 - else: - plural_errors_count = ( - translations.filter( - Q( - Q(Q(approved=True) | Q(pretranslated=True) | Q(fuzzy=True)) - & Q(errors__isnull=False) - ), - ) - .distinct() - .count() - ) - - plural_warnings_count = ( - translations.filter( - Q( - Q(Q(approved=True) | Q(pretranslated=True) | Q(fuzzy=True)) - & Q(warnings__isnull=False) - ), - ) - .distinct() - .count() - ) - - if plural_errors_count: - errors += 1 - elif plural_warnings_count: - warnings += 1 - - plural_unreviewed_count = translations.filter( - approved=False, pretranslated=False, fuzzy=False, rejected=False - ).count() - if plural_unreviewed_count: - unreviewed += plural_unreviewed_count - if not save: - self.total_strings = resource.total_strings + self.total_strings = total self.approved_strings = approved self.pretranslated_strings = pretranslated self.strings_with_errors = errors @@ -288,7 +263,7 @@ def calculate_stats(self, save=True): return False # Calculate diffs to reduce DB queries - total_strings_diff = resource.total_strings - self.total_strings + total_strings_diff = total - self.total_strings approved_strings_diff = approved - self.approved_strings pretranslated_strings_diff = pretranslated - self.pretranslated_strings strings_with_errors_diff = errors - self.strings_with_errors diff --git a/pontoon/base/models/translation.py b/pontoon/base/models/translation.py index a35a4c0e81..50c23464de 100644 --- a/pontoon/base/models/translation.py +++ b/pontoon/base/models/translation.py @@ -264,13 +264,11 @@ def tm_target(self): def __str__(self): return self.string - def save(self, update_stats=True, failed_checks=None, *args, **kwargs): + def save(self, failed_checks=None, *args, **kwargs): from pontoon.base.models.translated_resource import TranslatedResource from pontoon.base.models.translation_memory import TranslationMemoryEntry - # We parametrize update of stats to make testing easier. - if update_stats: - stats_before = self.entity.get_stats(self.locale) + stats_before = self.entity.get_stats(self.locale) super().save(*args, **kwargs) @@ -332,7 +330,7 @@ def save(self, update_stats=True, failed_checks=None, *args, **kwargs): self.entity.reset_term_translation(self.locale) # We use get_or_create() instead of just get() to make it easier to test. - translatedresource, _ = TranslatedResource.objects.get_or_create( + translatedresource, created = TranslatedResource.objects.get_or_create( resource=self.entity.resource, locale=self.locale ) @@ -343,12 +341,15 @@ def save(self, update_stats=True, failed_checks=None, *args, **kwargs): if failed_checks is not None: save_failed_checks(self, failed_checks) - # We parametrize update of stats to make testing easier. - if update_stats: - # Update stats AFTER changing approval status. - stats_after = self.entity.get_stats(self.locale) - stats_diff = Entity.get_stats_diff(stats_before, stats_after) - translatedresource.adjust_all_stats(**stats_diff) + # Update stats AFTER changing approval status. + stats_after = self.entity.get_stats(self.locale) + stats_diff = { + stat_name: stats_after[stat_name] - stats_before[stat_name] + for stat_name in stats_before + } + if created: + stats_diff["total_strings_diff"] = translatedresource.count_total_strings() + translatedresource.adjust_all_stats(**stats_diff) def update_latest_translation(self): """ diff --git a/pontoon/base/tests/__init__.py b/pontoon/base/tests/__init__.py index 20893533d0..9880af4c6f 100644 --- a/pontoon/base/tests/__init__.py +++ b/pontoon/base/tests/__init__.py @@ -90,7 +90,9 @@ def locales(self, create, extracted, **kwargs): if extracted: for locale in extracted: - ProjectLocaleFactory.create(project=self, locale=locale) + ProjectLocaleFactory.create( + project=self, locale=locale, total_strings=self.total_strings + ) @factory.post_generation def repositories(self, create, extracted, **kwargs): diff --git a/pontoon/base/tests/managers/test_entity.py b/pontoon/base/tests/managers/test_entity.py index 4d6dcca908..00a410a610 100644 --- a/pontoon/base/tests/managers/test_entity.py +++ b/pontoon/base/tests/managers/test_entity.py @@ -1561,150 +1561,6 @@ def test_lookup_collation(resource_a, locale_a): } -@pytest.mark.django_db -def test_mgr_entity_reset_active_translations(resource_a, locale_a): - locale_a.cldr_plurals = "1,5" - locale_a.save() - - entities = [ - EntityFactory.create( - resource=resource_a, - string="testentity%s" % i, - ) - for i in range(0, 5) - ] + [ - EntityFactory( - resource=resource_a, - string="testentity4", - string_plural="testentity4plural", - ) - ] - entities_qs = Entity.objects.filter(pk__in=[e.pk for e in entities]) - - # Translations for Entity 0: - # No translations - pass - - # Translations for Entity 1: - # 2 unreviewed translations - TranslationFactory.create( - locale=locale_a, - entity=entities[1], - string=entities[1].string + " translation1", - ) - TranslationFactory.create( - locale=locale_a, - entity=entities[1], - string=entities[1].string + " translation2", - ) - - # Translations for Entity 2: - # Approved and unreviewed translation - TranslationFactory.create( - locale=locale_a, - entity=entities[2], - string=entities[2].string + " translation1", - approved=True, - ) - TranslationFactory.create( - locale=locale_a, - entity=entities[2], - string=entities[2].string + " translation2", - ) - - # Translations for Entity 3: - # Fuzzy and unreviewed translation - TranslationFactory.create( - locale=locale_a, - entity=entities[3], - string=entities[3].string + " translation1", - ) - TranslationFactory.create( - locale=locale_a, - entity=entities[3], - string=entities[3].string + " translation2", - fuzzy=True, - ) - - # Translations for Entity 4: - # Pretranslated and unreviewed translation - TranslationFactory.create( - locale=locale_a, - entity=entities[4], - string=entities[4].string + " translation1", - ) - TranslationFactory.create( - locale=locale_a, - entity=entities[4], - string=entities[4].string + " translation2", - pretranslated=True, - ) - - # Translations for Entity 5 - pluralized: - # Approved and unreviewed translation for first form, - # a single unreviewed translation for second form - TranslationFactory.create( - locale=locale_a, - entity=entities[5], - plural_form=0, - string=entities[5].string + " translation1", - approved=True, - ) - TranslationFactory.create( - locale=locale_a, - entity=entities[5], - plural_form=0, - string=entities[5].string + " translation2", - ) - TranslationFactory.create( - locale=locale_a, - entity=entities[5], - plural_form=1, - string=entities[5].string_plural + " translation1plural", - ) - - entities_qs.reset_active_translations(locale=locale_a) - - # Active translations for Entity 0: - # no active translations - assert entities[0].translation_set.filter(active=True).count() == 0 - - # Active translations for Entity 1: - # latest translation is active - assert ( - entities[1].translation_set.get(active=True).string - == entities[1].string + " translation2" - ) - - # Active translations for Entity 2: - # approved translation is active - assert ( - entities[2].translation_set.get(active=True).string - == entities[2].string + " translation1" - ) - - # Active translations for Entity 3: - # fuzzy translation is active - assert ( - entities[3].translation_set.get(active=True).string - == entities[3].string + " translation2" - ) - - # Active translations for Entity 4: - # pretranslated translation is active - assert ( - entities[4].translation_set.get(active=True).string - == entities[4].string + " translation2" - ) - - # Active translations for Entity 5 - pluralized: - # Approved translation for first form, - # a single unreviewed translation for second form - active = entities[5].translation_set.filter(active=True) - assert active[0].string == entities[5].string + " translation1" - assert active[1].string == entities[5].string_plural + " translation1plural" - - @pytest.mark.parametrize( "input, expected_count", [ diff --git a/pontoon/base/tests/models/test_entity.py b/pontoon/base/tests/models/test_entity.py index 5c9d016241..dff85342db 100644 --- a/pontoon/base/tests/models/test_entity.py +++ b/pontoon/base/tests/models/test_entity.py @@ -1,7 +1,6 @@ import pytest from pontoon.base.models import ChangedEntityLocale, Entity, Project -from pontoon.sync import KEY_SEPARATOR from pontoon.test.factories import ( EntityFactory, ResourceFactory, @@ -40,7 +39,7 @@ def entity_test_models(translation_a, locale_b): entity_b = EntityFactory( resource=resourceX, string="entity_b", - key="Key%sentity_b" % KEY_SEPARATOR, + key="Key\x04entity_b", order=0, ) translation_a_pl = TranslationFactory( diff --git a/pontoon/base/tests/models/test_project.py b/pontoon/base/tests/models/test_project.py index 0b9167a18a..42e89358fd 100644 --- a/pontoon/base/tests/models/test_project.py +++ b/pontoon/base/tests/models/test_project.py @@ -1,5 +1,4 @@ import functools -import os from unittest.mock import patch @@ -7,82 +6,8 @@ from django.contrib.auth.models import AnonymousUser -from pontoon.base.models import Project, ProjectLocale, Repository -from pontoon.test.factories import ( - ChangedEntityLocaleFactory, - EntityFactory, - LocaleFactory, - ProjectFactory, - ProjectLocaleFactory, - RepositoryFactory, - ResourceFactory, -) - - -@pytest.mark.django_db -def test_project_type_no_repos(project_a): - """If a project has no repos, repository_type should be None.""" - assert project_a.repository_type is None - - -@pytest.mark.django_db -def test_project_type_multi_repos(project_a, repo_git, repo_hg): - """ - If a project has repos, return the type of the repo created - first. - """ - assert project_a.repositories.first().type == Repository.Type.GIT - assert project_a.repository_type == Repository.Type.GIT - - -@pytest.mark.django_db -def test_project_repo_path_none(project_a): - """ - If the project has no matching repositories, raise a ValueError. - """ - with pytest.raises(ValueError): - project_a.repository_for_path("doesnt/exist") - - -@pytest.mark.django_db -def test_project_repo_for_path(project_a): - """ - Return the first repo found with a checkout path that contains - the given path. - """ - repos = [ - RepositoryFactory.create( - type="file", - project=project_a, - url="testrepo%s" % i, - ) - for i in range(0, 3) - ] - path = os.path.join(repos[1].checkout_path, "foo", "bar") - assert project_a.repository_for_path(path) == repos[1] - - -@pytest.mark.django_db -def test_project_needs_sync(project_a, project_b, locale_a): - """ - Project.needs_sync should be True if ChangedEntityLocale objects - exist for its entities or if Project has unsynced locales. - """ - resource = ResourceFactory.create(project=project_a, path="resourceX.po") - entity = EntityFactory.create(resource=resource, string="entityX") - - assert not project_a.needs_sync - ChangedEntityLocaleFactory.create(entity=entity, locale=locale_a) - assert project_a.needs_sync - - assert not project_b.needs_sync - assert project_b.unsynced_locales == [] - del project_b.unsynced_locales - ProjectLocaleFactory.create( - project=project_b, - locale=locale_a, - ) - assert project_b.needs_sync == [locale_a] +from pontoon.base.models import Project, ProjectLocale +from pontoon.test.factories import LocaleFactory, ProjectFactory, ProjectLocaleFactory @pytest.mark.django_db diff --git a/pontoon/base/tests/models/test_repository.py b/pontoon/base/tests/models/test_repository.py index e6abd6240e..5a575f09e3 100644 --- a/pontoon/base/tests/models/test_repository.py +++ b/pontoon/base/tests/models/test_repository.py @@ -1,14 +1,12 @@ import os -from unittest.mock import Mock, call, patch from urllib.parse import urlparse import pytest from django.core.exceptions import ValidationError -from pontoon.base.models import repository_url_validator -from pontoon.test.factories import ProjectLocaleFactory +from pontoon.base.models.repository import repository_url_validator @pytest.mark.django_db @@ -26,207 +24,20 @@ def test_repo_checkout_path(repo_git, settings): assert repo_git.project.checkout_path.startswith("/media/root") -@pytest.mark.django_db -def test_repo_checkout_path_multi_locale(settings, repo_git): - """ - The checkout_path for multi-locale repos should not include the - locale_code variable. - """ - repo_git.url = "https://example.com/path/to/{locale_code}/" - repo_git.save() - settings.MEDIA_ROOT = "/media/root" - assert repo_git.checkout_path == ( - "/media/root/projects/%s/path/to" % repo_git.project.slug - ) - - @pytest.mark.django_db def test_repo_checkout_path_source_repo(settings, repo_git): """ - The checkout_path for a source repo should end with a templates + The checkout_path for a source repo should not end with a templates directory. """ repo_git.source_repo = True repo_git.url = "https://example.com/path/to/locale/" repo_git.save() assert repo_git.checkout_path == ( - "%s/projects/%s/path/to/locale/templates" - % (settings.MEDIA_ROOT, repo_git.project.slug) - ) - - -@pytest.mark.django_db -def test_repo_locale_checkout_path(settings, repo_git, locale_a): - """Append the locale code the the project's checkout_path.""" - repo_git.url = "https://example.com/path/{locale_code}/" - repo_git.save() - assert repo_git.locale_checkout_path(locale_a) == ( - "%s/projects/%s/path/%s" - % ( - settings.MEDIA_ROOT, - repo_git.project.slug, - locale_a.code, - ) - ) - - -@pytest.mark.django_db -def test_repo_path_non_multi_locale(repo_git, locale_a): - """If the repo isn't multi-locale, throw a ValueError.""" - assert repo_git.multi_locale is False - - with pytest.raises(ValueError): - repo_git.locale_checkout_path(locale_a) - - -@pytest.mark.django_db -def test_repo_locale_url(repo_git, locale_a): - """Fill in the {locale_code} variable in the URL.""" - - repo_git.url = "https://example.com/path/to/{locale_code}/" - repo_git.save() - assert ( - repo_git.locale_url(locale_a) - == "https://example.com/path/to/%s/" % locale_a.code + "%s/projects/%s/path/to/locale" % (settings.MEDIA_ROOT, repo_git.project.slug) ) -@pytest.mark.django_db -def test_repo_locale_url_non_multi_locale(repo_git, locale_a): - """If the repo isn't multi-locale, throw a ValueError.""" - with pytest.raises(ValueError): - repo_git.locale_url(locale_a) - - -@pytest.mark.django_db -def test_repo_url_for_path(project_locale_a, repo_git, locale_b): - """ - Return the first locale_checkout_path for locales active for the - repo's project that matches the given path. - """ - ProjectLocaleFactory.create( - project=repo_git.project, - locale=locale_b, - ) - repo_git.url = "https://example.com/path/to/{locale_code}/" - repo_git.save() - assert ( - repo_git.url_for_path( - os.path.join( - repo_git.locale_checkout_path(project_locale_a.locale), "foo/bar.po" - ) - ) - == "https://example.com/path/to/%s/" % project_locale_a.locale.code - ) - - -@pytest.mark.django_db -def test_repo_url_for_path_no_match(repo_git, locale_a, settings): - repo_git.url = "https://example.com/path/to/{locale_code}/" - repo_git.save() - settings.MEDIA_ROOT = "/media/root" - - with pytest.raises(ValueError): - repo_git.url_for_path("/media/root/path/to/match/foo/bar.po") - - -@pytest.mark.django_db -def test_repo_pull(repo_git): - with ( - patch("pontoon.sync.repositories.update_from_vcs") as m_update_from_vcs, - patch("pontoon.sync.repositories.get_revision") as m_get_revision, - ): - repo_git.url = "https://example.com" - m_get_revision.return_value = "asdf" - assert repo_git.pull() == {"single_locale": "asdf"} - assert m_update_from_vcs.call_args[0] == ( - "git", - "https://example.com", - repo_git.checkout_path, - "", - ) - - -@pytest.mark.django_db -def test_repo_pull_multi_locale(project_locale_a, repo_git, locale_b): - """ - If the repo is multi-locale, pull all of the repos for the - active locales. - """ - locale_a = project_locale_a.locale - ProjectLocaleFactory.create( - project=repo_git.project, - locale=locale_b, - ) - - with patch("pontoon.sync.repositories.update_from_vcs") as m_update_from_vcs: - with patch("pontoon.sync.repositories.get_revision") as m_get_revision: - repo_git.url = "https://example.com/{locale_code}/" - repo_git.locale_url = lambda locale: "https://example.com/%s" % locale.code - repo_git.locale_checkout_path = lambda locale: "/media/%s" % locale.code - - # Return path as the revision so different locales return - # different values. - m_get_revision.side_effect = lambda type, path: path - assert repo_git.pull() == { - locale_a.code: "/media/%s" % locale_a.code, - locale_b.code: "/media/%s" % locale_b.code, - } - assert m_update_from_vcs.call_args_list == [ - call( - "git", - "https://example.com/%s" % locale_b.code, - "/media/%s" % locale_b.code, - "", - ), - call( - "git", - "https://example.com/%s" % locale_a.code, - "/media/%s" % locale_a.code, - "", - ), - ] - - -@pytest.mark.django_db -def test_repo_commit(repo_git): - repo_git.url = "https://example.com" - - with patch("pontoon.sync.repositories.commit_to_vcs") as m: - repo_git.commit("message", "author", "path") - assert m.call_args[0] == ( - "git", - "path", - "message", - "author", - "", - "https://example.com", - ) - - -@pytest.mark.django_db -def test_repo_commit_multi_locale(repo_git): - """ - If the repo is multi-locale, use the url from url_for_path for - committing. - """ - repo_git.url = "https://example.com/{locale_code}/" - - repo_git.url_for_path = Mock(return_value="https://example.com/for_path") - - with patch("pontoon.sync.repositories.commit_to_vcs") as m: - repo_git.commit("message", "author", "path") - assert m.call_args[0] == ( - "git", - "path", - "message", - "author", - "", - "https://example.com/for_path", - ) - assert repo_git.url_for_path.call_args[0] == ("path",) - - def test_repository_url_validator(): """ The validity of the Repository URL. diff --git a/pontoon/base/tests/models/test_stats.py b/pontoon/base/tests/models/test_stats.py index 8a5561e434..fd2fffdc9b 100644 --- a/pontoon/base/tests/models/test_stats.py +++ b/pontoon/base/tests/models/test_stats.py @@ -5,77 +5,19 @@ import pytest from pontoon.base.models import TranslatedResource -from pontoon.checks.models import ( - Error, - FailedCheck, - Warning, -) - - -@pytest.fixture -def translation_with_error(translation_a): - Error.objects.create( - translation=translation_a, library=FailedCheck.Library.PONTOON, message="error" - ) - return translation_a - - -@pytest.fixture -def translation_with_warning(translation_a): - Warning.objects.create( - translation=translation_a, - library=FailedCheck.Library.PONTOON, - message="warning", - ) - return translation_a - - -def recalculate_stats(translation): - """ - Make the full recalculate stats on a TranslatedResource. - """ - translation.save(update_stats=False) - translated_resource = TranslatedResource.objects.get( - resource=translation.entity.resource, - locale=translation.locale, - ) - translated_resource.calculate_stats() - - -def diff_stats(t): - """ - Update only necessary stats by calculating difference between stats. - """ - t.save() - - -@pytest.fixture( - params=( - recalculate_stats, - diff_stats, - ) -) -def stats_update(db, request): - """ - Wrapper fixture which allows to test both implementations of stats calculations. - """ - return request.param -@pytest.fixture -def get_stats(): - def f(translation): - return TranslatedResource.objects.filter( - resource=translation.entity.resource, - locale=translation.locale, - ).aggregated_stats() - - return f +def get_stats(translation): + return TranslatedResource.objects.filter( + resource=translation.entity.resource, + locale=translation.locale, + ).aggregated_stats() -def test_translation_approved(stats_update, get_stats, translation_a): +@pytest.mark.django_db +def test_translation_approved(translation_a): translation_a.approved = True - stats_update(translation_a) + translation_a.save() assert get_stats(translation_a) == { "total": 1, @@ -87,7 +29,7 @@ def test_translation_approved(stats_update, get_stats, translation_a): } translation_a.approved = False - stats_update(translation_a) + translation_a.save() assert get_stats(translation_a) == { "total": 1, @@ -99,9 +41,10 @@ def test_translation_approved(stats_update, get_stats, translation_a): } -def test_translation_pretranslated(stats_update, get_stats, translation_a): +@pytest.mark.django_db +def test_translation_pretranslated(translation_a): translation_a.pretranslated = True - stats_update(translation_a) + translation_a.save() assert get_stats(translation_a) == { "total": 1, @@ -114,7 +57,7 @@ def test_translation_pretranslated(stats_update, get_stats, translation_a): translation_a.pretranslated = False translation_a.rejected = True - stats_update(translation_a) + translation_a.save() assert get_stats(translation_a) == { "total": 1, @@ -126,11 +69,12 @@ def test_translation_pretranslated(stats_update, get_stats, translation_a): } -def test_translation_with_error(stats_update, get_stats, translation_with_error): - translation_with_error.approved = True - stats_update(translation_with_error) +@pytest.mark.django_db +def test_translation_with_error(translation_a): + translation_a.approved = True + translation_a.save(failed_checks={"pErrors": ["error"]}) - assert get_stats(translation_with_error) == { + assert get_stats(translation_a) == { "total": 1, "approved": 0, "pretranslated": 0, @@ -139,11 +83,11 @@ def test_translation_with_error(stats_update, get_stats, translation_with_error) "errors": 1, } - translation_with_error.approved = False - translation_with_error.pretranslated = True - stats_update(translation_with_error) + translation_a.approved = False + translation_a.pretranslated = True + translation_a.save() - assert get_stats(translation_with_error) == { + assert get_stats(translation_a) == { "total": 1, "approved": 0, "pretranslated": 0, @@ -152,10 +96,10 @@ def test_translation_with_error(stats_update, get_stats, translation_with_error) "errors": 1, } - translation_with_error.pretranslated = False - stats_update(translation_with_error) + translation_a.pretranslated = False + translation_a.save() - assert get_stats(translation_with_error) == { + assert get_stats(translation_a) == { "total": 1, "approved": 0, "pretranslated": 0, @@ -165,11 +109,12 @@ def test_translation_with_error(stats_update, get_stats, translation_with_error) } -def test_translation_with_warning(stats_update, get_stats, translation_with_warning): - translation_with_warning.approved = True - stats_update(translation_with_warning) +@pytest.mark.django_db +def test_translation_with_warning(translation_a): + translation_a.approved = True + translation_a.save(failed_checks={"pWarnings": ["warning"]}) - assert get_stats(translation_with_warning) == { + assert get_stats(translation_a) == { "total": 1, "approved": 0, "pretranslated": 0, @@ -178,11 +123,11 @@ def test_translation_with_warning(stats_update, get_stats, translation_with_warn "errors": 0, } - translation_with_warning.approved = False - translation_with_warning.pretranslated = True - stats_update(translation_with_warning) + translation_a.approved = False + translation_a.pretranslated = True + translation_a.save() - assert get_stats(translation_with_warning) == { + assert get_stats(translation_a) == { "total": 1, "approved": 0, "pretranslated": 0, @@ -191,10 +136,10 @@ def test_translation_with_warning(stats_update, get_stats, translation_with_warn "errors": 0, } - translation_with_warning.pretranslated = False - stats_update(translation_with_warning) + translation_a.pretranslated = False + translation_a.save() - assert get_stats(translation_with_warning) == { + assert get_stats(translation_a) == { "total": 1, "approved": 0, "pretranslated": 0, diff --git a/pontoon/base/tests/test_utils.py b/pontoon/base/tests/test_utils.py index e7dc75de85..39f98c622f 100644 --- a/pontoon/base/tests/test_utils.py +++ b/pontoon/base/tests/test_utils.py @@ -8,7 +8,6 @@ from pontoon.base.models import Project from pontoon.base.utils import ( aware_datetime, - extension_in, get_m2m_changes, get_object_or_none, get_search_phrases, @@ -578,18 +577,6 @@ def test_get_m2m_mixed(user_a, user_b, user_c): assert [user_b] == changes[1] -def test_util_base_extension_in(): - assert extension_in("filename.txt", ["bat", "txt"]) - assert extension_in("filename.biff", ["biff"]) - assert extension_in("filename.tar.gz", ["gz"]) - - assert not extension_in("filename.txt", ["png", "jpg"]) - assert not extension_in(".dotfile", ["bat", "txt"]) - - # Unintuitive, but that's how splitext works. - assert not extension_in("filename.tar.gz", ["tar.gz"]) - - @pytest.mark.django_db def test_util_base_get_object_or_none(project_a): assert get_object_or_none(Project, slug="does-not-exist") is None diff --git a/pontoon/base/tests/views/test_download.py b/pontoon/base/tests/views/test_download.py new file mode 100644 index 0000000000..59ce618780 --- /dev/null +++ b/pontoon/base/tests/views/test_download.py @@ -0,0 +1,87 @@ +from os import makedirs +from tempfile import TemporaryDirectory +from unittest.mock import patch + +import pytest + +from django.conf import settings +from django.test import RequestFactory + +from pontoon.base.models.project import Project +from pontoon.base.tests import ( + LocaleFactory, + ProjectFactory, + RepositoryFactory, + ResourceFactory, + TranslatedResourceFactory, + UserFactory, +) +from pontoon.base.views import download_translations +from pontoon.sync.tests.test_checkouts import MockVersionControl +from pontoon.sync.tests.utils import build_file_tree + + +@pytest.mark.django_db +@pytest.mark.parametrize("two_repos", [True, False]) +@pytest.mark.parametrize( + "repo_url,expected_location", + [ + ( + "https://github.com:gh-org/gh-repo.git", + "https://raw.githubusercontent.com/gh-org/gh-repo/HEAD/de-Test/a.ftl", + ), + ( + "git@gitlab.com:gl-org/gl-repo.git", + "https://gitlab.com/gl-org/gl-repo/-/raw/HEAD/de-Test/a.ftl?inline=false", + ), + ("http://example.com/tgt-repo", "https://example.com/tgt-repo"), + ], +) +def test_download(two_repos, repo_url, expected_location): + mock_vcs = MockVersionControl(changes=None) + with ( + TemporaryDirectory() as root, + patch("pontoon.sync.core.checkout.get_repo", return_value=mock_vcs), + ): + settings.MEDIA_ROOT = root + locale = LocaleFactory.create(code="de-Test") + if two_repos: + repo_src = RepositoryFactory( + url="http://example.com/src-repo", source_repo=True + ) + repo_tgt = RepositoryFactory(url=repo_url) + project = ProjectFactory.create( + name="test-dl", + locales=[locale], + repositories=[repo_src, repo_tgt], + visibility=Project.Visibility.PUBLIC, + ) + src_root = repo_src.checkout_path + tgt_root = repo_tgt.checkout_path + makedirs(src_root) + build_file_tree(src_root, {"en-US": {"a.ftl": ""}}) + makedirs(tgt_root) + build_file_tree(tgt_root, {"de-Test": {"a.ftl": ""}}) + else: + repo = RepositoryFactory(url=repo_url) + project = ProjectFactory.create( + name="test-dl", + locales=[locale], + repositories=[repo], + visibility=Project.Visibility.PUBLIC, + ) + repo_root = repo.checkout_path + makedirs(repo_root) + build_file_tree( + repo_root, {"en-US": {"a.ftl": ""}, "de-Test": {"a.ftl": ""}} + ) + res = ResourceFactory.create(project=project, path="a.ftl", format="ftl") + TranslatedResourceFactory.create(locale=locale, resource=res) + + request = RequestFactory().get( + "/translations/?code=de-Test&slug=test-dl&part=a.ftl" + ) + request.user = UserFactory() + response = download_translations(request) + assert response.status_code == 302 + assert response.get("Location") == expected_location diff --git a/pontoon/base/tests/views/test_upload.py b/pontoon/base/tests/views/test_upload.py index 1f050fea0e..fd17cfda45 100644 --- a/pontoon/base/tests/views/test_upload.py +++ b/pontoon/base/tests/views/test_upload.py @@ -99,7 +99,6 @@ def test_upload_invalid_parameters( @pytest.mark.django_db def test_upload_missing_file( - client, translator_a, project_locale_a, ): @@ -109,10 +108,7 @@ def test_upload_missing_file( code=project_locale_a.locale.code, part="resource_a.po", ) - assert response.status_code == 303 - - redir = client.get(response["Location"]) - assert redir.status_code == 404 + assert response.status_code == 404 @pytest.mark.django_db @@ -170,5 +166,6 @@ def test_upload_file( assert translation.entity.key == "test_key" assert translation.entity.resource.path == "resource_a.po" assert translation.approved + assert translation.user assert not translation.warnings.exists() assert not translation.errors.exists() diff --git a/pontoon/base/utils.py b/pontoon/base/utils.py index 86ca7ebd3b..f2d0b80f49 100644 --- a/pontoon/base/utils.py +++ b/pontoon/base/utils.py @@ -1,30 +1,21 @@ -import codecs import functools -import io -import os import re -import tempfile import time -import zipfile from collections import defaultdict from datetime import datetime, timedelta, timezone -from urllib.parse import urljoin from xml.sax.saxutils import escape, quoteattr -import requests - from guardian.decorators import permission_required as guardian_permission_required from django.core.exceptions import ValidationError from django.core.validators import validate_email -from django.db.models import Prefetch, Q from django.db.models.query import QuerySet from django.http import Http404, HttpResponseBadRequest -from django.shortcuts import get_object_or_404, redirect +from django.shortcuts import redirect from django.urls import reverse from django.utils.text import slugify -from django.utils.timezone import make_aware, now +from django.utils.timezone import make_aware from django.utils.translation import trans_real @@ -61,28 +52,6 @@ def get_project_locale_from_request(request, locales): continue -def first(collection, test, default=None): - """ - Return the first item that, when passed to the given test function, - returns True. If no item passes the test, return the default value. - """ - return next((c for c in collection if test(c)), default) - - -def match_attr(collection, **attributes): - """ - Return the first item that has matching values for the given - attributes, or None if no item is found to match. - """ - return first( - collection, - lambda i: all( - getattr(i, attrib) == value for attrib, value in attributes.items() - ), - default=None, - ) - - def group_dict_by(list_of_dicts, key): """ Group dicts in a list by the given key. Return a defaultdict instance with @@ -96,19 +65,6 @@ def group_dict_by(list_of_dicts, key): return group -def extension_in(filename, extensions): - """ - Check if the extension for the given filename is in the list of - allowed extensions. Uses os.path.splitext rules for getting the - extension. - """ - filename, extension = os.path.splitext(filename) - if extension and extension[1:] in extensions: - return True - else: - return False - - def get_object_or_none(model, *args, **kwargs): """ Get an instance of the given model, returning None instead of @@ -165,275 +121,6 @@ def wrap(request, *_args, **_kwargs): return wrapper -def _download_file(prefixes, dirnames, vcs_project, relative_path): - for prefix in prefixes: - for dirname in dirnames: - if vcs_project.configuration: - locale = vcs_project.locales[0] - absolute_path = os.path.join( - vcs_project.source_directory_path, relative_path - ) - absolute_l10n_path = vcs_project.configuration.l10n_path( - locale, absolute_path - ) - relative_l10n_path = os.path.relpath( - absolute_l10n_path, - vcs_project.locale_directory_paths[locale.code], - ) - url = urljoin(prefix, relative_l10n_path) - else: - url = os.path.join(prefix.format(locale_code=dirname), relative_path) - - r = requests.get(url, stream=True) - if not r.ok: - continue - - extension = os.path.splitext(relative_path)[1] - with tempfile.NamedTemporaryFile( - prefix="strings" if extension == ".xml" else "", - suffix=extension, - delete=False, - ) as temp: - for chunk in r.iter_content(chunk_size=1024): - if chunk: - temp.write(chunk) - temp.flush() - - return temp.name - - -def get_download_content(slug, code, part): - """ - Get content of the file to be downloaded. - - :arg str slug: Project slug. - :arg str code: Locale code. - :arg str part: Resource path. - """ - # Avoid circular import; someday we should refactor to avoid. - from pontoon.base.models import Entity, Locale, Project, Resource - from pontoon.sync import formats - from pontoon.sync.utils import source_to_locale_path - from pontoon.sync.vcs.project import VCSProject - - project = get_object_or_404(Project, slug=slug) - locale = get_object_or_404(Locale, code=code) - vcs_project = VCSProject(project, locales=[locale]) - - # Download a ZIP of all files if project has > 1 and < 10 resources - resources = Resource.objects.filter( - project=project, translatedresources__locale=locale - ) - isZipable = 1 < len(resources) < 10 - if isZipable: - s = io.BytesIO() - zf = zipfile.ZipFile(s, "w") - - # Download a single file if project has 1 or >= 10 resources - else: - resources = [get_object_or_404(Resource, project__slug=slug, path=part)] - - locale_prefixes = project.repositories - - if not project.configuration_file: - locale_prefixes = locale_prefixes.filter( - permalink_prefix__contains="{locale_code}" - ) - - locale_prefixes = locale_prefixes.values_list( - "permalink_prefix", flat=True - ).distinct() - - source_prefixes = project.repositories.values_list( - "permalink_prefix", flat=True - ).distinct() - - for resource in resources: - # Get locale file - dirnames = {locale.code, locale.code.replace("-", "_")} - locale_path = _download_file( - locale_prefixes, dirnames, vcs_project, resource.path - ) - if not locale_path and not resource.is_asymmetric: - return None, None - - # Get source file if needed - source_path = None - if resource.is_asymmetric or resource.format == "xliff": - dirnames = VCSProject.SOURCE_DIR_NAMES - source_path = _download_file( - source_prefixes, dirnames, vcs_project, resource.path - ) - if not source_path: - return None, None - - # If locale file doesn't exist, create it - if not locale_path: - extension = os.path.splitext(resource.path)[1] - with tempfile.NamedTemporaryFile( - prefix="strings" if extension == ".xml" else "", - suffix=extension, - delete=False, - ) as temp: - temp.flush() - locale_path = temp.name - - # Update file from database - resource_file = formats.parse(locale_path, source_path) - entities_dict = {} - entities_qs = Entity.objects.filter( - changedentitylocale__locale=locale, - resource__project=project, - resource__path=resource.path, - obsolete=False, - ) - - for e in entities_qs: - entities_dict[e.key] = e.translation_set.filter( - Q(approved=True) | Q(pretranslated=True) - ).filter(locale=locale) - - for vcs_translation in resource_file.translations: - key = vcs_translation.key - if key in entities_dict: - entity = entities_dict[key] - vcs_translation.update_from_db(entity) - - resource_file.save(locale) - - if not locale_path: - return None, None - - if isZipable: - zf.write(locale_path, source_to_locale_path(resource.path)) - else: - with codecs.open(locale_path, "r", "utf-8") as f: - content = f.read() - filename = os.path.basename(source_to_locale_path(resource.path)) - - # Remove temporary files - os.remove(locale_path) - if source_path: - os.remove(source_path) - - if isZipable: - zf.close() - content = s.getvalue() - filename = project.slug + ".zip" - - return content, filename - - -def handle_upload_content(slug, code, part, f, user): - """ - Update translations in the database from uploaded file. - - :arg str slug: Project slug. - :arg str code: Locale code. - :arg str part: Resource path. - :arg UploadedFile f: UploadedFile instance. - :arg User user: User uploading the file. - """ - # Avoid circular import; someday we should refactor to avoid. - from pontoon.base.models import ( - Entity, - Locale, - Project, - Resource, - TranslatedResource, - Translation, - ) - from pontoon.sync import formats - from pontoon.sync.changeset import ChangeSet - from pontoon.sync.vcs.project import VCSProject - - project = get_object_or_404(Project, slug=slug) - locale = get_object_or_404(Locale, code=code) - resource = get_object_or_404(Resource, project__slug=slug, path=part) - # Store uploaded file to a temporary file and parse it - extension = os.path.splitext(f.name)[1] - is_messages_json = f.name.endswith("messages.json") - - with tempfile.NamedTemporaryFile( - prefix="strings" if extension == ".xml" else "", - suffix=".messages.json" if is_messages_json else extension, - ) as temp: - for chunk in f.chunks(): - temp.write(chunk) - temp.flush() - resource_file = formats.parse(temp.name) - - # Update database objects from file - changeset = ChangeSet(project, VCSProject(project, locales=[locale]), now()) - entities_qs = ( - Entity.objects.filter( - resource__project=project, resource__path=part, obsolete=False - ) - .prefetch_related( - Prefetch( - "translation_set", - queryset=Translation.objects.filter(locale=locale), - to_attr="db_translations", - ) - ) - .prefetch_related( - Prefetch( - "translation_set", - queryset=Translation.objects.filter( - locale=locale, approved_date__lte=now() - ), - to_attr="db_translations_approved_before_sync", - ) - ) - ) - entities_dict = {entity.key: entity for entity in entities_qs} - - for vcs_translation in resource_file.translations: - key = vcs_translation.key - if key in entities_dict: - entity = entities_dict[key] - changeset.update_entity_translations_from_vcs( - entity, - locale.code, - vcs_translation, - user, - entity.db_translations, - entity.db_translations_approved_before_sync, - ) - - changeset.bulk_create_translations() - changeset.bulk_update_translations() - changeset.bulk_log_actions() - - if changeset.changed_translations: - # Update 'active' status of all changed translations and their siblings, - # i.e. translations of the same entity to the same locale. - changed_pks = {t.pk for t in changeset.changed_translations} - ( - Entity.objects.filter( - translation__pk__in=changed_pks - ).reset_active_translations(locale=locale) - ) - - # Run checks and create TM entries for translations that pass them - valid_translations = changeset.bulk_check_translations() - changeset.bulk_create_translation_memory_entries(valid_translations) - - # Remove any TM entries of translations that got rejected - changeset.bulk_remove_translation_memory_entries() - - TranslatedResource.objects.get(resource=resource, locale=locale).calculate_stats() - - # Mark translations as changed - changed_translations_pks = [t.pk for t in changeset.changed_translations] - changed_translations = Translation.objects.filter(pk__in=changed_translations_pks) - changed_translations.bulk_mark_changed() - - # Update latest translation - if changeset.translations_to_create: - changeset.translations_to_create[-1].update_latest_translation() - - def aware_datetime(*args, **kwargs): """Return an aware datetime using Django's configured timezone.""" return make_aware(datetime(*args, **kwargs)) diff --git a/pontoon/base/views.py b/pontoon/base/views.py index 8c103e06bb..40a6d20df5 100755 --- a/pontoon/base/views.py +++ b/pontoon/base/views.py @@ -18,6 +18,7 @@ Http404, HttpResponse, HttpResponseForbidden, + HttpResponseRedirect, JsonResponse, StreamingHttpResponse, ) @@ -38,6 +39,7 @@ Locale, Project, ProjectLocale, + Resource, TranslatedResource, Translation, TranslationMemoryEntry, @@ -755,28 +757,26 @@ def perform_checks(request): @transaction.atomic def download_translations(request): - """Download translated resource.""" + """Download translated resource from its backing repository.""" + + from pontoon.sync.utils import translations_target_url + try: slug = request.GET["slug"] code = request.GET["code"] - part = request.GET["part"] + res_path = request.GET["part"] except MultiValueDictKeyError: raise Http404 - content, filename = utils.get_download_content(slug, code, part) - - if content is None: - raise Http404 + project = get_object_or_404(Project.objects.visible_for(request.user), slug=slug) + locale = get_object_or_404(Locale, code=code) - response = HttpResponse() - response.content = content - if filename.endswith(".zip"): - response["Content-Type"] = "application/zip" + # FIXME This is a temporary hack, to be replaced by 04/2025 with proper downloads. + url = translations_target_url(project, locale, res_path) + if url and url.startswith("https://"): + return HttpResponseRedirect(url) else: - response["Content-Type"] = "text/plain" - response["Content-Disposition"] = "attachment; filename=" + filename - - return response + raise Http404 @login_required(redirect_field_name="", login_url="/403") @@ -787,33 +787,37 @@ def upload(request): try: slug = request.POST["slug"] code = request.POST["code"] - part = request.POST["part"] + res_path = request.POST["part"] except MultiValueDictKeyError: raise Http404 locale = get_object_or_404(Locale, code=code) project = get_object_or_404(Project.objects.visible_for(request.user), slug=slug) - if not request.user.can_translate( project=project, locale=locale ) or utils.readonly_exists(project, locale): return HttpResponseForbidden("You don't have permission to upload files.") + get_object_or_404(Resource, project=project, path=res_path) form = forms.UploadFileForm(request.POST, request.FILES) - if form.is_valid(): - f = request.FILES["uploadfile"] - utils.handle_upload_content(slug, code, part, f, request.user) - messages.success(request, "Translations updated from uploaded file.") + from pontoon.sync.utils import import_uploaded_file + + upload = request.FILES["uploadfile"] + try: + import_uploaded_file(project, locale, res_path, upload, request.user) + messages.success(request, "Translations updated from uploaded file.") + except Exception as error: + messages.error(request, str(error)) else: - for field, errors in form.errors.items(): + for errors in form.errors.values(): for error in errors: messages.error(request, error) response = HttpResponse(content="", status=303) response["Location"] = reverse( "pontoon.translate", - kwargs={"locale": code, "project": slug, "resource": part}, + kwargs={"locale": code, "project": slug, "resource": res_path}, ) return response diff --git a/pontoon/checks/libraries/compare_locales.py b/pontoon/checks/libraries/compare_locales.py index 4285f0ef55..9bb9b9f780 100644 --- a/pontoon/checks/libraries/compare_locales.py +++ b/pontoon/checks/libraries/compare_locales.py @@ -9,7 +9,7 @@ from compare_locales.parser.properties import PropertiesEntityMixin from compare_locales.paths import File -from pontoon.sync.utils import escape_apostrophes +from pontoon.sync.formats.utils import escape_apostrophes CommentEntity = namedtuple("Comment", ("all",)) diff --git a/pontoon/pretranslation/tasks.py b/pontoon/pretranslation/tasks.py index d958cc5455..b2a061a6dd 100644 --- a/pontoon/pretranslation/tasks.py +++ b/pontoon/pretranslation/tasks.py @@ -1,6 +1,9 @@ import logging +from celery import shared_task + from django.conf import settings +from django.core.cache import cache from django.db.models import CharField, Q, Value as V from django.db.models.functions import Concat @@ -20,38 +23,28 @@ get_pretranslations, update_changed_instances, ) -from pontoon.sync.core import serial_task log = logging.getLogger(__name__) -@serial_task(settings.SYNC_TASK_TIMEOUT, base=PontoonTask, lock_key="project={0}") -def pretranslate(self, project_pk, locales=None, entities=None): +def pretranslate(project: Project, paths: set[str] | None): """ Identifies strings without any translations and any suggestions. Engages TheAlgorithm (bug 1552796) to gather pretranslations. Stores pretranslations as suggestions (approved=False) to DB. - :arg project_pk: the pk of the project to be pretranslated - :arg Queryset locales: the locales for the project to be pretranslated - :arg Queryset entites: the entities for the project to be pretranslated + :arg project: The project to be pretranslated + :arg paths: Paths of the project resources to be pretranslated, + or None to pretranslate all resources. :returns: None """ - project = Project.objects.get(pk=project_pk) - if not project.pretranslation_enabled: log.info(f"Pretranslation not enabled for project {project.name}") return - if locales: - locales = project.locales.filter(pk__in=locales) - else: - locales = project.locales - - locales = locales.filter( - project_locale__project=project, + locales = project.locales.filter( project_locale__pretranslation_enabled=True, project_locale__readonly=False, ) @@ -64,12 +57,9 @@ def pretranslate(self, project_pk, locales=None, entities=None): log.info(f"Fetching pretranslations for project {project.name} started") - if not entities: - entities = Entity.objects.filter( - resource__project=project, - obsolete=False, - ) - + entities = Entity.objects.filter(resource__project=project, obsolete=False) + if paths: + entities = entities.filter(resource__path__in=paths) entities = entities.prefetch_related("resource") # Fetch all available locale-resource pairs (TranslatedResource objects) @@ -205,3 +195,18 @@ def pretranslate(self, project_pk, locales=None, entities=None): log.info(f"Fetching pretranslations for locale {locale.code} done") log.info(f"Fetching pretranslations for project {project.name} done") + + +@shared_task(base=PontoonTask, name="pretranslate") +def pretranslate_task(project_pk): + project = Project.objects.get(pk=project_pk) + lock_name = f"pretranslate_{project_pk}" + if not cache.add(lock_name, True, timeout=settings.SYNC_TASK_TIMEOUT): + raise RuntimeError( + f"Cannot pretranslate {project.slug} because its previous pretranslation is still running." + ) + try: + pretranslate(project, None) + finally: + # release the lock + cache.delete(lock_name) diff --git a/pontoon/pretranslation/tests/test_tasks.py b/pontoon/pretranslation/tests/test_tasks.py index 01d825bc2b..6f9e9fd35c 100644 --- a/pontoon/pretranslation/tests/test_tasks.py +++ b/pontoon/pretranslation/tests/test_tasks.py @@ -3,7 +3,7 @@ import pytest from pontoon.base.models import ChangedEntityLocale, Translation, User -from pontoon.pretranslation.tasks import pretranslate +from pontoon.pretranslation.tasks import pretranslate_task from pontoon.test.factories import ( EntityFactory, ProjectLocaleFactory, @@ -48,7 +48,7 @@ def test_pretranslate(gt_mock, project_a, locale_a, resource_a, locale_b): gt_user = User.objects.get(email="pontoon-gt@example.com") gt_mock.return_value = [("pretranslation", None, gt_user)] - pretranslate(project_a.pk) + pretranslate_task(project_a.pk) project_a.refresh_from_db() translations = Translation.objects.filter(user=gt_user) @@ -103,7 +103,7 @@ def test_which_strings_to_pretranslate(gt_mock, project_a, locale_a, resource_a) user=gt_user, ) - pretranslate(project_a.pk) + pretranslate_task(project_a.pk) project_a.refresh_from_db() assert len(no_translations.translation_set.filter(string="pretranslation")) == 1 diff --git a/pontoon/sync/__init__.py b/pontoon/sync/__init__.py index 41db483868..e69de29bb2 100644 --- a/pontoon/sync/__init__.py +++ b/pontoon/sync/__init__.py @@ -1,2 +0,0 @@ -# A separator used in Translation keys as proposed by Translate Toolkit. -KEY_SEPARATOR = "\x04" diff --git a/pontoon/sync/changeset.py b/pontoon/sync/changeset.py deleted file mode 100644 index 80b871d385..0000000000 --- a/pontoon/sync/changeset.py +++ /dev/null @@ -1,589 +0,0 @@ -import logging - -from collections import defaultdict -from datetime import datetime - -from notifications.signals import notify - -from django.contrib.auth.models import User -from django.db import connection -from django.db.models import Prefetch, Q -from django.template.defaultfilters import pluralize - -from pontoon.actionlog.models import ActionLog -from pontoon.base.models import ( - Entity, - Locale, - Project, - Resource, - Translation, - TranslationMemoryEntry, -) -from pontoon.base.utils import match_attr -from pontoon.checks.utils import bulk_run_checks - -from .vcs.project import VCSProject -from .vcs.resource import VCSEntity -from .vcs.translation import VCSTranslation - - -log = logging.getLogger(__name__) - - -class ChangeSet: - """ - Stores a set of changes to be made to the database and the - translations stored in VCS. Once all the necessary changes have been - stored, execute all the changes at once efficiently. - """ - - def __init__( - self, - db_project: Project, - vcs_project: VCSProject, - now: datetime, - locale: Locale | None = None, - ): - """ - :param now: - Datetime to use for marking when approvals happened. - """ - self.db_project = db_project - self.vcs_project = vcs_project - self.now = now - self.locale = locale - - # Store locales and resources for FK relationships. - self.locales = {loc.code: loc for loc in Locale.objects.all()} - self.resources: dict[str, Resource] = { - r.path: r for r in self.db_project.resources.all() - } - - self.executed = False - self.changes = { - "update_vcs": [], - "update_db": [], - "obsolete_db": [], - "create_db": [], - } - - self.entities_to_update = [] - self.translations_to_update = {} - self.translations_to_create = [] - self.rejected_translations = [] - self.actions_to_log = [] - - self.commit_authors_per_locale = defaultdict(list) - self.locales_to_commit: set[Locale] = set() - self.new_entities: list[Entity] = [] - - self.sync_user = User.objects.get(username="pontoon-sync") - - @property - def changed_translations(self): - """A list of Translation objects that have been created or updated.""" - return self.translations_to_create + list(self.translations_to_update.values()) - - def update_vcs_entity( - self, locale: Locale, db_entity: Entity, vcs_entity: VCSEntity - ): - """ - Replace the translations in VCS with the translations from the - database. - Updates only entities that has been changed. - """ - if db_entity.has_changed(locale): - self.changes["update_vcs"].append((locale.code, db_entity, vcs_entity)) - self.locales_to_commit.add(locale) - - def create_db_entity(self, vcs_entity: VCSEntity): - """Create a new entity in the database.""" - self.changes["create_db"].append(vcs_entity) - - def update_db_entity( - self, locale: Locale, db_entity: Entity, vcs_entity: VCSEntity - ): - """Update the database with translations from VCS.""" - self.changes["update_db"].append((locale.code, db_entity, vcs_entity)) - - def update_db_source_entity(self, db_entity: Entity, vcs_entity: VCSEntity): - """Update the entities with the latest data from vcs.""" - self.changes["update_db"].append((None, db_entity, vcs_entity)) - - def obsolete_db_entity(self, db_entity: Entity): - """Mark the given entity as obsolete.""" - self.changes["obsolete_db"].append(db_entity.pk) - - def execute(self): - """ - Execute the changes stored in this changeset. Execute can only - be called once per changeset; subsequent calls raise a - RuntimeError, even if the changes failed. - """ - if self.executed: - raise RuntimeError("execute() can only be called once per changeset.") - else: - self.executed = True - - # Perform the changes and fill the lists for bulk creation and - # updating. - self.execute_update_vcs() - self.execute_create_db() - self.execute_update_db() - self.execute_obsolete_db() - - # Apply the built-up changes to the DB - self.bulk_update_entities() - self.bulk_create_translations() - self.bulk_update_translations() - - # Create all log events. - self.bulk_log_actions() - - # Clean up any duplicate approvals - if self.locale: - with connection.cursor() as cursor: - cursor.execute( - """ - UPDATE base_translation AS b - SET approved = FALSE, approved_date = NULL - WHERE - id IN - (SELECT trans.id FROM base_translation AS trans - LEFT JOIN base_entity AS ent ON ent.id = trans.entity_id - LEFT JOIN base_resource AS res ON res.id = ent.resource_id - WHERE locale_id = %(locale_id)s - AND res.project_id = %(project_id)s) - AND approved_date != - (SELECT max(approved_date) - FROM base_translation - WHERE entity_id = b.entity_id - AND locale_id = b.locale_id - AND (plural_form = b.plural_form OR plural_form IS NULL)); - """, - {"locale_id": self.locale.id, "project_id": self.db_project.id}, - ) - - if self.changed_translations: - # Update 'active' status of all changed translations and their siblings, - # i.e. translations of the same entity to the same locale. - changed_pks = {t.pk for t in self.changed_translations} - ( - Entity.objects.filter( - translation__pk__in=changed_pks - ).reset_active_translations(locale=self.locale) - ) - - # Run checks and create TM entries for translations that pass them - valid_translations = self.bulk_check_translations() - self.bulk_create_translation_memory_entries(valid_translations) - - # Remove any TM entries of translations that got rejected - self.bulk_remove_translation_memory_entries() - - def execute_update_vcs(self): - resources = self.vcs_project.resources - changed_resources = set() - - for locale_code, db_entity, vcs_entity in self.changes["update_vcs"]: - changed_resources.add(resources[db_entity.resource.path]) - vcs_translation = vcs_entity.translations[locale_code] - db_translations = db_entity.translation_set.filter( - Q(approved=True) | Q(pretranslated=True) - ).filter(locale__code=locale_code) - vcs_translation.update_from_db(db_translations) - - # Track which translators were involved. - self.commit_authors_per_locale[locale_code].extend( - [t.user for t in db_translations if t.user] - ) - - for resource in changed_resources: - resource.save(self.locale) - - def get_entity_updates( - self, vcs_entity: VCSEntity, db_entity: Entity | None = None - ): - """ - Return a dict of the properties and values necessary to create - or update a database entity from a VCS entity. - """ - return { - "resource": self.resources[vcs_entity.resource.path], - "string": vcs_entity.string, - "string_plural": vcs_entity.string_plural, - "key": vcs_entity.key, - "context": vcs_entity.context, - "comment": "\n".join(vcs_entity.comments), - "group_comment": "\n".join(vcs_entity.group_comments), - "resource_comment": "\n".join(vcs_entity.resource_comments), - # one timestamp per import, unlike timezone.now() - "date_created": db_entity.date_created if db_entity else self.now, - "order": vcs_entity.order, - "source": vcs_entity.source, - } - - def send_notifications(self, new_entities): - """ - Notify project contributors if new entities have been added. - """ - count = len(new_entities) - - if count > 0: - log.info(f"Sending new string notifications for project {self.db_project}.") - - verb = f"updated with {count} new string{pluralize(count)}" - contributors = User.objects.filter( - translation__entity__resource__project=self.db_project, - profile__new_string_notifications=True, - ).distinct() - - for contributor in contributors: - notify.send( - self.db_project, - recipient=contributor, - verb=verb, - category="new_string", - ) - - log.info(f"New string notifications for project {self.db_project} sent.") - - def execute_create_db(self): - new_entities = [] - - for vcs_entity in self.changes["create_db"]: - # We can't use bulk_create since we need a PK - entity, created = Entity.objects.get_or_create( - **self.get_entity_updates(vcs_entity) - ) - - if created: - new_entities.append(entity) - - for locale_code, vcs_translation in vcs_entity.translations.items(): - for plural_form, string in vcs_translation.strings.items(): - self.translations_to_create.append( - Translation( - entity=entity, - locale=self.locales[locale_code], - string=string, - plural_form=plural_form, - approved=not vcs_translation.fuzzy, - approved_date=( - self.now if not vcs_translation.fuzzy else None - ), - fuzzy=vcs_translation.fuzzy, - ) - ) - - self.send_notifications(new_entities) - self.new_entities = new_entities - - def update_entity_translations_from_vcs( - self, - db_entity: Entity, - locale_code: str, - vcs_translation: VCSTranslation, - user=None, - db_translations=None, - db_translations_approved_before_sync=None, - ): - if db_translations is None: - db_translations = db_entity.translation_set.filter( - locale__code=locale_code, - ) - - if db_translations_approved_before_sync is None: - db_translations_approved_before_sync = db_translations.filter( - approved_date__lte=self.now - ) - - approved_translations = [] - fuzzy_translations = [] - - for plural_form, string in vcs_translation.strings.items(): - db_translation = match_attr( - db_translations, plural_form=plural_form, string=string - ) - - # Modify existing translation. - if db_translation: - new_action = None - if ( - not db_translation.approved - and not db_translation.pretranslated - and not vcs_translation.fuzzy - ): - new_action = ActionLog( - action_type=ActionLog.ActionType.TRANSLATION_APPROVED, - performed_by=user or self.sync_user, - translation=db_translation, - ) - db_translation.approved = True - db_translation.approved_user = user - db_translation.approved_date = self.now - db_translation.rejected = False - db_translation.fuzzy = vcs_translation.fuzzy - - if db_translation.is_dirty(): - self.translations_to_update[db_translation.pk] = db_translation - if new_action: - self.actions_to_log.append(new_action) - if db_translation.fuzzy: - fuzzy_translations.append(db_translation) - elif not db_translation.pretranslated: - approved_translations.append(db_translation) - - # Create new translation. - else: - self.translations_to_create.append( - Translation( - entity=db_entity, - locale=self.locales[locale_code], - string=string, - plural_form=plural_form, - approved=not vcs_translation.fuzzy, - approved_user=user, - approved_date=self.now if not vcs_translation.fuzzy else None, - user=user, - fuzzy=vcs_translation.fuzzy, - ) - ) - - # Unapprove translations that were approved before the sync job started unless sync - # resolves them as active approved translations. - # Note: If translations get approved after the sync starts, duplicate approvals can arise. - # We take care of that at the and of the sync job in tasks.py. - for translation in db_translations_approved_before_sync: - if translation not in approved_translations: - # Use the translation instance already set for update if it exists. - translation = self.translations_to_update.get( - translation.pk, translation - ) - translation.approved = False - translation.approved_user = None - translation.approved_date = None - - # Reject translations unless they became fuzzy during sync. Condition is sufficient - # because they were approved previously. - if not translation.fuzzy: - new_action = ActionLog( - action_type=ActionLog.ActionType.TRANSLATION_REJECTED, - performed_by=user or self.sync_user, - translation=translation, - is_implicit_action=True, - ) - translation.rejected = True - translation.rejected_user = user - translation.rejected_date = self.now - else: - new_action = ActionLog( - action_type=ActionLog.ActionType.TRANSLATION_UNAPPROVED, - performed_by=user or self.sync_user, - translation=translation, - is_implicit_action=True, - ) - - if translation.is_dirty(): - self.rejected_translations.append(translation) - self.translations_to_update[translation.pk] = translation - self.actions_to_log.append(new_action) - - # Unfuzzy existing translations unless sync resolves them as active fuzzy translations. - # Note: Translations cannot get fuzzy after the sync job starts, because they cannot be - # made fuzzy in Pontoon. - for translation in db_translations: - if translation not in fuzzy_translations: - # Use the translation instance already set for update if it exists. - translation = self.translations_to_update.get( - translation.pk, translation - ) - translation.fuzzy = False - - if translation.is_dirty(): - self.translations_to_update[translation.pk] = translation - - def prefetch_entity_translations(self): - prefetched_entities = {} - - locale_entities = {} - for locale_code, db_entity, vcs_entity in self.changes["update_db"]: - locale_entities.setdefault(locale_code, []).append(db_entity.pk) - - for locale in locale_entities.keys(): - entities_qs = ( - Entity.objects.filter( - pk__in=locale_entities[locale], - ) - .prefetch_related( - Prefetch( - "translation_set", - queryset=Translation.objects.filter(locale__code=locale), - to_attr="db_translations", - ) - ) - .prefetch_related( - Prefetch( - "translation_set", - queryset=Translation.objects.filter( - locale__code=locale, approved_date__lte=self.now - ), - to_attr="db_translations_approved_before_sync", - ) - ) - ) - prefetched_entities[locale] = {entity.id: entity for entity in entities_qs} - - return prefetched_entities - - def execute_update_db(self): - if self.changes["update_db"]: - entities_with_translations = self.prefetch_entity_translations() - - for locale_code, db_entity, vcs_entity in self.changes["update_db"]: - for field, value in self.get_entity_updates(vcs_entity, db_entity).items(): - setattr(db_entity, field, value) - - if db_entity.is_dirty(check_relationship=True): - self.entities_to_update.append(db_entity) - - if locale_code is not None: - # Update translations for the entity. - vcs_translation = vcs_entity.translations[locale_code] - prefetched_entity = entities_with_translations[locale_code][ - db_entity.id - ] - self.update_entity_translations_from_vcs( - db_entity, - locale_code, - vcs_translation, - None, - prefetched_entity.db_translations, - prefetched_entity.db_translations_approved_before_sync, - ) - - def execute_obsolete_db(self): - ( - Entity.objects.filter(pk__in=self.changes["obsolete_db"]).update( - obsolete=True, date_obsoleted=self.now - ) - ) - - def bulk_update_entities(self): - if len(self.entities_to_update) > 0: - Entity.objects.bulk_update( - self.entities_to_update, - fields=[ - "resource", - "string", - "string_plural", - "key", - "context", - "comment", - "group_comment", - "resource_comment", - "order", - "source", - ], - ) - - def bulk_create_translations(self): - Translation.objects.bulk_create(self.translations_to_create) - for translation in self.translations_to_create: - self.actions_to_log.append( - ActionLog( - action_type=ActionLog.ActionType.TRANSLATION_CREATED, - created_at=translation.date, - performed_by=translation.user or self.sync_user, - translation=translation, - ) - ) - - def bulk_update_translations(self): - if len(self.translations_to_update) > 0: - Translation.objects.bulk_update( - list(self.translations_to_update.values()), - fields=[ - "entity", - "locale", - "string", - "plural_form", - "approved", - "approved_user_id", - "approved_date", - "rejected", - "fuzzy", - ], - ) - - def bulk_log_actions(self): - ActionLog.objects.bulk_create(self.actions_to_log) - - def bulk_create_translation_memory_entries(self, valid_translations_pks): - """ - Create Translation Memory entries for: - - new approved translations - - updated translations that are approved and don't have a TM entry yet - - :arg list[int] valid_translations_pks: list of translations (their pks) without errors - """ - - def is_valid(t): - """ - Verify if a translation should land in the Translation Memory - """ - return t.approved and t.pk in valid_translations_pks - - translations_to_create_translation_memory_entries_for = [ - t for t in self.translations_to_create if is_valid(t) - ] + list( - Translation.objects.filter( - pk__in=[ - pk for pk, t in self.translations_to_update.items() if is_valid(t) - ], - memory_entries__isnull=True, - ) - ) - - memory_entries = [ - TranslationMemoryEntry( - source=t.tm_source, - target=t.tm_target, - locale_id=t.locale_id, - entity_id=t.entity.pk, - translation_id=t.pk, - project=self.db_project, - ) - for t in translations_to_create_translation_memory_entries_for - ] - - TranslationMemoryEntry.objects.bulk_create(memory_entries) - - def bulk_remove_translation_memory_entries(self): - """ - Remove Translation Memory entries of translations that got rejected - """ - TranslationMemoryEntry.objects.filter( - translation__in=self.rejected_translations - ).delete() - - def bulk_check_translations(self): - """ - Run checks on all changed translations from supported resources - - :return: primary keys of translations without warnings and errors. - """ - changed_pks = {t.pk for t in self.changed_translations} - - bulk_run_checks(Translation.objects.for_checks().filter(pk__in=changed_pks)) - - valid_translations = set( - Translation.objects.filter( - pk__in=changed_pks, - errors__isnull=True, - ) - .values_list("pk", flat=True) - .order_by("pk") - ) - - return valid_translations diff --git a/pontoon/sync/core.py b/pontoon/sync/core.py deleted file mode 100644 index 92294a6741..0000000000 --- a/pontoon/sync/core.py +++ /dev/null @@ -1,457 +0,0 @@ -import logging - -from collections import Counter -from datetime import datetime -from functools import wraps -from typing import Any - -import requests - -from celery import shared_task - -from django.conf import settings -from django.contrib.auth.models import User -from django.core.cache import cache -from django.db import transaction -from django.template.loader import render_to_string - -from pontoon.base.models import ( - Entity, - Locale, - Project, - Resource, - TranslatedResource, -) - -from .changeset import ChangeSet -from .vcs.project import VCSProject - - -log = logging.getLogger(__name__) - - -def update_originals(db_project: Project, now, force=False): - vcs_project = VCSProject(db_project, locales=[], force=force) - - with transaction.atomic(): - added_paths, removed_paths, changed_paths = update_resources( - db_project, vcs_project - ) - changeset = ChangeSet(db_project, vcs_project, now) - update_entities(db_project, vcs_project, changeset) - changeset.execute() - - return added_paths, removed_paths, changed_paths, changeset.new_entities - - -def serial_task(timeout, lock_key="", on_error=None, **celery_args): - """ - Decorator ensures that there's only one running task with given task_name. - Decorated tasks are bound tasks, meaning their first argument is always their Task instance - :param timeout: time after which lock is released. - :param lock_key: allows to define different lock for respective parameters of task. - :param on_error: callback to be executed if an error is raised. - :param celery_args: argument passed to celery's shared_task decorator. - """ - - def wrapper(func): - @shared_task(bind=True, **celery_args) - @wraps(func) - def wrapped_func(self, *args, **kwargs): - lock_name = f"serial_task.{self.name}[{lock_key.format(*args, **kwargs)}]" - # Acquire the lock - if not cache.add(lock_name, True, timeout=timeout): - error = RuntimeError( - f"Can't execute task '{lock_name}' because the previously called task is still running." - ) - if callable(on_error): - on_error(error, *args, **kwargs) - raise error - try: - return func(self, *args, **kwargs) - finally: - # release the lock - cache.delete(lock_name) - - return wrapped_func - - return wrapper - - -def collect_entities(db_project: Project, vcs_project: VCSProject, changed_resources): - """ - Find all the entities in the database and on the filesystem and - match them together, yielding tuples of the form - (entity_key, database_entity, vcs_entity). - - When a match isn't found, the missing entity will be None. - """ - db_entities = get_db_entities(db_project, changed_resources) - vcs_entities = get_vcs_entities(vcs_project) - entity_keys = set().union(db_entities.keys(), vcs_entities.keys()) - - for key in entity_keys: - yield key, db_entities.get(key, None), vcs_entities.get(key, None) - - -def update_entities(db_project: Project, vcs_project: VCSProject, changeset): - changed_resources = vcs_project.changed_files - for key, db_entity, vcs_entity in collect_entities( - db_project, vcs_project, changed_resources - ): - if vcs_entity is None: - if db_entity is None: - # This should never happen. What? Hard abort. - raise ValueError(f"No entities found for key `{key}`") - else: - # VCS no longer has the entity, obsolete it. - changeset.obsolete_db_entity(db_entity) - elif db_entity is None: - # New VCS entities are added to Pontoon. - changeset.create_db_entity(vcs_entity) - else: - changeset.update_db_source_entity(db_entity, vcs_entity) - - -def update_resources(db_project: Project, vcs_project: VCSProject): - """Update the database on what resource files exist in VCS.""" - log.debug(f"Scanning {vcs_project.source_directory_path}") - vcs_changed_files, vcs_removed_files = vcs_project.changed_source_files - - removed_resources = db_project.resources.filter(path__in=vcs_removed_files) - removed_paths = removed_resources.values_list("path", flat=True) - - changed_resources = db_project.resources.filter(path__in=vcs_changed_files) - changed_paths = changed_resources.values_list("path", flat=True) - - added_paths = [] - - log.debug(f"Removed files: {', '.join(removed_paths) or 'None'}") - removed_resources.delete() - - for relative_path, vcs_resource in vcs_project.resources.items(): - resource, created = db_project.resources.get_or_create(path=relative_path) - resource.format = Resource.get_path_format(relative_path) - resource.total_strings = len(vcs_resource.entities) - resource.save() - - if created: - db_project.reset_resource_order() - added_paths.append(relative_path) - - log.debug(f"Added files: {', '.join(added_paths) or 'None'}") - return added_paths, removed_paths, changed_paths - - -def get_changed_resources( - db_project: Project, vcs_project: VCSProject -) -> list[Any] | None: - if db_project.unsynced_locales: - return None - changed_files = vcs_project.changed_files - if changed_files is None: - return None - return ( - list(changed_files.keys()) - + list(vcs_project.added_paths) - + list(vcs_project.changed_paths) - ) - - -def update_translations( - db_project: Project, vcs_project: VCSProject, locale, changeset -): - changed_resources = get_changed_resources(db_project, vcs_project) - all_entities = collect_entities(db_project, vcs_project, changed_resources) - for key, db_entity, vcs_entity in all_entities: - # If we don't have both the db_entity and vcs_entity we can't - # do anything with the translations. - if db_entity is None or vcs_entity is None: - continue - - if not vcs_entity.has_translation_for(locale.code): - # VCS lacks an entity for this locale, so we can't - # pull updates nor edit it. Skip it! - continue - - if db_entity.has_changed(locale): - # Pontoon changes overwrite whatever VCS has. - changeset.update_vcs_entity(locale, db_entity, vcs_entity) - else: - # If Pontoon has nothing or has not changed, and the VCS - # still has the entity, update Pontoon with whatever may - # have changed. - changeset.update_db_entity(locale, db_entity, vcs_entity) - - -def update_translated_resources(db_project: Project, vcs_project: VCSProject, locale): - """ - Update the TranslatedResource entries in the database. - Returns true if a new TranslatedResource is added to the locale. - """ - if vcs_project.configuration: - return update_translated_resources_with_config( - db_project, - vcs_project, - locale, - ) - else: - return update_translated_resources_without_config( - db_project, - vcs_project, - locale, - ) - - -def update_translated_resources_with_config( - db_project: Project, vcs_project: VCSProject, locale -): - """ - Create/update the TranslatedResource objects for each Resource instance - that is enabled for the given locale through project configuration. - """ - tr_created = False - - for resource in vcs_project.configuration.locale_resources(locale): - translatedresource, created = TranslatedResource.objects.get_or_create( - resource=resource, locale=locale - ) - - if created: - tr_created = True - translatedresource.calculate_stats() - - return tr_created - - -def update_translated_resources_without_config( - db_project: Project, vcs_project: VCSProject, locale -): - """ - We only want to create/update the TranslatedResource object if the - resource exists in the current locale, UNLESS the file is asymmetric. - """ - tr_created = False - - for resource in db_project.resources.all(): - vcs_resource = vcs_project.resources.get(resource.path, None) - - if vcs_resource is not None: - resource_exists = vcs_resource.files.get(locale) is not None - if resource_exists or resource.is_asymmetric: - translatedresource, created = TranslatedResource.objects.get_or_create( - resource=resource, locale=locale - ) - - if created: - tr_created = True - translatedresource.calculate_stats() - - return tr_created - - -def update_translated_resources_no_files( - db_project: Project, locale, changed_resources -): - """ - Create/update TranslatedResource entries if files aren't available. This typically happens when - originals change and translations don't, so we don't pull locale repositories. - """ - for resource in changed_resources: - # We can only update asymmetric (monolingual) TranslatedResources. For bilingual files we - # only create TranslatedResources if the file is present in the repository for the locale, - # which we cannot check without files. - if not resource.is_asymmetric: - log.error(f"Unable to calculate stats for asymmetric resource: {resource}") - continue - - translatedresource, _ = TranslatedResource.objects.get_or_create( - resource=resource, locale=locale - ) - translatedresource.calculate_stats() - - -def get_vcs_entities(vcs_project: VCSProject): - return {entity_key(entity): entity for entity in vcs_project.entities} - - -def get_changed_entities(db_project: Project, changed_resources): - entities = ( - Entity.objects.select_related("resource") - .prefetch_related("changed_locales") - .filter(resource__project=db_project, obsolete=False) - ) - - if changed_resources is not None: - entities = entities.filter(resource__path__in=changed_resources) - return entities - - -def get_db_entities(db_project: Project, changed_resources=None) -> dict[str, Entity]: - return { - entity_key(entity): entity - for entity in get_changed_entities(db_project, changed_resources) - } - - -def entity_key(entity: Entity) -> str: - """ - Generate a key for the given entity that is unique within the - project. - """ - key = entity.key or entity.string - return ":".join([entity.resource.path, key]) - - -def has_repo_changed(last_synced_revisions, pulled_revisions) -> bool: - # If any revision is None, we can't be sure if a change - # happened or not, so we default to assuming it did. - unsure_change = None in pulled_revisions.values() - if unsure_change or pulled_revisions != last_synced_revisions: - return True - return False - - -def pull_source_repo_changes(db_project: Project) -> bool: - source_repo = db_project.source_repository - pulled_revisions = source_repo.pull() - return has_repo_changed(source_repo.last_synced_revisions, pulled_revisions) - - -def pull_locale_repo_changes( - db_project: Project, locales -) -> tuple[bool, dict[str, Any]]: - """ - Update the local files with changes from the VCS. Returns True - if any of the updated repos have changed since the last sync. - """ - has_changed = False - repo_locales = {} - - # If none of the locales have changed, quit early. - if not locales: - return has_changed, repo_locales - - # Skip already pulled locales. Useful for projects with multiple repositories, - # since we don't store the information what locale belongs to what repository. - pulled_locales = [] - - for repo in db_project.translation_repositories(): - remaining_locales = locales.exclude(code__in=pulled_locales) - if not remaining_locales: - break - - pulled_revisions = repo.pull(remaining_locales) - repo_locales[repo.pk] = Locale.objects.filter(code__in=pulled_revisions.keys()) - pulled_locales += pulled_revisions.keys() - - if has_repo_changed(repo.last_synced_revisions, pulled_revisions): - has_changed = True - - return has_changed, repo_locales - - -def commit_changes( - db_project: Project, vcs_project: VCSProject, changeset: ChangeSet, locale: Locale -) -> None: - """Commit the changes we've made back to the VCS.""" - authors = changeset.commit_authors_per_locale.get(locale.code, []) - - # Use the top translator for this batch as commit author, or - # the fake Pontoon user if there are no authors. - commit_author = ( - Counter(authors).most_common(1)[0][0] - if authors - else User(first_name=settings.VCS_SYNC_NAME, email=settings.VCS_SYNC_EMAIL) - ) - - commit_message = render_to_string( - "sync/commit_message.jinja", - {"locale": locale, "project": db_project, "authors": set(authors)}, - ) - - locale_path = vcs_project.locale_directory_paths[locale.code] - repo = db_project.repository_for_path(locale_path) - repo.commit(commit_message, commit_author, locale_path) - - -def get_changed_locales(db_project: Project, locales, now: datetime): - """ - Narrow down locales to the ones that have changed since the last sync by fetching latest - repository commit hashes via API. For projects with many repositories, this is much faster - than running VCS pull/clone for each repository. - """ - repos = db_project.translation_repositories() - - # Requirement: all translation repositories must have API configured - # and must be multi-locale repositories. - for repo in repos: - if not repo.api_config or not repo.multi_locale: - return locales - - log.info(f"Fetching latest commit hashes for project {db_project.slug} started.") - - # If locale has changed in the DB, we need to sync it. - changed_locale_pks = list( - locales.filter( - changedentitylocale__entity__resource__project=db_project, - changedentitylocale__when__lte=now, - ).values_list("pk", flat=True) - ) - - unchanged_locale_pks = [] - error_locale_pks = set() - - for repo in repos: - for locale in locales: - # If we already processed the locale, we can move on. - if locale.pk in changed_locale_pks + unchanged_locale_pks: - continue - - try: - locale_api_endpoint = repo.api_config["endpoint"].format( - locale_code=locale.code - ) - response = requests.get(locale_api_endpoint) - - # Raise exception on 4XX client error or 5XX server error response - response.raise_for_status() - - # If locale has not synced yet, we need to sync it. - last_synced_commit_id = repo.get_last_synced_revisions(locale.code) - if not last_synced_commit_id: - changed_locale_pks.append(locale.pk) - continue - - # If locale has changed in the VCS, we need to sync it. - latest_commit_id = repo.api_config["get_key"](response.json()) - if not latest_commit_id.startswith(last_synced_commit_id): - changed_locale_pks.append(locale.pk) - - # If locale hasn't changed in the VCS, we don't need to sync it. - else: - unchanged_locale_pks.append(locale.pk) - - # Errors and exceptions can mean locale is in a different repository or indicate - # an actual network problem. - except requests.exceptions.RequestException: - error_locale_pks.add(locale.pk) - - # Check if any locale for which the exception was raised hasn't been processed yet. - # For those locales we can't be sure if a change happened, so we assume it did. - for loc in error_locale_pks: - if loc not in changed_locale_pks + unchanged_locale_pks: - log.error( - "Unable to fetch latest commit hash for " - f"locale {Locale.objects.get(pk=loc)} in project {db_project.slug}" - ) - changed_locale_pks.append(locale.pk) - - changed_locales = db_project.locales.filter(pk__in=changed_locale_pks) - - log.info( - f"Fetching latest commit hashes for project {db_project.slug} complete. " - f"Changed locales: {', '.join(changed_locales.values_list('code', flat=True))}." - ) - - return changed_locales diff --git a/pontoon/sync/core/__init__.py b/pontoon/sync/core/__init__.py new file mode 100644 index 0000000000..0e2f870d63 --- /dev/null +++ b/pontoon/sync/core/__init__.py @@ -0,0 +1,114 @@ +import logging + +from notifications.signals import notify + +from django.utils import timezone + +from pontoon.base.models import ChangedEntityLocale, Locale, Project, User +from pontoon.pretranslation.tasks import pretranslate +from pontoon.sync.core.checkout import checkout_repos +from pontoon.sync.core.entities import sync_entities_from_repo +from pontoon.sync.core.paths import find_paths +from pontoon.sync.core.stats import update_stats +from pontoon.sync.core.translations_from_repo import sync_translations_from_repo +from pontoon.sync.core.translations_to_repo import sync_translations_to_repo +from pontoon.sync.models import ProjectSyncLog, RepositorySyncLog, SyncLog + + +log = logging.getLogger(__name__) + + +def sync_project( + project: Project, + sync_log: SyncLog, + *, + pull: bool = True, + commit: bool = True, + force: bool = False, +): + # Mark "now" at the start of sync to avoid messing with + # translations submitted during sync. + now = timezone.now() + + log_prefix = f"[{project.slug}]" + log.info(f"{log_prefix} Sync start") + project_sync_log = ProjectSyncLog.objects.create( + sync_log=sync_log, project=project, start_time=now + ) + + try: + checkouts = checkout_repos(project, force=force, pull=pull) + paths = find_paths(project, checkouts) + except Exception as e: + log.error(f"{log_prefix} {e}") + project_sync_log.skip() + raise e + + locale_map: dict[str, Locale] = { + lc.code: lc for lc in project.locales.order_by("code") + } + paths.locales = list(locale_map.keys()) + added_entities_count, changed_paths, removed_paths = sync_entities_from_repo( + project, locale_map, checkouts.source, paths, now + ) + + repo_sync_log = RepositorySyncLog.objects.create( + project_sync_log=project_sync_log, + repository=checkouts.target.repo, + start_time=timezone.now(), + ) + + db_changes = ChangedEntityLocale.objects.filter( + entity__resource__project=project, when__lte=now + ).select_related("entity__resource", "locale") + del_trans_count, updated_trans_count = sync_translations_from_repo( + project, locale_map, checkouts, paths, db_changes, now + ) + if added_entities_count > 0: + notify_users(project, added_entities_count) + sync_translations_to_repo( + project, + commit, + locale_map, + checkouts, + paths, + db_changes, + changed_paths, + removed_paths, + now, + ) + + db_changes.delete() + checkouts.source.repo.last_synced_revision = checkouts.source.commit + if checkouts.target != checkouts.source: + checkouts.target.repo.last_synced_revision = checkouts.target.commit + if ( + added_entities_count + or changed_paths + or removed_paths + or del_trans_count + or updated_trans_count + ): + update_stats(project) + repo_sync_log.end() + log.info(f"{log_prefix} Sync done") + + if project.pretranslation_enabled and changed_paths: + # Pretranslate changed and added resources for all locales + pretranslate(project, changed_paths) + + +def notify_users(project: Project, count: int) -> None: + users = User.objects.filter( + translation__entity__resource__project=project, + profile__new_string_notifications=True, + ).distinct() + new_strings = f"{count} new {'string' if count == 1 else 'strings'}" + log.info(f"[{project.slug}] Notifying {len(users)} users about {new_strings}") + for user in users: + notify.send( + project, + recipient=user, + verb=f"updated with {new_strings}", + category="new_string", + ) diff --git a/pontoon/sync/core/checkout.py b/pontoon/sync/core/checkout.py new file mode 100644 index 0000000000..c8d5f5ec55 --- /dev/null +++ b/pontoon/sync/core/checkout.py @@ -0,0 +1,119 @@ +import logging + +from os import walk +from os.path import join, normpath, relpath +from typing import NamedTuple, cast + +from django.db.models.manager import BaseManager + +from pontoon.base.models import Project, Repository +from pontoon.sync.repositories import get_repo + + +log = logging.getLogger(__name__) + + +class Checkout: + repo: Repository + is_source: bool + url: str + path: str + prev_commit: str | None + commit: str | None + changed: list[str] + """Relative paths from the checkout base""" + removed: list[str] + """Relative paths from the checkout base""" + renamed: list[tuple[str, str]] + """Relative paths (old, new) from the checkout base""" + + def __init__( + self, + slug: str, + db_repo: Repository, + *, + pull: bool = True, + force: bool = False, + shallow: bool = False, + ) -> None: + self.repo = db_repo + self.is_source = db_repo.source_repo + self.url = db_repo.url + self.path = normpath(db_repo.checkout_path) + self.prev_commit = db_repo.last_synced_revision + + versioncontrol = get_repo(db_repo.type) + if pull: + versioncontrol.update(self.url, self.path, db_repo.branch, shallow) + else: + log.info(f"[{slug}] Skipping pull") + self.commit = versioncontrol.revision(self.path) + str_updated = ( + f"at {self.commit}" + if not self.prev_commit or self.prev_commit == self.commit + else f"updated from {self.prev_commit} to {self.commit}" + ) + log.info(f"[{slug}] Repo {str_updated}") + + delta = ( + versioncontrol.changed_files(self.path, self.prev_commit) + if not shallow and isinstance(self.prev_commit, str) + else None + ) + if shallow: + self.changed = [] + self.removed = [] + self.renamed = [] + elif delta is not None and not force: + self.changed, self.removed, self.renamed = delta + else: + # Initially and on error & when forced, consider all files changed + log.warning(f"[{slug}] Considering all files as changed") + self.changed = [] + for root, dirnames, filenames in walk(self.path): + dirnames[:] = (dn for dn in dirnames if not dn.startswith(".")) + rel_root = relpath(root, self.path) if root != self.path else "" + self.changed.extend( + join(rel_root, fn) for fn in filenames if not fn.startswith(".") + ) + self.removed = delta[1] if delta else [] + self.renamed = [] + + +class Checkouts(NamedTuple): + source: Checkout + target: Checkout + + +def checkout_repos( + project: Project, + *, + pull: bool = True, + force: bool = False, + shallow: bool = False, +) -> Checkouts: + """ + For each project repository, + update its local checkout (unless `pull` is false), + and provide a `Checkout` representing their current state. + """ + source: Checkout | None = None + target: Checkout | None = None + for repo in cast(BaseManager[Repository], project.repositories).all(): + if repo.source_repo: + if source: + raise Exception("Multiple source repositories") + source = Checkout( + project.slug, repo, force=force, pull=pull, shallow=shallow + ) + log.debug(f"[{project.slug}] source root: {source.path}") + elif target: + raise Exception("Multiple target repositories") + else: + target = Checkout( + project.slug, repo, force=force, pull=pull, shallow=shallow + ) + log.debug(f"[{project.slug}] target root: {target.path}") + if source is None and target is None: + raise Exception("No repository found") + return Checkouts(source or target, target or source) diff --git a/pontoon/sync/core/entities.py b/pontoon/sync/core/entities.py new file mode 100644 index 0000000000..3fec706854 --- /dev/null +++ b/pontoon/sync/core/entities.py @@ -0,0 +1,355 @@ +import logging + +from collections import defaultdict +from datetime import datetime +from os.path import exists, isfile, join, relpath, splitext + +from moz.l10n.paths import L10nConfigPaths, L10nDiscoverPaths + +from django.db import transaction +from django.db.models import Q + +from pontoon.base.models import Entity, Locale, Project, Resource, TranslatedResource +from pontoon.base.models.entity import get_word_count +from pontoon.sync.core.checkout import Checkout +from pontoon.sync.formats import parse +from pontoon.sync.formats.exceptions import ParseError +from pontoon.sync.formats.silme import SilmeEntity, SilmeResource # Approximate types + + +log = logging.getLogger(__name__) + +BILINGUAL_FORMATS = {"po", "xliff"} + + +def sync_entities_from_repo( + project: Project, + locale_map: dict[str, Locale], + checkout: Checkout, + paths: L10nConfigPaths | L10nDiscoverPaths, + now: datetime, +) -> tuple[int, set[str], set[str]]: + """(added_entities_count, changed_source_paths, removed_source_paths""" + if not checkout.changed and not checkout.removed and not checkout.renamed: + return 0, set(), set() + log.info(f"[{project.slug}] Syncing entities from repo...") + # db_path -> parsed_resource + updates: dict[str, SilmeResource | None] = {} + source_paths = set(paths.ref_paths) + source_locale = Locale.objects.get(code="en-US") + for co_path in checkout.changed: + path = join(checkout.path, co_path) + if path in source_paths and exists(path): + db_path = get_db_path(paths, path) + try: + res = parse(path, locale=source_locale) + except ParseError as error: + log.error( + f"[{project.slug}:{db_path}] Skipping resource with parse error: {error}" + ) + res = None + except ValueError as error: + if str(error).startswith("Translation format"): + log.warning( + f"[{project.slug}:{db_path}] Skipping resource with unsupported format" + ) + res = None + else: + raise error + updates[db_path] = res + + with transaction.atomic(): + renamed_paths = rename_resources(project, paths, checkout) + removed_paths = remove_resources(project, paths, checkout) + old_res_added_ent_count, changed_paths = update_resources( + project, locale_map, paths, updates, now + ) + new_res_added_ent_count, _ = add_resources( + project, locale_map, paths, updates, changed_paths, now + ) + update_translated_resources(project, locale_map, paths) + + return ( + old_res_added_ent_count + new_res_added_ent_count, + renamed_paths | changed_paths, + removed_paths, + ) + + +def rename_resources( + project: Project, paths: L10nConfigPaths | L10nDiscoverPaths, checkout: Checkout +) -> set[str]: + if not checkout.renamed: + return set() + renamed_db_paths = { + get_db_path(paths, join(checkout.path, old_path)): get_db_path( + paths, join(checkout.path, new_path) + ) + for old_path, new_path in checkout.renamed + } + renamed_resources = project.resources.filter(path__in=renamed_db_paths.keys()) + for res in renamed_resources: + new_db_path = renamed_db_paths[res.path] + log.info(f"[{project.slug}:{res.path}] Rename as {new_db_path}") + res.path = new_db_path + Resource.objects.bulk_update(renamed_resources, ["path"]) + return set(renamed_db_paths.values()) + + +def remove_resources( + project: Project, paths: L10nConfigPaths | L10nDiscoverPaths, checkout: Checkout +) -> set[str]: + if not checkout.removed: + return set() + removed_resources = project.resources.filter( + path__in={ + get_db_path(paths, join(checkout.path, co_path)) + for co_path in checkout.removed + } + ) + removed_db_paths = {res.path for res in removed_resources} + if removed_db_paths: + # FIXME: https://github.com/mozilla/pontoon/issues/2133 + removed_resources.delete() + rm_count = len(removed_db_paths) + str_source_files = "source file" if rm_count == 1 else "source files" + log.info( + f"[{project.slug}] Removed {rm_count} {str_source_files}: {', '.join(removed_db_paths)}" + ) + return removed_db_paths + + +def update_resources( + project: Project, + locale_map: dict[str, Locale], + paths: L10nConfigPaths | L10nDiscoverPaths, + updates: dict[str, SilmeResource | None], + now: datetime, +) -> tuple[int, set[str]]: + changed_resources = ( + list(project.resources.filter(path__in=updates.keys())) if updates else None + ) + if not changed_resources: + return 0, set() + log.info( + f"[{project.slug}] Changed source files: {', '.join(res.path for res in changed_resources)}" + ) + + prev_entities = { + (e.resource.path, e.key or e.string): e + for e in Entity.objects.filter(resource__in=changed_resources, obsolete=False) + .select_related("resource") + .iterator() + } + next_entities = { + (path, entity.key or entity.string): entity + for path, entity in ( + (cr.path, entity_from_source(cr, now, 0, tx)) + for cr in changed_resources + for tx in updates[cr.path].translations + ) + } + + obsolete_entities = [ + ent + for key, ent in prev_entities.items() + if key in prev_entities.keys() - next_entities.keys() + ] + for ent in obsolete_entities: + ent.obsolete = True + ent.date_obsoleted = now + obs_count = Entity.objects.bulk_update( + obsolete_entities, ["obsolete", "date_obsoleted"] + ) + + mod_count = Entity.objects.bulk_update( + ( + ent + for key, ent in next_entities.items() + if key in prev_entities.keys() & next_entities.keys() + and not entities_same(ent, prev_entities[key]) + ), + [ + "string", + "string_plural", + "comment", + "source", + "group_comment", + "resource_comment", + "context", + ], + ) + + # FIXME: Entity order should be updated on insertion + # https://github.com/mozilla/pontoon/issues/2115 + added_entities = Entity.objects.bulk_create( + ent + for key, ent in next_entities.items() + if key in next_entities.keys() - prev_entities.keys() + ) + add_count = len(added_entities) + + delta = [ + f"added {add_count}" if add_count else "", + f"changed {mod_count}" if mod_count else "", + f"obsoleted {obs_count}" if obs_count else "", + ] + if any(delta): + ds = ", ".join(d for d in delta if d) + log.info(f"[{project.slug}] Source entity updates: {ds}") + return add_count, set(res.path for res in changed_resources) + + +def add_resources( + project: Project, + locale_map: dict[str, Locale], + paths: L10nConfigPaths | L10nDiscoverPaths, + updates: dict[str, SilmeResource | None], + changed_paths: set[str], + now: datetime, +) -> tuple[int, set[str]]: + added_resources = [ + Resource(project=project, path=db_path, format=get_path_format(db_path)) + for db_path, res in updates.items() + if res is not None and db_path not in changed_paths + ] + if not added_resources: + return 0, set() + + added_resources = Resource.objects.bulk_create(added_resources) + ordered_resources = project.resources.order_by("path") + for idx, r in enumerate(ordered_resources): + r.order = idx + Resource.objects.bulk_update(ordered_resources, ["order"]) + + added_entities = Entity.objects.bulk_create( + ( + entity_from_source(resource, now, idx, tx) + for resource in added_resources + for idx, tx in enumerate(updates[resource.path].translations) + ) + ) + + ent_count = len(added_entities) + added_paths = {ar.path for ar in added_resources} + log.info( + f"[{project.slug}] New source files with {ent_count} entities: {', '.join(added_paths)}" + ) + return ent_count, added_paths + + +def update_translated_resources( + project: Project, + locale_map: dict[str, Locale], + paths: L10nConfigPaths | L10nDiscoverPaths, +) -> None: + prev_tr_keys: set[tuple[int, int]] = set( + (tr["resource_id"], tr["locale_id"]) + for tr in TranslatedResource.objects.filter(resource__project=project) + .values("resource_id", "locale_id") + .iterator() + ) + add_tr: list[TranslatedResource] = [] + for resource in Resource.objects.filter(project=project).iterator(): + _, locales = paths.target(resource.path) + for lc in locales: + locale = locale_map.get(lc, None) + if is_translated_resource(paths, resource, locale): + assert locale is not None + key = (resource.pk, locale.pk) + if key in prev_tr_keys: + prev_tr_keys.remove(key) + else: + add_tr.append(TranslatedResource(resource=resource, locale=locale)) + if add_tr: + add_tr = TranslatedResource.objects.bulk_create(add_tr) + add_by_res: dict[str, list[str]] = defaultdict(list) + for tr in add_tr: + add_by_res[tr.resource.path].append(tr.locale.code) + for res_path, locale_codes in add_by_res.items(): + locale_codes.sort() + log.info( + f"[{project.slug}:{res_path}] Added for translation in: {', '.join(locale_codes)}" + ) + if prev_tr_keys: + del_tr_q = Q() + for resource_id, locale_id in prev_tr_keys: + del_tr_q |= Q(resource_id=resource_id, locale_id=locale_id) + _, del_dict = TranslatedResource.objects.filter(del_tr_q).delete() + del_count = del_dict.get("base.translatedresource", 0) + str_tr = "translated resource" if del_count == 1 else "translated resources" + log.info(f"[{project.slug}] Removed {del_count} {str_tr}") + + +def is_translated_resource( + paths: L10nConfigPaths | L10nDiscoverPaths, + resource: Resource, + locale: Locale | None, +) -> bool: + if locale is None: + return False + if resource.format in BILINGUAL_FORMATS: + # For bilingual formats, only create TranslatedResource + # if the resource exists for the locale. + target = paths.target(resource.path) # , locale_code) + if target is None: + return False + target_path = paths.format_target_path(target[0], locale.code) + return isfile(target_path) + return True + + +def entity_from_source( + resource: Resource, now: datetime, idx: int, tx: SilmeEntity +) -> Entity: + comments = getattr(tx, "comments", None) + group_comments = getattr(tx, "group_comments", None) + resource_comments = getattr(tx, "resource_comments", None) + return Entity( + string=tx.source_string, + string_plural=tx.source_string_plural, + key=tx.key, + comment="\n".join(comments) if comments else "", + order=tx.order or idx, + source=tx.source, + resource=resource, + date_created=now, + group_comment="\n".join(group_comments) if group_comments else "", + resource_comment="\n".join(resource_comments) if resource_comments else "", + context=tx.context, + word_count=get_word_count(tx.source_string), + ) + + +def entities_same(a: Entity, b: Entity) -> bool: + return ( + a.string == b.string + and a.string_plural == b.string_plural + and a.comment == b.comment + and a.source == b.source + and a.group_comment == b.group_comment + and a.resource_comment == b.resource_comment + and a.context == b.context + ) + + +def get_db_path(paths: L10nConfigPaths | L10nDiscoverPaths, file_path: str) -> str: + rel_path = relpath(file_path, paths.ref_root) + return ( + rel_path[:-1] + if isinstance(paths, L10nDiscoverPaths) and rel_path.endswith(".pot") + else rel_path + ) + + +def get_path_format(path: str) -> str: + _, extension = splitext(path) + path_format = extension[1:].lower() + + # Special case: pot files are considered the po format + if path_format == "pot": + return "po" + elif path_format == "xlf": + return "xliff" + else: + return path_format diff --git a/pontoon/sync/core/paths.py b/pontoon/sync/core/paths.py new file mode 100644 index 0000000000..455e425ebf --- /dev/null +++ b/pontoon/sync/core/paths.py @@ -0,0 +1,69 @@ +import logging + +from os.path import join, relpath + +from moz.l10n.paths import L10nConfigPaths, L10nDiscoverPaths, get_android_locale + +from pontoon.base.models import Project +from pontoon.sync.core.checkout import Checkouts + + +log = logging.getLogger(__name__) + + +class MissingLocaleDirectoryError(IOError): + """Raised when sync can't find the locale directory.""" + + +def find_paths( + project: Project, checkouts: Checkouts +) -> L10nConfigPaths | L10nDiscoverPaths: + src_root = checkouts.source.path + + force_paths = [join(src_root, path) for path in checkouts.source.removed] + if project.configuration_file: + paths = L10nConfigPaths( + join(src_root, project.configuration_file), + locale_map={"android_locale": get_android_locale}, + force_paths=force_paths, + ) + if checkouts.target != checkouts.source: + paths.base = checkouts.target.path + name = f"cfg={project.configuration_file}" + else: + paths = L10nDiscoverPaths( + project.checkout_path, + ref_root=src_root, + force_paths=force_paths, + source_locale=["templates", "en-US", "en"], + ) + if paths.base is None: + raise MissingLocaleDirectoryError("Base localization directory not found") + name = "auto" + + rel_root = relpath(paths.ref_root, src_root) + rel_base = relpath(paths.base, src_root) + log.debug(f"[{project.slug}] Paths({name}): ref_root={rel_root} base={rel_base}") + + return paths + + +class UploadPaths: + """ + moz.l10n.paths -like interface for sync'ing content from a single file. + Implements minimal functionality required by `find_db_updates()`. + """ + + ref_root = "" + + def __init__(self, ref_path: str, locale_code: str, file_path: str): + self._ref_path = ref_path + self._locale_code = locale_code + self._file_path = file_path + + def find_reference(self, target_path: str): + return ( + (self._ref_path, {"locale": self._locale_code}) + if target_path == self._file_path + else None + ) diff --git a/pontoon/sync/core/stats.py b/pontoon/sync/core/stats.py new file mode 100644 index 0000000000..faf6dfcd4e --- /dev/null +++ b/pontoon/sync/core/stats.py @@ -0,0 +1,215 @@ +import logging + +from textwrap import dedent + +from django.db import connection + +from pontoon.base.models import Project + + +log = logging.getLogger(__name__) + + +def update_stats(project: Project, *, update_locales: bool = True) -> None: + """Uses raw SQL queries for performance.""" + + with connection.cursor() as cursor: + # Resources, counted from entities + cursor.execute( + dedent( + """ + UPDATE base_resource res + SET total_strings = agg.total + FROM ( + SELECT ent.resource_id AS "resource_id", COUNT(*) AS "total" + FROM "base_entity" ent + LEFT OUTER JOIN "base_resource" res ON (ent.resource_id = res.id) + WHERE NOT ent.obsolete AND res.project_id = %s + GROUP BY ent.resource_id + ) AS agg + WHERE res.id = agg.resource_id AND res.project_id = %s + """ + ), + [project.id, project.id], + ) + + # Translated resource total strings are counted from entities and expected locale plurals. + # The source entity count is offset by the number of gettext plural source strings + # multiplied by one less than the count of plural categories of the target locale, + # which equals the number of commas in cldr_plurals. + cursor.execute( + dedent( + """ + UPDATE base_translatedresource tr + SET total_strings = agg.total + FROM ( + SELECT + tr.id AS "id", + COUNT(*) + + (LENGTH(loc.cldr_plurals) - LENGTH(REPLACE(loc.cldr_plurals, ',', ''))) * + COUNT(*) FILTER (WHERE ent.string_plural != '') + AS "total" + FROM "base_translatedresource" tr + LEFT OUTER JOIN "base_resource" res ON (tr.resource_id = res.id) + LEFT OUTER JOIN "base_locale" loc ON (tr.locale_id = loc.id) + LEFT OUTER JOIN "base_entity" ent ON (tr.resource_id = ent.resource_id) + WHERE NOT ent.obsolete AND res.project_id = %s + GROUP BY tr.id, loc.cldr_plurals + ) AS agg + WHERE agg.id = tr.id + """ + ), + [project.id], + ) + + # Other translated resource string counts, counted directly from translations + cursor.execute( + dedent( + """ + UPDATE base_translatedresource tr + SET + approved_strings = agg.approved, + pretranslated_strings = agg.pretranslated, + strings_with_errors = agg.errors, + strings_with_warnings = agg.warnings, + unreviewed_strings = agg.unreviewed + FROM ( + SELECT + trans.locale_id AS "locale_id", + ent.resource_id AS "resource_id", + COUNT(*) FILTER (WHERE trans.approved AND err.id IS NULL AND warn.id IS NULL) AS "approved", + COUNT(*) FILTER (WHERE trans.pretranslated AND err.id IS NULL AND warn.id IS NULL) AS "pretranslated", + COUNT(*) FILTER (WHERE (trans.approved OR trans.pretranslated OR trans.fuzzy) AND err.id IS NOT NULL) AS "errors", + COUNT(*) FILTER (WHERE (trans.approved OR trans.pretranslated OR trans.fuzzy) AND warn.id IS NOT NULL) AS "warnings", + COUNT(*) FILTER (WHERE NOT trans.approved AND NOT trans.pretranslated AND NOT trans.rejected AND NOT trans.fuzzy) AS "unreviewed" + FROM "base_translation" trans + LEFT OUTER JOIN "checks_error" err ON (trans.id = err.translation_id) + LEFT OUTER JOIN "checks_warning" warn ON (trans.id = warn.translation_id) + LEFT OUTER JOIN "base_entity" ent ON (trans.entity_id = ent.id) + LEFT OUTER JOIN "base_resource" res ON (ent.resource_id = res.id) + WHERE NOT ent.obsolete AND res.project_id = %s + GROUP BY trans.locale_id, ent.resource_id + ) AS agg + WHERE agg.locale_id = tr.locale_id AND agg.resource_id = tr.resource_id + """ + ), + [project.id], + ) + tr_count = cursor.rowcount + + # Project locales, counted from translated resources + cursor.execute( + dedent( + """ + UPDATE base_projectlocale pl + SET + total_strings = agg.total, + approved_strings = agg.approved, + pretranslated_strings = agg.pretranslated, + strings_with_errors = agg.errors, + strings_with_warnings = agg.warnings, + unreviewed_strings = agg.unreviewed + FROM ( + SELECT + tr.locale_id AS "locale_id", + SUM(tr.total_strings) AS "total", + SUM(tr.approved_strings) AS "approved", + SUM(tr.pretranslated_strings) AS "pretranslated", + SUM(tr.strings_with_errors) AS "errors", + SUM(tr.strings_with_warnings) AS "warnings", + SUM(tr.unreviewed_strings) AS "unreviewed" + FROM "base_translatedresource" tr + INNER JOIN "base_resource" res ON (tr.resource_id = res.id) + WHERE res.project_id = %s + GROUP BY tr.locale_id + ) AS agg + WHERE agg.locale_id = pl.locale_id AND pl.project_id = %s + """ + ), + [project.id, project.id], + ) + pl_count = cursor.rowcount + + # Project, counted from project locales + cursor.execute( + dedent( + """ + UPDATE base_project proj + SET + total_strings = GREATEST(agg.total, 0), + approved_strings = GREATEST(agg.approved, 0), + pretranslated_strings = GREATEST(agg.pretranslated, 0), + strings_with_errors = GREATEST(agg.errors, 0), + strings_with_warnings = GREATEST(agg.warnings, 0), + unreviewed_strings = GREATEST(agg.unreviewed, 0) + FROM ( + SELECT + SUM(pl.total_strings) AS "total", + SUM(pl.approved_strings) AS "approved", + SUM(pl.pretranslated_strings) AS "pretranslated", + SUM(pl.strings_with_errors) AS "errors", + SUM(pl.strings_with_warnings) AS "warnings", + SUM(pl.unreviewed_strings) AS "unreviewed" + FROM "base_projectlocale" pl + WHERE pl.project_id = %s + ) AS agg + WHERE proj.id = %s + """ + ), + [project.id, project.id], + ) + + lc_count = _update_locales(cursor) if update_locales else 0 + + tr_str = ( + "1 translated resource" if tr_count == 1 else f"{tr_count} translated resources" + ) + pl_str = "1 projectlocale" if pl_count == 1 else f"{pl_count} projectlocales" + lc_str = "1 locale" if lc_count == 1 else f"{lc_count} locales" + summary = ( + f"{tr_str} and {pl_str}" + if lc_count == 0 + else f"{tr_str}, {pl_str}, and {lc_str}" + ) + log.info(f"[{project.slug}] Updated stats for {summary}") + + +def update_locale_stats() -> None: + with connection.cursor() as cursor: + lc_count = _update_locales(cursor) + lc_str = "1 locale" if lc_count == 1 else f"{lc_count} locales" + log.info(f"Updated stats for {lc_str}") + + +def _update_locales(cursor) -> int: + # All locales, counted from project locales + cursor.execute( + dedent( + """ + UPDATE base_locale loc + SET + total_strings = agg.total, + approved_strings = agg.approved, + pretranslated_strings = agg.pretranslated, + strings_with_errors = agg.errors, + strings_with_warnings = agg.warnings, + unreviewed_strings = agg.unreviewed + FROM ( + SELECT + pl.locale_id AS "locale_id", + SUM(pl.total_strings) AS "total", + SUM(pl.approved_strings) AS "approved", + SUM(pl.pretranslated_strings) AS "pretranslated", + SUM(pl.strings_with_errors) AS "errors", + SUM(pl.strings_with_warnings) AS "warnings", + SUM(pl.unreviewed_strings) AS "unreviewed" + FROM "base_projectlocale" pl + INNER JOIN "base_project" proj ON (pl.project_id = proj.id) + WHERE NOT proj.disabled AND NOT proj.system_project AND proj.visibility = 'public' + GROUP BY pl.locale_id + ) AS agg + WHERE agg.locale_id = loc.id + """ + ) + ) + return cursor.rowcount diff --git a/pontoon/sync/core/translations_from_repo.py b/pontoon/sync/core/translations_from_repo.py new file mode 100644 index 0000000000..f7bee66a84 --- /dev/null +++ b/pontoon/sync/core/translations_from_repo.py @@ -0,0 +1,494 @@ +import logging + +from collections import defaultdict +from collections.abc import Iterable, Sized +from datetime import datetime +from os.path import join, relpath, splitext +from typing import cast + +from moz.l10n.paths import L10nConfigPaths, L10nDiscoverPaths, parse_android_locale +from moz.l10n.resource import bilingual_extensions, l10n_extensions + +from django.core.paginator import Paginator +from django.db import transaction +from django.db.models import Q +from django.db.models.manager import BaseManager + +from pontoon.actionlog.models import ActionLog +from pontoon.base.models import ( + ChangedEntityLocale, + Entity, + Locale, + Project, + Resource, + TranslatedResource, + Translation, + TranslationMemoryEntry, + User, +) +from pontoon.checks import DB_FORMATS +from pontoon.checks.utils import bulk_run_checks +from pontoon.sync.core.checkout import Checkout, Checkouts +from pontoon.sync.core.paths import UploadPaths +from pontoon.sync.formats import parse +from pontoon.sync.vcs.translation import VCSTranslation + + +log = logging.getLogger(__name__) + +Updates = dict[tuple[int, int], tuple[dict[int | None, str], bool]] +""" (entity.id, locale.id) -> (plural_form -> string, fuzzy) """ + + +def sync_translations_from_repo( + project: Project, + locale_map: dict[str, Locale], + checkouts: Checkouts, + paths: L10nConfigPaths | L10nDiscoverPaths, + db_changes: BaseManager[ChangedEntityLocale], + now: datetime, +) -> tuple[int, int]: + """(removed_resource_count, updated_translation_count)""" + co = checkouts.target + source_paths: set[str] = set(paths.ref_paths) if checkouts.source == co else set() + del_count = delete_removed_bilingual_resources(project, co, paths, source_paths) + + changed_target_paths = [ + path + for path in (join(co.path, co_rel_path) for co_rel_path in co.changed) + if path not in source_paths + ] + if changed_target_paths: + n = len(changed_target_paths) + str_files = "file" if n == 1 else "files" + log.info( + f"[{project.slug}] Reading changes from {n} changed target {str_files}" + ) + updates = find_db_updates( + project, locale_map, changed_target_paths, paths, db_changes + ) + update_count = 0 if updates is None else len(updates) + if updates: + write_db_updates(project, updates, None, now) + return del_count, update_count + + +def write_db_updates( + project: Project, updates: Updates, user: User | None, now: datetime +) -> None: + updated_translations, new_translations = update_db_translations( + project, updates, user, now + ) + add_failed_checks(new_translations) + add_translation_memory_entries(project, new_translations + updated_translations) + + +def delete_removed_bilingual_resources( + project: Project, + target: Checkout, + paths: L10nConfigPaths | L10nDiscoverPaths, + source_paths: set[str], +) -> int: + rm_t = Q() + rm_tr = Q() + count = 0 + removed_target_paths = ( + path + for path in (join(target.path, co_path) for co_path in target.removed) + if path not in source_paths and splitext(path)[1] in bilingual_extensions + ) + for target_path in removed_target_paths: + ref = paths.find_reference(target_path) + if ref: + ref_path, path_vars = ref + locale_code = get_path_locale(path_vars) + if locale_code is not None: + db_path = relpath(ref_path, paths.ref_root) + if not project.configuration_file and db_path.endswith(".pot"): + db_path = db_path[:-1] + rm_t |= Q(entity__resource__path=db_path, locale__code=locale_code) + rm_tr |= Q(resource__path=db_path, locale__code=locale_code) + count += 1 + if rm_t and rm_tr: + str_del_resources = "deleted resource" if count == 1 else "deleted resources" + log.info(f"[{project.slug}] Removing {count} {str_del_resources}") + with transaction.atomic(): + Translation.objects.filter(entity__resource__project=project).filter( + rm_t + ).delete() + TranslatedResource.objects.filter(resource__project=project).filter( + rm_tr + ).delete() + return count + + +def find_db_updates( + project: Project, + locale_map: dict[str, Locale], + changed_target_paths: Iterable[str], + paths: L10nConfigPaths | L10nDiscoverPaths | UploadPaths, + db_changes: Iterable[ChangedEntityLocale], +) -> Updates | None: + """ + `(entity.id, locale.id) -> (plural_form -> string, fuzzy)` + + Translations in changed resources, excluding: + - Exact matches with previous approved or pretranslated translations + - Entity/Locale combos for which Pontoon has changes since the last sync + - Translations for which no matching entity is found + """ + log.debug(f"[{project.slug}] Scanning for translation updates...") + resource_paths: set[str] = set() + # db_path -> {locale.id} + translated_resources: dict[str, set[int]] = defaultdict(set) + # (db_path, tx.key, locale.id) -> (plural_form -> string, fuzzy) + translations: dict[tuple[str, str, int], tuple[dict[int | None, str], bool]] = {} + for target_path in changed_target_paths: + ref = paths.find_reference(target_path) + if ref: + ref_path, path_vars = ref + lc = get_path_locale(path_vars) + if lc in locale_map: + locale = locale_map[lc] + db_path = relpath(ref_path, paths.ref_root) + lc_scope = f"[{project.slug}:{db_path}, {locale.code}]" + try: + res = parse( + target_path, + None if isinstance(paths, UploadPaths) else ref_path, + locale, + ) + except Exception as error: + log.error(f"{lc_scope} Skipping resource with parse error: {error}") + continue + if not project.configuration_file and db_path.endswith(".pot"): + db_path = db_path[:-1] + resource_paths.add(db_path) + translated_resources[db_path].add(locale.id) + translations.update( + ((db_path, tx.key, locale.id), (tx.strings, tx.fuzzy)) + for tx in cast(list[VCSTranslation], res.translations) + if tx.strings + ) + elif splitext(target_path)[1] in l10n_extensions: + log.debug( + f"[{project.slug}:{relpath(target_path, paths.base)}] Not an L10n target path" + ) + if not translations: + return None + + resources: dict[str, Resource] = { + res.path: res + for res in Resource.objects.filter( + project=project, path__in=resource_paths + ).iterator() + } + + # Exclude translations for which DB & repo already match + # TODO: Should be able to use repo diff to identify changed entities and refactor this. + trans_q = Q() + for db_path, locale_ids in translated_resources.items(): + res = resources.get(db_path, None) + if res is not None: + trans_q |= Q(entity__resource=res, locale_id__in=locale_ids) + if trans_q: + log.debug(f"[{project.slug}] Filtering matches from translations...") + trans_query = ( + Translation.objects.filter(trans_q) + .filter(Q(entity__obsolete=False)) + .filter(Q(approved=True) | Q(pretranslated=True)) + .order_by("id") + .values( + "entity__resource__path", + "entity__key", + "entity__string", # terminology/common and tutorial/playground use string instead of key. + "locale_id", + "plural_form", + "string", + ) + ) + paginator = Paginator(trans_query, per_page=10000, allow_empty_first_page=True) + for page_number in paginator.page_range: + page = paginator.page(page_number) + for trans_values in page: + key = ( + trans_values["entity__resource__path"], + trans_values["entity__key"] or trans_values["entity__string"], + trans_values["locale_id"], + ) + if key in translations: + plural_form = trans_values["plural_form"] + strings, _ = translations[key] + if strings.get(plural_form, None) == trans_values["string"]: + if len(strings) > 1: + del strings[plural_form] + else: + del translations[key] + if paginator.num_pages > 3: + log.debug( + f"[{project.slug}] Filtering matches from translations... {page_number}/{paginator.num_pages}" + ) + if not translations: + return None + + # If repo and database both have changes, database wins. + log.debug(f"[{project.slug}] Filtering db changes from translations...") + for change in db_changes: + key = ( + change.entity.resource.path, + change.entity.key or change.entity.string, + change.locale_id, + ) + if key in translations: + del translations[key] + if not translations: + return None + + log.debug(f"[{project.slug}] Compiling updates...") + trans_res = {resources[db_path] for db_path, _, _ in translations} + entities: dict[tuple[str, str], int] = { + (e["resource__path"], e["key"] or e["string"]): e["id"] + for e in Entity.objects.filter(resource__in=trans_res, obsolete=False) + .values("id", "key", "string", "resource__path") + .iterator() + } + res: Updates = {} + for (db_path, ent_key, locale_id), tx in translations.items(): + entity_id = entities.get((db_path, ent_key), None) + if entity_id is not None: + res[(entity_id, locale_id)] = tx + log.debug(f"[{project.slug}] Compiling updates... Found {len(res)}") + return res + + +def update_db_translations( + project: Project, + repo_translations: Updates, + user: User | None, + now: datetime, +) -> tuple[list[Translation], list[Translation]]: + if not repo_translations: + return [], [] + log.debug(f"[{project.slug}] Syncing translations from repo...") + + log_user = user or User.objects.get(username="pontoon-sync") + translations_to_reject = Q() + actions: list[ActionLog] = [] + + # Approve matching suggestions + matching_suggestions_q = Q() + for (entity_id, locale_id), (strings, _) in repo_translations.items(): + for plural_form, string in strings.items(): + matching_suggestions_q |= Q( + entity_id=entity_id, + locale_id=locale_id, + plural_form=plural_form, + string=string, + ) + if matching_suggestions_q: + # (entity_id, locale_id, plural_form) => translation + suggestions: dict[tuple[int, int, int], Translation] = { + (tx.entity_id, tx.locale_id, tx.plural_form): tx + for tx in Translation.objects.filter(matching_suggestions_q) + .filter(approved=False, pretranslated=False) + .iterator() + } + else: + log.warning( + f"[{project.slug}] Empty strings in repo_translations!? {repo_translations}" + ) + suggestions = {} + update_fields: set[str] = set() + approve_count = 0 + for tx in suggestions.values(): + _, fuzzy = repo_translations[(tx.entity_id, tx.locale_id)] + if fuzzy and tx.fuzzy: + # Keep fuzzy suggestions unchanged + continue + + if tx.rejected: + tx.rejected = False + tx.unrejected_user = None + tx.unrejected_date = now + actions.append( + ActionLog( + action_type=ActionLog.ActionType.TRANSLATION_UNREJECTED, + performed_by=log_user, + translation=tx, + ) + ) + + tx.active = True + tx.fuzzy = fuzzy + if not fuzzy: + tx.approved = True + tx.approved_user = None + tx.approved_date = now + tx.pretranslated = False + tx.unapproved_user = None + tx.unapproved_date = None + actions.append( + ActionLog( + action_type=ActionLog.ActionType.TRANSLATION_APPROVED, + created_at=now, + performed_by=log_user, + translation=tx, + ) + ) + approve_count += 1 + translations_to_reject |= Q( + entity=tx.entity, locale=tx.locale, plural_form=tx.plural_form + ) & ~Q(id=tx.id) + update_fields.update(tx.get_dirty_fields()) + for entity_id, locale_id, _ in suggestions: + try: + del repo_translations[(entity_id, locale_id)] + except KeyError: + pass + + new_translations: list[Translation] = [] + if repo_translations: + # Add new approved translations for the remainder + for (entity_id, locale_id), (strings, fuzzy) in repo_translations.items(): + for plural_form, string in strings.items(): + # Note: no tx.entity.resource, which would be required by tx.save() + tx = Translation( + entity_id=entity_id, + locale_id=locale_id, + string=string, + plural_form=plural_form, + date=now, + active=True, + user=user, + ) + if fuzzy: + tx.fuzzy = True + else: + tx.approved = True + tx.approved_date = now + new_translations.append(tx) + actions.append( + ActionLog( + action_type=ActionLog.ActionType.TRANSLATION_CREATED, + created_at=now, + performed_by=log_user, + translation=tx, + ) + ) + translations_to_reject |= Q( + entity_id=entity_id, locale_id=locale_id, plural_form=plural_form + ) + + if translations_to_reject: + rejected = Translation.objects.filter(rejected=False).filter( + translations_to_reject + ) + actions.extend( + ActionLog( + action_type=ActionLog.ActionType.TRANSLATION_REJECTED, + created_at=now, + performed_by=log_user, + translation=tx, + is_implicit_action=True, + ) + for tx in rejected + ) + reject_count = rejected.update( + active=False, + approved=False, + approved_user=None, + approved_date=None, + rejected=True, + rejected_user=None, + rejected_date=now, + pretranslated=False, + fuzzy=False, + ) + if reject_count: + TranslationMemoryEntry.objects.filter( + translation__in=[tx.pk for tx in rejected] + ).delete() + log.info( + f"[{project.slug}] Rejected {str_n_translations(reject_count)} from repo changes" + ) + + update_count = ( + Translation.objects.bulk_update(suggestions.values(), list(update_fields)) + if update_fields + else 0 + ) + if update_count: + count = ( + str(approve_count) + if approve_count == update_count + else f"{approve_count}/{update_count}" + ) + log.info( + f"[{project.slug}] Approved {str_n_translations(count)} from repo changes" + ) + + created = Translation.objects.bulk_create(new_translations) + if created: + log.info( + f"[{project.slug}] Created {str_n_translations(created)} from repo changes" + ) + + if actions: + ActionLog.objects.bulk_create(actions) + + return created, list(suggestions.values()) + + +def str_n_translations(n: int | Sized) -> str: + if not isinstance(n, int): + n = len(n) + return "1 translation" if n == 1 else f"{n} translations" + + +def get_path_locale(path_vars: dict[str, str]) -> str | None: + if "locale" in path_vars: + return path_vars["locale"] + elif "android_locale" in path_vars: + return parse_android_locale(path_vars["android_locale"]) + else: + return None + + +def add_failed_checks(translations: list[Translation]) -> None: + """ + Run checks on all changed translations from supported resources + """ + if translations: + checked_translations = Translation.objects.filter( + pk__in=[tx.pk for tx in translations], + entity__resource__format__in=DB_FORMATS, + ).select_related("entity__resource", "locale") + bulk_run_checks(checked_translations) + + +def add_translation_memory_entries( + project: Project, + translations: list[Translation], +) -> None: + """ + Create Translation Memory entries for: + - new approved translations + - updated translations that are approved and don't have a TM entry yet + """ + if translations: + TranslationMemoryEntry.objects.bulk_create( + TranslationMemoryEntry( + source=tx.tm_source, + target=tx.tm_target, + entity_id=tx.entity_id, + locale_id=tx.locale_id, + translation=tx, + project=project, + ) + for tx in Translation.objects.filter( + pk__in=[tx.pk for tx in translations], + approved=True, + errors__isnull=True, + memory_entries__isnull=True, + ).iterator() + ) diff --git a/pontoon/sync/core/translations_to_repo.py b/pontoon/sync/core/translations_to_repo.py new file mode 100644 index 0000000000..3e72fda032 --- /dev/null +++ b/pontoon/sync/core/translations_to_repo.py @@ -0,0 +1,222 @@ +import logging + +from collections import defaultdict +from collections.abc import Container +from datetime import datetime +from os import remove +from os.path import commonpath, isfile, join, normpath + +from moz.l10n.paths import L10nConfigPaths, L10nDiscoverPaths + +from django.conf import settings +from django.db.models import Q +from django.db.models.manager import BaseManager + +from pontoon.base.models import Locale, Project, Translation, User +from pontoon.base.models.changed_entity_locale import ChangedEntityLocale +from pontoon.sync.core.checkout import Checkouts +from pontoon.sync.formats import parse +from pontoon.sync.formats.po import POResource +from pontoon.sync.repositories import CommitToRepositoryException, get_repo + + +log = logging.getLogger(__name__) + + +def sync_translations_to_repo( + project: Project, + commit: bool, + locale_map: dict[str, Locale], + checkouts: Checkouts, + paths: L10nConfigPaths | L10nDiscoverPaths, + db_changes: BaseManager[ChangedEntityLocale], + changed_source_paths: set[str], + removed_source_paths: set[str], + now: datetime, +) -> None: + readonly_locales = project.locales.filter(project_locale__readonly=True) + removed = delete_removed_resources( + project, paths, locale_map, readonly_locales, removed_source_paths + ) + updated, updated_locales, translators = update_changed_resources( + project, + paths, + locale_map, + readonly_locales, + db_changes, + changed_source_paths, + now, + ) + if not removed and not updated: + return + + if not commit: + log.info(f"[{project.slug}] Skipping commit & push") + return + + if removed: + lc_str = "all localizations" + else: + if len(updated_locales) > 4: + lc_str = f"{len(updated_locales)} localizations" + else: + lc_str = ", ".join(f"{loc.name} ({loc.code})" for loc in updated_locales) + commit_msg = f"Pontoon/{project.name}: Update {lc_str}" + + if translators: + commit_msg += "\n" + for translator, lc_set in translators.items(): + tr_str = translator.display_name_and_email + lc_str = ", ".join(sorted(lc_set)) + commit_msg += f"\nCo-authored-by: {tr_str} ({lc_str})" + + commit_author = f"{settings.VCS_SYNC_NAME} <{settings.VCS_SYNC_EMAIL}>" + + co = checkouts.target + repo = get_repo(co.repo.type) + try: + repo.commit(co.path, commit_msg, commit_author, co.repo.branch, co.url) + co.commit = repo.revision(co.path) + except CommitToRepositoryException as error: + log.warning(f"[{project.slug}] {co.repo.type} commit failed: {error}") + raise error + + +def delete_removed_resources( + project: Project, + paths: L10nConfigPaths | L10nDiscoverPaths, + locale_map: dict[str, Locale], + readonly_locales: BaseManager[Locale], + removed_source_paths: set[str], +) -> int: + count = 0 + for path in removed_source_paths: + log_scope = f"[{project.slug}:{path}]" + log.info(f"{log_scope} Removing for all locales") + target, locale_codes = paths.target(path) + if target and commonpath((paths.base, target)) == paths.base: + for lc in locale_codes: + if lc not in locale_map or locale_map[lc] in readonly_locales: + continue + target_path = paths.format_target_path(target, lc) + try: + remove(target_path) + count += 1 + except FileNotFoundError: + pass + else: + log.error(f"{log_scope} Invalid resource path") + return count + + +def update_changed_resources( + project: Project, + paths: L10nConfigPaths | L10nDiscoverPaths, + locale_map: dict[str, Locale], + readonly_locales: Container[Locale], + db_changes: BaseManager[ChangedEntityLocale], + changed_source_paths: set[str], + now: datetime, +) -> tuple[int, set[Locale], dict[User, set[str]]]: + count = 0 + # db_path -> {Locale}, empty set stands for "all locales" + changed_resources: dict[str, set[Locale]] = { + path: set() for path in changed_source_paths + } + for change in db_changes: + if change.locale in readonly_locales: + continue + path = str(change.entity.resource.path) + if path not in changed_resources: + changed_resources[path] = {change.locale} + else: + prev = changed_resources[path] + if prev: + prev.add(change.locale) + changed_entities = set(change.entity for change in db_changes) + if changed_resources: + n = len(changed_resources) + str_resources = "resource" if n == 1 else "resources" + log.info(f"[{project.slug}] Updating {n} changed {str_resources}") + + updated_locales: set[Locale] = set() + translators: dict[User, set[str]] = defaultdict(set) + for path, locales_ in changed_resources.items(): + log_scope = f"[{project.slug}:{path}]" + target, locale_codes = paths.target(path) + if target is None: + continue + if commonpath((paths.base, target)) != paths.base: + log.error(f"{log_scope} Invalid resource path") + continue + locales = locales_ or { + locale + for locale in ( + locale_map[lc] for lc in sorted(locale_codes) if lc in locale_map + ) + if locale not in readonly_locales + } + if not locales: + continue + ref_path = normpath(join(paths.ref_root, path)) + if ref_path.endswith(".po"): + ref_path += "t" + if not isfile(ref_path): + log.error(f"{log_scope} Missing source file") + continue + if locales_: + lc_str = ", ".join(locale.code for locale in locales_) + log.info(f"{log_scope} Updating locales: {lc_str}") + else: + log.info(f"{log_scope} Updating all locales") + + translations = ( + Translation.objects.filter( + entity__obsolete=False, + entity__resource__project_id=project.pk, + entity__resource__path=path, + locale__in=[locale.pk for locale in locales], + active=True, + ) + .filter(Q(approved=True) | Q(pretranslated=True) | Q(fuzzy=True)) + .exclude(approved_date__gt=now) # includes approved_date = None + .select_related("entity") + ) + for locale in locales: + lc_scope = f"[{project.slug}:{path}, {locale.code}]" + lc_translations = [tx for tx in translations if tx.locale_id == locale.pk] + target_path = paths.format_target_path(target, locale.code) + if not lc_translations and not isfile(target_path): + continue + try: + res = parse(target_path, ref_path, locale) + if isinstance(res, POResource): + for po_ent in res.entities: + po_tx = [ + tx for tx in lc_translations if tx.entity.key == po_ent.key + ] + po_ent.strings = {tx.plural_form: tx.string for tx in po_tx} + po_ent.fuzzy = any(tx.fuzzy for tx in po_tx) + if lc_translations and res.entities: + last_tx = max(lc_translations, key=lambda tx: tx.date) + res.entities[0].last_updated = last_tx.date + res.entities[0].last_translator = last_tx.user + else: + for ent in res.translations: + ent.strings = {} + for tx in lc_translations: + key = tx.entity.key + if key in res.entities: + res.entities[key].strings = {None: tx.string} + else: + log.warning(f"{lc_scope} No source entry for {key}") + res.save(locale) + updated_locales.add(locale) + for tx in lc_translations: + if tx.approved and tx.entity in changed_entities and tx.user: + translators[tx.user].add(locale.code) + count += 1 + except Exception as error: + log.error(f"{lc_scope} Update failed: {error}") + continue + return count, updated_locales, translators diff --git a/pontoon/sync/formats/__init__.py b/pontoon/sync/formats/__init__.py index a431e8882c..5cdbfc8d6e 100644 --- a/pontoon/sync/formats/__init__.py +++ b/pontoon/sync/formats/__init__.py @@ -16,6 +16,7 @@ xliff, xml, ) +from pontoon.sync.formats.base import ParsedResource # To add support for a new resource format, add an entry to this dict @@ -56,7 +57,7 @@ def are_compatible_files(file_a, file_b): return False -def parse(path, source_path=None, locale=None): +def parse(path, source_path=None, locale=None) -> ParsedResource: """ Parse the resource file at the given path and return a ParsedResource with its translations. diff --git a/pontoon/sync/formats/base.py b/pontoon/sync/formats/base.py index 13a0d6548f..1276216bae 100644 --- a/pontoon/sync/formats/base.py +++ b/pontoon/sync/formats/base.py @@ -1,3 +1,6 @@ +from pontoon.sync.vcs.translation import VCSTranslation + + class ParsedResource: """ Parent class for parsed resources as returned by parse. @@ -6,8 +9,10 @@ class ParsedResource: that inherits from this class. """ + entities: dict[str, VCSTranslation] + @property - def translations(self): + def translations(self) -> list[VCSTranslation]: """ Return a list of VCSTranslation instances or subclasses that represent the translations in the resource. diff --git a/pontoon/sync/formats/base_json_file.py b/pontoon/sync/formats/base_json_file.py index f94da8baa9..a3a543d90d 100644 --- a/pontoon/sync/formats/base_json_file.py +++ b/pontoon/sync/formats/base_json_file.py @@ -7,9 +7,9 @@ from jsonschema import validate from jsonschema.exceptions import ValidationError -from pontoon.sync.exceptions import ParseError from pontoon.sync.formats.base import ParsedResource -from pontoon.sync.utils import create_parent_directory +from pontoon.sync.formats.exceptions import ParseError +from pontoon.sync.formats.utils import create_parent_directory log = logging.getLogger(__name__) diff --git a/pontoon/sync/exceptions.py b/pontoon/sync/formats/exceptions.py similarity index 100% rename from pontoon/sync/exceptions.py rename to pontoon/sync/formats/exceptions.py diff --git a/pontoon/sync/formats/ftl.py b/pontoon/sync/formats/ftl.py index a873909f51..80ee763ca8 100644 --- a/pontoon/sync/formats/ftl.py +++ b/pontoon/sync/formats/ftl.py @@ -4,9 +4,9 @@ from fluent.syntax import FluentParser, FluentSerializer, ast -from pontoon.sync.exceptions import ParseError, SyncError from pontoon.sync.formats.base import ParsedResource -from pontoon.sync.utils import create_parent_directory +from pontoon.sync.formats.exceptions import ParseError, SyncError +from pontoon.sync.formats.utils import create_parent_directory from pontoon.sync.vcs.translation import VCSTranslation diff --git a/pontoon/sync/formats/json_extensions.py b/pontoon/sync/formats/json_extensions.py index fb3a96ac23..58e026e179 100644 --- a/pontoon/sync/formats/json_extensions.py +++ b/pontoon/sync/formats/json_extensions.py @@ -9,8 +9,8 @@ import copy import logging -from pontoon.sync.exceptions import SyncError from pontoon.sync.formats.base_json_file import JSONResource, parse as parseJSONResource +from pontoon.sync.formats.exceptions import SyncError from pontoon.sync.vcs.translation import VCSTranslation diff --git a/pontoon/sync/formats/json_keyvalue.py b/pontoon/sync/formats/json_keyvalue.py index a9afe9622a..eeb7ba0fbf 100644 --- a/pontoon/sync/formats/json_keyvalue.py +++ b/pontoon/sync/formats/json_keyvalue.py @@ -12,8 +12,8 @@ import json import logging -from pontoon.sync.exceptions import SyncError from pontoon.sync.formats.base_json_file import JSONResource, parse as parseJSONResource +from pontoon.sync.formats.exceptions import SyncError from pontoon.sync.vcs.translation import VCSTranslation diff --git a/pontoon/sync/formats/po.py b/pontoon/sync/formats/po.py index f26eb651bb..9c90bc6bba 100644 --- a/pontoon/sync/formats/po.py +++ b/pontoon/sync/formats/po.py @@ -8,9 +8,8 @@ from django.utils import timezone -from pontoon.sync import KEY_SEPARATOR -from pontoon.sync.exceptions import ParseError from pontoon.sync.formats.base import ParsedResource +from pontoon.sync.formats.exceptions import ParseError from pontoon.sync.vcs.translation import VCSTranslation @@ -30,7 +29,7 @@ def __init__(self, po_entry, order): key = po_entry.msgid context = po_entry.msgctxt or "" if context: - key = context + KEY_SEPARATOR + key + key = context + "\x04" + key super().__init__( key=key, @@ -64,6 +63,8 @@ def __repr__(self): class POResource(ParsedResource): + entities: list[POEntity] + def __init__(self, pofile): self.pofile = pofile self.entities = [ diff --git a/pontoon/sync/formats/silme.py b/pontoon/sync/formats/silme.py index 7985c197bd..65476e88bf 100644 --- a/pontoon/sync/formats/silme.py +++ b/pontoon/sync/formats/silme.py @@ -14,9 +14,9 @@ from silme.format.ini import FormatParser as IniParser from silme.format.properties import FormatParser as PropertiesParser -from pontoon.sync.exceptions import ParseError, SyncError from pontoon.sync.formats.base import ParsedResource -from pontoon.sync.utils import ( +from pontoon.sync.formats.exceptions import ParseError, SyncError +from pontoon.sync.formats.utils import ( create_parent_directory, escape_quotes, unescape_quotes, diff --git a/pontoon/sync/formats/utils.py b/pontoon/sync/formats/utils.py new file mode 100644 index 0000000000..4b881e6b41 --- /dev/null +++ b/pontoon/sync/formats/utils.py @@ -0,0 +1,54 @@ +import errno +import os + + +def escape_apostrophes(value: str) -> str: + """ + Apostrophes (straight single quotes) have special meaning in Android strings.xml files, + so they need to be escaped using a preceding backslash. + + Learn more: + https://developer.android.com/guide/topics/resources/string-resource.html#escaping_quotes + """ + return value.replace("'", "\\'") + + +def unescape_apostrophes(value: str) -> str: + return value.replace("\\'", "'") + + +def escape_quotes(value: str) -> str: + """ + DTD files can use single or double quotes for identifying strings, + so " and ' are the safe bet that will work in both cases. + """ + value = value.replace('"', "\\"") + value = value.replace("'", "\\'") + + return value + + +def unescape_quotes(value: str) -> str: + value = value.replace("\\"", '"') + value = value.replace("\\u0022", '"') # Bug 1390111 + value = value.replace('\\"', '"') + + value = value.replace("\\'", "'") + value = value.replace("\\u0027", "'") # Bug 1390111 + value = value.replace("\\'", "'") + + return value + + +def create_parent_directory(path: str) -> None: + """ + Create parent directory of the given path if it doesn't exist yet. + """ + try: + os.makedirs(os.path.dirname(path)) + except OSError as e: + # Directory already exists + if e.errno == errno.EEXIST: + pass + else: + raise diff --git a/pontoon/sync/formats/xliff.py b/pontoon/sync/formats/xliff.py index 935107257a..1cdd19a020 100644 --- a/pontoon/sync/formats/xliff.py +++ b/pontoon/sync/formats/xliff.py @@ -6,8 +6,8 @@ from lxml import etree -from pontoon.sync.exceptions import ParseError, SyncError from pontoon.sync.formats.base import ParsedResource +from pontoon.sync.formats.exceptions import ParseError, SyncError from pontoon.sync.vcs.translation import VCSTranslation from translate.storage import xliff diff --git a/pontoon/sync/formats/xml.py b/pontoon/sync/formats/xml.py index 20b9718543..5a0380eb72 100644 --- a/pontoon/sync/formats/xml.py +++ b/pontoon/sync/formats/xml.py @@ -6,14 +6,11 @@ from collections import OrderedDict -from compare_locales import ( - parser, - serializer, -) +from compare_locales import parser, serializer -from pontoon.sync.exceptions import ParseError, SyncError from pontoon.sync.formats.base import ParsedResource -from pontoon.sync.utils import ( +from pontoon.sync.formats.exceptions import ParseError, SyncError +from pontoon.sync.formats.utils import ( create_parent_directory, escape_apostrophes, unescape_apostrophes, diff --git a/pontoon/sync/management/commands/sync_projects.py b/pontoon/sync/management/commands/sync_projects.py index 68c78a5b81..00e541b3dc 100644 --- a/pontoon/sync/management/commands/sync_projects.py +++ b/pontoon/sync/management/commands/sync_projects.py @@ -3,7 +3,7 @@ from pontoon.base.models import Project from pontoon.sync.models import SyncLog -from pontoon.sync.tasks import sync_project +from pontoon.sync.tasks import sync_project_task class Command(BaseCommand): @@ -77,10 +77,10 @@ def handle(self, *args, **options): for project in projects: self.stdout.write(f"Scheduling sync for project {project.name}.") - sync_project.delay( + sync_project_task.delay( project.pk, sync_log.pk, - no_pull=options["no_pull"], - no_commit=options["no_commit"], + pull=not options["no_pull"], + commit=not options["no_commit"], force=options["force"], ) diff --git a/pontoon/sync/models.py b/pontoon/sync/models.py index 6da81f5dbe..584e773f9f 100644 --- a/pontoon/sync/models.py +++ b/pontoon/sync/models.py @@ -4,12 +4,12 @@ from typing import Literal from django.db import models -from django.db.models import F, Max, Sum +from django.db.models import Max from django.urls import reverse from django.utils import timezone from django.utils.functional import cached_property -from pontoon.base.models import Project, ProjectLocale, Repository, TranslatedResource +from pontoon.base.models import Project, Repository from pontoon.base.utils import latest_datetime @@ -55,69 +55,6 @@ def finished(self) -> bool: def get_absolute_url(self) -> str: return reverse("pontoon.sync.logs.details", kwargs={"sync_log_pk": self.pk}) - def fix_stats(self) -> None: - """ - Recalculate any broken stats when sync task is finished. This is a - temporary fix for https://github.com/mozilla/pontoon/issues/2040. - """ - if not self.finished: - return - - # total_strings missmatch between TranslatedResource & Resource - translated_resources = [] - tr_source = TranslatedResource.objects.exclude( - total_strings=F("resource__total_strings") - ).select_related("resource") - for t in tr_source: - t.total_strings = t.resource.total_strings - translated_resources.append(t) - log.info( - "Fix stats: total_strings mismatch for {resource}, {locale}.".format( - resource=t.resource, locale=t.locale.code - ) - ) - - TranslatedResource.objects.bulk_update( - translated_resources, fields=["total_strings"] - ) - - # total_strings missmatch in ProjectLocales within the same project - for p in Project.objects.available(): - count = ( - ProjectLocale.objects.filter(project=p) - .values("total_strings") - .distinct() - .count() - ) - if count > 1: - for pl in ProjectLocale.objects.filter(project=p): - pl.aggregate_stats() - - # approved + pretranslated + errors + warnings > total in TranslatedResource - for t in ( - TranslatedResource.objects.filter( - resource__project__disabled=False, - resource__project__sync_disabled=False, - ) - .annotate( - total=Sum( - F("approved_strings") - + F("pretranslated_strings") - + F("strings_with_errors") - + F("strings_with_warnings") - ) - ) - .filter(total__gt=F("total_strings")) - ): - log.info( - "Fix stats: total_strings overflow for {resource}, {locale}.".format( - resource=t.resource, locale=t.locale.code - ) - ) - t.calculate_stats() - - log.info("Sync complete.") - class ProjectSyncLog(BaseLog): sync_log = models.ForeignKey( @@ -171,7 +108,6 @@ def skip(self, end_time: datetime | None = None) -> None: self.skipped = True self.skipped_end_time = end_time or timezone.now() self.save(update_fields=("skipped", "skipped_end_time")) - self.sync_log.fix_stats() class RepositorySyncLog(BaseLog): @@ -190,4 +126,3 @@ def finished(self) -> bool: def end(self) -> None: self.end_time = timezone.now() self.save(update_fields=["end_time"]) - self.project_sync_log.sync_log.fix_stats() diff --git a/pontoon/sync/repositories/__init__.py b/pontoon/sync/repositories/__init__.py index ecc36db061..139e03bb5a 100644 --- a/pontoon/sync/repositories/__init__.py +++ b/pontoon/sync/repositories/__init__.py @@ -1,7 +1,3 @@ -import os - -from typing import Any - from . import git, hg, svn from .utils import CommitToRepositoryException, PullFromRepositoryException @@ -9,55 +5,10 @@ __all__ = [ "CommitToRepositoryException", "PullFromRepositoryException", - "commit_to_vcs", - "get_changed_files", - "get_revision", - "update_from_vcs", + "get_repo", ] -def update_from_vcs(repo_type: str, url: str, path: str, branch: str | None) -> None: - get_repo(repo_type).update(url, path, branch) - - -def commit_to_vcs( - repo_type: str, path: str, message: str, user: Any, branch: str | None, url: str -) -> None: - repo = get_repo(repo_type) - try: - repo.commit(path, message, user, branch, url) - except CommitToRepositoryException as e: - repo.log.debug(f"{repo_type.upper()} Commit Error for {path}: {e}") - raise e - - -def get_revision(repo_type: str, path: str) -> str | None: - return get_repo(repo_type).revision(path) - - -def get_changed_files( - repo_type: str, path: str, revision: str | None -) -> tuple[list[str], list[str]]: - """Return a list of changed files for the repository.""" - repo = get_repo(repo_type) - repo.log.info(f"Retrieving changed files for: {path}:{revision}") - - if revision is not None: - delta = repo.changed_files(path, revision) - if delta is not None: - return delta - - # If there's no latest revision we should return all the files in the latest - # version of repository - paths = [] - for root, _, files in os.walk(path): - for f in files: - if root[0] == "." or "/." in root: - continue - paths.append(os.path.join(root, f).replace(path + "/", "")) - return paths, [] - - def get_repo(type: str): type = type.lower() if type == "git": diff --git a/pontoon/sync/repositories/git.py b/pontoon/sync/repositories/git.py index 4896bef12d..fd5fff6859 100644 --- a/pontoon/sync/repositories/git.py +++ b/pontoon/sync/repositories/git.py @@ -1,6 +1,5 @@ import logging - -from typing import Any +import re from django.conf import settings @@ -10,42 +9,63 @@ log = logging.getLogger(__name__) -def update(source: str, target: str, branch: str | None) -> None: - log.debug("Git: Update repository.") - - command = ["git", "fetch", "--all"] - execute(command, target) - - # Undo local changes - remote = f"origin/{branch}" if branch else "origin" - - command = ["git", "reset", "--hard", remote] - code, _output, error = execute(command, target) - - if code != 0: - log.info(f"Git: {error}") - log.debug("Git: Clone instead.") - command = ["git", "clone", source, target] - code, _output, error = execute(command) - +def update(source: str, target: str, branch: str | None, shallow: bool) -> None: + log.debug(f"Git: Updating repo {source}") + if branch and re.search(r"[^%&()+,\-./0-9;<=>@A-Z_a-z{|}]|^-|\.\.|{@", branch): + raise PullFromRepositoryException(f"Git: Unsupported branch name {branch}") + + command = ["git", "rev-parse", "--is-shallow-repository"] + code, output, error = execute(command, target) + + if code == 0: + command = ( + ["git", "fetch", "origin"] + if shallow or output.strip() == b"false" + else ["git", "fetch", "--unshallow", "origin"] + ) + code, output, error = execute(command, target) + + if code == 0: + log.debug("Git: Repo updated.") + + if branch: + command = ["git", "checkout", branch] + code, output, error = execute(command, target) + if code != 0: + if output: + log.debug(output) + raise PullFromRepositoryException(error) + log.debug(f"Git: Branch {branch} checked out.") + + # Undo any local changes + remote = f"origin/{branch}" if branch else "origin" + command = ["git", "reset", "--hard", remote] + code, output, error = execute(command, target) if code != 0: + if output: + log.debug(output) raise PullFromRepositoryException(error) - - log.debug(f"Git: Repository at {source} cloned.") else: - log.debug(f"Git: Repository at {source} updated.") - - if branch: - command = ["git", "checkout", branch] - code, _output, error = execute(command, target) - + if error != "No such file or directory": + if output: + log.debug(output) + log.warning(f"Git: {error}") + log.debug("Git: Cloning repo...") + command = ["git", "clone"] + if branch: + command.extend(["--branch", branch]) + if shallow: + command.extend(["--depth", "1"]) + command.extend([source, target]) + code, output, error = execute(command) if code != 0: + if output: + log.debug(output) raise PullFromRepositoryException(error) + log.debug("Git: Repo cloned.") - log.debug(f"Git: Branch {branch} checked out.") - -def commit(path: str, message: str, user: Any, branch: str | None, url: str) -> None: +def commit(path: str, message: str, author: str, branch: str | None, url: str) -> None: log.debug("Git: Commit to repository.") # Embed git identity info into commands @@ -61,20 +81,14 @@ def commit(path: str, message: str, user: Any, branch: str | None, url: str) -> execute(git_cmd + ["add", "-A", "--", path], path) # Commit - commit = git_cmd + [ - "commit", - "-m", - message, - "--author", - user.display_name_and_email, - ] - code, _output, error = execute(commit, path) + commit = git_cmd + ["commit", "-m", message, "--author", author] + code, _, error = execute(commit, path) if code != 0 and error: raise CommitToRepositoryException(error) # Push push = ["git", "push", url, branch or "HEAD"] - code, _output, error = execute(push, path) + code, _, error = execute(push, path) if code != 0: if ( @@ -91,22 +105,42 @@ def commit(path: str, message: str, user: Any, branch: str | None, url: str) -> def revision(path: str) -> str | None: - cmd = ["git", "rev-parse", "HEAD"] - code, output, _error = execute(cmd, path, log=log) + cmd = ["git", "rev-parse", "--short", "HEAD"] + code, output, _ = execute(cmd, path, log=log) return output.decode().strip() if code == 0 else None -def changed_files(path: str, from_revision: str) -> tuple[list[str], list[str]] | None: - cmd = ["git", "diff", "--name-status", f"{from_revision}..HEAD", "--", path] +def changed_files( + path: str, from_revision: str +) -> tuple[list[str], list[str], list[tuple[str, str]]] | None: + cmd = [ + "git", + "diff", + "--name-status", + "--find-renames=100%", + f"{from_revision}..HEAD", + "--", + path, + ] code, output, _error = execute(cmd, path, log=log) if code != 0: return None - changed = [] - removed = [] - for line in output.decode().split("\n"): - if line: - if line.startswith(("A", "M")): - changed.append(line.split(None, 2)[1]) - elif line.startswith("D"): - removed.append(line.split(None, 2)[1]) - return changed, removed + changed: list[str] = [] + removed: list[str] = [] + renamed: list[tuple[str, str]] = [] # [(from, to)] + for line in output.decode().strip().split("\n"): + if line.startswith(("A", "M")): + changed.append(line.split(None, 2)[1]) + elif line.startswith("D"): + removed.append(line.split(None, 2)[1]) + elif line.startswith("R"): + parts = line.split() + if len(parts) == 3: + renamed.append((parts[1], parts[2])) + else: + log.warning(f"Git: Failed to parse diff line: {line}") + return None + elif line.strip(): + log.warning(f"Git: Failed to parse diff line: {line}") + return None + return changed, removed, renamed diff --git a/pontoon/sync/repositories/hg.py b/pontoon/sync/repositories/hg.py index e66aee7a17..1978e38ef1 100644 --- a/pontoon/sync/repositories/hg.py +++ b/pontoon/sync/repositories/hg.py @@ -1,22 +1,20 @@ import logging -from typing import Any - from .utils import CommitToRepositoryException, PullFromRepositoryException, execute log = logging.getLogger(__name__) -def update(source: str, target: str, branch: str | None) -> None: - log.debug("Mercurial: Update repository.") +def update(source: str, target: str, branch: str | None, shallow: bool) -> None: + log.debug(f"Mercurial: Updating repo {source}") # Undo local changes: Mercurial doesn't offer anything more elegant command = ["rm", "-rf", target] - code, _output, error = execute(command) + code, _, error = execute(command) command = ["hg", "clone", source, target] - code, _output, error = execute(command) + code, _, error = execute(command) if code == 0: log.debug(f"Mercurial: Repository at {source} cloned.") @@ -25,14 +23,14 @@ def update(source: str, target: str, branch: str | None) -> None: raise PullFromRepositoryException(error) -def commit(path: str, message: str, user: Any, branch: str | None, url: str) -> None: +def commit(path: str, message: str, author: str, branch: str | None, url: str) -> None: log.debug("Mercurial: Commit to repository.") # Add new and remove missing paths execute(["hg", "addremove"], path) # Commit - commit = ["hg", "commit", "-m", message, "-u", user.display_name_and_email] + commit = ["hg", "commit", "-m", message, "-u", author] code, output, error = execute(commit, path) if code != 0 and error: raise CommitToRepositoryException(error) @@ -51,15 +49,17 @@ def commit(path: str, message: str, user: Any, branch: str | None, url: str) -> def revision(path: str) -> str | None: cmd = ["hg", "identify", "--id", "--rev=default"] - code, output, _error = execute(cmd, path, log=log) + code, output, _ = execute(cmd, path, log=log) return output.decode().strip() if code == 0 else None -def changed_files(path: str, from_revision: str) -> tuple[list[str], list[str]] | None: +def changed_files( + path: str, from_revision: str +) -> tuple[list[str], list[str], list[tuple[str, str]]] | None: # Ignore trailing + in revision number. It marks local changes. rev = from_revision.rstrip("+") cmd = ["hg", "status", "-a", "-m", "-r", f"--rev={rev}", "--rev=default"] - code, output, _error = execute(cmd, path, log=log) + code, output, _ = execute(cmd, path, log=log) if code != 0: return None changed = [] @@ -70,4 +70,4 @@ def changed_files(path: str, from_revision: str) -> tuple[list[str], list[str]] changed.append(line.split(None, 2)[1]) elif line.startswith("R"): removed.append(line.split(None, 2)[1]) - return changed, removed + return changed, removed, [] diff --git a/pontoon/sync/repositories/svn.py b/pontoon/sync/repositories/svn.py index e45912adeb..642c5c8c71 100644 --- a/pontoon/sync/repositories/svn.py +++ b/pontoon/sync/repositories/svn.py @@ -1,7 +1,6 @@ import logging from os import environ, path -from typing import Any from django.conf import settings @@ -11,7 +10,7 @@ log = logging.getLogger(__name__) -def update(source: str, target: str, branch: str | None) -> None: +def update(source: str, target: str, branch: str | None, shallow: bool) -> None: log.debug("Subversion: Checkout or update repository.") if path.exists(target): @@ -29,7 +28,7 @@ def update(source: str, target: str, branch: str | None) -> None: target, ] - code, _output, error = execute(command, env=get_svn_env()) + code, _, error = execute(command, env=get_svn_env()) if code != 0: raise PullFromRepositoryException(error) @@ -37,7 +36,7 @@ def update(source: str, target: str, branch: str | None) -> None: log.debug(f"Subversion: Repository at {source} {status}.") -def commit(path: str, message: str, user: Any, branch: str | None, url: str) -> None: +def commit(path: str, message: str, author: str, branch: str | None, url: str) -> None: log.debug("Subversion: Commit to repository.") # Commit @@ -47,7 +46,7 @@ def commit(path: str, message: str, user: Any, branch: str | None, url: str) -> "-m", message, "--with-revprop", - f"author={user.display_name_and_email}", + f"author={author}", path, ] code, output, error = execute(commit, env=get_svn_env()) @@ -62,15 +61,17 @@ def commit(path: str, message: str, user: Any, branch: str | None, url: str) -> def revision(path: str) -> str | None: cmd = ["svnversion", path] - code, output, _error = execute(cmd, env=get_svn_env(), log=log) + code, output, _ = execute(cmd, env=get_svn_env(), log=log) return output.decode().strip() if code == 0 else None -def changed_files(path: str, from_revision: str) -> tuple[list[str], list[str]] | None: +def changed_files( + path: str, from_revision: str +) -> tuple[list[str], list[str], list[tuple[str, str]]] | None: # Remove all non digit characters from the revision number. rev = "".join(filter(lambda c: c.isdigit(), from_revision)) cmd = ["svn", "diff", "-r", f"{rev}:HEAD", "--summarize"] - code, output, _error = execute(cmd, path, env=get_svn_env(), log=log) + code, output, _ = execute(cmd, path, env=get_svn_env(), log=log) if code != 0: return None changed = [] @@ -80,7 +81,7 @@ def changed_files(path: str, from_revision: str) -> tuple[list[str], list[str]] changed.append(line.split(None, 2)[1]) elif line.startswith("D"): removed.append(line.split(None, 2)[1]) - return changed, removed + return changed, removed, [] def get_svn_env(): diff --git a/pontoon/sync/repositories/utils.py b/pontoon/sync/repositories/utils.py index d5e89697e5..b1dd54524e 100644 --- a/pontoon/sync/repositories/utils.py +++ b/pontoon/sync/repositories/utils.py @@ -13,18 +13,18 @@ class CommitToRepositoryException(Exception): def execute( command: list[str], cwd: str | None = None, env=None, log: Logger | None = None -) -> tuple[int, bytes, str | None]: +) -> tuple[int, bytes, str]: try: sp = subprocess.PIPE proc = subprocess.Popen( command, stdout=sp, stderr=sp, stdin=sp, cwd=cwd, env=env ) output, error = proc.communicate() - strerror = error.decode() if error else None + strerror = error.decode() if error else "" if log is not None and proc.returncode != 0: log.error( f"Error while executing command `{command}` in `{cwd}`: {strerror}" ) return proc.returncode, output, strerror except OSError as error: - return -1, b"", error.strerror + return -1, b"", error.strerror or "" diff --git a/pontoon/sync/tasks.py b/pontoon/sync/tasks.py index 481ae298ce..2846f93f56 100644 --- a/pontoon/sync/tasks.py +++ b/pontoon/sync/tasks.py @@ -1,407 +1,47 @@ import logging -from datetime import datetime +from celery import shared_task from django.conf import settings -from django.db import transaction -from django.utils import timezone +from django.core.cache import cache -from pontoon.base.models import ( - ChangedEntityLocale, - Entity, - Locale, - Project, -) +from pontoon.base.models import Project from pontoon.base.tasks import PontoonTask -from pontoon.pretranslation.tasks import pretranslate -from pontoon.sync.changeset import ChangeSet -from pontoon.sync.core import ( - commit_changes, - get_changed_locales, - pull_locale_repo_changes, - pull_source_repo_changes, - serial_task, - update_originals, - update_translated_resources, - update_translated_resources_no_files, - update_translations, -) -from pontoon.sync.models import ProjectSyncLog, RepositorySyncLog, SyncLog -from pontoon.sync.repositories import CommitToRepositoryException -from pontoon.sync.vcs.project import MissingSourceDirectoryError, VCSProject +from pontoon.sync.core import sync_project +from pontoon.sync.models import ProjectSyncLog, SyncLog log = logging.getLogger(__name__) -def get_or_fail(ModelClass, message=None, **kwargs): +@shared_task(base=PontoonTask, name="sync_project") +def sync_project_task( + project_pk: int, + sync_log_pk: int, + pull: bool = True, + commit: bool = True, + force: bool = False, +): try: - return ModelClass.objects.get(**kwargs) - except ModelClass.DoesNotExist: - if message is not None: - log.error(message) + project = Project.objects.get(pk=project_pk) + sync_log = SyncLog.objects.get(pk=sync_log_pk) + except Project.DoesNotExist: + log.error(f"[id={project.slug}] Sync aborted: Project not found.") raise - - -def sync_project_error(error, *args, **kwargs): - ProjectSyncLog.objects.create( - sync_log=SyncLog.objects.get(pk=args[1]), - project=Project.objects.get(pk=args[0]), - start_time=timezone.now(), - ).skip() - - -def update_locale_project_locale_stats(locale: Locale, project: Project): - locale.aggregate_stats() - locale.project_locale.get(project=project).aggregate_stats() - - -@serial_task( - settings.SYNC_TASK_TIMEOUT, - base=PontoonTask, - lock_key="project={0}", - on_error=sync_project_error, -) -def sync_project( - self, - project_pk, - sync_log_pk, - no_pull=False, - no_commit=False, - force=False, -): - """Fetch the project with the given PK and perform sync on it.""" - db_project = get_or_fail( - Project, - pk=project_pk, - message=f"Could not sync project with pk={project_pk}, not found.", - ) - - sync_log = get_or_fail( - SyncLog, - pk=sync_log_pk, - message=( - f"Could not sync project {db_project.slug}, log with pk={sync_log_pk} not found." - ), - ) - - # Mark "now" at the start of sync to avoid messing with - # translations submitted during sync. - now = timezone.now() - - project_sync_log = ProjectSyncLog.objects.create( - sync_log=sync_log, project=db_project, start_time=now - ) - - log.info(f"Syncing project {db_project.slug}.") - - source_changes = sync_sources(db_project, now, force, no_pull) - # Skip syncing translations if we already know there's nothing to sync - # or if no source directory found. - if not source_changes: - project_sync_log.skip() - return - - # Sync translations - sync_translations( - db_project, - project_sync_log, - now, - source_changes.get("has_source_repo_changed"), - source_changes.get("added_paths"), - source_changes.get("removed_paths"), - source_changes.get("changed_paths"), - source_changes.get("new_entities"), - no_pull=no_pull, - no_commit=no_commit, - force=force, - ) - - -def sync_sources(db_project: Project, now: datetime, force: bool, no_pull: bool): - # Pull from source repository - if no_pull: - has_source_repo_changed = True - else: - log.info(f"Pulling source changes for project {db_project.slug} started.") - has_source_repo_changed = pull_source_repo_changes(db_project) - log.info(f"Pulling source changes for project {db_project.slug} complete.") - - # If the only repo hasn't changed since the last sync and there are - # no Pontoon-side changes for this project, quit early. - if ( - not force - and not db_project.needs_sync - and not has_source_repo_changed - and db_project.has_single_repo - ): - log.info(f"Skipping project {db_project.slug}, no changes detected.") - return False - - if force or has_source_repo_changed: - try: - added_paths, removed_paths, changed_paths, new_entities = update_originals( - db_project, now, force=force - ) - except MissingSourceDirectoryError as e: - log.error(e) - return False - - log.info(f"Synced sources for project {db_project.slug}.") - - else: - added_paths, removed_paths, changed_paths, new_entities = None, None, None, None - log.info( - "Skipping syncing sources for project {}, no changes detected.".format( - db_project.slug - ) - ) - - return { - "has_source_repo_changed": has_source_repo_changed, - "added_paths": added_paths, - "removed_paths": removed_paths, - "changed_paths": changed_paths, - "new_entities": new_entities, - } - - -def sync_translations( - db_project: Project, - project_sync_log: ProjectSyncLog, - now: datetime, - has_source_repo_changed, - added_paths=None, - removed_paths=None, - changed_paths=None, - new_entities=None, - no_pull=False, - no_commit=False, - force=False, -): - repo = db_project.translation_repositories()[0] - - log.info(f"Syncing translations for project: {db_project.slug}") - - repo_sync_log = RepositorySyncLog.objects.create( - project_sync_log=project_sync_log, repository=repo, start_time=timezone.now() - ) - - locales = db_project.locales.order_by("code") - - if not locales: - log.info( - "Skipping syncing translations for project {}, no locales to sync " - "found within.".format(db_project.slug) - ) - repo_sync_log.end() - return - - # If project repositories have API access, we can retrieve latest commit hashes and detect - # changed locales before the expensive VCS pull/clone operations. When performing full scan, - # we still need to sync all locales. - if not force: - locales = get_changed_locales(db_project, locales, now) - - readonly_locales = db_project.locales.filter(project_locale__readonly=True) - added_and_changed_resources = db_project.resources.filter( - path__in=list(added_paths or []) + list(changed_paths or []) - ).distinct() - - # We should also sync files for which source file change - but only for read-only locales. - # See https://github.com/mozilla/pontoon/issues/2068 for more details. - if added_and_changed_resources: - changed_locales_pks = [loc.pk for loc in locales] - readonly_locales_pks = [loc.pk for loc in readonly_locales] - locales = db_project.locales.filter( - pk__in=changed_locales_pks + readonly_locales_pks - ) - - have_repos_changed = has_source_repo_changed - repo_locales = None - - if not no_pull: - repo_locales = {db_project.source_repository.pk: Locale.objects.none()} - - # Pull repos of locales in case of multi_locale_project - if not db_project.has_single_repo: - log.info(f"Pulling locale repos for project {db_project.slug} started.") - have_locale_repos_changed, pulled_repo_locales = pull_locale_repo_changes( - db_project, locales - ) - log.info(f"Pulling locale repos for project {db_project.slug} complete.") - - have_repos_changed |= have_locale_repos_changed - repo_locales.update(pulled_repo_locales) - - # If none of the repos has changed since the last sync and there are - # no Pontoon-side changes for this project, quit early. - if ( - not force - and not db_project.needs_sync - and not have_repos_changed - and not (added_paths or removed_paths or changed_paths) - ): - log.info(f"Skipping project {db_project.slug}, no changes detected.") - repo_sync_log.end() - return - - vcs_project = VCSProject( - db_project, - now, - locales=locales, - repo_locales=repo_locales, - added_paths=added_paths, - changed_paths=changed_paths, - force=force, - ) - - synced_locales = set() - failed_locales = set() - - # Store newly added locales and locales with newly added resources - new_locales = [] - - for locale in locales: - try: - with transaction.atomic(): - # Sets VCSProject.synced_locales, needed to skip early - if not vcs_project.synced_locales: - vcs_project.resources - - # Skip all locales if none of the them has anything to sync - if len(vcs_project.synced_locales) == 0: - break - - # Skip locales that have nothing to sync - if ( - vcs_project.synced_locales - and locale not in vcs_project.synced_locales - ): - continue - - changeset = ChangeSet(db_project, vcs_project, now, locale) - update_translations(db_project, vcs_project, locale, changeset) - changeset.execute() - - created = update_translated_resources(db_project, vcs_project, locale) - if created: - new_locales.append(locale.pk) - update_locale_project_locale_stats(locale, db_project) - - # Clear out the "has_changed" markers now that we've finished - # syncing. - ( - ChangedEntityLocale.objects.filter( - entity__resource__project=db_project, - locale=locale, - when__lte=now, - ).delete() - ) - - # Perform the commit last so that, if it succeeds, there is - # nothing after it to fail. - if ( - not no_commit - and locale in changeset.locales_to_commit - and locale not in readonly_locales - ): - commit_changes(db_project, vcs_project, changeset, locale) - - log.info( - "Synced locale {locale} for project {project}.".format( - locale=locale.code, - project=db_project.slug, - ) - ) - - synced_locales.add(locale.code) - - except CommitToRepositoryException as err: - # Transaction aborted, log and move on to the next locale. - log.warning( - "Failed to sync locale {locale} for project {project} due to " - "commit error: {error}".format( - locale=locale.code, - project=db_project.slug, - error=err, - ) - ) - - failed_locales.add(locale.code) - - # If sources have changed, update stats for all locales. - if added_paths or removed_paths or changed_paths: - for locale in db_project.locales.all(): - # Already synced. - if locale.code in synced_locales: - continue - - # We have files: update all translated resources. - if locale in locales: - created = update_translated_resources(db_project, vcs_project, locale) - if created: - new_locales.append(locale.pk) - - # We don't have files: we can still update asymmetric translated resources. - else: - update_translated_resources_no_files( - db_project, - locale, - added_and_changed_resources, - ) - - update_locale_project_locale_stats(locale, db_project) - synced_locales.add(locale.code) - - log.info( - "Synced source changes for locale {locale} for project {project}.".format( - locale=locale.code, - project=db_project.slug, - ) - ) - - db_project.aggregate_stats() - - synced_locales = sorted(synced_locales) - - if synced_locales: - log.info( - "Synced translations for project {} in locales {}.".format( - db_project.slug, ",".join(synced_locales) - ) - ) - elif failed_locales: - log.info( - "Failed to sync translations for project {} due to commit error.".format( - db_project.slug - ) + except SyncLog.DoesNotExist: + log.error( + f"[{project.slug}] Sync aborted: Log with id={sync_log_pk} not found." ) - else: - log.info( - "Skipping syncing translations for project {}, none of the locales " - "has anything to sync.".format(db_project.slug) - ) - - if repo_locales: - repos = db_project.repositories.filter(pk__in=repo_locales.keys()) - for r in repos: - r.set_last_synced_revisions( - locales=repo_locales[r.pk].exclude(code__in=failed_locales) - ) - repo_sync_log.end() - - if db_project.pretranslation_enabled: - # Pretranslate all entities for newly added locales - # and locales with newly added resources - if len(new_locales): - pretranslate(db_project.pk, locales=new_locales) + raise - locales = db_project.locales.exclude(pk__in=new_locales).values_list( - "pk", flat=True + lock_name = f"sync_{project_pk}" + if not cache.add(lock_name, True, timeout=settings.SYNC_TASK_TIMEOUT): + ProjectSyncLog.objects.create(project=project, sync_log=sync_log).skip() + raise RuntimeError( + f"[{project.slug}] Sync aborted: Previous sync still running." ) - - # Pretranslate newly added entities for all locales - if new_entities and locales: - entities = Entity.objects.filter(pk__in=[e.pk for e in set(new_entities)]) - pretranslate(db_project.pk, locales=locales, entities=entities) + try: + sync_project(project, sync_log, pull=pull, commit=commit, force=force) + finally: + # release the lock + cache.delete(lock_name) diff --git a/pontoon/sync/templates/sync/commit_message.jinja b/pontoon/sync/templates/sync/commit_message.jinja deleted file mode 100644 index baac1f8e3c..0000000000 --- a/pontoon/sync/templates/sync/commit_message.jinja +++ /dev/null @@ -1,6 +0,0 @@ -Pontoon: Update {{ locale.name }} ({{ locale.code }}) localization of {{ project.name }} -{% if authors %} -{% for author in authors -%} -Co-authored-by: {{ author.display_name_and_email|safe }} -{% endfor %} -{%- endif -%} diff --git a/pontoon/sync/tests/__init__.py b/pontoon/sync/tests/__init__.py index dbb2ee13e3..92fdc3be8f 100644 --- a/pontoon/sync/tests/__init__.py +++ b/pontoon/sync/tests/__init__.py @@ -1,64 +1,7 @@ -import os.path - -from unittest.mock import PropertyMock, patch - import factory -from pontoon.base.models import Project -from pontoon.base.tests import ( - EntityFactory, - LocaleFactory, - ProjectFactory, - RepositoryFactory, - ResourceFactory, - TestCase, - TranslationFactory, -) -from pontoon.base.utils import aware_datetime -from pontoon.sync.changeset import ChangeSet +from pontoon.base.tests import ProjectFactory, RepositoryFactory from pontoon.sync.models import ProjectSyncLog, RepositorySyncLog, SyncLog -from pontoon.sync.vcs.project import VCSProject -from pontoon.sync.vcs.resource import VCSEntity, VCSResource -from pontoon.sync.vcs.translation import VCSTranslation - - -FAKE_CHECKOUT_PATH = os.path.join( - os.path.dirname(__file__), - "fake-checkout", -) -PROJECT_CONFIG_CHECKOUT_PATH = os.path.join( - os.path.dirname(__file__), - "project-config-checkout", -) -LOCALE_SEPARATOR_TEST_PATH = os.path.join( - os.path.dirname(__file__), - "locale-separator-tests", -) - - -class VCSEntityFactory(factory.Factory): - resource = None - key = "key" - context = "context" - string = "string" - string_plural = "" - comments = factory.List([]) - source = factory.List([]) - order = factory.Sequence(lambda n: n) - - class Meta: - model = VCSEntity - - -class VCSTranslationFactory(factory.Factory): - key = factory.Sequence(lambda n: f"key-{n}") - context = "context" - strings = factory.Dict({}) - comments = factory.List([]) - fuzzy = False - - class Meta: - model = VCSTranslation class SyncLogFactory(factory.django.DjangoModelFactory): @@ -80,121 +23,3 @@ class RepositorySyncLogFactory(factory.django.DjangoModelFactory): class Meta: model = RepositorySyncLog - - -class FakeCheckoutTestCase(TestCase): - """Parent class for tests that use the fake l10n repo checkout.""" - - def setUp(self): - self.now = aware_datetime(1970, 1, 1) - - timezone_patch = patch("pontoon.sync.tasks.timezone") - self.mock_timezone = timezone_patch.start() - self.addCleanup(timezone_patch.stop) - self.mock_timezone.now.return_value = self.now - - self.translated_locale = LocaleFactory.create(code="translated-locale") - self.inactive_locale = LocaleFactory.create(code="inactive-locale") - self.repository = RepositoryFactory() - - self.db_project = ProjectFactory.create( - name="db-project", - locales=[self.translated_locale], - repositories=[self.repository], - ) - self.main_db_resource = ResourceFactory.create( - project=self.db_project, - path="main.po", - format="po", - ) - self.other_db_resource = ResourceFactory.create( - project=self.db_project, - path="other.po", - format="po", - ) - self.missing_db_resource = ResourceFactory.create( - project=self.db_project, - path="missing.po", - format="po", - ) - self.main_db_entity = EntityFactory.create( - resource=self.main_db_resource, - string="Source String", - key="Source String", - obsolete=False, - ) - self.other_db_entity = EntityFactory.create( - resource=self.other_db_resource, - string="Other Source String", - key="Other Source String", - obsolete=False, - ) - self.main_db_translation = TranslationFactory.create( - entity=self.main_db_entity, - plural_form=None, - locale=self.translated_locale, - string="Translated String", - date=aware_datetime(1970, 1, 1), - approved=True, - ) - - # Load paths from the fake locale directory. - checkout_path_patch = patch.object( - Project, - "checkout_path", - new_callable=PropertyMock, - return_value=FAKE_CHECKOUT_PATH, - ) - checkout_path_patch.start() - - self.addCleanup(checkout_path_patch.stop) - - vcs_changed_files = { - self.main_db_resource.path: [self.translated_locale], - self.other_db_resource.path: [self.translated_locale], - self.missing_db_resource.path: [self.translated_locale], - } - - changed_files_patch = patch.object( - VCSProject, - "changed_files", - new_callable=PropertyMock, - return_value=vcs_changed_files, - ) - changed_files_patch.start() - self.addCleanup(changed_files_patch.stop) - - source_repository = patch.object( - Project, - "source_repository", - new_callable=PropertyMock, - return_value=self.db_project.repositories.all()[0], - ) - source_repository.start() - self.addCleanup(source_repository.stop) - - self.vcs_project = VCSProject(self.db_project) - self.main_vcs_resource = self.vcs_project.resources[self.main_db_resource.path] - self.other_vcs_resource = self.vcs_project.resources[ - self.other_db_resource.path - ] - self.missing_vcs_resource = self.vcs_project.resources[ - self.missing_db_resource.path - ] - self.main_vcs_entity = self.main_vcs_resource.entities["Source String"] - self.main_vcs_translation = self.main_vcs_entity.translations[ - "translated-locale" - ] - - # Mock VCSResource.save() for each resource to avoid altering - # the filesystem. - resource_save_patch = patch.object(VCSResource, "save") - resource_save_patch.start() - self.addCleanup(resource_save_patch.stop) - - self.changeset = ChangeSet( - self.db_project, - self.vcs_project, - aware_datetime(1970, 1, 1), - self.translated_locale, - ) diff --git a/pontoon/sync/tests/formats/__init__.py b/pontoon/sync/tests/formats/__init__.py index f70ca3cefd..4e29696b5d 100644 --- a/pontoon/sync/tests/formats/__init__.py +++ b/pontoon/sync/tests/formats/__init__.py @@ -3,7 +3,6 @@ assert_attributes_equal, create_tempfile, ) -from pontoon.base.utils import match_attr class FormatTestsMixin: @@ -366,8 +365,9 @@ def run_save_translation_missing( MissingString=Translated Missing String """ path, resource = self.parse_string(input_string, source_string=source_string) - missing_translation = match_attr( - resource.translations, key=self.key("Missing String") + key = self.key("Missing String") + missing_translation = next( + trans for trans in resource.translations if getattr(trans, "key") == key ) missing_translation.strings = { None: expected_translation or "Translated Missing String" @@ -394,7 +394,11 @@ def run_save_translation_identical( """ path, resource = self.parse_string(input_string, source_string=source_string) - translation = match_attr(resource.translations, key="String") + translation = next( + trans + for trans in resource.translations + if getattr(trans, "key") == "String" + ) translation.strings = {None: expected_translation or "Source String"} resource.save(self.locale) diff --git a/pontoon/sync/tests/formats/test_ftl.py b/pontoon/sync/tests/formats/test_ftl.py index 873db102a6..25526f835a 100644 --- a/pontoon/sync/tests/formats/test_ftl.py +++ b/pontoon/sync/tests/formats/test_ftl.py @@ -12,8 +12,8 @@ assert_attributes_equal, create_named_tempfile, ) -from pontoon.sync.exceptions import ParseError from pontoon.sync.formats import ftl +from pontoon.sync.formats.exceptions import ParseError from pontoon.sync.tests.formats import FormatTestsMixin diff --git a/pontoon/sync/tests/formats/test_po.py b/pontoon/sync/tests/formats/test_po.py index 8fc53fa779..4bd4f917e4 100644 --- a/pontoon/sync/tests/formats/test_po.py +++ b/pontoon/sync/tests/formats/test_po.py @@ -2,7 +2,6 @@ from pontoon.base.tests import TestCase, UserFactory, assert_attributes_equal from pontoon.base.utils import aware_datetime -from pontoon.sync import KEY_SEPARATOR from pontoon.sync.formats import po from pontoon.sync.tests.formats import FormatTestsMixin @@ -61,7 +60,7 @@ msgstr[1] "Translated Plural %(count)s strings with missing translations" """ -HEADER_TEMPLATE = """#\x20 +HEADER_TEMPLATE = """# msgid "" msgstr "" "Project-Id-Version: PACKAGE VERSION\\n" @@ -135,13 +134,13 @@ def test_parse_context(self): assert_attributes_equal( resource.translations[0], source_string="Source", - key=self.key("Main context" + KEY_SEPARATOR + "Source"), + key=self.key("Main context\x04Source"), ) assert_attributes_equal( resource.translations[1], source_string="Source", - key=self.key("Other context" + KEY_SEPARATOR + "Source"), + key=self.key("Other context\x04Source"), ) assert_attributes_equal( diff --git a/pontoon/sync/tests/formats/test_silme.py b/pontoon/sync/tests/formats/test_silme.py index 8f7ca14e87..d6867be25b 100644 --- a/pontoon/sync/tests/formats/test_silme.py +++ b/pontoon/sync/tests/formats/test_silme.py @@ -13,8 +13,8 @@ assert_attributes_equal, create_tempfile, ) -from pontoon.sync.exceptions import ParseError from pontoon.sync.formats import silme +from pontoon.sync.formats.exceptions import ParseError from pontoon.sync.tests.formats import FormatTestsMixin diff --git a/pontoon/sync/tests/formats/test_xliff.py b/pontoon/sync/tests/formats/test_xliff.py index 6ad25ffd82..861ef509a8 100644 --- a/pontoon/sync/tests/formats/test_xliff.py +++ b/pontoon/sync/tests/formats/test_xliff.py @@ -4,9 +4,8 @@ import pytest from pontoon.base.tests import TestCase -from pontoon.sync import KEY_SEPARATOR -from pontoon.sync.exceptions import ParseError from pontoon.sync.formats import xliff +from pontoon.sync.formats.exceptions import ParseError from pontoon.sync.tests.formats import FormatTestsMixin @@ -57,7 +56,7 @@ class XLIFFTests(FormatTestsMixin, TestCase): def key(self, source_string): """XLIFF keys are prefixed with the file name.""" - return "filename" + KEY_SEPARATOR + super().key(source_string) + return "filename\x04" + super().key(source_string) def assert_file_content(self, file_path, expected_content): """ diff --git a/pontoon/sync/tests/formats/test_xml.py b/pontoon/sync/tests/formats/test_xml.py index 314e3a0273..56ca91a137 100644 --- a/pontoon/sync/tests/formats/test_xml.py +++ b/pontoon/sync/tests/formats/test_xml.py @@ -12,8 +12,8 @@ assert_attributes_equal, create_named_tempfile, ) -from pontoon.sync.exceptions import ParseError from pontoon.sync.formats import xml +from pontoon.sync.formats.exceptions import ParseError from pontoon.sync.tests.formats import FormatTestsMixin diff --git a/pontoon/sync/tests/test_changeset.py b/pontoon/sync/tests/test_changeset.py deleted file mode 100644 index b1cc09f3ce..0000000000 --- a/pontoon/sync/tests/test_changeset.py +++ /dev/null @@ -1,654 +0,0 @@ -from unittest.mock import MagicMock, Mock, patch - -import pytest - -from notifications.signals import notify - -from pontoon.actionlog.models import ActionLog -from pontoon.base.models import Entity -from pontoon.base.tests import ( - TranslationFactory, - UserFactory, - assert_attributes_equal, -) -from pontoon.base.utils import aware_datetime -from pontoon.sync.tests import FakeCheckoutTestCase - - -class ChangeSetTests(FakeCheckoutTestCase): - def test_execute_called_once(self): - """Raise a RuntimeError if execute is called more than once.""" - self.changeset.execute() - with pytest.raises(RuntimeError): - self.changeset.execute() - - def update_main_vcs_entity(self, **translation_changes): - for key, value in translation_changes.items(): - setattr(self.main_db_translation, key, value) - self.main_db_translation.save() - - self.changeset.update_vcs_entity( - self.translated_locale, self.main_db_entity, self.main_vcs_entity - ) - self.changeset.execute() - - def test_changed_translations_created(self): - """ - Return a list of Translation objects that have been created. - """ - self.main_db_translation.delete() - self.update_main_db_entity() - translation = self.main_db_entity.translation_set.all()[0] - assert self.changeset.changed_translations == [translation] - - def test_changed_translations_no_changes(self): - """ - If there are no changes, changed_translations should return empty list. - """ - assert self.changeset.changed_translations == [] - - def test_update_vcs_entity(self): - """ - Update the VCS translations with translations in the database. - """ - self.main_vcs_resource.save = Mock() - self.other_vcs_resource.save = Mock() - - self.update_main_vcs_entity(string="New Translated String") - assert self.main_vcs_translation.strings == {None: "New Translated String"} - - # Ensure only resources that were updated are saved. - assert self.main_vcs_resource.save.called - assert not self.other_vcs_resource.save.called - - # Update the VCS translation with info about the last - # translation. - assert self.main_vcs_translation.last_updated == self.main_db_translation.date - assert ( - self.main_vcs_translation.last_translator == self.main_db_translation.user - ) - - def test_update_vcs_entity_unapproved(self): - """ - Do not update VCS with unapproved translations. If no approved - translations exist, delete existing ones. - """ - self.update_main_vcs_entity(approved=False) - assert self.main_vcs_translation.strings == {} - - def test_update_vcs_entity_fuzzy(self): - self.main_vcs_translation.fuzzy = False - self.update_main_vcs_entity(fuzzy=True) - assert self.main_vcs_translation.fuzzy - - def test_update_vcs_entity_not_fuzzy(self): - self.main_vcs_translation.fuzzy = True - self.update_main_vcs_entity(fuzzy=False) - assert not self.main_vcs_translation.fuzzy - - def test_update_vcs_last_translation_no_translations(self): - """ - If there are no translations in the database, do not set the - last_updated and last_translator fields on the VCS translation. - """ - self.main_db_translation.delete() - - self.changeset.update_vcs_entity( - self.translated_locale, self.main_db_entity, self.main_vcs_entity - ) - self.changeset.execute() - - assert self.main_vcs_translation.last_updated is None - assert self.main_vcs_translation.last_translator is None - - def test_update_vcs_entity_user(self): - """Track translation authors for use in the commit message.""" - user = UserFactory.create() - self.update_main_vcs_entity(user=user) - assert self.changeset.commit_authors_per_locale["translated-locale"] == [user] - - def test_create_db(self): - """Create new entity in the database.""" - self.main_db_entity.delete() - - self.main_vcs_entity.key = "Source String" - self.main_vcs_entity.comments = ["first comment", "second"] - self.main_vcs_entity.order = 7 - self.main_vcs_translation.fuzzy = False - self.main_vcs_entity.string_plural = "plural string" - self.main_vcs_entity.source = ["foo.py:87"] - - self.changeset.create_db_entity(self.main_vcs_entity) - self.changeset.execute() - new_entity = Entity.objects.get( - resource__path=self.main_vcs_resource.path, - string=self.main_vcs_entity.string, - ) - assert_attributes_equal( - new_entity, - resource=self.main_db_resource, - string="Source String", - key="Source String", - comment="first comment\nsecond", - order=7, - string_plural="plural string", - source=["foo.py:87"], - ) - - new_translation = new_entity.translation_set.all()[0] - assert_attributes_equal( - new_translation, - locale=self.translated_locale, - string="Translated String", - plural_form=None, - approved=True, - approved_date=aware_datetime(1970, 1, 1), - fuzzy=False, - ) - - def test_send_notifications(self): - """ - Test if new string notifications are sent to the right users. - """ - notify.send = Mock() - - # No new entities created - new_entities = [] - self.changeset.send_notifications(new_entities) - assert not notify.send.called - - # New entity created - new_entities = [ - Entity.objects.get( - resource__path=self.main_vcs_resource.path, - string=self.main_vcs_entity.string, - ) - ] - self.changeset.send_notifications(new_entities) - assert notify.send.called - - def test_send_notifications_opt_out(self): - """ - Test if new string notifications are not sent to users that opt out. - """ - notify.send = Mock() - - # New entity created, user opts out of getting new string notifications - new_entities = [ - Entity.objects.get( - resource__path=self.main_vcs_resource.path, - string=self.main_vcs_entity.string, - ) - ] - self.main_db_translation.user.profile.new_string_notifications = False - self.main_db_translation.user.profile.save() - - self.changeset.send_notifications(new_entities) - assert not notify.send.called - - def update_main_db_entity(self): - self.changeset.update_db_entity( - self.translated_locale, self.main_db_entity, self.main_vcs_entity - ) - self.changeset.execute() - - def test_update_db_existing_translation(self): - """ - Update an existing translation in the DB with changes from VCS. - """ - # Set up DB and VCS to differ and require an update. - self.main_db_translation.fuzzy = True - self.main_db_translation.save() - - self.main_vcs_entity.key = "Source String" - self.main_vcs_entity.comments = ["first comment", "second"] - self.main_vcs_entity.order = 7 - self.main_vcs_entity.string_plural = "plural string" - self.main_vcs_entity.source = ["foo.py:87"] - self.main_vcs_translation.fuzzy = False - - self.update_main_db_entity() - self.main_db_entity.refresh_from_db() - assert_attributes_equal( - self.main_db_entity, - key="Source String", - comment="first comment\nsecond", - order=7, - string_plural="plural string", - source=["foo.py:87"], - ) - - self.main_db_translation.refresh_from_db() - assert_attributes_equal(self.main_db_translation, fuzzy=False) - - def test_update_db_clean_entity_translation(self): - """ - If no changes have been made to the database entity or the - translation, do not bother updating them in the database. - """ - self.update_main_db_entity() - - # TODO: It'd be nice if we didn't rely on internal changeset - # attributes to check this, but not vital. - assert self.main_db_entity not in self.changeset.entities_to_update - assert self.main_db_translation not in self.changeset.translations_to_update - - def test_update_db_approve_translation(self): - """ - Approve any un-approved translations that have counterparts in - VCS. - """ - self.main_db_translation.approved = False - self.main_db_translation.approved_date = None - self.main_db_translation.save() - - self.update_main_db_entity() - self.main_db_translation.refresh_from_db() - assert_attributes_equal( - self.main_db_translation, - approved=True, - approved_date=aware_datetime(1970, 1, 1), - ) - - assert ActionLog.objects.filter( - action_type=ActionLog.ActionType.TRANSLATION_APPROVED, - translation=self.main_db_translation.pk, - ).exists() - - def test_update_db_dont_approve_fuzzy(self): - """ - Do not approve un-approved translations that have non-fuzzy - counterparts in VCS. - """ - self.main_db_translation.approved = False - self.main_db_translation.approved_date = None - self.main_db_translation.save() - self.main_vcs_translation.fuzzy = True - - self.update_main_db_entity() - self.main_db_translation.refresh_from_db() - assert_attributes_equal( - self.main_db_translation, approved=False, approved_date=None - ) - - def test_update_db_new_translation(self): - """ - If a matching translation does not exist in the database, create a new - one. - """ - self.main_db_translation.delete() - self.update_main_db_entity() - - translation = self.main_db_entity.translation_set.all()[0] - assert_attributes_equal( - translation, - locale=self.translated_locale, - string="Translated String", - plural_form=None, - approved=True, - approved_date=aware_datetime(1970, 1, 1), - fuzzy=False, - ) - - assert ActionLog.objects.filter( - action_type=ActionLog.ActionType.TRANSLATION_CREATED, - translation=translation.pk, - ).exists() - - def test_update_db_unfuzzy_existing(self): - """ - Any existing fuzzy translations get unfuzzied. - """ - self.main_db_translation.approved = False - self.main_db_translation.fuzzy = True - self.main_db_translation.save() - self.main_vcs_translation.strings[None] = "New Translated String" - - self.update_main_db_entity() - self.main_db_translation.refresh_from_db() - assert_attributes_equal(self.main_db_translation, fuzzy=False) - - def test_update_db_unapprove_existing(self): - """ - Any existing translations that don't match anything in VCS get - unapproved, unless they were created after self.now. - """ - self.main_db_translation.approved = True - self.main_db_translation.approved_date = aware_datetime(1970, 1, 1) - self.main_db_translation.approved_user = UserFactory.create() - self.main_db_translation.save() - self.main_vcs_translation.strings[None] = "New Translated String" - - created_after_translation = TranslationFactory.create( - entity=self.main_db_entity, - approved=True, - approved_date=aware_datetime(1970, 1, 3), - ) - - self.update_main_db_entity() - self.main_db_translation.refresh_from_db() - assert_attributes_equal( - self.main_db_translation, - approved=False, - approved_user=None, - approved_date=None, - ) - - assert ActionLog.objects.filter( - action_type=ActionLog.ActionType.TRANSLATION_REJECTED, - translation=self.main_db_translation.pk, - ).exists() - - created_after_translation.refresh_from_db() - assert_attributes_equal( - created_after_translation, - approved=True, - approved_date=aware_datetime(1970, 1, 3), - ) - - def test_update_db_unapprove_fuzzy(self): - """ - If an existing translation is fuzzy and doesn't match anything in VCS, - unapprove and unfuzzy that translation without rejecting it. - """ - self.main_db_translation.fuzzy = True - self.main_db_translation.approved = True - self.main_db_translation.approved_date = aware_datetime(1970, 1, 1) - self.main_db_translation.approved_user = UserFactory.create() - self.main_db_translation.save() - self.main_vcs_translation.strings[None] = "New Translated String" - - self.update_main_db_entity() - self.main_db_translation.refresh_from_db() - assert_attributes_equal( - self.main_db_translation, - approved=False, - approved_user=None, - approved_date=None, - rejected=False, - fuzzy=False, - ) - - assert ActionLog.objects.filter( - action_type=ActionLog.ActionType.TRANSLATION_UNAPPROVED, - translation=self.main_db_translation.pk, - ).exists() - - def test_update_db_unapprove_clean(self): - """ - If translations that are set to be unapproved were already unapproved, - don't bother updating them. - """ - self.main_db_translation.approved = False - self.main_db_translation.approved_date = None - self.main_db_translation.approved_user = None - self.main_db_translation.save() - self.main_vcs_translation.strings[None] = "New Translated String" - - self.update_main_db_entity() - self.main_db_translation.refresh_from_db() - assert self.main_db_translation not in self.changeset.translations_to_update - - def test_update_db_reject_approved(self): - """ - When a translation is submitted through VCS, reject any existing approved translations. - """ - self.main_db_translation.approved = True - self.main_db_translation.approved_date = aware_datetime(1970, 1, 1) - self.main_db_translation.approved_user = UserFactory.create() - self.main_db_translation.rejected = False - self.main_db_translation.save() - self.main_vcs_translation.strings[None] = "New Translated String" - - self.update_main_db_entity() - self.main_db_translation.refresh_from_db() - assert_attributes_equal( - self.main_db_translation, - rejected=True, - ) - - assert ActionLog.objects.filter( - action_type=ActionLog.ActionType.TRANSLATION_REJECTED, - translation=self.main_db_translation.pk, - ).exists() - - def test_update_db_reject_approved_skip_fuzzy(self): - """ - When a translation is submitted through VCS, reject any existing approved translations. - Unless the same translation is submitted and only made fuzzy. - """ - self.main_db_translation.approved = True - self.main_db_translation.approved_date = aware_datetime(1970, 1, 1) - self.main_db_translation.approved_user = UserFactory.create() - self.main_db_translation.rejected = False - self.main_db_translation.save() - self.main_vcs_translation.strings[None] = self.main_db_translation.string - self.main_vcs_translation.fuzzy = True - - self.update_main_db_entity() - self.main_db_translation.refresh_from_db() - assert_attributes_equal( - self.main_db_translation, - rejected=False, - ) - - def test_obsolete_db(self): - self.changeset.obsolete_db_entity(self.main_db_entity) - self.changeset.execute() - self.main_db_entity.refresh_from_db() - assert self.main_db_entity.obsolete - - def test_no_new_translations(self): - """ - Don't change any resource if there aren't any new translations. - """ - TranslationFactory.create( - locale=self.translated_locale, - entity=self.main_db_entity, - approved=True, - date=aware_datetime(2015, 1, 1), - ) - - with patch.object( - self.main_db_entity, "has_changed", return_value=False - ) as mock_has_changed: - resource_file = MagicMock() - self.changeset.update_vcs_entity( - self.translated_locale, self.main_db_entity, MagicMock() - ) - self.changeset.vcs_project.resources = { - self.main_db_entity.resource.path: resource_file - } - - self.changeset.execute_update_vcs() - - assert mock_has_changed.called - assert not resource_file.save.called - - def test_changed_resources_sync(self): - """ - Synchronization should modify resource files if there - are changed translations. - """ - TranslationFactory.create( - locale=self.translated_locale, - entity=self.main_db_entity, - approved=True, - date=aware_datetime(2015, 1, 1), - ) - - resource_file = MagicMock() - self.changeset.vcs_project.resources = { - self.main_db_entity.resource.path: resource_file - } - - with patch.object( - self.main_db_entity, "has_changed", return_value=True - ) as mock_has_changed: - self.changeset.update_vcs_entity( - self.translated_locale, self.main_db_entity, MagicMock() - ) - - self.changeset.execute_update_vcs() - assert mock_has_changed.called - assert resource_file.save.called - - def test_unchanged_resources_sync(self): - """ - Synchronization shouldn't modify resources if their - entities weren't changed. - """ - TranslationFactory.create( - locale=self.translated_locale, - entity=self.main_db_entity, - approved=True, - date=aware_datetime(2015, 1, 1), - ) - - resource_file = MagicMock() - self.changeset.vcs_project.resources = { - self.main_db_entity.resource.path: resource_file - } - - with patch.object( - self.main_db_entity, "has_changed", return_value=False - ) as mock_has_changed: - self.changeset.update_vcs_entity( - self.translated_locale, self.main_db_entity, MagicMock() - ) - - self.changeset.execute_update_vcs() - assert mock_has_changed.called - assert len(resource_file.save.mock_calls) == 0 - - -class AuthorsTests(FakeCheckoutTestCase): - """ - Tests authors of translations passed to the final commit message. - """ - - def test_multiple_authors(self): - """ - Commit message should include authors from translations of separate - entities. - """ - first_author, second_author = UserFactory.create_batch(2) - TranslationFactory.create( - locale=self.translated_locale, - entity=self.main_db_entity, - user=first_author, - approved=True, - ) - TranslationFactory.create( - locale=self.translated_locale, entity=self.main_db_entity, approved=False - ) - TranslationFactory.create( - locale=self.translated_locale, - entity=self.other_db_entity, - user=second_author, - approved=True, - ) - TranslationFactory.create( - locale=self.translated_locale, entity=self.other_db_entity, approved=False - ) - - self.changeset.update_vcs_entity( - self.translated_locale, self.main_db_entity, MagicMock() - ) - self.changeset.update_vcs_entity( - self.translated_locale, self.other_db_entity, MagicMock() - ) - - self.changeset.execute_update_vcs() - - assert self.changeset.commit_authors_per_locale[ - self.translated_locale.code - ] == [first_author, second_author] - - def test_plural_translations(self): - """ - If entity has some plural translations and approved translations their authors - should be included in commit message. - """ - first_author, second_author, third_author = UserFactory.create_batch(3) - - TranslationFactory.create( - locale=self.translated_locale, - entity=self.main_db_entity, - user=first_author, - approved=True, - ) - TranslationFactory.create( - locale=self.translated_locale, - entity=self.main_db_entity, - user=third_author, - approved=True, - plural_form=1, - ) - TranslationFactory.create( - locale=self.translated_locale, - entity=self.main_db_entity, - user=second_author, - approved=False, - ) - - self.changeset.update_vcs_entity( - self.translated_locale, self.main_db_entity, MagicMock() - ) - - self.changeset.execute_update_vcs() - - assert set( - self.changeset.commit_authors_per_locale[self.translated_locale.code] - ) == {first_author, third_author} - - def test_multiple_translations(self): - """ - If there are multiple translations to the same locale, only authors of - the final approved version should be returned. - """ - first_author, second_author = UserFactory.create_batch(2) - - TranslationFactory.create( - locale=self.translated_locale, - entity=self.main_db_entity, - user=first_author, - approved=True, - ) - TranslationFactory.create( - locale=self.translated_locale, - entity=self.main_db_entity, - user=second_author, - approved=False, - ) - - self.changeset.update_vcs_entity( - self.translated_locale, self.main_db_entity, MagicMock() - ) - - self.changeset.execute_update_vcs() - - assert self.changeset.commit_authors_per_locale[ - self.translated_locale.code - ] == [first_author] - - def test_no_translations(self): - """ - We don't attribute anyone if there aren't any new translations. - """ - TranslationFactory.create( - locale=self.translated_locale, - entity=self.main_db_entity, - approved=True, - date=aware_datetime(2015, 1, 1), - ) - - with patch.object(self.main_db_entity, "has_changed", return_value=False): - self.changeset.update_vcs_entity( - self.translated_locale, self.main_db_entity, MagicMock() - ) - self.changeset.execute_update_vcs() - assert ( - self.changeset.commit_authors_per_locale[self.translated_locale.code] - == [] - ) diff --git a/pontoon/sync/tests/test_checkouts.py b/pontoon/sync/tests/test_checkouts.py new file mode 100644 index 0000000000..2eb8408b39 --- /dev/null +++ b/pontoon/sync/tests/test_checkouts.py @@ -0,0 +1,149 @@ +from tempfile import TemporaryDirectory +from typing import Any +from unittest.mock import Mock, patch + +from django.test import TestCase + +from pontoon.base.models import Project, Repository +from pontoon.sync.core.checkout import Checkout, checkout_repos +from pontoon.sync.tests.utils import FileTree, build_file_tree + + +class MockVersionControl: + def __init__( + self, changes: tuple[list[str], list[str], list[tuple[str, str]]] | None + ): + self._calls: list[tuple[str, Any]] = [] + self._changes = changes + + def commit(self, *args): + self._calls.append(("commit", args)) + + def update(self, *args): + self._calls.append(("update", args)) + + def revision(self, *args): + self._calls.append(("revision", args)) + return "abc123" + + def changed_files(self, *args): + self._calls.append(("changed_files", args)) + return self._changes + + +class CheckoutsTests(TestCase): + def test_no_changes_with_prev_commit(self): + mock_vcs = MockVersionControl(changes=([], [], [])) + mock_repo = Mock( + Repository, + branch="BRANCH", + checkout_path="/foo/bar", + last_synced_revision="def456", + source_repo=True, + url="URL", + type=Repository.Type.GIT, + ) + with patch("pontoon.sync.core.checkout.get_repo", return_value=mock_vcs): + co = Checkout("SLUG", mock_repo) + assert co.repo == mock_repo + assert co.is_source + assert co.url == "URL" + assert co.path == "/foo/bar" + assert co.prev_commit == "def456" + assert co.commit == "abc123" + assert not co.changed + assert not co.removed + assert mock_vcs._calls == [ + ("update", ("URL", "/foo/bar", "BRANCH", False)), + ("revision", ("/foo/bar",)), + ("changed_files", ("/foo/bar", "def456")), + ] + + mock_vcs._calls.clear() + co = Checkout("SLUG", mock_repo, pull=False) + assert mock_vcs._calls == [ + ("revision", ("/foo/bar",)), + ("changed_files", ("/foo/bar", "def456")), + ] + + mock_vcs._calls.clear() + co = Checkout("SLUG", mock_repo, shallow=True) + assert mock_vcs._calls == [ + ("update", ("URL", "/foo/bar", "BRANCH", True)), + ("revision", ("/foo/bar",)), + ] + + def test_no_changes_with_no_prev_commit(self): + tree: FileTree = { + "en-US": {"bar.ftl": "", "foo.ftl": "", ".other.ftl": ""}, + "fr": {"bar.ftl": "", "foo.ftl": ""}, + ".ignore": {"other.ftl": ""}, + } + with TemporaryDirectory() as root: + build_file_tree(root, tree) + mock_vcs = MockVersionControl(changes=([], [], [])) + mock_repo = Mock( + Repository, + branch="BRANCH", + checkout_path=root, + last_synced_revision=None, + source_repo=True, + url="URL", + type=Repository.Type.GIT, + ) + with patch("pontoon.sync.core.checkout.get_repo", return_value=mock_vcs): + co = Checkout("SLUG", mock_repo) + assert co.path == root + assert co.prev_commit is None + assert not co.removed + assert sorted(co.changed) == [ + "en-US/bar.ftl", + "en-US/foo.ftl", + "fr/bar.ftl", + "fr/foo.ftl", + ] + assert mock_vcs._calls == [ + ("update", ("URL", root, "BRANCH", False)), + ("revision", (root,)), + ] + + @patch("pontoon.sync.core.checkout.Checkout") + def test_get_checkouts(self, _): + with self.assertRaises(Exception) as cm: + two_sources = Mock(**{"all.return_value": []}) + checkout_repos(Mock(Project, repositories=two_sources)) + assert str(cm.exception) == "No repository found" + + with self.assertRaises(Exception) as cm: + two_sources = Mock( + **{ + "all.return_value": [ + Mock(Repository, source_repo=True), + Mock(Repository, source_repo=True), + ] + } + ) + checkout_repos(Mock(Project, repositories=two_sources)) + assert str(cm.exception) == "Multiple source repositories" + + with self.assertRaises(Exception) as cm: + two_targets = Mock( + **{ + "all.return_value": [ + Mock(Repository, source_repo=False), + Mock(Repository, source_repo=False), + ] + } + ) + checkout_repos(Mock(Project, repositories=two_targets)) + assert str(cm.exception) == "Multiple target repositories" + + one_source = Mock(**{"all.return_value": [Mock(Repository, source_repo=True)]}) + result = checkout_repos(Mock(Project, repositories=one_source)) + assert result.source is not None + assert result.source == result.target + + one_target = Mock(**{"all.return_value": [Mock(Repository, source_repo=False)]}) + result = checkout_repos(Mock(Project, repositories=one_target)) + assert result.source is not None + assert result.source == result.target diff --git a/pontoon/sync/tests/test_checks.py b/pontoon/sync/tests/test_checks.py deleted file mode 100644 index f43bf8f65a..0000000000 --- a/pontoon/sync/tests/test_checks.py +++ /dev/null @@ -1,110 +0,0 @@ -from unittest.mock import PropertyMock, patch - -from pontoon.base.tests import EntityFactory, ResourceFactory, TranslationFactory -from pontoon.base.utils import aware_datetime -from pontoon.checks.models import ( - Error, - FailedCheck, - Warning, -) -from pontoon.sync.tests import FakeCheckoutTestCase - - -class TestChangesetTranslationsChecks(FakeCheckoutTestCase): - """ - Semi-integration tests for translation checks during a sync. - """ - - def setUp(self): - super().setUp() - - changed_translation_patch = patch( - "pontoon.sync.changeset.ChangeSet.changed_translations", - new_callable=PropertyMock, - ) - - self.mock_changed_translations = changed_translation_patch.start() - self.addCleanup(changed_translation_patch.stop) - - def test_bulk_check_translations_no_translations(self): - self.mock_changed_translations.return_value = [] - - assert self.changeset.bulk_check_translations() == set() - assert not Error.objects.exists() - assert not Warning.objects.exists() - - def test_bulk_check_valid_translations(self): - translation1, translation2 = TranslationFactory.create_batch( - 2, - locale=self.translated_locale, - entity=self.main_db_entity, - approved=True, - date=aware_datetime(2015, 1, 1), - ) - - self.mock_changed_translations.return_value = [ - translation1, - translation2, - ] - assert self.changeset.bulk_check_translations() == { - translation1.pk, - translation2.pk, - } - assert not Error.objects.exists() - assert not Warning.objects.exists() - - def test_bulk_check_invalid_translations(self): - """ - Test scenario: - * check if errors are detected - * check if only valid translation will land in the Translate Memory - """ - db_resource = ResourceFactory.create( - project=self.db_project, - path="resource.ftl", - format="ftl", - ) - db_entity = EntityFactory.create( - resource=db_resource, - string="key = Source String", - key="key", - obsolete=False, - ) - invalid_translation, valid_translation = TranslationFactory.create_batch( - 2, - locale=self.translated_locale, - entity=db_entity, - string="key = Translated String", - approved=True, - date=aware_datetime(2015, 1, 1), - ) - invalid_translation.string = "Translated String" - invalid_translation.save() - - # Clear TM entries for those translations - invalid_translation.memory_entries.all().delete() - valid_translation.memory_entries.all().delete() - - self.mock_changed_translations.return_value = [ - invalid_translation, - valid_translation, - ] - - valid_translations = self.changeset.bulk_check_translations() - - assert valid_translations == {valid_translation.pk} - - (error,) = Error.objects.all() - - assert error.library == FailedCheck.Library.PONTOON - assert error.message == 'Expected token: "="' - assert error.translation == invalid_translation - - self.changeset.translations_to_update = { - valid_translation.pk: valid_translation - } - - self.changeset.bulk_create_translation_memory_entries(valid_translations) - - assert not invalid_translation.memory_entries.exists() - assert valid_translation.memory_entries.count() == 1 diff --git a/pontoon/sync/tests/test_sync_projects.py b/pontoon/sync/tests/test_command.py similarity index 75% rename from pontoon/sync/tests/test_sync_projects.py rename to pontoon/sync/tests/test_command.py index 491b00b7cd..98fdfd48ce 100644 --- a/pontoon/sync/tests/test_sync_projects.py +++ b/pontoon/sync/tests/test_command.py @@ -18,14 +18,16 @@ def setUp(self): super().setUp() self.command = sync_projects.Command() self.command.verbosity = 0 - self.command.no_commit = False - self.command.no_pull = False + self.command.commit = True + self.command.pull = True self.command.force = False self.command.stderr = io.StringIO() Project.objects.filter(slug="pontoon-intro").delete() - self.mock_sync_project = self.patch_object(sync_projects, "sync_project") + self.mock_sync_project_task = self.patch_object( + sync_projects, "sync_project_task" + ) def execute_command(self, *args, **kwargs): kwargs.setdefault("verbosity", 0) @@ -48,12 +50,8 @@ def test_syncable_projects_only(self): ) self.execute_command() - self.mock_sync_project.delay.assert_called_with( - active_project.pk, - ANY, - no_pull=False, - no_commit=False, - force=False, + self.mock_sync_project_task.delay.assert_called_with( + active_project.pk, ANY, pull=True, commit=True, force=False ) def test_non_repository_projects(self): @@ -62,12 +60,8 @@ def test_non_repository_projects(self): repo_project = ProjectFactory.create(data_source=Project.DataSource.REPOSITORY) self.execute_command() - self.mock_sync_project.delay.assert_called_with( - repo_project.pk, - ANY, - no_pull=False, - no_commit=False, - force=False, + self.mock_sync_project_task.delay.assert_called_with( + repo_project.pk, ANY, pull=True, commit=True, force=False ) def test_project_slugs(self): @@ -78,12 +72,8 @@ def test_project_slugs(self): ignore_project, handle_project = ProjectFactory.create_batch(2) self.execute_command(projects=handle_project.slug) - self.mock_sync_project.delay.assert_called_with( - handle_project.pk, - ANY, - no_pull=False, - no_commit=False, - force=False, + self.mock_sync_project_task.delay.assert_called_with( + handle_project.pk, ANY, pull=True, commit=True, force=False ) def test_no_matching_projects(self): @@ -102,12 +92,8 @@ def test_invalid_slugs(self): self.execute_command(projects=handle_project.slug + ",aaa,bbb") - self.mock_sync_project.delay.assert_called_with( - handle_project.pk, - ANY, - no_pull=False, - no_commit=False, - force=False, + self.mock_sync_project_task.delay.assert_called_with( + handle_project.pk, ANY, pull=True, commit=True, force=False ) assert ( @@ -118,8 +104,8 @@ def test_invalid_slugs(self): def test_options(self): project = ProjectFactory.create() self.execute_command(no_pull=True, no_commit=True) - self.mock_sync_project.delay.assert_called_with( - project.pk, ANY, no_pull=True, no_commit=True, force=False + self.mock_sync_project_task.delay.assert_called_with( + project.pk, ANY, pull=False, commit=False, force=False ) def test_sync_log(self): diff --git a/pontoon/sync/tests/test_core.py b/pontoon/sync/tests/test_core.py deleted file mode 100644 index 46a2d70df0..0000000000 --- a/pontoon/sync/tests/test_core.py +++ /dev/null @@ -1,426 +0,0 @@ -import os.path - -from unittest.mock import ANY, MagicMock, Mock, PropertyMock, patch - -import pytest - -from pontoon.base.models import ( - Entity, - Repository, - Resource, - TranslatedResource, -) -from pontoon.base.tests import ( - CONTAINS, - NOT, - UserFactory, -) -from pontoon.sync.core import ( - commit_changes, - entity_key, - pull_locale_repo_changes, - update_entities, - update_resources, - update_translated_resources, - update_translated_resources_with_config, - update_translated_resources_without_config, - update_translations, -) -from pontoon.sync.tests import FAKE_CHECKOUT_PATH, FakeCheckoutTestCase - - -class UpdateEntityTests(FakeCheckoutTestCase): - def call_update_entities(self, collected): - with patch("pontoon.sync.core.collect_entities") as mock_collect_entities: - mock_collect_entities.return_value = collected - return update_entities(self.db_project, self.vcs_project, self.changeset) - - def test_none(self): - """ - If both the db_entity and vcs_entity are None, raise a - CommandError, as that should never happen. - """ - with pytest.raises(ValueError): - self.call_update_entities([("key", None, None)]) - - def test_obsolete(self): - """If VCS is missing the entity in question, obsolete it.""" - self.changeset.obsolete_db_entity = Mock() - self.call_update_entities([("key", self.main_db_entity, None)]) - self.changeset.obsolete_db_entity.assert_called_with(self.main_db_entity) - - def test_create(self): - """If the DB is missing an entity in VCS, create it.""" - self.changeset.create_db_entity = Mock() - self.call_update_entities([("key", None, self.main_vcs_entity)]) - self.changeset.create_db_entity.assert_called_with(self.main_vcs_entity) - - -class UpdateTranslationsTests(FakeCheckoutTestCase): - def call_update_translations(self, collected): - with patch("pontoon.sync.core.collect_entities") as mock_collect_entities: - mock_collect_entities.return_value = collected - return update_translations( - self.db_project, - self.vcs_project, - self.translated_locale, - self.changeset, - ) - - def test_missing_entities(self): - """If either of the entities is missing, skip it.""" - self.changeset.update_vcs_entity = Mock() - self.changeset.update_db_entity = Mock() - - self.call_update_translations( - [ - ("one", None, self.main_vcs_entity), - ("other", self.main_db_entity, None), - ("both", None, None), - ] - ) - assert not self.changeset.update_vcs_entity.called - assert not self.changeset.update_db_entity.called - - def test_no_translation(self): - """If no translation exists for a specific locale, skip it.""" - self.changeset.update_vcs_entity = Mock() - self.changeset.update_db_entity = Mock() - self.main_vcs_entity.has_translation_for = Mock(return_value=False) - - self.call_update_translations( - [("key", self.main_db_entity, self.main_vcs_entity)] - ) - assert not self.changeset.update_vcs_entity.called - assert not self.changeset.update_db_entity.called - - def test_db_changed(self): - """ - If the DB entity has changed since the last sync, update the - VCS. - """ - self.changeset.update_vcs_entity = Mock() - with patch.object(Entity, "has_changed", return_value=True): - self.call_update_translations( - [("key", self.main_db_entity, self.main_vcs_entity)] - ) - - self.changeset.update_vcs_entity.assert_called_with( - self.translated_locale, self.main_db_entity, self.main_vcs_entity - ) - - def test_vcs_changed(self): - """ - If the DB entity has not changed since the last sync, update the DB with - the latest changes from VCS. - """ - self.changeset.update_db_entity = Mock() - with patch.object(Entity, "has_changed", return_value=False): - self.call_update_translations( - [("key", self.main_db_entity, self.main_vcs_entity)] - ) - - self.changeset.update_db_entity.assert_called_with( - self.translated_locale, self.main_db_entity, self.main_vcs_entity - ) - - -class UpdateResourcesTests(FakeCheckoutTestCase): - def test_basic(self): - # Check for self.main_db_resource to be updated and - # self.other_db_resource to be created. - self.main_db_resource.total_strings = 5000 - self.main_db_resource.save() - self.other_db_resource.delete() - - update_resources(self.db_project, self.vcs_project) - self.main_db_resource.refresh_from_db() - assert self.main_db_resource.total_strings == len( - self.main_vcs_resource.entities - ) - - other_db_resource = Resource.objects.get(path=self.other_vcs_resource.path) - assert other_db_resource.total_strings == len(self.other_vcs_resource.entities) - - def test_order(self): - # Check if Resource.order gets reset for all Project resources. - self.other_db_resource.delete() - - assert self.main_db_resource.order == 0 # path="main.po" - assert self.missing_db_resource.order == 0 # path="missing.po" - assert self.other_db_resource.order == 0 # path="other.po" - - update_resources(self.db_project, self.vcs_project) - self.missing_db_resource.refresh_from_db() - other_db_resource = Resource.objects.get(path=self.other_vcs_resource.path) - - assert self.main_db_resource.order == 0 - assert self.missing_db_resource.order == 1 - assert other_db_resource.order == 2 - - -class UpdateTranslatedResourcesTests(FakeCheckoutTestCase): - @patch("pontoon.sync.core.update_translated_resources_without_config") - @patch("pontoon.sync.core.update_translated_resources_with_config") - def test_with_or_without_project_config( - self, - update_translated_resources_with_config_mock, - update_translated_resources_without_config_mock, - ): - """ - Pick the right update_translated_resources() method, depending on - whether the project configuration file is provided or not. - """ - # Without project config - self.vcs_project.configuration = None - update_translated_resources( - self.db_project, - self.vcs_project, - self.translated_locale, - ) - assert not update_translated_resources_with_config_mock.called - assert update_translated_resources_without_config_mock.called - - # Reset called value - update_translated_resources_with_config_mock.called = False - update_translated_resources_without_config_mock.called = False - - # With project config - self.vcs_project.configuration = True - update_translated_resources( - self.db_project, - self.vcs_project, - self.translated_locale, - ) - assert update_translated_resources_with_config_mock.called - assert not update_translated_resources_without_config_mock.called - - def test_project_configuration_basic(self): - """ - Create/update the TranslatedResource objects based on project configuration. - """ - with patch.object(self.vcs_project, "configuration") as configuration: - with patch.object(configuration, "locale_resources") as locale_resources: - locale_resources.return_value = [ - self.other_db_resource, - ] - - update_translated_resources_with_config( - self.db_project, - self.vcs_project, - self.translated_locale, - ) - - assert TranslatedResource.objects.filter( - resource=self.other_db_resource, - locale=self.translated_locale, - ).exists() - - assert not TranslatedResource.objects.filter( - resource=self.missing_db_resource, - locale=self.translated_locale, - ).exists() - - def test_no_project_configuration_basic(self): - """ - Create/update the TranslatedResource object on all resources - available in the current locale. - """ - update_translated_resources_without_config( - self.db_project, - self.vcs_project, - self.translated_locale, - ) - - assert TranslatedResource.objects.filter( - resource=self.main_db_resource, locale=self.translated_locale - ).exists() - - assert TranslatedResource.objects.filter( - resource=self.other_db_resource, locale=self.translated_locale - ).exists() - - assert not TranslatedResource.objects.filter( - resource=self.missing_db_resource, locale=self.translated_locale - ).exists() - - def test_no_project_configuration_asymmetric(self): - """ - Create/update the TranslatedResource object on asymmetric resources - even if they don't exist in the target locale. - """ - with patch.object( - Resource, "is_asymmetric", new_callable=PropertyMock - ) as is_asymmetric: - is_asymmetric.return_value = True - - update_translated_resources_without_config( - self.db_project, - self.vcs_project, - self.translated_locale, - ) - - assert TranslatedResource.objects.filter( - resource=self.main_db_resource, locale=self.translated_locale - ).exists() - - assert TranslatedResource.objects.filter( - resource=self.other_db_resource, locale=self.translated_locale - ).exists() - - assert TranslatedResource.objects.filter( - resource=self.missing_db_resource, locale=self.translated_locale - ).exists() - - def test_no_project_configuration_extra_locales(self): - """ - Only create/update the TranslatedResource object for active locales, - even if the inactive locale has a resource. - """ - update_translated_resources_without_config( - self.db_project, - self.vcs_project, - self.translated_locale, - ) - - assert TranslatedResource.objects.filter( - resource=self.main_db_resource, locale=self.translated_locale - ).exists() - - assert TranslatedResource.objects.filter( - resource=self.other_db_resource, locale=self.translated_locale - ).exists() - - assert not TranslatedResource.objects.filter( - resource=self.main_db_resource, locale=self.inactive_locale - ).exists() - - assert not TranslatedResource.objects.filter( - resource=self.other_db_resource, locale=self.inactive_locale - ).exists() - - -class EntityKeyTests(FakeCheckoutTestCase): - def test_entity_key_common_string(self): - """ - Entities with the same string from different resources must not get the - same key from entity_key. - """ - assert entity_key( - self.main_vcs_resource.entities["Common String"] - ) != entity_key(self.other_vcs_resource.entities["Common String"]) - - -class CommitChangesTests(FakeCheckoutTestCase): - def setUp(self): - super().setUp() - self.mock_repo_commit = self.patch_object(Repository, "commit") - - def test_multiple_authors(self): - """ - Tests if multiple authors are passed to commit message. The - author with the most occurrences for the locale should be set as - the commit author. - """ - first_author, second_author = UserFactory.create_batch(2) - self.changeset.commit_authors_per_locale = { - self.translated_locale.code: [first_author, first_author, second_author] - } - self.db_project.repository_for_path = Mock(return_value=self.repository) - - commit_changes( - self.db_project, self.vcs_project, self.changeset, self.translated_locale - ) - self.repository.commit.assert_called_with( - CONTAINS( - first_author.display_name_and_email, - second_author.display_name_and_email, - ), - first_author, - os.path.join(FAKE_CHECKOUT_PATH, self.translated_locale.code), - ) - - def test_author_with_multiple_contributions(self): - """ - Tests if author with multiple contributions occurs once in commit message. - """ - author = UserFactory.create() - self.changeset.commit_authors_per_locale = { - self.translated_locale.code: [author, author] - } - self.db_project.repository_for_path = Mock(return_value=self.repository) - - commit_changes( - self.db_project, self.vcs_project, self.changeset, self.translated_locale - ) - self.repository.commit.assert_called_with( - CONTAINS(author.display_name_and_email), - author, - os.path.join(FAKE_CHECKOUT_PATH, self.translated_locale.code), - ) - commit_message = self.repository.commit.mock_calls[0][1][0] - assert commit_message.count(author.display_name_and_email) == 1 - - def test_no_authors(self): - """ - If no authors are found in the changeset, default to a fake - "Mozilla Pontoon" user. - """ - self.changeset.commit_authors_per_locale = {self.translated_locale.code: []} - self.db_project.repository_for_path = Mock(return_value=self.repository) - - commit_changes( - self.db_project, self.vcs_project, self.changeset, self.translated_locale - ) - self.repository.commit.assert_called_with( - NOT(CONTAINS("Authors:")), # Don't list authors in commit - ANY, - os.path.join(FAKE_CHECKOUT_PATH, self.translated_locale.code), - ) - user = self.mock_repo_commit.call_args[0][1] - assert user.first_name == "Pontoon" - assert user.email == "pontoon@example.com" - - -class PullChangesTests(FakeCheckoutTestCase): - def setUp(self): - super().setUp() - self.mock_repo_pull = self.patch_object(Repository, "pull") - self.locales = self.db_project.locales.all() - - def test_basic(self): - """ - Pull_changes should call repo.pull for each repo for the - project and return whether any changes happened in VCS. - """ - mock_db_project = MagicMock() - mock_db_project.repositories.all.return_value = [self.repository] - self.mock_repo_pull.return_value = {"single_locale": "asdf"} - - has_changed, _ = pull_locale_repo_changes(self.db_project, self.locales) - assert has_changed - - def test_unsure_changes(self): - """ - If any of the repos returns None as a revision number, consider - the VCS as changed even if the revisions match the last sync. - """ - self.mock_repo_pull.return_value = {"single_locale": None} - self.repository.last_synced_revisions = {"single_locale": None} - self.repository.save() - - has_changed, _ = pull_locale_repo_changes(self.db_project, self.locales) - assert has_changed - - def test_unchanged(self): - """ - If the revisions returned by repo.pull match those from the last - sync, consider the VCS unchanged and return False. - """ - self.mock_repo_pull.return_value = {"single_locale": "asdf"} - self.repository.last_synced_revisions = {"single_locale": "asdf"} - self.repository.save() - has_changed, _ = pull_locale_repo_changes( - self.db_project, locales=self.db_project.locales.all() - ) - assert not has_changed diff --git a/pontoon/sync/tests/test_e2e.py b/pontoon/sync/tests/test_e2e.py new file mode 100644 index 0000000000..eaad02b8de --- /dev/null +++ b/pontoon/sync/tests/test_e2e.py @@ -0,0 +1,374 @@ +import re + +from os import makedirs +from os.path import join +from tempfile import TemporaryDirectory +from textwrap import dedent +from unittest.mock import patch + +import pytest + +from django.conf import settings + +from pontoon.base.models import ChangedEntityLocale, TranslatedResource, Translation +from pontoon.base.tests import ( + EntityFactory, + LocaleFactory, + ProjectFactory, + RepositoryFactory, + ResourceFactory, + TranslatedResourceFactory, + TranslationFactory, +) +from pontoon.sync.tasks import sync_project_task +from pontoon.sync.tests import SyncLogFactory +from pontoon.sync.tests.test_checkouts import MockVersionControl +from pontoon.sync.tests.utils import build_file_tree + + +@pytest.mark.django_db +def test_end_to_end(): + mock_vcs = MockVersionControl(changes=([join("en-US", "c.ftl")], [], [])) + with ( + TemporaryDirectory() as root, + patch("pontoon.sync.core.checkout.get_repo", return_value=mock_vcs), + patch("pontoon.sync.core.translations_to_repo.get_repo", return_value=mock_vcs), + ): + # Database setup + settings.MEDIA_ROOT = root + synclog = SyncLogFactory.create() + locale_de = LocaleFactory.create( + code="de-Test", name="Test German", total_strings=100 + ) + locale_fr = LocaleFactory.create( + code="fr-Test", name="Test French", total_strings=100 + ) + repo_src = RepositoryFactory( + url="http://example.com/src-repo", source_repo=True + ) + repo_tgt = RepositoryFactory(url="http://example.com/tgt-repo") + project = ProjectFactory.create( + name="test-project", + locales=[locale_de, locale_fr], + repositories=[repo_src, repo_tgt], + total_strings=10, + ) + ResourceFactory.create(project=project, path="a.ftl", format="ftl") + ResourceFactory.create(project=project, path="b.po", format="po") + res_c = ResourceFactory.create(project=project, path="c.ftl", format="ftl") + for i in range(3): + entity = EntityFactory.create( + resource=res_c, key=f"key-{i}", string=f"key-{i} = Message {i}\n" + ) + for locale in [locale_de, locale_fr]: + TranslationFactory.create( + entity=entity, + locale=locale, + string=f"key-{i} = New translation {locale.code[:2]} {i}\n", + active=True, + approved=True, + ) + + # Filesystem setup + src_root = repo_src.checkout_path + c_ftl_src = dedent( + """\ + key-0 = Message 0 + # New entry comment + key-2 = Message 2 + key-3 = Message 3 + """ + ) + makedirs(src_root) + build_file_tree( + src_root, + {"en-US": {"a.ftl": "", "b.pot": "", "c.ftl": c_ftl_src}}, + ) + + tgt_root = repo_tgt.checkout_path + c_ftl_de = dedent( + """\ + key-0 = Translation de 0 + key-1 = Translation de 1 + key-2 = Translation de 2 + """ + ) + c_ftl_fr = dedent( + """\ + key-0 = Translation fr 0 + key-1 = Translation fr 1 + """ + ) + makedirs(tgt_root) + build_file_tree( + tgt_root, + { + "de-Test": {"a.ftl": "", "b.po": "", "c.ftl": c_ftl_de}, + "fr-Test": {"a.ftl": "", "b.po": "", "c.ftl": c_ftl_fr}, + }, + ) + + # Test + assert len(ChangedEntityLocale.objects.filter(entity__resource=res_c)) == 6 + sync_project_task(project.id, synclog.id) + assert len(ChangedEntityLocale.objects.filter(entity__resource=res_c)) == 0 + with open(join(repo_tgt.checkout_path, "de-Test", "c.ftl")) as file: + assert ( + file.read() + == "key-0 = New translation de 0\n# New entry comment\nkey-2 = New translation de 2\n" + ) + with open(join(repo_tgt.checkout_path, "fr-Test", "c.ftl")) as file: + assert ( + file.read() + == "key-0 = New translation fr 0\n# New entry comment\nkey-2 = New translation fr 2\n" + ) + commit_msg: str = mock_vcs._calls[4][1][1] + assert mock_vcs._calls == [ + ("update", ("http://example.com/src-repo", src_root, "", False)), + ("revision", (src_root,)), + ("update", ("http://example.com/tgt-repo", tgt_root, "", False)), + ("revision", (tgt_root,)), + ( + "commit", + ( + tgt_root, + commit_msg, + f"{settings.VCS_SYNC_NAME} <{settings.VCS_SYNC_EMAIL}>", + "", + "http://example.com/tgt-repo", + ), + ), + ("revision", (tgt_root,)), + ] + assert re.fullmatch( + dedent( + r""" + Pontoon/test-project: Update Test (German|French) \((de|fr)-Test\), Test (German|French) \((de|fr)-Test\) + + Co-authored-by: test\d+ \((de|fr)-Test\) + Co-authored-by: test\d+ \((de|fr)-Test\) + Co-authored-by: test\d+ \((de|fr)-Test\) + Co-authored-by: test\d+ \((de|fr)-Test\) + """ + ).strip(), + commit_msg, + ) + assert TranslatedResource.objects.filter(resource__project=project).count() == 6 + + +@pytest.mark.django_db +def test_translation_before_source(): + with TemporaryDirectory() as root: + # Database setup + settings.MEDIA_ROOT = root + synclog = SyncLogFactory.create() + locale_de = LocaleFactory.create(code="de-Test", name="Test German") + repo_src = RepositoryFactory( + url="http://example.com/src-repo", source_repo=True + ) + repo_tgt = RepositoryFactory(url="http://example.com/tgt-repo") + project = ProjectFactory.create( + name="trans-before-source", + locales=[locale_de], + repositories=[repo_src, repo_tgt], + ) + res_a = ResourceFactory.create(project=project, path="a.ftl", format="ftl") + TranslationFactory.create( + entity=EntityFactory.create( + resource=res_a, key="a0", string="a0 = Message 0\n" + ), + locale=locale_de, + string="a0 = Translation 0\n", + active=True, + approved=True, + ) + + res_b = ResourceFactory.create(project=project, path="b.ftl", format="ftl") + TranslationFactory.create( + entity=EntityFactory.create( + resource=res_b, key="b0", string="b0 = Message 0\n" + ), + locale=locale_de, + string="b0 = Translation 0\n", + active=True, + approved=True, + ) + + ChangedEntityLocale.objects.filter(entity__resource__project=project).delete() + + # Filesystem setup + src_root = repo_src.checkout_path + makedirs(src_root) + build_file_tree( + src_root, + { + "en-US": { + "a.ftl": "a0 = Message 0\n", + "b.ftl": "b0 = Message 0\n", + } + }, + ) + + tgt_root = repo_tgt.checkout_path + makedirs(tgt_root) + build_file_tree( + tgt_root, + { + "de-Test": { + "a.ftl": ("a0 = New translation 0\n" "a1 = New translation 1\n"), + "b.ftl": "b0 = Translation 0\n", + } + }, + ) + + # Sync + mock_vcs = MockVersionControl(changes=([join("de-Test", "a.ftl")], [], [])) + with ( + patch("pontoon.sync.core.checkout.get_repo", return_value=mock_vcs), + patch( + "pontoon.sync.core.translations_to_repo.get_repo", + return_value=mock_vcs, + ), + ): + sync_project_task(project.id, synclog.id) + + # Test -- New a0 translation is picked up, added a1 is dropped + with open(join(repo_tgt.checkout_path, "de-Test", "a.ftl")) as file: + assert file.read() == "a0 = New translation 0\n" + + +@pytest.mark.django_db +def test_fuzzy(): + with TemporaryDirectory() as root: + # Database setup + settings.MEDIA_ROOT = root + synclog = SyncLogFactory.create() + locale = LocaleFactory.create(code="fr-Test", name="Test French") + repo = RepositoryFactory(url="http://example.com/repo") + project = ProjectFactory.create( + name="test-write-fuzzy", locales=[locale], repositories=[repo] + ) + res = ResourceFactory.create(project=project, path="res.po", format="po") + TranslatedResourceFactory.create(locale=locale, resource=res) + for i in range(5): + string = f"Message {i}\n" + fuzzy = i < 3 + entity = EntityFactory.create(resource=res, key=f"key-{i}", string=string) + TranslationFactory.create( + entity=entity, + locale=locale, + string=string.replace("Message", "Fuzzy" if fuzzy else "Translation"), + active=True, + approved=not fuzzy, + fuzzy=fuzzy, + ) + ChangedEntityLocale.objects.filter(entity__resource__project=project).delete() + + # Filesystem setup + res_src = dedent( + """ + #, fuzzy + msgid "key-0" + msgstr "" + + #, fuzzy + msgid "key-1" + msgstr "" + + msgid "key-2" + msgstr "" + + msgid "key-3" + msgstr "" + + #, fuzzy + msgid "key-4" + msgstr "" + """ + ) + res_tgt = dedent( + """ + #, fuzzy + msgid "key-0" + msgstr "Fuzzy 0" + + #, fuzzy + msgid "key-1" + msgstr "Fuzzy Changed 1" + + msgid "key-2" + msgstr "Not Fuzzy 2" + + msgid "key-3" + msgstr "Translation 3" + + #, fuzzy + msgid "key-4" + msgstr "Made Fuzzy 4" + """ + ) + makedirs(repo.checkout_path) + build_file_tree( + repo.checkout_path, + {"en-US": {"res.pot": res_src}, "fr-Test": {"res.po": res_tgt}}, + ) + + # Sync + mock_vcs = MockVersionControl( + changes=([join("en-US", "res.pot"), join("fr-test", "res.po")], [], []) + ) + with ( + patch("pontoon.sync.core.checkout.get_repo", return_value=mock_vcs), + patch( + "pontoon.sync.core.translations_to_repo.get_repo", + return_value=mock_vcs, + ), + ): + sync_project_task(project.id, synclog.id) + + # Test + trans = Translation.objects.filter( + entity__resource=res, locale=locale, active=True + ).values_list("string", flat=True) + assert set(trans) == { + "Fuzzy 0", + "Fuzzy Changed 1", + "Not Fuzzy 2", + "Translation 3", + "Made Fuzzy 4", + } + assert set(trans.filter(fuzzy=True)) == { + "Fuzzy 0", + "Fuzzy Changed 1", + "Made Fuzzy 4", + } + assert set(trans.filter(approved=True)) == { + "Not Fuzzy 2", + "Translation 3", + } + with open(join(repo.checkout_path, "fr-Test", "res.po")) as file: + assert re.sub(r'^".*"\n', "", file.read(), flags=re.MULTILINE) == dedent( + """\ + # + msgid "" + msgstr "" + + #, fuzzy + msgid "key-0" + msgstr "Fuzzy 0" + + #, fuzzy + msgid "key-1" + msgstr "Fuzzy Changed 1" + + msgid "key-2" + msgstr "Not Fuzzy 2" + + msgid "key-3" + msgstr "Translation 3" + + #, fuzzy + msgid "key-4" + msgstr "Made Fuzzy 4" + """ + ) diff --git a/pontoon/sync/tests/test_entities.py b/pontoon/sync/tests/test_entities.py new file mode 100644 index 0000000000..1bb4f7ef59 --- /dev/null +++ b/pontoon/sync/tests/test_entities.py @@ -0,0 +1,261 @@ +from os import makedirs +from os.path import join +from tempfile import TemporaryDirectory +from textwrap import dedent +from unittest.mock import Mock + +import pytest + +from moz.l10n.paths import L10nDiscoverPaths + +from django.conf import settings +from django.utils import timezone + +from pontoon.base.models import Entity, Project, TranslatedResource +from pontoon.base.tests import ( + EntityFactory, + LocaleFactory, + ProjectFactory, + RepositoryFactory, + ResourceFactory, + TranslationFactory, +) +from pontoon.sync.core.checkout import Checkout, Checkouts +from pontoon.sync.core.entities import sync_entities_from_repo +from pontoon.sync.core.paths import find_paths +from pontoon.sync.core.stats import update_stats +from pontoon.sync.tests.utils import build_file_tree + + +now = timezone.now() + + +def test_no_changes(): + assert sync_entities_from_repo( + Mock(Project), + {}, + Mock(Checkout, changed=[], removed=[], renamed=[]), + Mock(L10nDiscoverPaths), + now, + ) == (0, set(), set()) + + +@pytest.mark.django_db +def test_remove_resource(): + with TemporaryDirectory() as root: + # Database setup + settings.MEDIA_ROOT = root + locale = LocaleFactory.create(code="fr-Test") + locale_map = {locale.code: locale} + repo = RepositoryFactory(url="http://example.com/repo") + project = ProjectFactory.create( + name="test-rm", locales=[locale], repositories=[repo] + ) + ResourceFactory.create(project=project, path="a.ftl", format="ftl") + ResourceFactory.create(project=project, path="b.po", format="po") + res_c = ResourceFactory.create(project=project, path="c.ftl", format="ftl") + + # Filesystem setup + makedirs(repo.checkout_path) + build_file_tree( + repo.checkout_path, + { + "en-US": {"a.ftl": "", "b.pot": ""}, + "fr-Test": {"a.ftl": "", "b.po": "", "c.ftl": ""}, + }, + ) + + # Paths setup + mock_checkout = Mock( + Checkout, + path=repo.checkout_path, + changed=[], + removed=[join("en-US", "c.ftl")], + renamed=[], + ) + paths = find_paths(project, Checkouts(mock_checkout, mock_checkout)) + + # Test + assert sync_entities_from_repo( + project, locale_map, mock_checkout, paths, now + ) == (0, set(), {"c.ftl"}) + assert {res.path for res in project.resources.all()} == {"a.ftl", "b.po"} + with pytest.raises(TranslatedResource.DoesNotExist): + TranslatedResource.objects.get(resource=res_c) + + +@pytest.mark.django_db +def test_rename_resource(): + with TemporaryDirectory() as root: + # Database setup + settings.MEDIA_ROOT = root + locale = LocaleFactory.create(code="fr-Test") + locale_map = {locale.code: locale} + repo = RepositoryFactory(url="http://example.com/repo") + project = ProjectFactory.create( + name="test-mv", locales=[locale], repositories=[repo] + ) + ResourceFactory.create(project=project, path="a.ftl", format="ftl") + ResourceFactory.create(project=project, path="b.po", format="po") + res_c = ResourceFactory.create(project=project, path="c.ftl", format="ftl") + + # Filesystem setup + makedirs(repo.checkout_path) + build_file_tree( + repo.checkout_path, + { + "en-US": {"a.ftl": "", "b.pot": "", "d.ftl": ""}, + "fr-Test": {"a.ftl": "", "b.po": "", "c.ftl": ""}, + }, + ) + + # Paths setup + mock_checkout = Mock( + Checkout, + path=repo.checkout_path, + changed=[], + removed=[], + renamed=[(join("en-US", "c.ftl"), join("en-US", "d.ftl"))], + ) + paths = find_paths(project, Checkouts(mock_checkout, mock_checkout)) + + # Test + assert sync_entities_from_repo( + project, locale_map, mock_checkout, paths, now + ) == (0, {"d.ftl"}, set()) + assert {res.path for res in project.resources.all()} == { + "a.ftl", + "b.po", + "d.ftl", + } + res_c.refresh_from_db() + assert res_c.path == "d.ftl" + + +@pytest.mark.django_db +def test_add_resource(): + with TemporaryDirectory() as root: + # Database setup + settings.MEDIA_ROOT = root + locale = LocaleFactory.create(code="fr-Test") + locale_map = {locale.code: locale} + repo = RepositoryFactory(url="http://example.com/repo") + project = ProjectFactory.create( + name="test-add", locales=[locale], repositories=[repo] + ) + ResourceFactory.create(project=project, path="a.ftl", format="ftl") + ResourceFactory.create(project=project, path="b.po", format="po") + + # Filesystem setup + c_ftl = dedent( + """ + key-1 = Message 1 + key-2 = Message 2 + key-3 = Message 3 + """ + ) + makedirs(repo.checkout_path) + build_file_tree( + repo.checkout_path, + { + "en-US": {"a.ftl": "", "b.pot": "", "c.ftl": c_ftl}, + "fr-Test": {"a.ftl": "", "b.po": ""}, + }, + ) + + # Paths setup + mock_checkout = Mock( + Checkout, + path=repo.checkout_path, + changed=[join("en-US", "c.ftl")], + removed=[], + renamed=[], + ) + paths = find_paths(project, Checkouts(mock_checkout, mock_checkout)) + + # Test + assert sync_entities_from_repo( + project, locale_map, mock_checkout, paths, now + ) == (3, set(), set()) + res_c = project.resources.get(path="c.ftl") + TranslatedResource.objects.get(resource=res_c) + assert set(ent.key for ent in Entity.objects.filter(resource=res_c)) == { + "key-1", + "key-2", + "key-3", + } + + +@pytest.mark.django_db +def test_update_resource(): + with TemporaryDirectory() as root: + # Database setup + settings.MEDIA_ROOT = root + locale = LocaleFactory.create(code="fr-Test") + locale_map = {locale.code: locale} + repo = RepositoryFactory(url="http://example.com/repo") + project = ProjectFactory.create( + name="test-up", locales=[locale], repositories=[repo] + ) + res = {} + for n in ("a", "b", "c"): + res[n] = ResourceFactory.create( + project=project, path=f"{n}.ftl", format="ftl", total_strings=3 + ) + for i in (1, 2, 3): + entity = EntityFactory.create( + resource=res[n], + key=f"key-{n}-{i}", + string=f"key-{n}-{i} = Message {i}\n", + ) + TranslationFactory.create( + entity=entity, + locale=locale, + string=f"key-{n}-{i} = Translation {i}\n", + active=True, + approved=True, + ) + + # Filesystem setup + c_ftl = dedent( + """ + key-c-2 = Message 2 + key-c-4 = Message 4 + """ + ) + makedirs(repo.checkout_path) + build_file_tree( + repo.checkout_path, + { + "en-US": {"a.ftl": "", "b.ftl": "", "c.ftl": c_ftl}, + "fr-Test": {"a.ftl": "", "b.ftl": ""}, + }, + ) + + # Paths setup + mock_checkout = Mock( + Checkout, + path=repo.checkout_path, + changed=[join("en-US", "c.ftl")], + removed=[], + renamed=[], + ) + paths = find_paths(project, Checkouts(mock_checkout, mock_checkout)) + + # Test sync + assert sync_entities_from_repo( + project, locale_map, mock_checkout, paths, now + ) == (1, {"c.ftl"}, set()) + assert set( + (ent.key, ent.obsolete) for ent in Entity.objects.filter(resource=res["c"]) + ) == { + ("key-c-1", True), + ("key-c-2", False), + ("key-c-3", True), + ("key-c-4", False), + } + + # Test stats + update_stats(project) + project.refresh_from_db() + assert (project.total_strings, project.approved_strings) == (8, 7) diff --git a/pontoon/sync/tests/test_paths.py b/pontoon/sync/tests/test_paths.py new file mode 100644 index 0000000000..076e79ee30 --- /dev/null +++ b/pontoon/sync/tests/test_paths.py @@ -0,0 +1,160 @@ +from os.path import join +from tempfile import TemporaryDirectory +from textwrap import dedent +from unittest.mock import Mock + +from pontoon.base.models import Project, Repository +from pontoon.sync.core.checkout import Checkout, Checkouts +from pontoon.sync.core.paths import find_paths +from pontoon.sync.tests.utils import FileTree, build_file_tree + + +def test_no_config_one_repo(): + tree: FileTree = { + "repo": { + "en-US": {"bar.ftl": "", "foo.pot": "", ".other.ftl": ""}, + "fr": {"bar.ftl": "", "foo.po": "", ".other.ftl": ""}, + ".ignore": {"other.ftl": ""}, + } + } + with TemporaryDirectory() as root: + build_file_tree(root, tree) + mock_project = Mock(Project, checkout_path=root, configuration_file=None) + mock_checkout = Mock( + Checkout, path=join(root, "repo"), removed=[join("en-US", "missing.ftl")] + ) + paths = find_paths(mock_project, Checkouts(mock_checkout, mock_checkout)) + assert paths.ref_root == join(root, "repo", "en-US") + assert paths.base == join(root, "repo") + assert set(paths.ref_paths) == set( + join(root, "repo", "en-US", file) + for file in ["bar.ftl", "foo.pot", "missing.ftl"] + ) + assert paths.find_reference("fr/bar.ftl") == ( + join(root, "repo", "en-US", "bar.ftl"), + {"locale": "fr"}, + ) + assert paths.find_reference("fr/foo.po") == ( + join(root, "repo", "en-US", "foo.pot"), + {"locale": "fr"}, + ) + assert paths.find_reference("fr/missing.ftl") == ( + join(root, "repo", "en-US", "missing.ftl"), + {"locale": "fr"}, + ) + assert paths.find_reference("fr/.other.ftl") is None + + +def test_no_config_two_repos(): + tree: FileTree = { + "source": {"bar.ftl": "", "foo.pot": "", ".other.ftl": ""}, + "target": { + "de": {"bar.ftl": "", "foo.po": ""}, + "fr": {"bar.ftl": "", "foo.po": ""}, + ".ignore": {"other.ftl": ""}, + }, + } + with TemporaryDirectory() as root: + build_file_tree(root, tree) + mock_project = Mock(Project, checkout_path=root, configuration_file=None) + checkouts = Checkouts( + Mock(Checkout, path=join(root, "source"), removed=[]), + Mock(Checkout, path=join(root, "target")), + ) + paths = find_paths(mock_project, checkouts) + assert paths.ref_root == join(root, "source") + assert paths.base == join(root, "target") + assert set(paths.ref_paths) == set( + join(root, "source", file) for file in ["bar.ftl", "foo.pot"] + ) + assert paths.find_reference(join(root, "target", "de", "bar.ftl")) == ( + join(root, "source", "bar.ftl"), + {"locale": "de"}, + ) + assert paths.find_reference(join(root, "target", "de", "foo.po")) == ( + join(root, "source", "foo.pot"), + {"locale": "de"}, + ) + + +def test_config_one_repo(): + tree: FileTree = { + "repo": { + "bar": {"en": {"bar.ftl": ""}, "fr": {"bar.ftl": ""}}, + "foo": {"en": {"foo.pot": ""}, "fr": {"foo.po": ""}}, + "l10n.toml": dedent( + """\ + [[paths]] + reference = "bar/en/bar.ftl" + l10n = "bar/{locale}/bar.ftl" + [[paths]] + reference = "foo/en/**" + l10n = "foo/{locale}/**" + """ + ), + } + } + with TemporaryDirectory() as root: + build_file_tree(root, tree) + mock_project = Mock(Project, checkout_path=root, configuration_file="l10n.toml") + mock_checkout = Mock(Checkout, path=join(root, "repo"), removed=[]) + paths = find_paths(mock_project, Checkouts(mock_checkout, mock_checkout)) + assert paths.ref_root == join(root, "repo") + assert paths.base == join(root, "repo") + assert set(paths.ref_paths) == set( + [ + join(root, "repo", "bar", "en", "bar.ftl"), + join(root, "repo", "foo", "en", "foo.pot"), + ] + ) + assert paths.find_reference(join(root, "repo", "foo", "fr", "foo.po")) == ( + join(root, "repo", "foo", "en", "foo.pot"), + {"locale": "fr"}, + ) + + +def test_config_two_repos(): + tree: FileTree = { + "source": { + "bar": {"en": {"bar.ftl": ""}}, + "foo": {"en": {"foo.pot": ""}}, + "l10n.toml": dedent( + """\ + [[paths]] + reference = "bar/en/bar.ftl" + l10n = "bar/{locale}/bar.ftl" + [[paths]] + reference = "foo/en/**" + l10n = "foo/{locale}/**" + """ + ), + }, + "target": { + "bar": {"fr": {"bar.ftl": ""}}, + "foo": {"fr": {"foo.po": ""}}, + }, + } + with TemporaryDirectory() as root: + build_file_tree(root, tree) + mock_project = Mock(Project, checkout_path=root, configuration_file="l10n.toml") + checkouts = Checkouts( + Mock(Checkout, path=join(root, "source"), removed=[]), + Mock( + Checkout, + path=join(root, "target"), + repo=Mock(Repository, checkout_path=join(root, "target")), + ), + ) + paths = find_paths(mock_project, checkouts) + assert paths.ref_root == join(root, "source") + assert paths.base == join(root, "target") + assert set(paths.ref_paths) == set( + [ + join(root, "source", "bar", "en", "bar.ftl"), + join(root, "source", "foo", "en", "foo.pot"), + ] + ) + assert paths.find_reference(join(root, "target", "foo", "fr", "foo.po")) == ( + join(root, "source", "foo", "en", "foo.pot"), + {"locale": "fr"}, + ) diff --git a/pontoon/sync/tests/test_repositories.py b/pontoon/sync/tests/test_repositories.py index c3a71c1c54..ca30107064 100644 --- a/pontoon/sync/tests/test_repositories.py +++ b/pontoon/sync/tests/test_repositories.py @@ -51,6 +51,7 @@ def test_changed_files(self, mock_popen): assert delta == ( ["changed_file1.properties", "changed_file2.properties"], ["removed_file1.properties", "removed_file2.properties"], + [], ) @patch("subprocess.Popen") diff --git a/pontoon/sync/tests/test_tasks.py b/pontoon/sync/tests/test_tasks.py deleted file mode 100644 index 8774c3334b..0000000000 --- a/pontoon/sync/tests/test_tasks.py +++ /dev/null @@ -1,450 +0,0 @@ -from unittest.mock import ANY, PropertyMock, patch - -import pytest - -from pontoon.base.models import ChangedEntityLocale, Locale, Project, Repository -from pontoon.base.tests import ( - CONTAINS, - ChangedEntityLocaleFactory, - ProjectFactory, - RepositoryFactory, - TestCase, - TranslationFactory, -) -from pontoon.base.utils import aware_datetime -from pontoon.sync.core import serial_task -from pontoon.sync.models import ProjectSyncLog, RepositorySyncLog, SyncLog -from pontoon.sync.tasks import sync_project, sync_translations -from pontoon.sync.tests import ( - FAKE_CHECKOUT_PATH, - FakeCheckoutTestCase, - ProjectSyncLogFactory, - SyncLogFactory, -) - - -class SyncProjectTests(TestCase): - def setUp(self): - super().setUp() - self.db_project = ProjectFactory.create() - self.repository = self.db_project.repositories.all()[0] - self.sync_log = SyncLogFactory.create() - - self.mock_pull_source_repo_changes = self.patch( - "pontoon.sync.tasks.pull_source_repo_changes", return_value=True - ) - self.mock_project_needs_sync = self.patch_object( - Project, "needs_sync", new_callable=PropertyMock, return_value=True - ) - - self.mock_sync_translations = self.patch("pontoon.sync.tasks.sync_translations") - - self.mock_update_originals = self.patch( - "pontoon.sync.tasks.update_originals", return_value=[[], [], [], []] - ) - - self.mock_source_directory_path = self.patch( - "pontoon.sync.vcs.project.VCSProject.source_directory_path", - return_value=self.repository.checkout_path, - ) - - def test_missing_project(self): - """ - If a project with the given PK doesn't exist, log it and exit. - """ - with patch("pontoon.sync.tasks.log") as mock_log: - with pytest.raises(Project.DoesNotExist): - sync_project(99999, self.sync_log.pk) - mock_log.error.assert_called_with(CONTAINS("99999")) - assert not self.mock_update_originals.called - - def test_missing_log(self): - """ - If a log with the given PK doesn't exist, log it and exit. - """ - with patch("pontoon.sync.tasks.log") as mock_log: - with pytest.raises(SyncLog.DoesNotExist): - sync_project(self.db_project.pk, 99999) - mock_log.error.assert_called_with(CONTAINS("99999")) - assert not self.mock_update_originals.called - - def test_db_changed_no_repo_changed(self): - """ - If the database has changes and VCS doesn't, skip syncing - resources, but sync translations. - """ - self.mock_pull_source_repo_changes.return_value = False - self.mock_project_needs_sync.return_value = True - - with patch("pontoon.sync.tasks.log") as mock_log: - sync_project(self.db_project.pk, self.sync_log.pk) - - sync_project(self.db_project.pk, self.sync_log.pk) - assert not self.mock_update_originals.called - mock_log.info.assert_called_with( - CONTAINS("Skipping syncing sources", self.db_project.slug) - ) - - def test_no_changes_skip(self): - """ - If the database and the source repository both have no - changes, and project has a single repository, skip sync. - """ - self.mock_pull_source_repo_changes.return_value = False - self.mock_project_needs_sync.return_value = False - - with patch("pontoon.sync.tasks.log") as mock_log: - sync_project(self.db_project.pk, self.sync_log.pk) - - assert not self.mock_update_originals.called - mock_log.info.assert_called_with( - CONTAINS("Skipping project", self.db_project.slug) - ) - - # When skipping, mark the project log properly. - assert ProjectSyncLog.objects.get(project=self.db_project).skipped - - def test_no_changes_force(self): - """ - If the database and VCS both have no changes, but force is true, - do not skip syncing resources. - """ - self.mock_pull_source_repo_changes.return_value = False - self.mock_project_needs_sync.return_value = False - - sync_project(self.db_project.pk, self.sync_log.pk, force=True) - assert self.mock_update_originals.called - - def test_no_pull(self): - """ - Don't call repo.pull if command.no_pull is True. - """ - sync_project(self.db_project.pk, self.sync_log.pk, no_pull=True) - assert not self.mock_pull_source_repo_changes.called - - def test_create_project_log(self): - assert not ProjectSyncLog.objects.exists() - sync_project(self.db_project.pk, self.sync_log.pk) - - log = ProjectSyncLog.objects.get(project=self.db_project) - assert self.mock_sync_translations.call_args[0][1].pk == log.pk - - -class SyncTranslationsTests(FakeCheckoutTestCase): - def setUp(self): - super().setUp() - self.project_sync_log = ProjectSyncLogFactory.create() - - self.mock_pull_locale_repo_changes = self.patch( - "pontoon.sync.tasks.pull_locale_repo_changes", return_value=[True, {}] - ) - self.mock_commit_changes = self.patch("pontoon.sync.tasks.commit_changes") - self.mock_pretranslate = self.patch("pontoon.sync.tasks.pretranslate") - self.mock_repo_checkout_path = self.patch_object( - Repository, - "checkout_path", - new_callable=PropertyMock, - return_value=FAKE_CHECKOUT_PATH, - ) - - def test_clear_changed_entities(self): - """ - Delete all ChangedEntityLocale objects for the project created - before the sync started after handling it. - """ - self.now = aware_datetime(1970, 1, 2) - self.mock_pull_locale_repo_changes.return_value = [ - True, - {self.repository.pk: Locale.objects.filter(pk=self.translated_locale.pk)}, - ] - - changed1, changed2, changed_after = ChangedEntityLocaleFactory.create_batch( - 3, - locale=self.translated_locale, - entity__resource=self.main_db_resource, - when=aware_datetime(1970, 1, 1), - ) - changed_after.when = aware_datetime(1970, 1, 3) - changed_after.save() - - sync_translations(self.db_project, self.project_sync_log, self.now, True) - with pytest.raises(ChangedEntityLocale.DoesNotExist): - changed1.refresh_from_db() - with pytest.raises(ChangedEntityLocale.DoesNotExist): - changed2.refresh_from_db() - changed_after.refresh_from_db() # Should not raise - - def test_no_commit(self): - """Don't call commit_changes if command.no_commit is True.""" - self.mock_pull_locale_repo_changes.return_value = [ - True, - {self.repository.pk: Locale.objects.filter(pk=self.translated_locale.pk)}, - ] - sync_translations( - self.db_project, self.project_sync_log, self.now, True, no_commit=True - ) - assert not self.mock_commit_changes.called - - def test_readonly_locales(self): - """Don't call commit_changes for locales in read-only mode.""" - project_locale = self.translated_locale.project_locale.get( - project=self.db_project, - ) - project_locale.readonly = True - project_locale.save() - - self.mock_pull_locale_repo_changes.return_value = [ - True, - { - self.repository.pk: Locale.objects.filter( - pk=self.translated_locale.pk, - ) - }, - ] - - sync_translations( - self.db_project, - self.project_sync_log, - self.now, - True, - no_commit=False, - ) - - assert not self.mock_commit_changes.called - - def test_remove_duplicate_approvals(self): - """ - Ensure that duplicate approvals are removed. - """ - # Trigger creation of new approved translation. - self.main_vcs_translation.strings[None] = "New Translated String" - self.main_vcs_translation.fuzzy = False - self.mock_pull_locale_repo_changes.return_value = [ - True, - {self.repository.pk: Locale.objects.filter(pk=self.translated_locale.pk)}, - ] - - # Translation approved after the sync started simulates the race - # where duplicate translations occur. - duplicate_translation = TranslationFactory.create( - entity=self.main_db_entity, - locale=self.translated_locale, - string="Other New Translated String", - approved=True, - approved_date=aware_datetime(1970, 1, 3), - ) - ChangedEntityLocale.objects.filter(entity=self.main_db_entity).delete() - - with patch("pontoon.sync.tasks.VCSProject", return_value=self.vcs_project): - sync_translations(self.db_project, self.project_sync_log, self.now, True) - - # Only one translation should be approved: the duplicate_translation. - assert self.main_db_entity.translation_set.filter(approved=True).count() == 1 - new_translation = self.main_db_entity.translation_set.get( - string="New Translated String" - ) - assert not new_translation.approved - assert new_translation.approved_date is None - - duplicate_translation.refresh_from_db() - assert duplicate_translation.approved - assert duplicate_translation.approved_date == aware_datetime(1970, 1, 3) - - def test_create_repository_log(self): - assert not RepositorySyncLog.objects.exists() - - repo = RepositoryFactory.create() - self.db_project.repositories.set([repo]) - self.db_project.save() - self.mock_pull_locale_repo_changes.return_value = [ - True, - {repo.pk: Locale.objects.filter(pk=self.translated_locale.pk)}, - ] - - sync_translations(self.db_project, self.project_sync_log, self.now, True) - - log = RepositorySyncLog.objects.get(repository=repo.pk) - assert log.repository == repo - - def test_no_pretranslation(self): - """ - Ensure that pretranslation isn't called if pretranslation not enabled - or no new Entity, Locale or TranslatedResource is created. - """ - self.mock_pull_locale_repo_changes.return_value = [ - True, - {self.repository.pk: Locale.objects.filter(pk=self.translated_locale.pk)}, - ] - - sync_translations( - self.db_project, - self.project_sync_log, - self.now, - True, - [], - [], - [], - [self.main_db_entity], - ) - - # Pretranslation is not enabled - assert not self.mock_pretranslate.called - - self.db_project.pretranslation_enabled = True - self.db_project.save() - - with self.patch( - "pontoon.sync.tasks.update_translated_resources", return_value=False - ): - sync_translations(self.db_project, self.project_sync_log, self.now, True) - - # No new Entity, Locale or TranslatedResource - assert not self.mock_pretranslate.called - - def test_new_entities_pretranslation(self): - """ - Test if pretranslation is called for newly added entities. - """ - self.db_project.pretranslation_enabled = True - self.db_project.save() - self.mock_pull_locale_repo_changes.return_value = [ - True, - {self.repository.pk: Locale.objects.filter(pk=self.translated_locale.pk)}, - ] - all_locales = list(self.db_project.locales.values_list("pk", flat=True)) - - with self.patch( - "pontoon.sync.tasks.update_translated_resources", return_value=False - ): - sync_translations( - self.db_project, - self.project_sync_log, - self.now, - True, - [], - [], - [], - [self.main_db_entity], - ) - - assert self.mock_pretranslate.called - assert ( - self.mock_pretranslate.call_args[1]["entities"][0].pk - == self.main_db_entity.pk - ) - assert list(self.mock_pretranslate.call_args[1]["locales"]) == all_locales - - def test_new_translated_resource_pretranslation(self): - """ - Test if pretranslation is called for locales with newly added TranslatedResource. - """ - self.db_project.pretranslation_enabled = True - self.db_project.save() - self.mock_pull_locale_repo_changes.return_value = [ - True, - {self.repository.pk: Locale.objects.filter(pk=self.translated_locale.pk)}, - ] - - sync_translations( - self.db_project, - self.project_sync_log, - self.now, - True, - [], - [], - [], - [self.main_db_entity], - ) - - assert self.mock_pretranslate.called - assert self.mock_pretranslate.call_args[1]["locales"] == [ - self.translated_locale.pk - ] - - # Ensure that pretranslate is called only once for the locale. - assert self.mock_pretranslate.call_args[1].get("entities") is None - - -class UserError(Exception): - pass - - -class SyncExecutionTests(TestCase): - def test_serial_task(self): - """ - Test if sync will create lock in cache and release this after task is done. - """ - - @serial_task(100) - def test_task(self, call_subtask): - if call_subtask: - return subtask() - - def subtask(): - return test_task.delay() - - first_call = test_task.delay(call_subtask=True) - second_call = first_call.get() - - assert first_call.successful() - assert second_call.failed() - with pytest.raises(RuntimeError): - second_call.get() - - def test_release_lock_after_timeout(self): - """ - Tests if lock is released after specified timeout. - """ - with patch("pontoon.sync.core.cache") as mock_cache: - - @serial_task(3) - def timeout_task(self): - return 42 - - first_call = timeout_task.delay() - - assert first_call.successful() - assert first_call.get(), 42 - mock_cache.add.assert_called_with(ANY, ANY, timeout=3) - - def test_parametrized_serial_task(self): - """ - Serial task should be able to work simultaneously for different parameters. - """ - with patch("pontoon.sync.core.cache") as mock_cache: - - @serial_task(3, lock_key="param={0}") - def task_lock_key(self, param): - return param - - first_call = task_lock_key.delay(42) - second_call = task_lock_key.delay(24) - assert first_call.successful() - assert second_call.successful() - assert first_call.get() == 42 - assert second_call.get() == 24 - mock_cache.add.assert_any_call( - CONTAINS("task_lock_key[param=42]"), ANY, timeout=3 - ) - mock_cache.add.assert_any_call( - CONTAINS("task_lock_key[param=24]"), ANY, timeout=3 - ) - - def test_exception_during_sync(self): - """ - Any error during performing synchronization should release the lock. - """ - - @serial_task(100) - def exception_task(self): - raise UserError - - first_call = exception_task.delay() - second_call = exception_task.delay() - - assert first_call.failed() - assert second_call.failed() - with pytest.raises(UserError): - first_call.get() - with pytest.raises(UserError): - second_call.get() diff --git a/pontoon/sync/tests/test_translations_from_repo.py b/pontoon/sync/tests/test_translations_from_repo.py new file mode 100644 index 0000000000..8f803deaeb --- /dev/null +++ b/pontoon/sync/tests/test_translations_from_repo.py @@ -0,0 +1,215 @@ +from os import makedirs +from os.path import join +from tempfile import TemporaryDirectory +from textwrap import dedent +from unittest.mock import Mock + +import pytest + +from django.conf import settings +from django.utils import timezone + +from pontoon.actionlog.models import ActionLog +from pontoon.base.models import ( + Entity, + TranslatedResource, + Translation, + TranslationMemoryEntry, +) +from pontoon.base.tests import ( + EntityFactory, + LocaleFactory, + ProjectFactory, + RepositoryFactory, + ResourceFactory, + TranslatedResourceFactory, + TranslationFactory, +) +from pontoon.sync.core.checkout import Checkout, Checkouts +from pontoon.sync.core.paths import find_paths +from pontoon.sync.core.stats import update_stats +from pontoon.sync.core.translations_from_repo import sync_translations_from_repo +from pontoon.sync.tests.utils import build_file_tree + + +now = timezone.now() + + +@pytest.mark.django_db +def test_add_ftl_translation(): + with TemporaryDirectory() as root: + # Database setup + settings.MEDIA_ROOT = root + locale = LocaleFactory.create(code="fr-Test") + locale_map = {locale.code: locale} + repo = RepositoryFactory(url="http://example.com/repo") + project = ProjectFactory.create( + name="test-add-ftl", + locales=[locale], + repositories=[repo], + total_strings=9, + ) + res = {} + for id in ["a", "b", "c"]: + res[id] = ResourceFactory.create( + project=project, path=f"{id}.ftl", format="ftl", total_strings=3 + ) + TranslatedResourceFactory.create( + locale=locale, resource=res[id], total_strings=3 + ) + for i in range(3): + key = f"key-{id}-{i}" + string = f"{key} = Message {id} {i}\n" + entity = EntityFactory.create(resource=res[id], string=string, key=key) + if id != "c" or i != 2: + TranslationFactory.create( + entity=entity, + locale=locale, + string=string.replace("Message", "Translation"), + active=True, + approved=True, + ) + TranslationFactory.create( + entity=Entity.objects.get(resource=res["c"], key="key-c-1"), + locale=locale, + string="key-c-1 = New translation c 1\n", + ) + + project.refresh_from_db() + assert project.total_strings == 9 + assert project.approved_strings == 8 + + # Filesystem setup + c_ftl = dedent( + """ + key-c-0 = Translation c 0 + key-c-1 = New translation c 1 + key-c-2 = New translation c 2 + """ + ) + makedirs(repo.checkout_path) + build_file_tree( + repo.checkout_path, + { + "en-US": {"a.ftl": "", "b.ftl": "", "c.ftl": ""}, + "fr-Test": {"a.ftl": "", "b.ftl": "", "c.ftl": c_ftl}, + }, + ) + + # Paths setup + mock_checkout = Mock( + Checkout, + path=repo.checkout_path, + changed=[join("fr-Test", "c.ftl")], + removed=[], + ) + checkouts = Checkouts(mock_checkout, mock_checkout) + paths = find_paths(project, checkouts) + + # Test sync + removed_resources, updated_translations = sync_translations_from_repo( + project, locale_map, checkouts, paths, [], now + ) + assert (removed_resources, updated_translations) == (0, 2) + translations = Translation.objects.filter( + entity__resource=res["c"], locale=locale + ) + assert set(trans.entity.key for trans in translations) == { + "key-c-0", + "key-c-1", + "key-c-2", + } + tr_c2 = next(trans for trans in translations if trans.entity.key == "key-c-2") + assert not tr_c2.user + + # Test actions + assert { + (action.translation.string, action.action_type) + for action in ActionLog.objects.filter(translation__in=translations) + } == { + ("key-c-1 = Translation c 1\n", "translation:rejected"), + ("key-c-1 = New translation c 1\n", "translation:approved"), + ("key-c-2 = New translation c 2\n", "translation:created"), + } + + # Test stats + update_stats(project) + project.refresh_from_db() + assert project.total_strings == 9 + assert project.approved_strings == 9 + tm = TranslationMemoryEntry.objects.filter( + entity__resource=res["c"], translation__isnull=False + ).values_list("target", flat=True) + assert set(tm) == { + "Translation c 0", + "Translation c 1", + "New translation c 1", + "New translation c 2", + } + + +@pytest.mark.django_db +def test_remove_po_target_resource(): + with TemporaryDirectory() as root: + # Database setup + settings.MEDIA_ROOT = root + locale = LocaleFactory.create(code="fr-Test") + locale_map = {locale.code: locale} + repo = RepositoryFactory(url="http://example.com/repo") + project = ProjectFactory.create( + name="test-rm-po", locales=[locale], repositories=[repo] + ) + res = {} + for id in ["a", "b", "c"]: + res[id] = ResourceFactory.create( + project=project, path=f"{id}.po", format="po", total_strings=3 + ) + TranslatedResourceFactory.create(locale=locale, resource=res[id]) + for i in range(3): + key = f"key-{id}-{i}" + string = f"Message {id} {i}" + entity = EntityFactory.create(resource=res[id], string=string, key=key) + TranslationFactory.create( + entity=entity, + locale=locale, + string=string.replace("Message", "Translation"), + active=True, + approved=True, + ) + + # Filesystem setup + makedirs(repo.checkout_path) + build_file_tree( + repo.checkout_path, + { + "en-US": {"a.pot": "", "b.pot": "", "c.pot": ""}, + "fr-Test": {"a.po": "", "c.po": ""}, + }, + ) + + # Paths setup + mock_checkout = Mock( + Checkout, + path=repo.checkout_path, + changed=[], + removed=[join("fr-Test", "b.po")], + ) + checkouts = Checkouts(mock_checkout, mock_checkout) + paths = find_paths(project, checkouts) + + # Test sync + removed_resources, updated_translations = sync_translations_from_repo( + project, locale_map, checkouts, paths, [], now + ) + assert (removed_resources, updated_translations) == (1, 0) + assert not TranslatedResource.objects.filter(locale=locale, resource=res["b"]) + assert not Translation.objects.filter(entity__resource=res["b"], locale=locale) + tm = TranslationMemoryEntry.objects.filter( + entity__resource=res["b"], translation__isnull=True + ) + assert len(tm) == 3 + + # Test stats + update_stats(project) + project.refresh_from_db() + assert (project.total_strings, project.approved_strings) == (6, 6) diff --git a/pontoon/sync/tests/test_translations_to_repo.py b/pontoon/sync/tests/test_translations_to_repo.py new file mode 100644 index 0000000000..18bc81703b --- /dev/null +++ b/pontoon/sync/tests/test_translations_to_repo.py @@ -0,0 +1,238 @@ +from os import makedirs +from os.path import exists, join +from tempfile import TemporaryDirectory +from textwrap import dedent +from unittest.mock import Mock + +import pytest + +from django.conf import settings +from django.utils import timezone + +from pontoon.base.models import ChangedEntityLocale +from pontoon.base.tests import ( + EntityFactory, + LocaleFactory, + ProjectFactory, + RepositoryFactory, + ResourceFactory, + TranslatedResourceFactory, + TranslationFactory, +) +from pontoon.sync.core.checkout import Checkout, Checkouts +from pontoon.sync.core.paths import find_paths +from pontoon.sync.core.translations_to_repo import sync_translations_to_repo +from pontoon.sync.tests.utils import build_file_tree + + +now = timezone.now() + + +@pytest.mark.django_db +def test_remove_resource(): + with TemporaryDirectory() as root: + # Database setup + settings.MEDIA_ROOT = root + locale = LocaleFactory.create(code="fr-Test", total_strings=100) + locale_map = {locale.code: locale} + repo = RepositoryFactory(url="http://example.com/repo") + project = ProjectFactory.create( + name="test-rm-res", + locales=[locale], + repositories=[repo], + total_strings=10, + ) + res_a = ResourceFactory.create(project=project, path="a.ftl", format="ftl") + res_b = ResourceFactory.create(project=project, path="b.po", format="po") + res_c = ResourceFactory.create(project=project, path="c.ftl", format="ftl") + TranslatedResourceFactory.create(locale=locale, resource=res_a) + TranslatedResourceFactory.create(locale=locale, resource=res_b) + TranslatedResourceFactory.create(locale=locale, resource=res_c) + + # Filesystem setup + makedirs(repo.checkout_path) + build_file_tree( + repo.checkout_path, + { + "en-US": {"a.ftl": "", "b.pot": ""}, + "fr-Test": {"a.ftl": "", "b.po": "", "c.ftl": ""}, + }, + ) + + # Paths setup + mock_checkout = Mock( + Checkout, + path=repo.checkout_path, + changed=[], + removed=[join("en-US", "c.ftl")], + ) + checkouts = Checkouts(mock_checkout, mock_checkout) + paths = find_paths(project, checkouts) + + # Test + sync_translations_to_repo( + project, False, locale_map, checkouts, paths, [], set(), {"c.ftl"}, now + ) + assert exists(join(repo.checkout_path, "fr-Test", "b.po")) + assert not exists(join(repo.checkout_path, "fr-Test", "c.ftl")) + + +@pytest.mark.django_db +def test_remove_entity(): + with TemporaryDirectory() as root: + # Database setup + settings.MEDIA_ROOT = root + locale = LocaleFactory.create(code="fr-Test", total_strings=100) + locale_map = {locale.code: locale} + repo = RepositoryFactory(url="http://example.com/repo") + project = ProjectFactory.create( + name="test-rm-ent", + locales=[locale], + repositories=[repo], + total_strings=10, + ) + res_a = ResourceFactory.create(project=project, path="a.ftl", format="ftl") + res_b = ResourceFactory.create(project=project, path="b.po", format="po") + res_c = ResourceFactory.create(project=project, path="c.ftl", format="ftl") + TranslatedResourceFactory.create(locale=locale, resource=res_a) + TranslatedResourceFactory.create(locale=locale, resource=res_b) + TranslatedResourceFactory.create(locale=locale, resource=res_c, total_strings=3) + for i in range(3): + if i != 1: + entity = EntityFactory.create( + resource=res_c, key=f"key-{i}", string=f"key-{i} = Message {i}\n" + ) + TranslationFactory.create( + entity=entity, + locale=locale, + string=f"key-{i} = Translation {i}\n", + active=True, + approved=True, + ) + + # Filesystem setup + c_ftl_src = dedent( + """\ + key-0 = Message 0 + key-2 = Message 2 + """ + ) + c_ftl_tgt = dedent( + """\ + key-0 = Translation 0 + key-1 = Translation 1 + key-2 = Translation 2 + """ + ) + makedirs(repo.checkout_path) + build_file_tree( + repo.checkout_path, + { + "en-US": {"a.ftl": "", "b.pot": "", "c.ftl": c_ftl_src}, + "fr-Test": {"a.ftl": "", "b.po": "", "c.ftl": c_ftl_tgt}, + }, + ) + + # Paths setup + mock_checkout = Mock( + Checkout, + path=repo.checkout_path, + changed=[join("en-US", "c.ftl")], + removed=[], + ) + checkouts = Checkouts(mock_checkout, mock_checkout) + paths = find_paths(project, checkouts) + + # Test + sync_translations_to_repo( + project, False, locale_map, checkouts, paths, [], {"c.ftl"}, set(), now + ) + with open(join(repo.checkout_path, "fr-Test", "c.ftl")) as file: + assert file.read() == dedent( + """\ + key-0 = Translation 0 + key-2 = Translation 2 + """ + ) + + +@pytest.mark.django_db +def test_add_translation(): + with TemporaryDirectory() as root: + # Database setup + settings.MEDIA_ROOT = root + locale = LocaleFactory.create(code="fr-Test", total_strings=100) + locale_map = {locale.code: locale} + repo = RepositoryFactory(url="http://example.com/repo") + project = ProjectFactory.create( + name="test-add-trans", + locales=[locale], + repositories=[repo], + total_strings=10, + ) + res_a = ResourceFactory.create(project=project, path="a.ftl", format="ftl") + res_b = ResourceFactory.create(project=project, path="b.po", format="po") + res_c = ResourceFactory.create(project=project, path="c.ftl", format="ftl") + TranslatedResourceFactory.create(locale=locale, resource=res_a) + TranslatedResourceFactory.create(locale=locale, resource=res_b) + TranslatedResourceFactory.create(locale=locale, resource=res_c, total_strings=3) + for i in range(3): + entity = EntityFactory.create( + resource=res_c, key=f"key-{i}", string=f"key-{i} = Message {i}\n" + ) + TranslationFactory.create( + entity=entity, + locale=locale, + string=f"key-{i} = Translation {i}\n", + active=True, + approved=True, + ) + + # Filesystem setup + c_ftl_src = dedent( + """\ + key-0 = Message 0 + key-1 = Message 1 + key-2 = Message 2 + """ + ) + c_ftl_tgt = dedent( + """\ + key-0 = Translation 0 + """ + ) + makedirs(repo.checkout_path) + build_file_tree( + repo.checkout_path, + { + "en-US": {"a.ftl": "", "b.pot": "", "c.ftl": c_ftl_src}, + "fr-Test": {"a.ftl": "", "b.po": "", "c.ftl": c_ftl_tgt}, + }, + ) + + # Paths setup + mock_checkout = Mock( + Checkout, + path=repo.checkout_path, + changed=[join("en-US", "c.ftl")], + removed=[], + ) + checkouts = Checkouts(mock_checkout, mock_checkout) + paths = find_paths(project, checkouts) + + # Test + db_changes = ChangedEntityLocale.objects.filter( + entity__resource__project=project + ) + assert len(db_changes) == 3 + sync_translations_to_repo( + project, False, locale_map, checkouts, paths, db_changes, set(), set(), now + ) + with open(join(repo.checkout_path, "fr-Test", "c.ftl")) as file: + assert file.read() == dedent( + """\ + key-0 = Translation 0 + key-1 = Translation 1 + key-2 = Translation 2 + """ + ) diff --git a/pontoon/sync/tests/test_utils.py b/pontoon/sync/tests/test_utils.py deleted file mode 100644 index 1100e9ae72..0000000000 --- a/pontoon/sync/tests/test_utils.py +++ /dev/null @@ -1,6 +0,0 @@ -from pontoon.sync.tests import LOCALE_SEPARATOR_TEST_PATH -from pontoon.sync.utils import uses_undercore_as_separator - - -def test_uses_undercore_as_separator(): - assert (uses_undercore_as_separator(LOCALE_SEPARATOR_TEST_PATH)) is True diff --git a/pontoon/sync/tests/test_vcs_config.py b/pontoon/sync/tests/test_vcs_config.py deleted file mode 100644 index b42c5eafec..0000000000 --- a/pontoon/sync/tests/test_vcs_config.py +++ /dev/null @@ -1,57 +0,0 @@ -import tempfile - -from http.client import HTTPException -from unittest.mock import patch - -from pontoon.base.tests import TestCase -from pontoon.sync.vcs.config import DownloadTOMLParser - - -class DownloadTOMLParserTests(TestCase): - def setUp(self): - self.requests_patcher = patch("pontoon.sync.vcs.config.requests.get") - self.requests_mock = self.requests_patcher.start() - self.temp_dir = tempfile.mkdtemp() - - def tearDown(self): - self.requests_patcher.stop() - - def test_config_file_not_found(self): - """ - When the project config file is not available, throw an error. - """ - self.requests_mock.return_value.raise_for_status.side_effect = HTTPException( - "not found" - ) - - with self.assertRaises(HTTPException): - parser = DownloadTOMLParser( - self.temp_dir, "https://example.com/", "l10n.toml" - ) - parser.parse() - - def test_remote_path(self): - parser = DownloadTOMLParser( - "", "https://example.com/without-locale-code/", "l10n.toml" - ) - self.assertEqual( - parser.get_remote_path("l10n.toml"), - "https://example.com/without-locale-code/l10n.toml", - ) - self.assertEqual( - parser.get_remote_path("subdir/l10n.toml"), - "https://example.com/without-locale-code/subdir/l10n.toml", - ) - - def test_local_path(self): - parser = DownloadTOMLParser(self.temp_dir, "", "aaa.toml") - self.assertEqual(parser.get_local_path("aaa.toml"), f"{self.temp_dir}/aaa.toml") - - def test_get_project_config(self): - parser = DownloadTOMLParser(self.temp_dir, "https://example.com/", "l10n.toml") - self.requests_mock.return_value.content = b"test-content" - project_config_path = parser.get_project_config("l10n.toml") - - self.assertTrue(self.requests_mock.called) - self.assertEqual(project_config_path, self.temp_dir + "/l10n.toml") - self.assertEqual(open(project_config_path).read(), "test-content") diff --git a/pontoon/sync/tests/test_vcs_models.py b/pontoon/sync/tests/test_vcs_models.py deleted file mode 100644 index f4d90f5476..0000000000 --- a/pontoon/sync/tests/test_vcs_models.py +++ /dev/null @@ -1,629 +0,0 @@ -import os - -from pathlib import Path -from unittest.mock import MagicMock, Mock, PropertyMock, patch - -from pontoon.base.models import ( - Locale, - Project, - Repository, -) -from pontoon.base.tests import ( - CONTAINS, - LocaleFactory, - ProjectFactory, - RepositoryFactory, - ResourceFactory, - TestCase, -) -from pontoon.sync.exceptions import ParseError -from pontoon.sync.tests import ( - PROJECT_CONFIG_CHECKOUT_PATH, - FakeCheckoutTestCase, - VCSEntityFactory, - VCSTranslationFactory, -) -from pontoon.sync.vcs.config import VCSConfiguration -from pontoon.sync.vcs.project import VCSProject -from pontoon.sync.vcs.resource import VCSResource - - -TEST_CHECKOUT_PATH = os.path.join( - os.path.dirname(__file__), "directory_detection_tests" -) - - -class VCSTestCase(TestCase): - """ - Setup fixtures that are shared between VCS tests. - """ - - def setUp(self): - self.get_project_config_patcher = patch( - "pontoon.sync.vcs.config.DownloadTOMLParser.get_project_config" - ) - self.get_project_config_mock = self.get_project_config_patcher.start() - self.get_project_config_mock.side_effect = lambda config_path: os.path.join( - PROJECT_CONFIG_CHECKOUT_PATH, config_path - ) - self.addCleanup(self.get_project_config_patcher.stop) - - -class VCSProjectTests(VCSTestCase): - def setUp(self): - # Force the checkout path to point to a test directory to make - # resource file loading pass during tests. - checkout_path_patch = patch.object( - Project, - "checkout_path", - new_callable=PropertyMock, - return_value=os.path.join(TEST_CHECKOUT_PATH, "no_resources_test"), - ) - self.mock_checkout_path = checkout_path_patch.start() - self.addCleanup(checkout_path_patch.stop) - - self.locale = LocaleFactory.create(code="XY") - self.project = ProjectFactory.create( - locales=[self.locale], - repositories__permalink="https://example.com/l10n/{locale_code}", - ) - self.vcs_project = VCSProject(self.project) - super().setUp() - - @patch.object(VCSProject, "source_directory_path", new_callable=PropertyMock) - def test_get_relevant_files_with_config(self, source_directory_path_mock): - """ - Return relative reference paths and locales of paths found in project configuration. - """ - source_directory_path_mock.return_value = "" - paths = ["locale/path/to/localizable_file.ftl"] - self.vcs_project.configuration = VCSConfiguration(self.vcs_project) - - # Return empty dict if no reference path found for any of the paths - with patch( - "pontoon.sync.vcs.config.VCSConfiguration.reference_path", - return_value=None, - ): - files = self.vcs_project.get_relevant_files_with_config(paths) - assert files == {} - - # Return empty dict if no reference path found for any of the paths - with patch( - "pontoon.sync.vcs.config.VCSConfiguration.reference_path", - return_value="reference/path/to/localizable_file.ftl", - ): - files = self.vcs_project.get_relevant_files_with_config(paths) - assert files == {"reference/path/to/localizable_file.ftl": [self.locale]} - - def test_get_relevant_files_without_config(self): - """ - Return relative paths and their locales if they start with locale repository paths. - """ - paths = [ - "locales/xy/path/to/localizable_file.ftl", - "some.random.file", - ".hidden_file", - ] - - locale_path_locales = { - "locales/ab": "AB", - "locales/cd": "CD", - "locales/xy": "XY", - } - - files = self.vcs_project.get_relevant_files_without_config( - paths, locale_path_locales - ) - - assert files == {"path/to/localizable_file.ftl": ["XY"]} - - def test_relative_resource_paths(self): - with patch.object( - VCSProject, - "source_directory_path", - new_callable=PropertyMock, - return_value="/root/", - ): - self.vcs_project.resource_paths_without_config = Mock( - return_value=["/root/foo.po", "/root/meh/bar.po"] - ) - - assert list(self.vcs_project.relative_resource_paths()) == [ - "foo.po", - "meh/bar.po", - ] - - def test_relative_resource_paths_pot(self): - """ - If a resource ends in .pot, replace the extension with .po since - relative paths are used within non-source locales that do not - have .pot files. - """ - with patch.object( - VCSProject, - "source_directory_path", - new_callable=PropertyMock, - return_value="/root/", - ): - self.vcs_project.resource_paths_without_config = Mock( - return_value=["/root/foo.pot", "/root/meh/bar.pot"] - ) - - assert list(self.vcs_project.relative_resource_paths()) == [ - "foo.po", - "meh/bar.po", - ] - - def test_source_directory_with_config(self): - """ - If project configuration provided, use source repository checkout path - as source directory path. - """ - self.vcs_project.configuration = Mock(return_value=[True]) - - assert ( - self.vcs_project.source_directory_path - == self.vcs_project.db_project.source_repository.checkout_path - ) - - def test_source_directory_path_no_resource(self): - """ - When searching for source directories, do not match directories that - do not contain resource files. - """ - checkout_path = os.path.join(TEST_CHECKOUT_PATH, "no_resources_test") - self.mock_checkout_path.return_value = checkout_path - - assert self.vcs_project.source_directory_path == os.path.join( - checkout_path, "real_resources", "templates" - ) - - def test_source_directory_scoring_templates(self): - """ - When searching for source directories, prefer directories named - `templates` over all others. - """ - checkout_path = os.path.join(TEST_CHECKOUT_PATH, "scoring_templates_test") - self.mock_checkout_path.return_value = checkout_path - - assert self.vcs_project.source_directory_path == os.path.join( - checkout_path, "templates" - ) - - def test_source_directory_scoring_en_US(self): - """ - When searching for source directories, prefer directories named - `en-US` over others besides `templates`. - """ - checkout_path = os.path.join(TEST_CHECKOUT_PATH, "scoring_en_US_test") - self.mock_checkout_path.return_value = checkout_path - - assert self.vcs_project.source_directory_path == os.path.join( - checkout_path, "en-US" - ) - - def test_source_directory_scoring_source_files(self): - """ - When searching for source directories, prefer directories with - source-only formats over all others. - """ - checkout_path = os.path.join(TEST_CHECKOUT_PATH, "scoring_source_files_test") - self.mock_checkout_path.return_value = checkout_path - - assert self.vcs_project.source_directory_path == os.path.join( - checkout_path, "en" - ) # en has pot files in it - - def test_resources_parse_error(self): - """ - If VCSResource() raises a ParseError while loading, log an error - and skip the resource. - """ - self.vcs_project.relative_resource_paths = Mock( - return_value=["failure", "success"] - ) - - # Fail only if the path is failure so we can test the ignore. - def vcs_resource_constructor(project, path, locales=None): - if path == "failure": - raise ParseError("error message") - else: - return "successful resource" - - changed_vcs_resources = {"success": [], "failure": []} - with ( - patch("pontoon.sync.vcs.project.VCSResource") as MockVCSResource, - patch("pontoon.sync.vcs.project.log") as mock_log, - patch.object( - VCSProject, - "changed_files", - new_callable=PropertyMock, - return_value=changed_vcs_resources, - ), - ): - MockVCSResource.side_effect = vcs_resource_constructor - - assert self.vcs_project.resources == {"success": "successful resource"} - mock_log.error.assert_called_with(CONTAINS("failure", "error message")) - - @patch.object(Repository, "checkout_path", new_callable=PropertyMock) - def test_resource_paths_with_config(self, checkout_path_mock): - """ - If project configuration provided, use it to collect absolute paths to all - source resources within the source repository checkout path. - """ - checkout_path_mock.return_value = PROJECT_CONFIG_CHECKOUT_PATH - self.vcs_project.db_project.configuration_file = "l10n.toml" - self.vcs_project.configuration = VCSConfiguration(self.vcs_project) - - assert sorted(list(self.vcs_project.resource_paths_with_config())) == sorted( - [ - os.path.join(PROJECT_CONFIG_CHECKOUT_PATH, "values/amo.pot"), - os.path.join(PROJECT_CONFIG_CHECKOUT_PATH, "values/strings.properties"), - os.path.join( - PROJECT_CONFIG_CHECKOUT_PATH, "values/strings_child.properties" - ), - os.path.join( - PROJECT_CONFIG_CHECKOUT_PATH, - "values/strings_reality.properties", - ), - ] - ) - - @patch.object(VCSProject, "source_directory_path", new_callable=PropertyMock) - def test_resource_paths_without_config_region_properties( - self, source_directory_path_mock - ): - """ - If a project has a repository_url in pontoon.base.MOZILLA_REPOS, - resource_paths_without_config should ignore files named - "region.properties". - """ - source_directory_path_mock.return_value = "/root" - url = "https://moz.example.com" - self.project.repositories.all().delete() - self.project.repositories.add(RepositoryFactory.create(url=url)) - - with ( - patch("pontoon.sync.vcs.project.os", wraps=os) as mock_os, - patch("pontoon.sync.vcs.project.MOZILLA_REPOS", [url]), - ): - mock_os.walk.return_value = [ - ("/root", [], ["foo.pot", "region.properties"]) - ] - - assert list(self.vcs_project.resource_paths_without_config()) == [ - os.path.join("/root", "foo.pot") - ] - - @patch.object(VCSProject, "source_directory_path", new_callable=PropertyMock) - def test_resource_paths_without_config_exclude_hidden( - self, source_directory_path_mock - ): - """ - We should filter out resources that are contained in the hidden paths. - """ - source_directory_path_mock.return_value = "/root" - hidden_paths = ( - ("/root/.hidden_folder/templates", [], ("bar.pot",)), - ("/root/templates", [], ("foo.pot",)), - ) - with patch( - "pontoon.sync.vcs.project.os.walk", - wraps=os, - return_value=hidden_paths, - ): - assert list(self.vcs_project.resource_paths_without_config()) == [ - "/root/templates/foo.pot" - ] - - -class VCSConfigurationTests(VCSTestCase): - toml = "l10n.toml" - - def setUp(self): - super().setUp() - self.locale, _ = Locale.objects.get_or_create(code="fr") - - self.repository = RepositoryFactory() - self.db_project = ProjectFactory.create( - repositories=[self.repository], - ) - - checkout_path_patch = patch.object( - Repository, - "checkout_path", - new_callable=PropertyMock, - return_value=PROJECT_CONFIG_CHECKOUT_PATH, - ) - self.mock_checkout_path = checkout_path_patch.start() - self.addCleanup(checkout_path_patch.stop) - - self.resource_amo = ResourceFactory.create( - project=self.db_project, - path="values/amo.pot", - ) - self.resource_strings = ResourceFactory.create( - project=self.db_project, - path="values/strings.properties", - ) - self.resource_strings_reality = ResourceFactory.create( - project=self.db_project, - path="values/strings_reality.properties", - ) - self.resource_strings_child = ResourceFactory.create( - project=self.db_project, - path="values/strings_child.properties", - ) - - # Make sure VCSConfiguration instance is initialized - self.db_project.configuration_file = self.toml - self.db_project.source_repository.permalink_prefix = "https://example.com/" - self.vcs_project = VCSProject(self.db_project, locales=[self.locale]) - - def test_add_locale(self): - config = self.vcs_project.configuration.parsed_configuration - locale_code = "new-locale-code" - - assert locale_code not in config.all_locales - - self.vcs_project.configuration.add_locale(locale_code) - - assert locale_code in config.locales - - def test_get_or_set_project_files_reference(self): - self.vcs_project.configuration.add_locale = Mock() - locale_code = None - - assert ( - self.vcs_project.configuration.get_or_set_project_files( - locale_code, - ).locale - == locale_code - ) - - assert not self.vcs_project.configuration.add_locale.called - - def test_get_or_set_project_files_l10n(self): - self.vcs_project.configuration.add_locale = Mock() - locale_code = self.locale.code - - assert ( - self.vcs_project.configuration.get_or_set_project_files( - locale_code, - ).locale - == locale_code - ) - - assert not self.vcs_project.configuration.add_locale.called - - def test_get_or_set_project_files_new_locale(self): - self.vcs_project.configuration.add_locale = Mock() - locale_code = "new-locale-code" - - assert ( - self.vcs_project.configuration.get_or_set_project_files( - locale_code, - ).locale - == locale_code - ) - - assert self.vcs_project.configuration.add_locale.called - - def test_l10n_path(self): - absolute_resource_path = os.path.join( - PROJECT_CONFIG_CHECKOUT_PATH, - "values/amo.pot", - ) - - l10n_path = os.path.join( - PROJECT_CONFIG_CHECKOUT_PATH, - "values-fr/amo.po", - ) - - assert ( - self.vcs_project.configuration.l10n_path( - self.locale, - absolute_resource_path, - ) - == l10n_path - ) - - def test_reference_path(self): - absolute_l10n_path = os.path.join( - PROJECT_CONFIG_CHECKOUT_PATH, - "values-fr/amo.po", - ) - - reference_path = os.path.join( - PROJECT_CONFIG_CHECKOUT_PATH, - "values/amo.pot", - ) - - assert ( - self.vcs_project.configuration.reference_path( - self.locale, - absolute_l10n_path, - ) - == reference_path - ) - - def test_locale_resources(self): - assert sorted( - self.vcs_project.configuration.locale_resources(self.locale), - key=lambda r: r.path, - ) == [ - self.resource_amo, - self.resource_strings, - self.resource_strings_child, - self.resource_strings_reality, - ] - - -class GrandFatheredVCSConfigurationTest(VCSConfigurationTests): - """Testing with deep includes and excludes""" - - toml = "grandfather.toml" - - def test_locale_resources(self): - # no resource_strings, excluded for `fr` - assert sorted( - self.vcs_project.configuration.locale_resources(self.locale), - key=lambda r: r.path, - ) == [ - self.resource_amo, - # self.resource_strings, - self.resource_strings_child, - self.resource_strings_reality, - ] - - -def setUpResource(self): - self.repository = RepositoryFactory() - self.db_project = ProjectFactory.create( - repositories=[self.repository], - ) - - checkout_path_patch = patch.object( - Repository, - "checkout_path", - new_callable=PropertyMock, - return_value=PROJECT_CONFIG_CHECKOUT_PATH, - ) - self.mock_checkout_path = checkout_path_patch.start() - self.addCleanup(checkout_path_patch.stop) - - # Make sure VCSConfiguration instance is initialized - self.db_project.configuration_file = "l10n.toml" - - self.db_project.source_repository.permalink_prefix = "https://example.com/" - self.vcs_project = VCSProject(self.db_project, locales=[self.locale]) - - -class VCSConfigurationFullLocaleTests(VCSTestCase): - def setUp(self): - self.locale, _ = Locale.objects.get_or_create(code="fr") - setUpResource(self) - super().setUp() - - def test_vcs_resource(self): - self.vcs_project.configuration.add_locale(self.locale.code) - r = VCSResource(self.vcs_project, "values/strings.properties", [self.locale]) - assert r.files[self.locale].path == os.path.join( - PROJECT_CONFIG_CHECKOUT_PATH, "values-fr/strings.properties" - ) - - def test_vcs_resource_path(self): - self.vcs_project.configuration.add_locale(self.locale.code) - r = VCSResource( - self.vcs_project, "values/strings_reality.properties", [self.locale] - ) - assert r.files[self.locale].path == os.path.join( - PROJECT_CONFIG_CHECKOUT_PATH, "values-fr/strings_reality.properties" - ) - - def test_vcs_resource_child(self): - self.vcs_project.configuration.add_locale(self.locale.code) - r = VCSResource( - self.vcs_project, "values/strings_child.properties", [self.locale] - ) - assert r.files[self.locale].path == os.path.join( - PROJECT_CONFIG_CHECKOUT_PATH, "values-fr/strings_child.properties" - ) - - -class VCSConfigurationPartialLocaleTests(VCSTestCase): - def setUp(self): - self.locale, _ = Locale.objects.get_or_create(code="sl") - setUpResource(self) - super().setUp() - - def test_vcs_resource(self): - self.vcs_project.configuration.add_locale(self.locale.code) - r = VCSResource(self.vcs_project, "values/strings.properties", [self.locale]) - assert r.files[self.locale].path == os.path.join( - PROJECT_CONFIG_CHECKOUT_PATH, "values-sl/strings.properties" - ) - - def test_vcs_resource_path(self): - self.vcs_project.configuration.add_locale(self.locale.code) - r = VCSResource( - self.vcs_project, "values/strings_reality.properties", [self.locale] - ) - assert r.files == {} - - def test_vcs_resource_child(self): - self.vcs_project.configuration.add_locale(self.locale.code) - r = VCSResource( - self.vcs_project, "values/strings_child.properties", [self.locale] - ) - assert r.files == {} - - -class VCSEntityTests(VCSTestCase): - def test_has_translation_for(self): - """ - Return True if a translation exists for the given locale, even - if the translation is empty/falsey. - """ - empty_translation = VCSTranslationFactory(strings={}) - full_translation = VCSTranslationFactory(strings={None: "TRANSLATED"}) - entity = VCSEntityFactory() - entity.translations = {"empty": empty_translation, "full": full_translation} - - assert not entity.has_translation_for("missing") - assert entity.has_translation_for("empty") - assert entity.has_translation_for("full") - - -class VCSChangedConfigFilesTests(FakeCheckoutTestCase): - """ - Tests the algorithm that detects changes of Project Config files. - """ - - def test_no_config_changes(self): - changed_source_files = {"file1.po": [], "test.toml": []} - - with ( - patch.object( - self.vcs_project, "configuration" - ) as changed_config_files_mock, - patch.object( - self.vcs_project, - "changed_source_files", - return_value=changed_source_files, - ) as changed_source_files_mock, - ): - changed_config_files_mock.parsed_configuration.configs.__iter__.return_value = set() - changed_source_files_mock.__getitem__.return_value = changed_source_files - self.assertSetEqual(self.vcs_project.changed_config_files, set()) - - def test_changed_config_files(self): - config_file_mock = MagicMock() - config_file_mock.path = str( - Path(self.vcs_project.source_directory_path).joinpath( - Path("test-l10n.toml") - ) - ) - changed_config_files = [config_file_mock] - changed_source_files = { - "file1.po": [], - "test-l10n.toml": [], - } - - with ( - patch.object( - self.vcs_project, "configuration" - ) as changed_config_files_mock, - patch.object( - self.vcs_project, - "changed_source_files", - return_value=changed_source_files, - ) as changed_source_files_mock, - ): - changed_config_files_mock.parsed_configuration.configs.__iter__.return_value = changed_config_files - changed_source_files_mock.__getitem__.return_value = changed_source_files - - self.assertSetEqual( - self.vcs_project.changed_config_files, {"test-l10n.toml"} - ) diff --git a/pontoon/sync/tests/utils.py b/pontoon/sync/tests/utils.py new file mode 100644 index 0000000000..b7a0a4e0eb --- /dev/null +++ b/pontoon/sync/tests/utils.py @@ -0,0 +1,23 @@ +from os import mkdir +from os.path import join +from typing import Dict, Union + + +FileTree = Dict[str, Union[str, "FileTree"]] +"""Strings are files, dicts are directories.""" + + +def build_file_tree(root: str, tree: FileTree) -> None: + """ + Fill out `root` with `tree` contents: + Strings are files, dicts are directories. + """ + for name, value in tree.items(): + path = join(root, name) + if isinstance(value, str): + with open(path, "x") as file: + if value: + file.write(value) + else: + mkdir(path) + build_file_tree(path, value) diff --git a/pontoon/sync/utils.py b/pontoon/sync/utils.py index 8c6a2745e9..a0924a9eaf 100644 --- a/pontoon/sync/utils.py +++ b/pontoon/sync/utils.py @@ -1,184 +1,109 @@ -import errno -import os - -from pontoon.base.models import Resource -from pontoon.base.utils import extension_in, first - - -def is_hidden(path: str) -> bool: - """ - Return true if path contains hidden directory. - """ - for p in path.split(os.sep): - if p.startswith("."): - return True - return False - - -def is_resource(filename: str) -> bool: - """ - Return True if the filename's extension is a supported Resource - format. - """ - return extension_in(filename, Resource.ALLOWED_EXTENSIONS) - - -def is_source_resource(filename: str) -> bool: - """ - Return True if the filename's extension is a source-only Resource - format. - """ - return extension_in(filename, Resource.SOURCE_EXTENSIONS) - - -def is_asymmetric_resource(filename: str) -> bool: - """ - Return True if the filename's extension is an asymmetric Resource - format. - """ - return extension_in(filename, Resource.ASYMMETRIC_FORMATS) - - -def get_parent_directory(path: str) -> str: - """ - Get parent directory of the path - """ - return os.path.abspath(os.path.join(path, os.pardir)) - - -def uses_undercore_as_separator(directory: str) -> bool: - """ - Return True if the names of folders in a directory contain more '_' than '-'. - """ - only_folders = [] - subdirs = os.listdir(directory) - - for i in subdirs: - if os.path.isdir(os.path.join(directory, i)): - only_folders.append(i) - - return "".join(only_folders).count("_") > "".join(only_folders).count("-") - - -def directory_contains_resources(directory_path: str, source_only=False) -> bool: - """ - Return True if the given directory contains at least one - supported resource file (checked via file extension), or False - otherwise. - - :param source_only: - If True, only check for source-only formats. - """ - resource_check = is_source_resource if source_only else is_resource - for root, dirnames, filenames in os.walk(directory_path): - # first() avoids checking past the first matching resource. - if first(filenames, resource_check) is not None: - return True - return False - - -def locale_directory_path( - checkout_path: str, locale_code: str, parent_directories: list[str] -) -> str: - """ - Path to the directory where strings for the given locale are - stored. - """ - - # Check paths that use underscore as locale/country code separator - locale_code_variants = [locale_code, locale_code.replace("-", "_")] - - # Optimization for directories with a lot of paths: if parent_directories - # is provided, we simply join it with locale_code and check if path exists - possible_paths = [ - path - for path in ( - os.path.join(parent_directory, locale) - for locale in locale_code_variants - for parent_directory in parent_directories +import re + +from io import BytesIO +from os.path import basename, exists, join, relpath +from tempfile import TemporaryDirectory +from zipfile import ZipFile + +from django.core.files import File +from django.utils import timezone + +from pontoon.base.models import ChangedEntityLocale, Locale, Project, User +from pontoon.base.models.repository import Repository +from pontoon.sync.core.checkout import checkout_repos +from pontoon.sync.core.paths import UploadPaths, find_paths +from pontoon.sync.core.stats import update_stats +from pontoon.sync.core.translations_from_repo import find_db_updates, write_db_updates +from pontoon.sync.core.translations_to_repo import update_changed_resources + + +# FIXME This is a temporary hack, to be replaced by 04/2025 with proper downloads. +def translations_target_url( + project: Project, locale: Locale, resource_path: str +) -> str | None: + """The target repository URL for a resource, for direct download.""" + + if project.repositories.count() > 1: + # HACK: Let's assume that no config is used, and the target repo root is the right base. + target_repo: Repository = project.repositories.get(source_repo=False) + rel_path = f"{locale.code}/{resource_path}" + else: + checkouts = checkout_repos(project, shallow=True) + target_repo = checkouts.target.repo + paths = find_paths(project, checkouts) + target, _ = paths.target(resource_path) + if not target: + return None + abs_path = paths.format_target_path(target, locale.code) + rel_path = relpath(abs_path, checkouts.target.path).replace("\\", "/") + + github = re.search(r"\bgithub\.com[:/]([^/]+)/([^/]+)\.git$", target_repo.url) + if github: + org, repo = github.groups() + ref = f"refs/heads/{target_repo.branch}" if target_repo.branch else "HEAD" + return f"https://raw.githubusercontent.com/{org}/{repo}/{ref}/{rel_path}" + + gitlab = re.search(r"gitlab\.com[:/]([^/]+)/([^/]+)\.git$", target_repo.url) + if gitlab: + org, repo = gitlab.groups() + ref = target_repo.branch or "HEAD" + return f"https://gitlab.com/{org}/{repo}/-/raw/{ref}/{rel_path}?inline=false" + + if target_repo.permalink_prefix: + url = target_repo.permalink_prefix.format(locale_code=locale.code) + return f"{url}{'' if url.endswith('/') else '/'}{rel_path}" + + # Default to bare repo link + return re.sub(r"^.*?(://|@)", "https://", target_repo.url, count=1) + + +# FIXME Currently not in use, to be refactored for proper download support +def download_translations_zip( + project: Project, locale: Locale +) -> tuple[bytes, str] | tuple[None, None]: + checkouts = checkout_repos(project, shallow=True) + paths = find_paths(project, checkouts) + db_changes = ChangedEntityLocale.objects.filter( + entity__resource__project=project, locale=locale + ).select_related("entity__resource", "locale") + update_changed_resources(project, paths, {}, [], db_changes, set(), timezone.now()) + + bytes_io = BytesIO() + zipfile = ZipFile(bytes_io, "w") + for _, tgt_path in paths.all(): + filename = paths.format_target_path(tgt_path, locale.code) + if exists(filename): + arcname = relpath(filename, checkouts.target.path) + zipfile.write(filename, arcname) + zipfile.close() + + return bytes_io.getvalue(), f"{project.slug}.zip" + + +def import_uploaded_file( + project: Project, locale: Locale, res_path: str, upload: File, user: User +): + """Update translations in the database from an uploaded file.""" + + with TemporaryDirectory() as root: + file_path = join(root, basename(res_path)) + with open(file_path, "wb") as file: + for chunk in upload.chunks(): + file.write(chunk) + paths = UploadPaths(res_path, locale.code, file_path) + updates = find_db_updates( + project, {locale.code: locale}, [file_path], paths, [] ) - if os.path.exists(path) - ] or [ - os.path.join(root, locale) - for locale in locale_code_variants - for root, dirnames, filenames in os.walk(checkout_path) - if locale in dirnames - ] - - for possible_path in possible_paths: - if directory_contains_resources(possible_path): - return possible_path - - # If locale directory empty (asymmetric formats) - if possible_paths: - return possible_paths[0] - - raise OSError(f"Directory for locale `{locale_code or 'source'}` not found") - - -def locale_to_source_path(path: str) -> str: - """ - Return source resource path for the given locale resource path. - Source files for .po files are actually .pot. - """ - return path + "t" if path.endswith("po") else path - - -def source_to_locale_path(path: str) -> str: - """ - Return locale resource path for the given source resource path. - Locale files for .pot files are actually .po. - """ - return path[:-1] if path.endswith("pot") else path - - -def escape_apostrophes(value: str) -> str: - """ - Apostrophes (straight single quotes) have special meaning in Android strings.xml files, - so they need to be escaped using a preceding backslash. - - Learn more: - https://developer.android.com/guide/topics/resources/string-resource.html#escaping_quotes - """ - return value.replace("'", "\\'") - - -def unescape_apostrophes(value: str) -> str: - return value.replace("\\'", "'") - - -def escape_quotes(value: str) -> str: - """ - DTD files can use single or double quotes for identifying strings, - so " and ' are the safe bet that will work in both cases. - """ - value = value.replace('"', "\\"") - value = value.replace("'", "\\'") - - return value - - -def unescape_quotes(value: str) -> str: - value = value.replace("\\"", '"') - value = value.replace("\\u0022", '"') # Bug 1390111 - value = value.replace('\\"', '"') - - value = value.replace("\\'", "'") - value = value.replace("\\u0027", "'") # Bug 1390111 - value = value.replace("\\'", "'") - - return value - - -def create_parent_directory(path: str) -> None: - """ - Create parent directory of the given path if it doesn't exist yet. - """ - try: - os.makedirs(os.path.dirname(path)) - except OSError as e: - # Directory already exists - if e.errno == errno.EEXIST: - pass - else: - raise + if updates: + now = timezone.now() + write_db_updates(project, updates, user, now) + update_stats(project) + ChangedEntityLocale.objects.bulk_create( + ( + ChangedEntityLocale(entity_id=entity_id, locale_id=locale_id, when=now) + for entity_id, locale_id in updates + ), + ignore_conflicts=True, + ) + else: + raise Exception("Upload failed.") diff --git a/pontoon/sync/vcs/config.py b/pontoon/sync/vcs/config.py deleted file mode 100644 index 0fcf95b89c..0000000000 --- a/pontoon/sync/vcs/config.py +++ /dev/null @@ -1,190 +0,0 @@ -from os.path import join -from pathlib import Path -from urllib.parse import urljoin, urlparse - -import requests - -from compare_locales.paths import ProjectFiles, TOMLParser - -from django.utils.functional import cached_property - - -class DownloadTOMLParser(TOMLParser): - """ - This wrapper is a workaround for the lack of the shared and persistent filesystem - on Heroku workers. - Related: https://bugzilla.mozilla.org/show_bug.cgi?id=1530988 - """ - - def __init__(self, checkout_path, permalink_prefix, configuration_file): - self.checkout_path = join(checkout_path, "") - self.permalink_prefix = permalink_prefix - self.config_path = urlparse(permalink_prefix).path - self.config_file = configuration_file - - def get_local_path(self, path): - """Return the directory in which the config file should be stored.""" - local_path = path.replace(self.config_path, "") - - return join(self.checkout_path, local_path) - - def get_remote_path(self, path): - """Construct the link to the remote resource based on the local path.""" - remote_config_path = path.replace(self.checkout_path, "") - - return urljoin(self.permalink_prefix, remote_config_path) - - def get_project_config(self, path): - """Download the project config file and return its local path.""" - local_path = Path(self.get_local_path(path)) - local_path.parent.mkdir(parents=True, exist_ok=True) - - with local_path.open("wb") as f: - remote_path = self.get_remote_path(path) - config_file = requests.get(remote_path) - config_file.raise_for_status() - f.write(config_file.content) - return str(local_path) - - def parse(self, path=None, env=None, ignore_missing_includes=True): - """Download the config file before it gets parsed.""" - return super().parse( - self.get_project_config(path or self.config_file), - env, - ignore_missing_includes, - ) - - -class VCSConfiguration: - """ - Container for the project configuration, provided by the optional - configuration file. - - For more information, see: - https://moz-l10n-config.readthedocs.io/en/latest/fileformat.html. - """ - - def __init__(self, vcs_project): - self.vcs_project = vcs_project - self.configuration_file = vcs_project.db_project.configuration_file - self.project_files = {} - - @cached_property - def l10n_base(self): - """ - If project configuration provided, files could be stored in multiple - directories, so we just use the translation repository checkout path - """ - return self.vcs_project.db_project.translation_repositories()[0].checkout_path - - @cached_property - def parsed_configuration(self): - """Return parsed project configuration file.""" - if self.vcs_project.db_project.source_repository.permalink_prefix: - """If we have a permalink we download the configuration file""" - return DownloadTOMLParser( - self.vcs_project.db_project.source_repository.checkout_path, - self.vcs_project.db_project.source_repository.permalink_prefix, - self.configuration_file, - ).parse(env={"l10n_base": self.l10n_base}) - else: - """If we don't have a permalink we use the configuration file from the checkout path""" - return TOMLParser().parse( - join( - self.vcs_project.db_project.source_repository.checkout_path, - self.configuration_file, - ), - env={"l10n_base": self.l10n_base}, - ignore_missing_includes=True, - ) - - def add_locale(self, locale_code): - """ - Add new locale to project configuration. - """ - locales = self.parsed_configuration.locales or [] - locales.append(locale_code) - self.parsed_configuration.set_locales(locales) - - """ - TODO: For now we don't make changes to the configuration file to - avoid committing it to the VCS. The pytoml serializer messes with the - file layout (indents and newlines) pretty badly. We should fix the - serializer and replace the content of this method with the following - code: - - # Update configuration file - with open(self.configuration_path, 'r+b') as f: - data = pytoml.load(f) - data['locales'].append(locale_code) - f.seek(0) - f.write(pytoml.dumps(data, sort_keys=True)) - f.truncate() - - # Invalidate cached parsed configuration - del self.__dict__['parsed_configuration'] - - # Commit configuration file to VCS - commit_message = 'Update configuration file' - commit_author = User( - first_name=settings.VCS_SYNC_NAME, - email=settings.VCS_SYNC_EMAIL, - ) - repo = self.vcs_project.db_project.source_repository - repo.commit(commit_message, commit_author, repo.checkout_path) - """ - - def get_or_set_project_files(self, locale_code): - """ - Get or set project files for the given locale code. This approach - allows us to cache the files for later use. - - Also, make sure that the requested locale_code is available in the - configuration file. - """ - if ( - locale_code is not None - and locale_code not in self.parsed_configuration.all_locales - ): - self.add_locale(locale_code) - - return self.project_files.setdefault( - locale_code, - ProjectFiles(locale_code, [self.parsed_configuration]), - ) - - def l10n_path(self, locale, reference_path): - """ - Return l10n path for the given locale and reference path. - """ - project_files = self.get_or_set_project_files(locale.code) - - m = project_files.match(reference_path) - return m[0] if m is not None else None - - def reference_path(self, locale, l10n_path): - """ - Return reference path for the given locale and l10n path. - """ - project_files = self.get_or_set_project_files(locale.code) - - m = project_files.match(l10n_path) - return m[1] if m is not None else None - - def locale_resources(self, locale): - """ - Return a list of Resource instances, which need to be enabled for the - given locale. - """ - resources = [] - project_files = self.get_or_set_project_files(locale.code) - - for resource in self.vcs_project.db_project.resources.all(): - absolute_resource_path = join( - self.vcs_project.source_directory_path, resource.path - ) - - if project_files.match(absolute_resource_path): - resources.append(resource) - - return resources diff --git a/pontoon/sync/vcs/project.py b/pontoon/sync/vcs/project.py deleted file mode 100644 index c87caadb37..0000000000 --- a/pontoon/sync/vcs/project.py +++ /dev/null @@ -1,540 +0,0 @@ -import logging -import os -import shutil - -from itertools import chain - -from django.utils.functional import cached_property - -from pontoon.base import MOZILLA_REPOS -from pontoon.sync.exceptions import ParseError -from pontoon.sync.repositories import get_changed_files -from pontoon.sync.utils import ( - directory_contains_resources, - get_parent_directory, - is_asymmetric_resource, - is_hidden, - is_resource, - locale_directory_path, - source_to_locale_path, - uses_undercore_as_separator, -) -from pontoon.sync.vcs.config import VCSConfiguration -from pontoon.sync.vcs.resource import VCSResource - - -log = logging.getLogger(__name__) - - -class MissingSourceRepository(Exception): - """ - Raised when project can't find the repository - which contains source files. - """ - - -class MissingSourceDirectoryError(Exception): - """Raised when sync can't find the source directory for the locales.""" - - -class MissingLocaleDirectoryError(IOError): - """Raised when sync can't find the locale directory.""" - - -class VCSProject: - """ - Container for project data that is stored on the filesystem and - pulled from a remote VCS. - """ - - SOURCE_DIR_SCORES = { - "templates": 3, - "en-US": 2, - "en-us": 2, - "en_US": 2, - "en_us": 2, - "en": 1, - } - SOURCE_DIR_NAMES = SOURCE_DIR_SCORES.keys() - - def __init__( - self, - db_project, - now=None, - locales=None, - repo_locales=None, - added_paths=None, - changed_paths=None, - force=False, - ): - """ - Load resource paths from the given db_project and parse them - for translation data. - - :param Project db_project: - Project model instance for the project we're going to be - reading files for. - :param datetime.datetime now: - Sync start time. - :param list locales: - List of Locale model instances for the locales that we want - to parse. Defaults to parsing resources for all enabled - locales on the project. - :param dict repo_locales: - A dict of repository PKs and their currently checked out locales - (not neccessarily matching the ones stored in the DB). - :param list added_paths: - List of added source file paths - :param list changed_paths: - List of changed source file paths - :param bool force: - Scans all resources in repository - :param VCSConfiguration configuration: - Project configuration, provided by the optional configuration file. - """ - self.db_project = db_project - self.now = now - self.locales = locales if locales is not None else db_project.locales.all() - self.repo_locales = repo_locales - self.added_paths = added_paths or [] - self.changed_paths = changed_paths or [] - self.force = force - self.synced_locales = set() - - self.configuration = None - if db_project.configuration_file: - self.configuration = VCSConfiguration(self) - - @cached_property - def changed_files(self): - if self.force or ( - self.db_project.configuration_file and self.changed_config_files - ): - # All files are marked as changed - return None - - if self.locales: - return self.changed_locales_files - else: - return self.changed_source_files[0] - - @cached_property - def changed_source_files(self): - """ - Returns a tuple of changed and removed source files in the project: - (changed_files, removed_files) - """ - source_resources_repo = self.db_project.source_repository - - if not source_resources_repo: - raise MissingSourceRepository(self.db_project) - - source_directory = self.source_directory_path - last_revision = source_resources_repo.get_last_synced_revisions() - - modified_files, removed_files = get_changed_files( - source_resources_repo.type, source_directory, last_revision - ) - - # Unify filesystem and data model file extensions - if not self.configuration: - modified_files = map(source_to_locale_path, modified_files) - removed_files = map(source_to_locale_path, removed_files) - - if source_resources_repo.source_repo or not last_revision: - - def get_path(path): - return (path, []) - - else: - relative_source_path = source_directory[ - len(source_resources_repo.checkout_path) : - ].lstrip(os.sep) - - def get_path(path): - return (path[len(relative_source_path) :].lstrip(os.sep), []) - - return dict(map(get_path, modified_files)), dict(map(get_path, removed_files)) - - @cached_property - def changed_locales_files(self): - """ - Map of changed files and locales they were changed for. - """ - files = {} - - # VCS changes - repos = self.db_project.translation_repositories() - if self.repo_locales: - repos = repos.filter(pk__in=self.repo_locales.keys()) - - for repo in repos: - if repo.multi_locale: - locales = ( - self.repo_locales[repo.pk] - if self.repo_locales - else self.db_project.locales.all() - ) - for locale in locales: - changed_files = get_changed_files( - repo.type, - repo.locale_checkout_path(locale), - repo.get_last_synced_revisions(locale.code), - )[0] - - for path in changed_files: - files.setdefault(path, []).append(locale) - else: - changed_files = get_changed_files( - repo.type, repo.checkout_path, repo.get_last_synced_revisions() - )[0] - - log.info( - "Changed files in {} repository, all: {}".format( - self.db_project, changed_files - ) - ) - - # Include only relevant (localizable) files - if self.configuration: - files = self.get_relevant_files_with_config(changed_files) - else: - files = self.get_relevant_files_without_config( - changed_files, self.locale_path_locales(repo.checkout_path) - ) - - log.info( - "Changed files in {} repository, relevant for enabled locales: {}".format( - self.db_project, files - ) - ) - - # DB changes - vcs = files - db = self.db_project.changed_resources(self.now) - for path in set(list(vcs.keys()) + list(db.keys())): - if path in vcs and path in db: - vcs[path] = set(list(vcs[path]) + list(db[path])) - - else: - vcs[path] = vcs[path] if path in vcs else db[path] - - return files - - @cached_property - def changed_config_files(self): - """ - A set of the changed project config files. - """ - config_files = { - pc.path.replace(os.path.join(self.source_directory_path, ""), "") - for pc in self.configuration.parsed_configuration.configs - } - changed_files = set(self.changed_source_files[0]) - return changed_files.intersection(config_files) - - def get_relevant_files_with_config(self, paths): - """ - Check if given paths represent localizable files using project configuration. - Return a dict of relative reference paths of such paths and corresponding Locale - objects. - """ - files = {} - - for locale in self.db_project.locales.all(): - for path in paths: - absolute_path = os.path.join(self.source_directory_path, path) - reference_path = self.configuration.reference_path( - locale, absolute_path - ) - - if reference_path: - relative_reference_path = reference_path[ - len(self.source_directory_path) : - ].lstrip(os.sep) - files.setdefault(relative_reference_path, []).append(locale) - - return files - - def get_relevant_files_without_config(self, paths, locale_path_locales): - """ - Check if given paths represent localizable files by matching them against locale - repository paths. Return a dict of relative reference paths of such paths and - corresponding Locale objects. - """ - files = {} - locale_paths = locale_path_locales.keys() - - for path in paths: - if is_hidden(path): - continue - - for locale_path in locale_paths: - if path.startswith(locale_path): - locale = locale_path_locales[locale_path] - path = path[len(locale_path) :].lstrip(os.sep) - files.setdefault(path, []).append(locale) - break - - return files - - def locale_path_locales(self, repo_checkout_path): - """ - A map of relative locale directory paths and their respective locales. - """ - locale_path_locales = {} - - for locale in self.db_project.locales.all(): - locale_directory = self.locale_directory_paths[locale.code] - path = locale_directory[len(repo_checkout_path) :].lstrip(os.sep) - path = os.path.join(path, "") # Ensure the path ends with os.sep - locale_path_locales[path] = locale - - return locale_path_locales - - @cached_property - def locale_directory_paths(self): - """ - A map of locale codes and their absolute directory paths. - Create locale directory, if not in repository yet. - """ - locale_directory_paths = {} - parent_directories = set() - - for locale in self.locales: - try: - if self.configuration: - locale_directory_paths[locale.code] = self.configuration.l10n_base - else: - locale_directory_paths[locale.code] = locale_directory_path( - self.checkout_path, - locale.code, - parent_directories, - ) - parent_directory = get_parent_directory( - locale_directory_paths[locale.code] - ) - - except OSError: - if not self.db_project.has_multi_locale_repositories: - source_directory = self.source_directory_path - parent_directory = get_parent_directory(source_directory) - - locale_code = locale.code - if uses_undercore_as_separator(parent_directory): - locale_code = locale_code.replace("-", "_") - - locale_directory = os.path.join(parent_directory, locale_code) - - # For asymmetric formats, create empty folder - if is_asymmetric_resource(next(self.relative_resource_paths())): - os.makedirs(locale_directory) - - # For other formats, copy resources from source directory - else: - shutil.copytree(source_directory, locale_directory) - - for root, dirnames, filenames in os.walk(locale_directory): - for filename in filenames: - path = os.path.join(root, filename) - if is_resource(filename): - os.rename(path, source_to_locale_path(path)) - else: - os.remove(path) - - locale_directory_paths[locale.code] = locale_directory - - else: - raise MissingLocaleDirectoryError( - f"Directory for locale `{locale.code}` not found" - ) - - parent_directories.add(parent_directory) - - return locale_directory_paths - - @cached_property - def resources(self): - """ - Lazy-loaded mapping of relative paths -> VCSResources that need to be synced: - * changed in repository - * changed in Pontoon DB - * corresponding source file added - * corresponding source file changed - * all paths relevant for newly enabled (unsynced) locales - - Waiting until first access both avoids unnecessary file reads - and allows tests that don't need to touch the resources to run - with less mocking. - """ - resources = {} - - log.info( - "Changed files in {} repository and Pontoon, relevant for enabled locales: {}".format( - self.db_project, self.changed_files - ) - ) - - for path in self.relative_resource_paths(): - # Syncing translations - if self.locales: - # Copy list instead of cloning - locales = list(self.db_project.unsynced_locales) - - if self.changed_files is not None and ( - (not self.changed_files or path not in self.changed_files) - and path not in self.added_paths - and path not in self.changed_paths - ): - if not locales: - log.debug(f"Skipping unchanged file: {path}") - continue - - else: - if ( - self.changed_files is None - or path in self.added_paths - or path in self.changed_paths - ): - locales += self.locales - else: - locales += self.changed_files[path] - - # Syncing resources - else: - if self.changed_files is not None and path not in self.changed_files: - log.debug(f"Skipping unchanged resource file: {path}") - continue - locales = [] - - locales = {loc for loc in locales if loc in self.locales} - self.synced_locales.update(locales) - - log.debug( - "Detected resource file {} for {}".format( - path, ",".join([loc.code for loc in locales]) or "source" - ) - ) - - try: - resources[path] = VCSResource(self, path, locales=locales) - except ParseError as err: - log.error( - "Skipping resource {path} due to ParseError: {err}".format( - path=path, err=err - ) - ) - - log.info( - "Relative paths in {} that need to be synced: {}".format( - self.db_project, resources.keys() - ) - ) - - return resources - - @property - def entities(self): - return chain.from_iterable( - resource.entities.values() for resource in self.resources.values() - ) - - @property - def checkout_path(self): - return self.db_project.checkout_path - - @cached_property - def source_directory_path(self): - """ - Path to the directory where source strings are stored. - - Paths are identified using a scoring system; more likely - directory names get higher scores, as do directories with - formats that only used for source strings. - """ - source_repository = self.db_project.source_repository - - # If project configuration provided, files could be stored in multiple - # directories, so we just use the source repository checkout path - if self.configuration: - return source_repository.checkout_path - - # If source repository explicitly marked - if source_repository.source_repo: - return source_repository.checkout_path - - possible_sources = [] - for root, dirnames, filenames in os.walk(self.checkout_path): - for dirname in dirnames: - if dirname in self.SOURCE_DIR_NAMES: - score = self.SOURCE_DIR_SCORES[dirname] - - # Ensure the matched directory contains resources. - directory_path = os.path.join(root, dirname) - if directory_contains_resources(directory_path): - # Extra points for source resources! - if directory_contains_resources( - directory_path, source_only=True - ): - score += 3 - - possible_sources.append((directory_path, score)) - - if possible_sources: - return max(possible_sources, key=lambda s: s[1])[0] - else: - raise MissingSourceDirectoryError( - f"No source directory found for project {self.db_project.slug}" - ) - - def relative_resource_paths(self): - """ - List of all source resource paths, relative to source_directory_path. - """ - if self.configuration: - paths = self.resource_paths_with_config() - else: - paths = self.resource_paths_without_config() - - for path in paths: - if not self.configuration: - path = source_to_locale_path(path) - yield os.path.relpath(path, self.source_directory_path) - - def resource_paths_with_config(self): - """ - List of absolute paths for all supported source resources - as specified through project configuration. - """ - path = self.source_directory_path - project_files = self.configuration.get_or_set_project_files(None) - - for root, dirnames, filenames in os.walk(path): - if is_hidden(root): - continue - - for filename in filenames: - absolute_path = os.path.join(root, filename) - if project_files.match(absolute_path): - yield absolute_path - - def resource_paths_without_config(self): - """ - List of absolute paths for all supported source resources - found within the given path. - """ - path = self.source_directory_path - - for root, dirnames, filenames in os.walk(path): - if is_hidden(root): - continue - - # Ignore certain files in Mozilla repositories. - if self.db_project.repository_url in MOZILLA_REPOS: - filenames = [ - f for f in filenames if not f.endswith("region.properties") - ] - - for filename in filenames: - if is_resource(filename): - yield os.path.join(root, filename) diff --git a/pontoon/sync/vcs/resource.py b/pontoon/sync/vcs/resource.py deleted file mode 100644 index 1b39df5b7e..0000000000 --- a/pontoon/sync/vcs/resource.py +++ /dev/null @@ -1,150 +0,0 @@ -import logging - -from os.path import join - -from pontoon.sync.exceptions import ParseError -from pontoon.sync.utils import locale_to_source_path - - -log = logging.getLogger(__name__) - - -class VCSEntity: - """ - An Entity is a single string to be translated, and a VCSEntity - stores the translations for an entity from several locales. - """ - - def __init__( - self, - resource, - key, - string, - source, - comments, - group_comments=None, - resource_comments=None, - context="", - string_plural="", - order=0, - ): - self.resource = resource - self.key = key - self.string = string - self.string_plural = string_plural - self.source = source - self.comments = comments - self.group_comments = group_comments or [] - self.resource_comments = resource_comments or [] - self.context = context - self.order = order - self.translations = {} - - def has_translation_for(self, locale_code): - """Return True if a translation exists for the given locale.""" - return locale_code in self.translations - - -class VCSResource: - """Represents a single resource across multiple locales.""" - - def __init__(self, vcs_project, path, locales=None): - """ - Load the resource file for each enabled locale and store its - translations in VCSEntity instances. - """ - from pontoon.base.models import Locale - from pontoon.sync import formats # Avoid circular import. - - self.vcs_project = vcs_project - self.path = path - self.locales = locales or [] - self.files = {} - self.entities = {} - - # Create entities using resources from the source directory, - source_resource_path = join(vcs_project.source_directory_path, self.path) - source_resource_path = locale_to_source_path(source_resource_path) - source_resource_file = formats.parse( - source_resource_path, locale=Locale.objects.get(code="en-US") - ) - - for index, translation in enumerate(source_resource_file.translations): - vcs_entity = VCSEntity( - resource=self, - key=translation.key, - context=translation.context, - string=translation.source_string, - string_plural=translation.source_string_plural, - comments=translation.comments, - group_comments=( - translation.group_comments - if hasattr(translation, "group_comments") - else None - ), - resource_comments=( - translation.resource_comments - if hasattr(translation, "resource_comments") - else None - ), - source=translation.source, - order=translation.order or index, - ) - self.entities[vcs_entity.key] = vcs_entity - - # Fill in translations from the locale resources. - for locale in locales: - locale_directory = self.vcs_project.locale_directory_paths[locale.code] - - if self.vcs_project.configuration: - # Some resources might not be available for this locale - resource_path = self.vcs_project.configuration.l10n_path( - locale, - source_resource_path, - ) - if resource_path is None: - continue - else: - resource_path = join(locale_directory, self.path) - - log.debug("Parsing resource file: %s", resource_path) - - try: - resource_file = formats.parse( - resource_path, source_resource_path, locale - ) - - # File doesn't exist or is invalid: log it and move on - except (OSError, ParseError) as err: - log.error( - "Skipping resource {path} due to {type}: {err}".format( - path=path, type=type(err).__name__, err=err - ) - ) - continue - - self.files[locale] = resource_file - - log.debug("Discovered %s translations.", len(resource_file.translations)) - - for translation in resource_file.translations: - try: - self.entities[translation.key].translations[locale.code] = ( - translation - ) - except KeyError: - # If the source is missing an entity, we consider it - # deleted and don't add it. - pass - - def save(self, locale=None): - """ - Save changes made to any of the translations in this resource - back to the filesystem for all locales. - """ - if locale: - self.files[locale].save(locale) - - else: - for locale, resource_file in self.files.items(): - resource_file.save(locale) diff --git a/requirements/default.in b/requirements/default.in index 7abb1c2010..f8601712f6 100644 --- a/requirements/default.in +++ b/requirements/default.in @@ -40,9 +40,10 @@ jsonfield==3.1.0 jsonschema==4.23.0 lxml==5.3.0 markupsafe==2.0.1 +moz.l10n[xml]==0.5.6 newrelic==9.6.0 openai==1.47.1 -polib==1.0.6 +polib==1.2.0 psycopg2==2.9.6 PyJWT==2.9.0 python-dateutil==2.9.0 @@ -51,7 +52,7 @@ rapidfuzz==3.9.7 raygun4py==4.3.0 sacrebleu==2.4.3 sacremoses==0.1.1 -translate-toolkit==3.3.2 +translate-toolkit==3.14.1 whitenoise==5.2.0 # Dependencies loaded from outside pypi. diff --git a/requirements/default.txt b/requirements/default.txt index d6cb76338c..ed4a5aef69 100644 --- a/requirements/default.txt +++ b/requirements/default.txt @@ -357,7 +357,13 @@ django-pipeline==3.0.0 \ fluent-syntax==0.19.0 \ --hash=sha256:920326d7f46864b9758f0044e9968e3112198bc826acee16ddd8f11d359004fd \ --hash=sha256:b352b3475fac6c6ed5f06527921f432aac073d764445508ee5218aeccc7cc5c4 - # via compare-locales + # via + # compare-locales + # moz-l10n +gitignorant==0.3.1 \ + --hash=sha256:d2b96ffe90eaf4fa2e1d68ad4e3f807213c8ec1e54a943a9e7105a9f7aaa4232 \ + --hash=sha256:eb51b3421761e41f594758ef1f93aa438f94856da8beea44ccd043f9c2373442 + # via moz-l10n google-api-core[grpc]==2.20.0 \ --hash=sha256:ef0591ef03c30bb83f79b3d0575c3f31219001fc9c5cf37024d08310aeffed8a \ --hash=sha256:f74dff1889ba291a4b76c5079df0711810e2d9da81abfdc99957bc961c1eb28f @@ -490,6 +496,11 @@ idna==3.10 \ # anyio # httpx # requests +iniparse==0.5 \ + --hash=sha256:88ca60473b1637055a937933d48840be1b1b6835f381a6158ef118a532583675 \ + --hash=sha256:932e5239d526e7acb504017bb707be67019ac428a6932368e6851691093aa842 \ + --hash=sha256:db6ef1d8a02395448e0e7b17ac0aa28b8d338b632bbd1ffca08c02ddae32cf97 + # via moz-l10n jinja2==3.1.4 \ --hash=sha256:4a3aee7acbbe7303aede8e9648d13b8bf88a429282aa6122a993f0ac800cb369 \ --hash=sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d @@ -724,6 +735,7 @@ lxml==5.3.0 \ --hash=sha256:fb66442c2546446944437df74379e9cf9e9db353e61301d1a0e26482f43f0dd8 # via # -r requirements/default.in + # moz-l10n # sacrebleu # translate-toolkit markupsafe==2.0.1 \ @@ -799,6 +811,10 @@ markupsafe==2.0.1 \ # via # -r requirements/default.in # jinja2 +moz-l10n[xml]==0.5.6 \ + --hash=sha256:07b4d88df540e5facc2c9483b44cf33da3ea5c2ce45e5ccb72418680c62e42bb \ + --hash=sha256:13dbf9fc56b38ef78ccfd1e188308912bf9125bb4898e0de2ee35b65be5ea25c + # via -r requirements/default.in newrelic==9.6.0 \ --hash=sha256:01c0eb630bb18261241a37aa0a70cb6f706079a1f58f59f2bb64f26fda54ffc5 \ --hash=sha256:09dad0db993402e166e37d99302c2ad5588b4ff1e5b814819540ca5ec2bd3cea \ @@ -897,10 +913,12 @@ packaging==24.1 \ --hash=sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002 \ --hash=sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124 # via gunicorn -polib==1.0.6 \ - --hash=sha256:20d2a0d589a692c11df549bd7cda83c665eef2a83e017b843fecdf956edbad74 \ - --hash=sha256:b1ea141d58ed5e48aed2674f7c894dfb83f639c3286d7b32b2e19fa032a5b400 - # via -r requirements/default.in +polib==1.2.0 \ + --hash=sha256:1c77ee1b81feb31df9bca258cbc58db1bbb32d10214b173882452c73af06d62d \ + --hash=sha256:f3ef94aefed6e183e342a8a269ae1fc4742ba193186ad76f175938621dbfc26b + # via + # -r requirements/default.in + # moz-l10n portalocker==2.10.1 \ --hash=sha256:53a5984ebc86a025552264b459b46a2086e269b21823cb572f8f28ee759e45bf \ --hash=sha256:ef1bf844e878ab08aee7e40184156e1151f228f103aa5c6bd0724cc330960f8f @@ -1449,6 +1467,7 @@ six==1.16.0 \ # apscheduler # bleach # compare-locales + # iniparse # promise # python-binary-memcached # python-dateutil @@ -1489,8 +1508,9 @@ tqdm==4.66.5 \ # via # openai # sacremoses -translate-toolkit==3.3.2 \ - --hash=sha256:0795bd3c8668213199550ae4ed8938874083139ec1f8c473dcca1524a206b108 +translate-toolkit==3.14.1 \ + --hash=sha256:2148c437c529d4eaf89c5a3bd5690376eabee97c3c39b7d4824001a7cf333e86 \ + --hash=sha256:74dd963f770ec1d18e44895d8a9f86d47a0d73b270b22a69a5652f30ae2dca79 # via -r requirements/default.in typing-extensions==4.12.2 \ --hash=sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d \ @@ -1529,7 +1549,9 @@ vine==5.1.0 \ wcwidth==0.2.13 \ --hash=sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859 \ --hash=sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5 - # via prompt-toolkit + # via + # prompt-toolkit + # translate-toolkit webencodings==0.5.1 \ --hash=sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78 \ --hash=sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923