Merge branch 'master' into TP2000-1327--upgrade-python-3.12

uktrade · Jul 23, 2024 · 3abadd6 · 3abadd6
2 parents 032b498 + 5624528
commit 3abadd6
Show file tree

Hide file tree

Showing 15 changed files with 373 additions and 111 deletions.
diff --git a/common/inspect_tap_tasks.py b/common/inspect_tap_tasks.py
@@ -59,9 +59,10 @@ def clean_tasks(self, tasks, task_status="", task_name="") -> List[Dict]:
 
         return tasks_cleaned
 
-    def current_rule_checks(self, task_name="") -> List[CeleryTask]:
+    def current_tasks(self, task_name="") -> List[CeleryTask]:
         """Return the list of tasks queued or started, ready to display in the
         view."""
+
         inspect = app.control.inspect()
         if not inspect:
             return []

diff --git a/common/util.py b/common/util.py
@@ -2,7 +2,9 @@
 
 from __future__ import annotations
 
+import os
 import re
+import typing
 from datetime import date
 from datetime import datetime
 from datetime import timedelta
@@ -43,6 +45,7 @@
 from django.db.models.functions.text import Upper
 from django.db.transaction import atomic
 from django.template import loader
+from django.utils import timezone
 from lxml import etree
 from psycopg.types.range import DateRange
 from psycopg.types.range import TimestampRange
@@ -599,3 +602,46 @@ def format_date_string(date_string: str, short_format=False) -> str:
             return date_parser.parse(date_string).strftime(settings.DATE_FORMAT)
     except:
         return ""
+
+
+def log_timing(logger_function: typing.Callable):
+    """
+    Decorator function to log start and end times of a decorated function.
+
+    When decorating a function, `logger_function` must be passed in to the
+    decorator to ensure the correct logger instance and function are applied.
+    `logger_function` may be any one of the logging output functions, but is
+    likely to be either `debug` or `info`.
+    Example:
+        ```
+        import logging
+
+        logger = logging.getLogger(__name__)
+
+        @log_timing(logger_function=logger.info)
+        def my_function():
+            ...
+        ```
+    """
+
+    @wrapt.decorator
+    def wrapper(wrapped, instance, args, kwargs):
+        start_time = timezone.localtime()
+        logger_function(
+            f"Entering the function {wrapped.__name__}() on process "
+            f"pid={os.getpid()} at {start_time.isoformat()}",
+        )
+
+        result = wrapped(*args, **kwargs)
+
+        end_time = timezone.localtime()
+        elapsed_time = end_time - start_time
+        logger_function(
+            f"Exited the function {wrapped.__name__}() on "
+            f"process pid={os.getpid()} at {end_time.isoformat()} after "
+            f"an elapsed time of {elapsed_time}.",
+        )
+
+        return result
+
+    return wrapper
diff --git a/common/views.py b/common/views.py
@@ -47,7 +47,7 @@
 from commodities.models import GoodsNomenclature
 from common.business_rules import BusinessRule
 from common.business_rules import BusinessRuleViolation
-from common.celery import app
+from common.celery import app as celery_app
 from common.forms import HomeSearchForm
 from common.models import TrackedModel
 from common.models import Transaction
@@ -65,8 +65,6 @@
 from workbaskets.models import WorkflowStatus
 from workbaskets.views.mixins import WithCurrentWorkBasket
 
-from .celery import app as celery_app
-
 
 class HomeView(LoginRequiredMixin, FormView):
     template_name = "common/homepage.jinja"
@@ -350,7 +348,7 @@ class AppInfoView(
     DATETIME_FORMAT = "%d %b %Y, %H:%M"
 
     def active_tasks(self) -> Dict:
-        inspect = app.control.inspect()
+        inspect = celery_app.control.inspect()
         if not inspect:
             return {}
 

diff --git a/conftest.py b/conftest.py
@@ -1119,13 +1119,13 @@ def hmrc_storage(s3):
 
 @pytest.fixture
 def sqlite_storage(s3, s3_bucket_names):
-    """Patch SQLiteStorage with moto so that nothing is really uploaded to
+    """Patch SQLiteS3VFSStorage with moto so that nothing is really uploaded to
     s3."""
-    from exporter.storages import SQLiteStorage
+    from exporter.storages import SQLiteS3VFSStorage
 
     storage = make_storage_mock(
         s3,
-        SQLiteStorage,
+        SQLiteS3VFSStorage,
         bucket_name=settings.SQLITE_STORAGE_BUCKET_NAME,
     )
     assert storage.endpoint_url is settings.SQLITE_S3_ENDPOINT_URL

diff --git a/exporter/management/commands/dump_sqlite.py b/exporter/management/commands/dump_sqlite.py
@@ -11,20 +11,39 @@
 
 
 class Command(BaseCommand):
+    help = (
+        "Create a snapshot of the application database to a file in SQLite "
+        "format. Snapshot file names take the form <transaction-order>.db, "
+        "where <transaction-order> is the value of the last published "
+        "transaction's order attribute. Care should be taken to ensure that "
+        "there is sufficient local file system storage to accomodate the "
+        "SQLite file - if you choose to target remote S3 storage, then a "
+        "temporary local copy of the file will be created and cleaned up."
+    )
+
     def add_arguments(self, parser: CommandParser) -> None:
         parser.add_argument(
-            "--immediately",
+            "--asynchronous",
             action="store_const",
-            help="Run the task in this process now rather than queueing it up",
+            help="Queue the snapshot task to run in an asynchronous process.",
             const=True,
             default=False,
         )
+        parser.add_argument(
+            "--save-local",
+            help=(
+                "Save the SQLite snapshot to the local file system under the "
+                "(existing) directory given by DIRECTORY_PATH."
+            ),
+            dest="DIRECTORY_PATH",
+        )
         return super().add_arguments(parser)
 
     def handle(self, *args: Any, **options: Any) -> Optional[str]:
         logger.info(f"Triggering tariff database export to SQLite")
 
-        if options["immediately"]:
-            export_and_upload_sqlite()
+        local_path = options["DIRECTORY_PATH"]
+        if options["asynchronous"]:
+            export_and_upload_sqlite.delay(local_path)
         else:
-            export_and_upload_sqlite.delay()
+            export_and_upload_sqlite(local_path)
diff --git a/exporter/sqlite/__init__.py b/exporter/sqlite/__init__.py
@@ -41,32 +41,41 @@
 }
 
 
-def make_export_plan(sqlite: runner.Runner) -> plan.Plan:
-    names = (
+def make_export_plan(sqlite_runner: runner.Runner) -> plan.Plan:
+    app_names = (
         name.split(".")[0]
         for name in settings.DOMAIN_APPS
         if name not in settings.SQLITE_EXCLUDED_APPS
     )
-    all_models = chain(*[apps.get_app_config(name).get_models() for name in names])
+    all_models = chain(*[apps.get_app_config(name).get_models() for name in app_names])
     models_by_table = {model._meta.db_table: model for model in all_models}
 
     import_script = plan.Plan()
-    for table, sql in sqlite.tables:
+    for table, create_table_statement in sqlite_runner.tables:
         model = models_by_table.get(table)
         if model is None or model.__name__ in SKIPPED_MODELS:
             continue
 
-        columns = list(sqlite.read_column_order(model._meta.db_table))
-        import_script.add_schema(sql)
+        columns = list(sqlite_runner.read_column_order(model._meta.db_table))
+        import_script.add_schema(create_table_statement)
         import_script.add_data(model, columns)
 
     return import_script
 
 
 def make_export(connection: apsw.Connection):
-    with NamedTemporaryFile() as db_name:
-        sqlite = runner.Runner.make_tamato_database(Path(db_name.name))
-        plan = make_export_plan(sqlite)
-
-    export = runner.Runner(connection)
-    export.run_operations(plan.operations)
+    with NamedTemporaryFile() as temp_sqlite_db:
+        # Create Runner instance with its SQLite file name pointing at a path on
+        # the local file system. This is only required temporarily in order to
+        # create an in-memory plan that can be run against a target database
+        # object.
+        plan_runner = runner.Runner.make_tamato_database(
+            Path(temp_sqlite_db.name),
+        )
+        plan = make_export_plan(plan_runner)
+        # make_tamato_database() creates a Connection instance that needs
+        # closing once an in-memory plan has been created from it.
+        plan_runner.database.close()
+
+    export_runner = runner.Runner(connection)
+    export_runner.run_operations(plan.operations)
diff --git a/exporter/sqlite/plan.py b/exporter/sqlite/plan.py
@@ -100,9 +100,11 @@ def operations(self) -> Iterable[Operation]:
         ]
 
     def add_schema(self, sql: str):
+        """Add sql schema (table) creation statements to this Plan instance."""
         self._operations.append((sql, [[]]))
 
     def add_data(self, model: Type[Model], columns: Iterable[str]):
+        """Add data insert statements to this Plan instance."""
         queryset = model.objects
         output_columns = []
         for column in columns:

diff --git a/exporter/sqlite/runner.py b/exporter/sqlite/runner.py
@@ -40,7 +40,7 @@ def normalise_loglevel(cls, loglevel):
             return loglevel
 
     @classmethod
-    def manage(cls, db: Path, *args: str):
+    def manage(cls, sqlite_file: Path, *args: str):
         """
         Runs a Django management command on the SQLite database.
 
@@ -56,7 +56,7 @@ def manage(cls, db: Path, *args: str):
                 sqlite_env["CELERY_LOG_LEVEL"],
             )
 
-        sqlite_env["DATABASE_URL"] = f"sqlite:///{str(db)}"
+        sqlite_env["DATABASE_URL"] = f"sqlite:///{str(sqlite_file)}"
         # Required to make sure the postgres default isn't set as the DB_URL
         if sqlite_env.get("VCAP_SERVICES"):
             vcap_env = json.loads(sqlite_env["VCAP_SERVICES"])
@@ -71,28 +71,36 @@ def manage(cls, db: Path, *args: str):
         )
 
     @classmethod
-    def make_tamato_database(cls, db: Path) -> "Runner":
-        """
-        Generate a new and empty SQLite database with the TaMaTo schema.
-
-        Because SQLite uses different fields to PostgreSQL, first missing
-        migrations are generated to bring in the different style of validity
-        fields. However, these should not generally stick around and be applied
-        to Postgres so they are removed after being applied.
-        """
+    def make_tamato_database(cls, sqlite_file: Path) -> "Runner":
+        """Generate a new and empty SQLite database with the TaMaTo schema
+        derived from Tamato's models - by performing 'makemigrations' followed
+        by 'migrate' on the Sqlite file located at `sqlite_file`."""
         try:
-            cls.manage(db, "makemigrations", "--name", "sqlite_export")
-            cls.manage(db, "migrate")
-            assert db.exists()
-            return cls(apsw.Connection(str(db)))
-
+            # Because SQLite uses different fields to PostgreSQL, missing
+            # migrations are first generated to bring in the different style of
+            # validity fields. However, these should not be applied to Postgres
+            # and so should be removed (in the `finally` block) after they have
+            # been applied (when running `migrate`).
+            cls.manage(sqlite_file, "makemigrations", "--name", "sqlite_export")
+            cls.manage(sqlite_file, "migrate")
+            assert sqlite_file.exists()
+            return cls(apsw.Connection(str(sqlite_file)))
         finally:
             for file in Path(settings.BASE_DIR).rglob(
                 "**/migrations/*sqlite_export.py",
             ):
                 file.unlink()
 
     def read_schema(self, type: str) -> Iterator[Tuple[str, str]]:
+        """
+        Generator yielding a tuple of 'name' and 'sql' column values from
+        Sqlite's "schema table", 'sqlite_schema'.
+
+        The `type` param filters rows that have a matching 'type' column value,
+        which may be any one of: 'table', 'index', 'view', or 'trigger'.
+
+        See https://www.sqlite.org/schematab.html for further details.
+        """
         cursor = self.database.cursor()
         cursor.execute(
             f"""
@@ -110,16 +118,21 @@ def read_schema(self, type: str) -> Iterator[Tuple[str, str]]:
 
     @property
     def tables(self) -> Iterator[Tuple[str, str]]:
+        """Generator yielding a tuple of each Sqlite table object's 'name' and
+        the SQL `CREATE_TABLE` statement that can be used to create the
+        table."""
         yield from self.read_schema("table")
 
     @property
     def indexes(self) -> Iterator[Tuple[str, str]]:
+        """Generator yielding a tuple of each SQLite table index object name and
+        the SQL `CREATE_INDEX` statement that can be used to create it."""
         yield from self.read_schema("index")
 
     def read_column_order(self, table: str) -> Iterator[str]:
         """
-        Returns the name of the columns in the order they are defined in an
-        SQLite database.
+        Returns the name of `table`'s columns in the order they are defined in
+        an SQLite database.
 
         This is necessary because the Django migrations do not generate the
         columns in the order they are defined on the model, and there's no other
@@ -131,8 +144,8 @@ def read_column_order(self, table: str) -> Iterator[str]:
             yield column[1]
 
     def run_operations(self, operations: Iterable[Operation]):
-        """Runs the supplied sequence of operations against the SQLite
-        database."""
+        """Runs each operation in `operations` against `database` member
+        attribute (a connection object to an SQLite database file)."""
         cursor = self.database.cursor()
         for operation in operations:
             logger.debug("%s: %s", self.database, operation[0])