diff --git a/docs/user/settings.rst b/docs/user/settings.rst index 50be715f..43b9c888 100644 --- a/docs/user/settings.rst +++ b/docs/user/settings.rst @@ -106,7 +106,7 @@ without having to specify the international prefix. ``OPENWISP_USERS_EXPORT_USERS_COMMAND_CONFIG`` ---------------------------------------------- -============ ============================= +============ ========================================================================= **type**: ``dict`` **default**: .. code-block:: python @@ -126,17 +126,112 @@ without having to specify the international prefix. "location", "notes", "language", - "organizations", + { + "name": "organizations", + "callable": openwisp_users.settings.export_organizations, + }, ], "select_related": [], + "prefetch_related": [], } -============ ============================= +============ ========================================================================= -This setting can be used to configure the exported fields for the -:ref:`export_users` command. +.. note:: -The ``select_related`` property can be used to optimize the database -query. + The ``callable`` value must be a Python callable (function or method), + not a string path. Ensure the function is imported in your settings + file before referencing it. + +This setting configures the fields exported by the :ref:`export_users` +management command. + +Field definitions +~~~~~~~~~~~~~~~~~ + +Each entry in ``fields`` can be either: + +- A string, representing a direct attribute of the user model: + + .. code-block:: python + + "email" + +- A dictionary, allowing advanced customization: + + .. code-block:: python + + { + "name": "organizations", + "callable": my_custom_function, + } + +The following keys are supported in field dictionaries: + +- ``name`` (str): the field name used as the fallback CSV column header. +- ``callable`` (callable, optional): a function that takes the user + instance as input and returns the value to be exported. +- ``fields`` (list of str, optional): a list of attributes to extract from + a related object or queryset. +- ``header`` (str, optional): a custom CSV column header. When provided, + it overrides the default header generation. +- ``header_fields`` (list of str, optional): a list of subfield names to + display in the header as ``name (field1, field2)``. If not provided, + falls back to ``fields``. + +Priority order: + +- If ``callable`` is provided, it is used. +- Else if ``fields`` is provided, the related object(s) are serialized. +- Otherwise, the value is resolved using ``name``. + +Related objects +~~~~~~~~~~~~~~~ + +You can export fields from related models in two ways: + +- **Dot notation** (for ``ForeignKey`` or ``OneToOne``): + + .. code-block:: python + + "profile.phone_number" + +- **Structured extraction using ``fields``**: + + .. code-block:: python + + { + "name": "groups", + "fields": ["name"], + } + +If the attribute resolves to a queryset (e.g. reverse ``ForeignKey`` or +``ManyToMany``), multiple values are serialized into a single CSV cell. + +Query optimization +~~~~~~~~~~~~~~~~~~ + +- ``select_related`` can be used for ``ForeignKey`` and ``OneToOne`` + relations. +- ``prefetch_related`` can be used for reverse relations and + ``ManyToMany`` fields. + +Example: + +.. code-block:: python + + { + "fields": [ + "username", + "email", + "profile.phone_number", + { + "name": "groups", + "fields": ["name"], + }, + ], + "select_related": ["profile"], + "prefetch_related": ["groups"], + } .. _openwisp_users_user_password_expiration: diff --git a/openwisp_users/management/commands/export_users.py b/openwisp_users/management/commands/export_users.py index 8690e6c6..489f948e 100644 --- a/openwisp_users/management/commands/export_users.py +++ b/openwisp_users/management/commands/export_users.py @@ -2,78 +2,196 @@ from django.contrib.auth import get_user_model from django.core.exceptions import ObjectDoesNotExist -from django.core.management.base import BaseCommand +from django.core.management.base import BaseCommand, CommandError +from django.db.models.manager import BaseManager +from django.db.models.query import QuerySet +from django.utils.translation import gettext_lazy as _ from ... import settings as app_settings User = get_user_model() +def normalize_field(field): + """Normalize a string or dict field definition to a dict.""" + if isinstance(field, dict): + return field + return {"name": field} + + class Command(BaseCommand): - help = "Exports user data to a CSV file" + help = _("Exports user data to a CSV file") + + @staticmethod + def _normalize_value(value): + """Convert None to empty string, otherwise stringify the value.""" + return "" if value is None else str(value) + + @staticmethod + def _get_header(field): + normalized = normalize_field(field) + header = normalized.get("header") + if header: + return header + header_fields = normalized.get("header_fields") or normalized.get("fields") + if header_fields: + return f"{normalized['name']} ({', '.join(header_fields)})" + return normalized["name"] def add_arguments(self, parser): parser.add_argument( "--exclude-fields", dest="exclude_fields", default="", - help="Comma-separated list of fields to exclude from export", + help=_("Comma-separated list of fields to exclude from export"), ) parser.add_argument( "--filename", dest="filename", default="openwisp_exported_users.csv", - help=( + help=_( "Filename for the exported CSV, defaults to" ' "openwisp_exported_users.csv"' ), ) def handle(self, *args, **options): - fields = app_settings.EXPORT_USERS_COMMAND_CONFIG.get("fields", []).copy() + raw_fields = app_settings.EXPORT_USERS_COMMAND_CONFIG.get("fields", []) # Get the fields to be excluded from the command-line argument - exclude_fields = options.get("exclude_fields").split(",") - # Remove excluded fields from the export fields - fields = [field for field in fields if field not in exclude_fields] - # Fetch all user data in a single query using select_related for related models - queryset = User.objects.select_related( - *app_settings.EXPORT_USERS_COMMAND_CONFIG.get("select_related", []), - ).order_by("date_joined") + exclude_fields = [ + t.strip() for t in options.get("exclude_fields").split(",") if t.strip() + ] + # Remove excluded fields from the export fields (match on the field name) + fields = [ + field + for field in raw_fields + if normalize_field(field)["name"] not in exclude_fields + ] + # Fetch all user data using select_related and prefetch_related + queryset = ( + User.objects.select_related( + *app_settings.EXPORT_USERS_COMMAND_CONFIG.get("select_related", []), + ) + .prefetch_related( + *app_settings.EXPORT_USERS_COMMAND_CONFIG.get("prefetch_related", []), + ) + .order_by("date_joined") + ) # Prepare a CSV writer filename = options.get("filename") - csv_file = open(filename, "w", newline="") - csv_writer = csv.writer(csv_file) - - # Write header row - csv_writer.writerow(fields) - - # Write data rows - for user in queryset.iterator(): - data_row = [] - for field in fields: - # Extract the value from related models - if "." in field: - related_model, related_field = field.split(".") - try: - related_value = getattr( - getattr(user, related_model), related_field - ) - except ObjectDoesNotExist: - data_row.append("") - else: - data_row.append(related_value) - elif field == "organizations": - organizations = [] - for org_id, user_perm in user.organizations_dict.items(): - organizations.append(f'({org_id},{user_perm["is_admin"]})') - data_row.append("\n".join(organizations)) - else: - data_row.append(getattr(user, field)) - csv_writer.writerow(data_row) - - # Close the CSV file - csv_file.close() + with open(filename, "w", newline="", encoding="utf-8") as csv_file: + csv_writer = csv.writer(csv_file) + # Write header row using the name of each field + csv_writer.writerow([self._get_header(f) for f in fields]) + + # Write data rows + for user in queryset.iterator(chunk_size=1000): + row = [] + for field in fields: + val = self._get_field_value(user, field) + row.append(val) + csv_writer.writerow(row) self.stdout.write( - self.style.SUCCESS(f"User data exported successfully to {filename}!") + self.style.SUCCESS( + _("User data exported successfully to {filename}!").format( + filename=filename + ) + ) ) + + def _format_rows(self, rows): + """Format rows into a cell string. + + Single subfield, single value → val + Single subfield, multiple values → (val1,val2,...) + Multiple subfields → tuple-per-row format: ((v1,v2)\n(v3,v4)) + """ + if not rows: + return "" + if len(rows[0]) == 1: + values = ",".join(row[0] for row in rows) + return f"({values})" if len(rows) > 1 else values + return "\n".join("(" + ",".join(row) + ")" for row in rows) + + def serialize_related(self, manager, subfields): + """Serialize a RelatedManager queryset using the given subfields.""" + rows = [] + # We use manager.all() instead of manager.iterator() to utilize the + # prefetch_related queryset cache. The iterator() method would bypass the cache + # and cause additional queries. + for obj in manager.all(): + # convert None -> empty string to avoid 'None' literals in CSV + row = [] + for f in subfields: + val = self._get_nested_attr(obj, f) + row.append(self._normalize_value(val)) + rows.append(row) + return self._format_rows(rows) + + def _get_nested_attr(self, obj, attr_path): + """Resolve a dotted attribute path on an object. + + Returns the resolved value or None when an intermediate attribute + is missing or raises ObjectDoesNotExist/AttributeError. When a + related manager/queryset is encountered (detected via QuerySet + instance or presence of `all()`), the remaining path is resolved + for each item and results are combined. + """ + if not attr_path: + return obj + parts = attr_path.split(".") + current = obj + for i, part in enumerate(parts): + try: + current = getattr(current, part) + except (ObjectDoesNotExist, AttributeError): + # missing attribute or intermediate raises -> None sentinel + return None + # Detect querysets/related managers robustly. + if (isinstance(current, (QuerySet, BaseManager))) and (i < len(parts) - 1): + remaining_path = ".".join(parts[i + 1 :]) + # We use current.all() instead of current.iterator() to utilize + # the prefetch_related queryset cache. The iterator() method + # would bypass the cache and cause additional queries. + rows = [] + for item in current.all(): + v = self._get_nested_attr(item, remaining_path) + rows.append([self._normalize_value(v)]) + return self._format_rows(rows) + return current + + def _get_field_value(self, user, field): + normalized = normalize_field(field) + name = normalized["name"] + callable_fn = normalized.get("callable") + subfields = normalized.get("fields") + # Priority: callable > fields > name + if callable_fn is not None: + try: + val = callable_fn(user) + except CommandError: + # Allow CommandError raised by callable to propagate unchanged. + raise + except Exception as e: + func_name = getattr(callable_fn, "__name__", repr(callable_fn)) + raise CommandError( + _( + "Error calling function {func_name!r} for field '{name}': {e}" + ).format(func_name=func_name, name=name, e=e) + ) + return self._normalize_value(val) + if subfields is not None: + attr = self._get_nested_attr(user, name) + if attr is None: + return "" + if isinstance(attr, (QuerySet, BaseManager)): + return self.serialize_related(attr, subfields) + row = [ + self._normalize_value(self._get_nested_attr(attr, f)) for f in subfields + ] + return self._format_rows([row]) + val = self._get_nested_attr(user, name) + if isinstance(val, (QuerySet, BaseManager)): + return "" + return self._normalize_value(val) diff --git a/openwisp_users/settings.py b/openwisp_users/settings.py index 82a3ccf4..73531737 100644 --- a/openwisp_users/settings.py +++ b/openwisp_users/settings.py @@ -1,3 +1,4 @@ +import swapper from django.conf import settings from openwisp_utils.utils import default_or_test @@ -13,26 +14,50 @@ AUTH_BACKEND_AUTO_PREFIXES = getattr( settings, "OPENWISP_USERS_AUTH_BACKEND_AUTO_PREFIXES", tuple() ) -EXPORT_USERS_COMMAND_CONFIG = { - "fields": [ - "id", - "username", - "email", - "password", - "first_name", - "last_name", - "is_staff", - "is_active", - "date_joined", - "phone_number", - "birth_date", - "location", - "notes", - "language", - "organizations", - ], - "select_related": [], -} +_OPENWISP_USERS_APP_LABEL = swapper.split( + swapper.get_model_name("openwisp_users", "OrganizationUser") +)[0] + + +def export_organizations(user): + """ + Export organizations using prefetch_related data when available. + """ + orgs = getattr(user, f"{_OPENWISP_USERS_APP_LABEL}_organizationuser").all() + if not orgs: + return "" + return "\n".join(f"({org.organization_id},{org.is_admin})" for org in orgs) + + +EXPORT_USERS_COMMAND_CONFIG = getattr( + settings, + "OPENWISP_USERS_EXPORT_USERS_COMMAND_CONFIG", + { + "fields": [ + "id", + "username", + "email", + "password", + "first_name", + "last_name", + "is_staff", + "is_active", + "date_joined", + "phone_number", + "birth_date", + "location", + "notes", + "language", + { + "name": "organizations", + "header_fields": ["organization_id", "is_admin"], + "callable": export_organizations, + }, + ], + "select_related": [], + "prefetch_related": [f"{_OPENWISP_USERS_APP_LABEL}_organizationuser"], + }, +) USER_PASSWORD_EXPIRATION = getattr( settings, "OPENWISP_USERS_USER_PASSWORD_EXPIRATION", 0 ) diff --git a/openwisp_users/tests/test_commands.py b/openwisp_users/tests/test_commands.py index 69b4131f..9da25982 100644 --- a/openwisp_users/tests/test_commands.py +++ b/openwisp_users/tests/test_commands.py @@ -1,15 +1,17 @@ import csv +import re from io import StringIO from unittest.mock import patch from django.core.files.temp import NamedTemporaryFile -from django.core.management import call_command +from django.core.management import CommandError, call_command from django.test import TestCase from rest_framework.authtoken.models import Token from openwisp_utils.tests import capture_stdout from .. import settings as app_settings +from ..management.commands.export_users import Command from .utils import TestOrganizationMixin @@ -25,7 +27,8 @@ def tearDown(self): def test_export_users(self): org1 = self._create_org(name="org1") org2 = self._create_org(name="org2") - user = self._create_user() + # create a user with a non-ASCII first_name to validate UTF-8 export + user = self._create_user(first_name="Téstér") operator = self._create_operator() admin = self._create_admin() self._create_org_user(organization=org1, user=user, is_admin=True) @@ -39,16 +42,34 @@ def test_export_users(self): stdout.getvalue(), ) - # Read the content of the temporary file - with open(self.temp_file.name, "r") as temp_file: + # Read the content of the temporary file (explicit encoding) + with open(self.temp_file.name, "r", encoding="utf-8") as temp_file: csv_reader = csv.reader(temp_file) csv_data = list(csv_reader) # 3 user and 1 header self.assertEqual(len(csv_data), 4) - self.assertEqual( - csv_data[0], app_settings.EXPORT_USERS_COMMAND_CONFIG["fields"] - ) + # Expected headers are the keys produced by normalize_field for the + # default EXPORT_USERS_COMMAND_CONFIG. These are stable values and + # asserted explicitly to avoid mirroring production code in the test. + expected_headers = [ + "id", + "username", + "email", + "password", + "first_name", + "last_name", + "is_staff", + "is_active", + "date_joined", + "phone_number", + "birth_date", + "location", + "notes", + "language", + "organizations (organization_id, is_admin)", + ] + self.assertEqual(csv_data[0], expected_headers) # Ensuring ordering self.assertEqual(csv_data[1][0], str(user.id)) self.assertEqual(csv_data[2][0], str(operator.id)) @@ -57,6 +78,8 @@ def test_export_users(self): self.assertEqual(csv_data[1][-1], f"({org1.id},True)\n({org2.id},False)") self.assertEqual(csv_data[2][-1], f"({org2.id},False)") self.assertEqual(csv_data[3][-1], "") + # Validate non-ASCII value exported correctly (first_name index 4) + self.assertEqual(csv_data[1][4], "Téstér") @capture_stdout() def test_exclude_fields(self): @@ -64,11 +87,27 @@ def test_exclude_fields(self): call_command( "export_users", filename=self.temp_file.name, + # Exclude all fields except "id" exclude_fields=",".join( - app_settings.EXPORT_USERS_COMMAND_CONFIG["fields"][1:] + [ + "username", + "email", + "password", + "first_name", + "last_name", + "is_staff", + "is_active", + "date_joined", + "phone_number", + "birth_date", + "location", + "notes", + "language", + "organizations", + ] ), ) - with open(self.temp_file.name, "r") as temp_file: + with open(self.temp_file.name, "r", encoding="utf-8") as temp_file: csv_reader = csv.reader(temp_file) csv_data = list(csv_reader) @@ -79,13 +118,17 @@ def test_exclude_fields(self): @patch.object( app_settings, "EXPORT_USERS_COMMAND_CONFIG", - {"fields": ["id", "auth_token.key"]}, + { + "fields": ["id", "auth_token.key"], + "select_related": ["auth_token"], + "prefetch_related": [], + }, ) def test_related_fields(self): user = self._create_user() token = Token.objects.create(user=user) stdout = StringIO() - with self.assertNumQueries(2): + with self.assertNumQueries(1): call_command("export_users", filename=self.temp_file.name, stdout=stdout) self.assertIn( f"User data exported successfully to {self.temp_file.name}", @@ -93,14 +136,231 @@ def test_related_fields(self): ) # Read the content of the temporary file - with open(self.temp_file.name, "r") as temp_file: + with open(self.temp_file.name, "r", encoding="utf-8") as temp_file: csv_reader = csv.reader(temp_file) csv_data = list(csv_reader) - # 3 user and 1 header + # 1 user and 1 header self.assertEqual(len(csv_data), 2) - self.assertEqual( - csv_data[0], app_settings.EXPORT_USERS_COMMAND_CONFIG["fields"] - ) + # When fields are ["id", "auth_token.key"] the expected headers + # are the literal keys used to identify columns in the CSV. + expected_headers = ["id", "auth_token.key"] + self.assertEqual(csv_data[0], expected_headers) self.assertEqual(csv_data[1][0], str(user.id)) self.assertEqual(csv_data[1][1], str(token.key)) + + @patch.object( + app_settings, + "EXPORT_USERS_COMMAND_CONFIG", + { + "fields": ["id", "auth_token.key"], + "select_related": ["auth_token"], + "prefetch_related": [], + }, + ) + @capture_stdout() + def test_related_fields_no_n_plus_1(self): + """Query count must not grow with additional users.""" + user1 = self._create_user() + self._create_operator() + token1 = Token.objects.create(user=user1) + # user2 intentionally has no token to cover the ObjectDoesNotExist path + with self.assertNumQueries(1): + call_command("export_users", filename=self.temp_file.name) + with open(self.temp_file.name, "r", encoding="utf-8") as temp_file: + csv_reader = csv.reader(temp_file) + csv_data = list(csv_reader) + # 2 users and 1 header + self.assertEqual(len(csv_data), 3) + self.assertEqual(csv_data[1][1], str(token1.key)) + self.assertEqual(csv_data[2][1], "") + + def test_callable_error_handling(self): + with self.subTest("Callable that raises non-CommandError"): + + def _broken_callable(user): + raise ValueError("test error") + + config = { + "fields": ["id", {"name": "broken", "callable": _broken_callable}], + "select_related": [], + "prefetch_related": [], + } + self._create_user() + stderr = StringIO() + with ( + patch.object(app_settings, "EXPORT_USERS_COMMAND_CONFIG", config), + self.assertRaises(CommandError) as context, + ): + # the command wraps callable errors in CommandError with callable name + call_command( + "export_users", filename=self.temp_file.name, stderr=stderr + ) + self.assertIn("Error calling function '", str(context.exception)) + + with self.subTest("Callable raises CommandError"): + + def _broken_command_error_callable(user): + raise CommandError("original msg") + + config2 = { + "fields": [ + "id", + {"name": "broken", "callable": _broken_command_error_callable}, + ], + "select_related": [], + "prefetch_related": [], + } + stderr = StringIO() + with ( + patch.object(app_settings, "EXPORT_USERS_COMMAND_CONFIG", config2), + self.assertRaises(CommandError) as context2, + ): + call_command( + "export_users", filename=self.temp_file.name, stderr=stderr + ) + self.assertIn("original msg", str(context2.exception)) + + @patch.object( + app_settings, + "EXPORT_USERS_COMMAND_CONFIG", + { + "fields": [ + "id", + # single related object with single subfield + {"name": "auth_token", "fields": ["key"]}, + # single related object with multiple subfields + {"name": "auth_token", "fields": ["key", "created"]}, + # nullable field + {"name": "birth_date", "fields": ["year"]}, + # manager with single subfield + { + "name": "openwisp_users_organizationuser", + "fields": ["organization_id"], + }, + # manager with multiple subfields + { + "name": "openwisp_users_organizationuser", + "fields": ["organization_id", "is_admin"], + }, + # dot-notation on a manager + "openwisp_users_organizationuser.organization_id", + ], + "select_related": ["auth_token"], + "prefetch_related": ["openwisp_users_organizationuser"], + }, + ) + @capture_stdout() + def test_subfields_dict_field(self): + org = self._create_org(name="org1") + org2 = self._create_org(name="org2") + user1 = self._create_user(birth_date=None) + user2 = self._create_operator(birth_date=None) + token = Token.objects.create(user=user1) + self._create_org_user(organization=org, user=user1, is_admin=True) + self._create_org_user(organization=org2, user=user1, is_admin=False) + # user2 has no token (covers ObjectDoesNotExist) + # and no org membership (covers empty manager) + call_command("export_users", filename=self.temp_file.name) + with open(self.temp_file.name, "r", encoding="utf-8") as temp_file: + csv_reader = csv.reader(temp_file) + csv_data = list(csv_reader) + # 2 users + 1 header + self.assertEqual(len(csv_data), 3) + expected_headers = [ + "id", + "auth_token (key)", + "auth_token (key, created)", + "birth_date (year)", + "openwisp_users_organizationuser (organization_id)", + "openwisp_users_organizationuser (organization_id, is_admin)", + "openwisp_users_organizationuser.organization_id", + ] + self.assertEqual(csv_data[0], expected_headers) + # user1: token present, birth_date None, one org membership + self.assertEqual(csv_data[1][0], str(user1.id)) + # subfields, single obj, single subfield → no wrapping + self.assertEqual(csv_data[1][1], token.key) + # single-obj multi-subfield: wrapped shape ((key,created)) + self.assertRegex( + csv_data[1][2], + rf"^\({re.escape(token.key)},", + ) + # birth_date is None + self.assertEqual(csv_data[1][3], "") + # single-subfield manager, single object → no wrapping + self.assertEqual(csv_data[1][4], f"({org.id},{org2.id})") + # multi-subfield manager + self.assertEqual(csv_data[1][5], f"({org.id},True)\n({org2.id},False)") + # dot-notation manager + self.assertEqual(csv_data[1][6], f"({org.id},{org2.id})") + # user2: no token, no org membership + self.assertEqual(csv_data[2][0], str(user2.id)) + # ObjectDoesNotExist auth_token + self.assertEqual(csv_data[2][1], "") + # ObjectDoesNotExist auth_token multi-subfield + self.assertEqual(csv_data[2][2], "") + # birth_date is None + self.assertEqual(csv_data[2][3], "") + # empty manager + self.assertEqual(csv_data[2][4], "") + self.assertEqual(csv_data[2][5], "") + # empty dot-notation manager + self.assertEqual(csv_data[2][6], "") + + def test_dot_notation_objectdoesnotexist_on_sub_attr(self): + """Returns empty string when sub-attribute access raises ObjectDoesNotExist.""" + from django.core.exceptions import ObjectDoesNotExist + + class FakeIntermediate: + @property + def sub_field(self): + raise ObjectDoesNotExist + + class FakeUser: + pk = 1 + intermediate = FakeIntermediate() + + result = Command()._get_field_value(FakeUser(), "intermediate.sub_field") + self.assertEqual(result, "") + + def test_plain_relation_field_returns_empty_string(self): + org = self._create_org(name="org1") + user = self._create_user() + self._create_org_user(organization=org, user=user, is_admin=True) + result = Command()._get_field_value(user, "openwisp_users_organizationuser") + self.assertEqual(result, "") + + @capture_stdout() + def test_custom_header(self): + def _get_is_active(user): + return "yes" if user.is_active else "no" + + config = { + "fields": [ + "id", + { + "name": "active_status", + "header": "Active?", + "callable": _get_is_active, + }, + { + "name": "custom_orgs", + "header_fields": ["org_id", "is_admin"], + "callable": _get_is_active, + }, + ], + "select_related": [], + "prefetch_related": [], + } + self._create_user(is_active=True) + with patch.object(app_settings, "EXPORT_USERS_COMMAND_CONFIG", config): + call_command("export_users", filename=self.temp_file.name) + with open(self.temp_file.name, "r", encoding="utf-8") as f: + csv_reader = csv.reader(f) + csv_data = list(csv_reader) + self.assertEqual( + csv_data[0], ["id", "Active?", "custom_orgs (org_id, is_admin)"] + ) + self.assertEqual(csv_data[1][1], "yes") + self.assertEqual(csv_data[1][2], "yes")