From b53bf16b3d3cf883791d91f80e7a97ebe9d2dc1c Mon Sep 17 00:00:00 2001
From: Steve Lamb <slamb@azavea.com>
Date: Tue, 16 Dec 2014 11:56:18 -0500
Subject: [PATCH] Allow custom serialization instead of localize

fixes #63 on github
attempts to address #49 on github

Adds support for formatting datetimes.
---
 README.rst                                    |  3 +-
 djqscsv/djqscsv.py                            | 28 ++++++++++++-----
 setup.py                                      |  2 +-
 .../djqscsv_tests/tests/test_utilities.py     | 31 +++++++++++++++++--
 4 files changed, 52 insertions(+), 12 deletions(-)

diff --git a/README.rst b/README.rst
index 720a07c..99a0fac 100644
--- a/README.rst
+++ b/README.rst
@@ -73,9 +73,10 @@ views.py::
 keyword arguments
 -----------------
 
-This module exports two functions that write CSVs, ``render_to_csv_response`` and ``write_csv``. Both of these functions require their own positional arguments. In addition, they both take three optional keyword arguments:
+This module exports two functions that write CSVs, ``render_to_csv_response`` and ``write_csv``. Both of these functions require their own positional arguments. In addition, they both take the following optional keyword arguments:
 
 - ``field_header_map`` - (default: ``None``) A dictionary mapping names of model fields to column header names. If specified, the csv writer will use these column headers. Otherwise, it will use defer to other parameters for rendering column names.
+- ``field_serializer_map`` - (default: ``{}``) A dictionary mapping names of model fields to functions that serialize them to text. For example, ``{'created': (lambda x: x.strftime('%Y/%m/%d')) }`` will serialize a datetime field called ``created``.
 - ``use_verbose_names`` - (default: ``True``) A boolean determining whether to use the django field's ``verbose_name``, or to use it's regular field name as a column header. Note that if a given field is found in the ``field_header_map``, this value will take precendence.
 - ``field_order`` - (default: ``None``) A list of fields to determine the sort order. This list need not be complete: any fields not specified will follow those in the list with the order they would have otherwise used.
 
diff --git a/djqscsv/djqscsv.py b/djqscsv/djqscsv.py
index bedc703..95470a5 100644
--- a/djqscsv/djqscsv.py
+++ b/djqscsv/djqscsv.py
@@ -2,7 +2,6 @@
 import datetime
 
 from django.core.exceptions import ValidationError
-from django.templatetags.l10n import localize
 from django.utils.text import slugify
 from django.http import HttpResponse
 
@@ -20,6 +19,7 @@
 # Keyword arguments that will be used by this module
 # the rest will be passed along to the csv writer
 DJQSCSV_KWARGS = {'field_header_map': None,
+                  'field_serializer_map': None,
                   'use_verbose_names': True,
                   'field_order': None}
 
@@ -59,6 +59,7 @@ def write_csv(queryset, file_obj, **kwargs):
 
     # process keyword arguments to pull out the ones used by this function
     field_header_map = kwargs.get('field_header_map', {})
+    field_serializer_map = kwargs.get('field_serializer_map', {})
     use_verbose_names = kwargs.get('use_verbose_names', True)
     field_order = kwargs.get('field_order', None)
 
@@ -121,7 +122,7 @@ def write_csv(queryset, file_obj, **kwargs):
     writer.writerow(merged_header_map)
 
     for record in values_qs:
-        record = _sanitize_unicode_record(record)
+        record = _sanitize_unicode_record(field_serializer_map, record)
         writer.writerow(record)
 
 
@@ -154,20 +155,31 @@ def _validate_and_clean_filename(filename):
     return filename
 
 
-def _sanitize_unicode_record(record):
+def _sanitize_unicode_record(field_serializer_map, record):
 
-    def _sanitize_value(value):
+    def _serialize_value(value):
+        # provide default serializer for the case when
+        # non text values get sent without a serializer
+        if isinstance(value, datetime.datetime):
+            return value.isoformat()
+        else:
+            return unicode(value)
+
+    def _sanitize_text(value):
+        # make sure every text value is of type 'str', coercing unicode
         if isinstance(value, unicode):
             return value.encode("utf-8")
-        elif isinstance(value, datetime.datetime):
-            return value.isoformat().encode("utf-8")
+        elif isinstance(value, str):
+            return value
         else:
-            return localize(value)
+            return str(value).encode("utf-8")
 
     obj = {}
     for key, val in six.iteritems(record):
         if val is not None:
-            obj[_sanitize_value(key)] = _sanitize_value(val)
+            serializer = field_serializer_map.get(key, _serialize_value)
+            newval = serializer(val)
+            obj[_sanitize_text(key)] = _sanitize_text(newval)
 
     return obj
 
diff --git a/setup.py b/setup.py
index 3253191..7688aa1 100644
--- a/setup.py
+++ b/setup.py
@@ -5,7 +5,7 @@
 
 setup(
     name='django-queryset-csv',
-    version='0.2.10',
+    version='0.3.0',
     description='A simple python module for writing querysets to csv',
     long_description=open('README.rst').read(),
     author=author,
diff --git a/test_app/djqscsv_tests/tests/test_utilities.py b/test_app/djqscsv_tests/tests/test_utilities.py
index 954eb83..3135f22 100644
--- a/test_app/djqscsv_tests/tests/test_utilities.py
+++ b/test_app/djqscsv_tests/tests/test_utilities.py
@@ -43,7 +43,7 @@ class SanitizeUnicodeRecordTests(TestCase):
     def test_sanitize(self):
         record = {'name': 'Tenar',
                   'nickname': u'\ufeffThe White Lady of Gont'}
-        sanitized = djqscsv._sanitize_unicode_record(record)
+        sanitized = djqscsv._sanitize_unicode_record({}, record)
         self.assertEqual(sanitized,
                          {'name': 'Tenar',
                           'nickname': '\xef\xbb\xbfThe White Lady of Gont'})
@@ -51,11 +51,38 @@ def test_sanitize(self):
     def test_sanitize_date(self):
         record = {'name': 'Tenar',
                   'created': datetime.datetime(1, 1, 1)}
-        sanitized = djqscsv._sanitize_unicode_record(record)
+        sanitized = djqscsv._sanitize_unicode_record({}, record)
         self.assertEqual(sanitized,
                          {'name': 'Tenar',
                           'created': '0001-01-01T00:00:00'})
 
+    def test_sanitize_date_with_non_string_formatter(self):
+        """
+        This test is only to make sure an edge case provides a sane
+        default and works as expected. It is not recommended to follow
+        this practice.
+        """
+        record = {'name': 'Tenar'}
+        serializer = {'name': lambda d: len(d) }
+        sanitized = djqscsv._sanitize_unicode_record(serializer, record)
+        self.assertEqual(sanitized, {'name': '5'})
+
+    def test_sanitize_date_with_formatter(self):
+        record = {'name': 'Tenar',
+                  'created': datetime.datetime(1973, 5, 13)}
+        serializer = {'created': lambda d: d.strftime('%Y-%m-%d') }
+        sanitized = djqscsv._sanitize_unicode_record(serializer, record)
+        self.assertEqual(sanitized,
+                         {'name': 'Tenar',
+                          'created': '1973-05-13'})
+
+    def test_sanitize_date_with_bad_formatter(self):
+        record = {'name': 'Tenar',
+                  'created': datetime.datetime(1973, 5, 13)}
+        formatter = lambda d: d.day
+        with self.assertRaises(AttributeError):
+            djqscsv._sanitize_unicode_record(formatter, record)
+
 
 class AppendDatestampTests(TestCase):