Skip to content

Commit 59cd017

Browse files
Merge pull request #6 from dabapps/streaming
Streaming
2 parents 42162c7 + 309d58a commit 59cd017

File tree

3 files changed

+111
-18
lines changed

3 files changed

+111
-18
lines changed

README.md

+9-1
Original file line numberDiff line numberDiff line change
@@ -56,14 +56,22 @@ Here, our `fetch_records` is just spitting the data straight to the headers for
5656

5757
Note that we specify the type of the header for `headers`. This allows the typechecker to find out quickly if any of your headers are accessing data incorrectly.
5858

59-
Now, we can use it. We have two methods for getting data out - `to_list` and `as_response`. `to_list` will convert the data to a list of lists of strings (allowing you to pass it to whatever other CSV handling options you want) whereas `as_response` will turn it into a prepared HttpResponse for returning from one of your views
59+
Now, we can use it. We have several methods for getting data out:
60+
61+
`to_list` will convert the data to a list of lists of strings (allowing you to pass it to whatever other CSV handling options you want):
6062

6163
```python
6264
LlamaExporter(my_llamas).to_list()
65+
```
66+
67+
whereas `as_response` will turn it into a prepared HttpResponse for returning from one of your views:
6368

69+
```python
6470
LlamaExporter.as_response('my_llamas')
6571
```
6672

73+
If your CSV is large, and takes a long time to generate, you should use a generator, or stream the response. `to_iter` and `to_streamed_response` are the generator_counterparts to the above methods, working in exactly the same way, just returning a generator and a `HttpStreamedResponse` respectively. By default, `to_list` calls `to_iter`, so if you need to do anything custom, it's best to do it in `to_iter`.
74+
6775
You can also provide an ordering to the headers, if you want. Simply assign a list of strings to `header_order` and when the data is unpacked, those headers who's labels match these will be placed in that order.
6876

6977
```python

csv_wrangler/exporter.py

+42-17
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
from abc import ABCMeta, abstractmethod
22
import csv
3-
from typing import List, Any, NamedTuple, Callable, Dict, TypeVar, Generic
3+
from typing import List, Any, NamedTuple, Callable, Dict, TypeVar, Generic, Generator
44
from typing import Optional # noqa
55
from functools import reduce
6-
from django.http import HttpResponse
6+
from django.http import HttpResponse, StreamingHttpResponse
77

88

99
T = TypeVar('T')
@@ -18,6 +18,17 @@ def __init__(self, label: str, callback: Callable[[T], str]) -> None:
1818
self.callback = callback
1919

2020

21+
class Echo:
22+
"""
23+
An object that implements just the write method of the file-like
24+
interface.
25+
https://docs.djangoproject.com/en/1.10/howto/outputting-csv/
26+
"""
27+
def write(self, value):
28+
"""Write the value by returning it, instead of storing in a buffer."""
29+
return value
30+
31+
2132
class BaseExporter(metaclass=ABCMeta):
2233
"""
2334
The root exporter class
@@ -26,16 +37,28 @@ class BaseExporter(metaclass=ABCMeta):
2637
header_order = None # type: Optional[List[str]]
2738

2839
@abstractmethod
29-
def to_list(self) -> List[List[str]]: # pragma: no cover
40+
def to_iter(self) -> Generator[List[str], None, None]: # pragma: no cover
3041
pass
3142

43+
def to_list(self) -> List[List[str]]:
44+
return list(self.to_iter())
45+
3246
def as_response(self, filename: str='export') -> HttpResponse:
3347
response = HttpResponse(content_type='text/csv')
3448
response['Content-Disposition'] = 'attachment; filename="{}.csv"'.format(filename)
3549
writer = csv.writer(response)
3650
[writer.writerow(row) for row in self.to_list()]
3751
return response
3852

53+
def as_streamed_response(self, filename: str='export') -> StreamingHttpResponse:
54+
writer = csv.writer(Echo())
55+
response = StreamingHttpResponse(
56+
(writer.writerow(row) for row in self.to_iter()),
57+
content_type='text/csv'
58+
)
59+
response['Content-Disposition'] = 'attachment; filename="{}.csv"'.format(filename)
60+
return response
61+
3962

4063
class Exporter(Generic[T], BaseExporter, metaclass=ABCMeta):
4164

@@ -63,14 +86,12 @@ def sort_headers(self, headers: List[Header[T]]) -> List[Header[T]]:
6386
self.header_order.index(header.label) if header.label in self.header_order else len(self.header_order)
6487
)
6588

66-
def to_list(self) -> List[List[str]]:
89+
def to_iter(self) -> Generator[List[str], None, None]:
6790
records = self.fetch_records()
6891
headers = self.get_sorted_headers()
69-
lines = [
70-
[header.callback(record) for header in headers]
71-
for record in records
72-
]
73-
return [self.get_header_labels()] + lines
92+
yield self.get_header_labels()
93+
for record in records:
94+
yield [header.callback(record) for header in headers]
7495

7596

7697
class MultiExporter(BaseExporter):
@@ -84,6 +105,12 @@ def to_list(self) -> List[List[str]]:
84105
exportings = [exporter.to_list() for exporter in self.exporters]
85106
return reduce(lambda memo, exporting: exporting if memo == [] else memo + [[]] + exporting, exportings, [])
86107

108+
def to_iter(self) -> Generator[List[str], None, None]:
109+
for exporter in self.exporters:
110+
yield from exporter.to_iter()
111+
if exporter != self.exporters[-1]:
112+
yield []
113+
87114

88115
SimpleHeader = NamedTuple('Header', [('label', str), ('callback', Callable[[Any, str], str])])
89116

@@ -109,14 +136,12 @@ def get_csv_headers(self) -> List[SimpleHeader]:
109136
def get_csv_header_labels(self) -> List[str]:
110137
return [header.label for header in self.get_csv_headers()]
111138

112-
def to_list(self) -> List[List[str]]:
139+
def to_iter(self) -> Generator[List[str], None, None]:
113140
records = self.fetch_records()
114141
headers = self.get_csv_headers()
115-
lines = [
116-
[header.callback(record, header.label) for header in headers]
117-
for record in records
118-
]
119-
return [self.get_csv_header_labels()] + lines
142+
yield self.get_csv_header_labels()
143+
for record in records:
144+
yield [header.callback(record, header.label) for header in headers]
120145

121146

122147
class PassthroughExporter(BaseExporter):
@@ -126,5 +151,5 @@ class PassthroughExporter(BaseExporter):
126151
def __init__(self, data: List[List[str]]) -> None:
127152
self.data = data
128153

129-
def to_list(self) -> List[List[str]]:
130-
return self.data
154+
def to_iter(self) -> Generator[List[str], None, None]:
155+
yield from self.data

csv_wrangler/test_exporter.py

+60
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from typing import NamedTuple
33
from typing import List, Any
44
from csv_wrangler.exporter import Exporter, Header, MultiExporter, SimpleExporter, PassthroughExporter
5+
from django.http import StreamingHttpResponse, HttpResponse
56

67

78
DummyData = NamedTuple('DummyData', [('a', str), ('b', int), ('c', float)])
@@ -48,6 +49,16 @@ def test_to_list(self) -> None:
4849
self.assertEqual(results[2], ['b', '2', '2.0'])
4950
self.assertEqual(results[3], ['c', '3', '3.0'])
5051

52+
def test_to_iter(self) -> None:
53+
results = self.exporter.to_iter()
54+
self.assertEqual(next(results), ['a', 'b', 'c'])
55+
self.assertEqual(next(results), ['a', '1', '1.0'])
56+
self.assertEqual(next(results), ['b', '2', '2.0'])
57+
self.assertEqual(next(results), ['c', '3', '3.0'])
58+
59+
def test_to_iter_is_same_as_list(self) -> None:
60+
self.assertListEqual(list(self.exporter.to_iter()), self.exporter.to_list())
61+
5162
def test_ordering(self) -> None:
5263
self.exporter.header_order = ['c', 'b', 'a']
5364
results = self.exporter.to_list()
@@ -67,6 +78,7 @@ def test_partial_ordering(self) -> None:
6778
def test_as_response(self) -> None:
6879
filename = 'hello'
6980
results = self.exporter.as_response(filename)
81+
self.assertIsInstance(results, HttpResponse)
7082
self.assertEqual(results['content-type'], 'text/csv')
7183
self.assertEqual(results['Content-Disposition'], 'attachment; filename="{}.csv"'.format(filename))
7284
self.assertEqual(str(results.content, 'utf-8'), '\r\n'.join([
@@ -75,6 +87,18 @@ def test_as_response(self) -> None:
7587
in self.exporter.to_list()
7688
]) + '\r\n')
7789

90+
def test_as_streamed_response(self) -> None:
91+
filename = 'hello'
92+
results = self.exporter.as_streamed_response(filename)
93+
self.assertIsInstance(results, StreamingHttpResponse)
94+
self.assertEqual(results['content-type'], 'text/csv')
95+
self.assertEqual(results['Content-Disposition'], 'attachment; filename="hello.csv"')
96+
self.assertEqual(results.getvalue().decode(), '\r\n'.join([
97+
','.join(row)
98+
for row
99+
in self.exporter.to_list()
100+
]) + '\r\n')
101+
78102

79103
class MultiExporterTestCase(TestCase):
80104

@@ -97,6 +121,21 @@ def test_multiple_exporters(self) -> None:
97121
self.assertEqual(results[6], ['llama'])
98122
self.assertEqual(results[7], ['drama'])
99123

124+
def test_multiple_exporters_to_iter(self) -> None:
125+
multi_exporter = MultiExporter([
126+
self.exporter,
127+
self.exporter_2
128+
])
129+
results = multi_exporter.to_iter()
130+
self.assertEqual(next(results), ['a', 'b', 'c'])
131+
self.assertEqual(next(results), ['a', '1', '1.0'])
132+
self.assertEqual(next(results), ['b', '2', '2.0'])
133+
self.assertEqual(next(results), ['c', '3', '3.0'])
134+
self.assertEqual(next(results), [])
135+
self.assertEqual(next(results), ['dummy'])
136+
self.assertEqual(next(results), ['llama'])
137+
self.assertEqual(next(results), ['drama'])
138+
100139

101140
class SimpleExporterTestCase(TestCase):
102141

@@ -110,6 +149,16 @@ def test_simple_exporter(self) -> None:
110149
self.assertEqual(results[0], ['a', 'b', 'c'])
111150
self.assertEqual(results[1], ['5', '', '15'])
112151

152+
def test_simple_exporter_to_iter(self) -> None:
153+
exporter = SimpleExporter(['a', 'b', 'c'], [{
154+
'a': 5,
155+
'b': None,
156+
'c': 15
157+
}])
158+
results = exporter.to_iter()
159+
self.assertEqual(next(results), ['a', 'b', 'c'])
160+
self.assertEqual(next(results), ['5', '', '15'])
161+
113162

114163
class PassthroughExporterTestCase(TestCase):
115164

@@ -124,6 +173,17 @@ def test_passthrough_to_list(self) -> None:
124173
self.assertEqual(results[1], ['1', '2', '3'])
125174
self.assertEqual(results[2], ['2', '3', '4'])
126175

176+
def test_passthrough_to_iter(self) -> None:
177+
exporter = PassthroughExporter([
178+
['a', 'b', 'c'],
179+
['1', '2', '3'],
180+
['2', '3', '4'],
181+
])
182+
results = exporter.to_iter()
183+
self.assertEqual(next(results), ['a', 'b', 'c'])
184+
self.assertEqual(next(results), ['1', '2', '3'])
185+
self.assertEqual(next(results), ['2', '3', '4'])
186+
127187
def test_malformed_passthrough(self) -> None:
128188
exporter = PassthroughExporter([
129189
['a', 'b', 'c'],

0 commit comments

Comments
 (0)