Skip to content

Commit abb0545

Browse files
committed
fix Flake8 complaints
- Fix whitespace, indentation, unused variables, duplicate function - Refactor to extract identical function definitions
1 parent 2864197 commit abb0545

File tree

7 files changed

+32
-52
lines changed

7 files changed

+32
-52
lines changed

.flake8

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,4 +17,6 @@ max-line-length=127
1717

1818
# List ignore rules one per line.
1919
ignore =
20+
C901
21+
E501
2022
W503

ckanext/xloader/jobs.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -182,9 +182,9 @@ def xloader_data_into_datastore_(input, job_dict, logger):
182182
if hasattr(h, "datastore_rw_resource_url_types"):
183183
datastore_rw_resource_url_types = h.datastore_rw_resource_url_types()
184184
else:
185-
#fallback for 2.10.x or older.
185+
# fallback for 2.10.x or older.
186186
datastore_rw_resource_url_types = ['datastore']
187-
187+
188188
if resource.get('url_type') in datastore_rw_resource_url_types:
189189
logger.info('Ignoring resource - R/W DataStore resources are '
190190
'managed with the Datastore API')
@@ -267,7 +267,7 @@ def tabulator_load():
267267
logger.warning('Load using COPY failed: %s', e)
268268
logger.info('Trying again with tabulator')
269269
tabulator_load()
270-
except JobTimeoutException as e:
270+
except JobTimeoutException:
271271
try:
272272
tmp_file.close()
273273
except FileNotFoundError:
@@ -393,7 +393,7 @@ def _download_resource_data(resource, data, api_key, logger):
393393
raise HTTPError(
394394
message=err_message, status_code=None,
395395
request_url=url, response=None)
396-
except JobTimeoutException as e:
396+
except JobTimeoutException:
397397
tmp_file.close()
398398
logger.warning('Job timed out after %ss', RETRIED_JOB_TIMEOUT)
399399
raise JobError('Job timed out after {}s'.format(RETRIED_JOB_TIMEOUT))

ckanext/xloader/loader.py

Lines changed: 17 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ def load_csv(csv_filepath, resource_id, mimetype='text/csv', logger=None):
204204
if f['id'] in existing_info:
205205
f['info'] = existing_info[f['id']]
206206
f['strip_extra_white'] = existing_info[f['id']].get('strip_extra_white') if 'strip_extra_white' in existing_info[f['id']] \
207-
else existing_fields_by_headers[f['id']].get('strip_extra_white', True)
207+
else existing_fields_by_headers[f['id']].get('strip_extra_white', True)
208208

209209
'''
210210
Delete or truncate existing datastore table before proceeding,
@@ -222,39 +222,32 @@ def load_csv(csv_filepath, resource_id, mimetype='text/csv', logger=None):
222222
fields = [
223223
{'id': header_name,
224224
'type': 'text',
225-
'strip_extra_white': True,}
225+
'strip_extra_white': True}
226226
for header_name in headers]
227227

228228
logger.info('Fields: %s', fields)
229229

230+
def _make_whitespace_stripping_iter(super_iter):
231+
def strip_white_space_iter():
232+
for row in super_iter():
233+
if len(row) == len(fields):
234+
for _index, _cell in enumerate(row):
235+
# only strip white space if strip_extra_white is True
236+
if fields[_index].get('strip_extra_white', True) and isinstance(_cell, str):
237+
row[_index] = _cell.strip()
238+
yield row
239+
return strip_white_space_iter
240+
230241
save_args = {'target': f_write.name, 'format': 'csv', 'encoding': 'utf-8', 'delimiter': delimiter}
231242
try:
232243
with UnknownEncodingStream(csv_filepath, file_format, decoding_result,
233244
skip_rows=skip_rows) as stream:
234-
super_iter = stream.iter
235-
def strip_white_space_iter():
236-
for row in super_iter():
237-
if len(row) == len(fields):
238-
for _index, _cell in enumerate(row):
239-
# only strip white space if strip_extra_white is True
240-
if fields[_index].get('strip_extra_white', True) and isinstance(_cell, str):
241-
row[_index] = _cell.strip()
242-
yield row
243-
stream.iter = strip_white_space_iter
245+
stream.iter = _make_whitespace_stripping_iter(stream.iter)
244246
stream.save(**save_args)
245247
except (EncodingError, UnicodeDecodeError):
246248
with Stream(csv_filepath, format=file_format, encoding=SINGLE_BYTE_ENCODING,
247249
skip_rows=skip_rows) as stream:
248-
super_iter = stream.iter
249-
def strip_white_space_iter():
250-
for row in super_iter():
251-
if len(row) == len(fields):
252-
for _index, _cell in enumerate(row):
253-
# only strip white space if strip_extra_white is True
254-
if fields[_index].get('strip_extra_white', True) and isinstance(_cell, str):
255-
row[_index] = _cell.strip()
256-
yield row
257-
stream.iter = strip_white_space_iter
250+
stream.iter = _make_whitespace_stripping_iter(stream.iter)
258251
stream.save(**save_args)
259252
csv_filepath = f_write.name
260253

@@ -284,9 +277,8 @@ def strip_white_space_iter():
284277
raise LoaderError('Could not create the database table: {}'
285278
.format(e))
286279

287-
288-
# datstore_active is switched on by datastore_create - TODO temporarily
289-
# disable it until the load is complete
280+
# datastore_active is switched on by datastore_create
281+
# TODO temporarily disable it until the load is complete
290282

291283
with engine.begin() as conn:
292284
_disable_fulltext_trigger(conn, resource_id)

ckanext/xloader/plugin.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -91,9 +91,9 @@ def receive_validation_report(self, validation_report):
9191
res_dict = toolkit.get_action('resource_show')({'ignore_auth': True},
9292
{'id': validation_report.get('resource_id')})
9393
if (toolkit.asbool(toolkit.config.get('ckanext.xloader.validation.enforce_schema', True))
94-
or res_dict.get('schema', None)) and validation_report.get('status') != 'success':
95-
# A schema is present, or required to be present
96-
return
94+
or res_dict.get('schema', None)) and validation_report.get('status') != 'success':
95+
# A schema is present, or required to be present
96+
return
9797
# if validation is running in async mode, it is running from the redis workers.
9898
# thus we need to do sync=True to have Xloader put the job at the front of the queue.
9999
sync = toolkit.asbool(toolkit.config.get(u'ckanext.validation.run_on_update_async', True))

ckanext/xloader/tests/test_jobs.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,15 @@ def get_response(download_url, headers):
2727
resp.headers = headers
2828
return resp
2929

30+
3031
def get_large_response(download_url, headers):
3132
"""Mock jobs.get_response() method to fake a large file."""
3233
resp = Response()
3334
resp.raw = io.BytesIO(_TEST_FILE_CONTENT.encode())
3435
resp.headers = {'content-length': 2000000000}
3536
return resp
3637

38+
3739
def get_large_data_response(download_url, headers):
3840
"""Mock jobs.get_response() method."""
3941
resp = Response()
@@ -42,6 +44,7 @@ def get_large_data_response(download_url, headers):
4244
resp.headers = headers
4345
return resp
4446

47+
4548
def _get_temp_files(dir='/tmp'):
4649
return [os.path.join(dir, f) for f in os.listdir(dir) if os.path.isfile(os.path.join(dir, f))]
4750

ckanext/xloader/tests/test_loader.py

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1177,23 +1177,6 @@ def test_simple_large_file(self, Session):
11771177
u"text",
11781178
]
11791179

1180-
def test_simple_large_file(self, Session):
1181-
csv_filepath = get_sample_filepath("simple-large.csv")
1182-
resource = factories.Resource()
1183-
resource_id = resource['id']
1184-
loader.load_table(
1185-
csv_filepath,
1186-
resource_id=resource_id,
1187-
mimetype="text/csv",
1188-
logger=logger,
1189-
)
1190-
assert self._get_column_types(Session, resource_id) == [
1191-
u"int4",
1192-
u"tsvector",
1193-
u"numeric",
1194-
u"text",
1195-
]
1196-
11971180
def test_with_mixed_types(self, Session):
11981181
csv_filepath = get_sample_filepath("mixed_numeric_string_sample.csv")
11991182
resource = factories.Resource()

ckanext/xloader/tests/test_plugin.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ def test_require_validation(self, monkeypatch):
8181

8282
# TODO: test IPipeValidation
8383
assert not func.called # because of the validation_status not being `success`
84-
func.called = None # reset
84+
func.called = None # reset
8585

8686
helpers.call_action(
8787
"resource_update",
@@ -118,7 +118,7 @@ def test_enforce_validation_schema(self, monkeypatch):
118118

119119
# TODO: test IPipeValidation
120120
assert not func.called # because of the schema being empty
121-
func.called = None # reset
121+
func.called = None # reset
122122

123123
helpers.call_action(
124124
"resource_update",
@@ -132,7 +132,7 @@ def test_enforce_validation_schema(self, monkeypatch):
132132

133133
# TODO: test IPipeValidation
134134
assert not func.called # because of the validation_status not being `success` and there is a schema
135-
func.called = None # reset
135+
func.called = None # reset
136136

137137
helpers.call_action(
138138
"resource_update",

0 commit comments

Comments
 (0)