Skip to content

Commit 92e40c7

Browse files
authored
Merge pull request #31 from q-verse/umar/qverse/add_support_for_utf-16_format
EDE-559 add support for utf 16 format
2 parents c098eea + 77a17dc commit 92e40c7

File tree

3 files changed

+79
-12
lines changed

3 files changed

+79
-12
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
"""
2+
Contains helper functions for Qverse registration application.
3+
"""
4+
from csv import reader, Sniffer
5+
import io
6+
import logging
7+
8+
LOGGER = logging.getLogger(__name__)
9+
10+
11+
def get_file_header_row(file_content, encoding):
12+
"""
13+
Returns fields of header row of the file.
14+
15+
Arguments:
16+
file_content (str): File content that has been read from the file
17+
encoding (str): File encoding format e.g: utf-8, utf-16
18+
19+
Returns:
20+
header_row (list): List of fields of the header row of CSV file
21+
"""
22+
decoded_file = file_content.decode(encoding, 'ignore')
23+
io_string = io.StringIO(decoded_file)
24+
dialect = Sniffer().sniff(io_string.readline())
25+
io_string.seek(0)
26+
header_row = reader(io_string, delimiter=dialect.delimiter).next()
27+
return [heading.lower().strip() for heading in header_row]
28+
29+
30+
def get_file_encoding(file_path):
31+
"""
32+
Returns the file encoding format.
33+
34+
Arguments:
35+
file_path (str): Path of the file whose encoding format will be returned
36+
37+
Returns:
38+
encoding (str): encoding format e.g: utf-8, utf-16, returns None if doesn't find
39+
any encoding format
40+
"""
41+
try:
42+
file = io.open(file_path, 'r', encoding='utf-8')
43+
encoding = None
44+
try:
45+
encoding = 'utf-8'
46+
_ = file.read()
47+
except UnicodeDecodeError:
48+
encoding = 'utf-16'
49+
50+
file.close()
51+
return encoding
52+
53+
except IOError as error:
54+
LOGGER.exception('({}) --- {}'.format(error.filename, error.strerror))
55+
return None

openedx/features/qverse_features/registration/signals.py

+12-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""
22
Signals for qverse registration application.
33
"""
4+
import io
45
import logging
56
import re
67
from csv import DictReader, DictWriter, Error, Sniffer
@@ -19,6 +20,7 @@
1920
SURNAME_MAX_LENGTH, FIRST_NAME_MAX_LENGTH,
2021
MOBILE_NUMBER_MAX_LENGTH, OTHER_NAME_MAX_LENGTH,
2122
MAX_LEVEL_CHOICES, MAX_PROGRAMME_CHOICES)
23+
from openedx.features.qverse_features.registration.helpers import get_file_encoding
2224
from openedx.features.qverse_features.registration.tasks import send_bulk_mail_to_newly_created_students
2325
from student.models import UserProfile
2426

@@ -51,8 +53,15 @@ def create_users_from_csv_file(sender, instance, created, **kwargs):
5153
csv_file = None
5254
dialect = None
5355
try:
54-
csv_file = open(instance.admission_file.path, 'r')
55-
dialect = Sniffer().sniff(csv_file.readline())
56+
encoding = get_file_encoding(instance.admission_file.path)
57+
csv_file = io.open(instance.admission_file.path, 'r', encoding=encoding)
58+
try:
59+
dialect = Sniffer().sniff(csv_file.readline())
60+
except Error:
61+
LOGGER.exception('Could not determine delimiter in the file.')
62+
csv_file.close()
63+
return
64+
5665
csv_file.seek(0)
5766
except IOError as error:
5867
LOGGER.exception('({}) --- {}'.format(error.filename, error.strerror))
@@ -66,6 +75,7 @@ def create_users_from_csv_file(sender, instance, created, **kwargs):
6675
reader = (dict((k.strip().lower(), v.strip() if v else v) for k, v in row.items()) for row in dict_reader)
6776
output_file_rows = []
6877
users_with_updated_emails = set()
78+
6979
try:
7080
CsvRowValidator.prepare_csv_row_validator()
7181
for row in reader:

openedx/features/qverse_features/registration/validators.py

+12-10
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
"""
22
Validation utils for qverse registration application.
33
"""
4-
from csv import reader, Sniffer
5-
import io
4+
from csv import Error
65

76
from django.core.exceptions import ValidationError
87

8+
from openedx.features.qverse_features.registration.helpers import get_file_header_row
9+
910

1011
def validate_admission_file(file):
1112
"""
@@ -22,18 +23,19 @@ def validate_admission_file(file):
2223
'regno', 'firstname', 'surname', 'othername', 'levelid',
2324
'programmeid', 'departmentid', 'mobile', 'email'
2425
]
25-
decoded_file = file.read().decode('utf-8')
26-
io_string = io.StringIO(decoded_file)
2726
header_row = []
2827
try:
29-
dialect = Sniffer().sniff(io_string.readline())
30-
io_string.seek(0)
31-
header_row = reader(io_string, delimiter=dialect.delimiter).next()
32-
header_row = [heading.lower().strip() for heading in header_row]
33-
except StopIteration:
28+
file_content = file.read()
29+
30+
try:
31+
header_row = get_file_header_row(file_content, 'utf-8')
32+
except Error:
33+
header_row = get_file_header_row(file_content, 'utf-16')
34+
35+
except Exception:
3436
raise ValidationError('', code='invalid')
3537

36-
if not all(field_name in header_row for field_name in FIELD_NAMES):
38+
if not all([field_name in header_row for field_name in FIELD_NAMES]):
3739
raise ValidationError('', code='invalid')
3840

3941
if 'error' in header_row:

0 commit comments

Comments
 (0)