forked from Lever-age/data-pipeline
-
Notifications
You must be signed in to change notification settings - Fork 0
/
functions.py
68 lines (30 loc) · 1.41 KB
/
functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#!/usr/bin/python
# coding: utf-8
import probablepeople
people_fields_to_strip_periods = ['FirstInitial', 'MiddleInitial', 'LastInitial', 'SuffixGenerational', 'SuffixOther', \
'SecondFirstInitial', 'SecondMiddleInitial', 'SecondLastInitial', 'SecondSuffixGenerational', 'SecondSuffixOther', \
'CorporationLegalType', 'SecondCorporationLegalType']
def clean_name(full_name):
"""
This function takes an text address (street address, city, state, zip), tokenizes it with the usaddress library,
and runs basic search and replace to help standardize the address
"""
try:
original_name = full_name.upper().replace(' ',', ')
# Make edits to original_name before calling probablepeople
probablepeople_name = probablepeople.tag(original_name)
#print 'probable name:', probablepeople_name
name_dict = dict(probablepeople_name[0])
name_type = probablepeople_name[1]
name_dict['name_type'] = name_type
name_dict['original_name'] = original_name
# Remove trailing '.' from any abbreviations
for field in people_fields_to_strip_periods:
if field in name_dict:
name_dict[field] = name_dict[field].strip('.')
#print 'probable name dict:', name_dict
return name_dict
except Exception as e:
print 'probablepeople ERROR:'
print e
return e