-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
use dataframes to and manipulate the csvs
- Loading branch information
1 parent
c51822a
commit e124f7c
Showing
5 changed files
with
80 additions
and
210 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,4 +3,5 @@ __pycache__/ | |
|
||
database_data.py | ||
|
||
*.csv | ||
*.csv | ||
.vscode |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,3 +8,5 @@ psycopg2==2.7.5 | |
pylint==2.1.1 | ||
six==1.11.0 | ||
wrapt==1.10.11 | ||
pandas==0.23.4 | ||
numpy==1.15.1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
if [ ! -d "venv" ]; then | ||
virtualenv venv -p python3 | ||
fi | ||
source venv/bin/activate | ||
pip install -r requirements.txt | ||
python deliverytimes.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,161 +1,57 @@ | ||
import csv | ||
from datetime import datetime | ||
|
||
def create_csv(routeplans_dict, stoplocations_dict): | ||
with open('estimated_delivery_times.csv', 'w', newline='') as csvfile: | ||
fieldnames = ['routeplanid', 'routeid', 'driverid', 'stoplocationid', 'deliverystatus', 'duration', 'estimated_duration'] | ||
writer = csv.DictWriter(csvfile, fieldnames = fieldnames) | ||
|
||
writer.writeheader() | ||
|
||
for routeplanid, routes in routeplans_dict.items(): | ||
for routeid, drivers in routes.items(): | ||
for driverid, stoplocs in drivers.items(): | ||
for stoplocationid, durations in stoplocs.items(): | ||
deliverystatus = '(null)' | ||
if stoplocationid in stoplocations_dict[routeplanid][routeid]: | ||
deliverystatus = stoplocations_dict[routeplanid][routeid][stoplocationid]["deliverystatus"] | ||
writer.writerow({ | ||
'routeplanid': routeplanid, | ||
'routeid': routeid, | ||
'driverid': driverid, | ||
'stoplocationid': stoplocationid, | ||
'deliverystatus': deliverystatus, | ||
'duration': durations["duration"], | ||
'estimated_duration': durations["estimated_duration"] if "estimated_duration" in durations else 0 | ||
}) | ||
import pandas as pd | ||
|
||
|
||
def create_csv(routeplans, stoplocations, driverpositions, estimated_durations): | ||
df = routeplans.merge(stoplocations[['stoplocationid', 'deliverystatus']], on='stoplocationid', how='left') | ||
df = df.merge(estimated_durations[['routeplanid', 'estimated_duration']], on='routeplanid', how='left') | ||
print(df) | ||
df.to_csv('estimated_delivery_times.csv', index = False) | ||
|
||
|
||
def remove_tzinfo(date_string): | ||
return date_string.split("+")[0] | ||
|
||
|
||
def remove_microseconds(date_string): | ||
return date_string.split(".")[0] | ||
|
||
# def routeplans(): | ||
# routeplans = {} | ||
|
||
# with open("routeplans.csv", "r") as f: | ||
# reader = csv.reader(f, delimiter="\t") | ||
# for i, line in enumerate(reader): | ||
# if i == 0: # skip headers | ||
# continue | ||
# if line[2] == '(null)': # no driver | ||
# continue | ||
# routeplanid = int(line[0]) | ||
# routeid = int(line[1]) | ||
# driverid = int(line[2]) | ||
# stoplocationid = int(line[3]) | ||
# duration = int(line[4]) if line[4] != '(null)' else 0 | ||
|
||
# if routeplanid in routeplans: | ||
# routes = routeplans[routeplanid] | ||
|
||
# if routeid in routes: | ||
# drivers = routes[routeid] | ||
|
||
# if driverid in drivers: | ||
# stoplocations = drivers[driverid] | ||
|
||
# stoplocations[stoplocationid] = {"duration": duration} | ||
# else: | ||
# drivers[driverid] = {stoplocationid: {"duration": duration}} | ||
# else: | ||
# routes[routeid] = { | ||
# driverid: { | ||
# stoplocationid: { | ||
# "duration": duration | ||
# } | ||
# } | ||
# } | ||
# else: | ||
# routeplans[routeplanid] = { | ||
# routeid: { | ||
# driverid: { | ||
# stoplocationid: { | ||
# "duration": duration | ||
# } | ||
# } | ||
# } | ||
# } | ||
|
||
# return routeplans | ||
|
||
def routeplans(): | ||
routeplans = [] | ||
csv = "routeplans_test.csv" | ||
routeplans = pd.read_csv(csv, sep='\t', header=0, na_values=["(null)"]) | ||
print('%s read shape: %s' % (csv, routeplans.shape)) | ||
routeplans.dropna(subset=["driverid"], inplace=True) # ignoring routes without driver | ||
print(routeplans.ftypes) | ||
return routeplans | ||
|
||
with open("routeplans_test.csv", "r") as f: | ||
reader = csv.reader(f, delimiter="\t") | ||
for i, line in enumerate(reader): | ||
if i == 0: # skip headers | ||
continue | ||
if line[2] == '(null)': # no driver | ||
continue | ||
|
||
routeplans.append(line) | ||
def get_routeids(stoplocations): | ||
return stoplocations.groupby(['routeid', 'stoplocationid']) | ||
|
||
return routeplans | ||
|
||
def stoplocations(): | ||
stoplocations = {} | ||
|
||
with open("stoplocations_test.csv", "r") as f: | ||
reader = csv.reader(f, delimiter="\t") | ||
for i, line in enumerate(reader): | ||
if i == 0: | ||
continue | ||
if line[3] == '(null)' or line[4] == '(null)': | ||
continue | ||
routeplanid = int(line[0]) | ||
routeid = int(line[1]) | ||
stoplocationid = int(line[2]) | ||
data = { | ||
"position": (float(line[3]), float(line[4])), | ||
"deliverystatus": int(line[5]), | ||
"deliverystatustimestamp": datetime.fromisoformat(line[6] if line[6] != '(null)' else 0) | ||
} | ||
|
||
if routeplanid in stoplocations: | ||
routes = stoplocations[routeplanid] | ||
|
||
if routeid in routes: | ||
stoplocs = routes[routeid] | ||
stoplocs[stoplocationid] = data # Assumption: only unique stoplocationids | ||
else: | ||
routes[routeid] = {stoplocationid: data} | ||
else: | ||
stoplocations[routeplanid] = {routeid: {stoplocationid: data}} | ||
|
||
csv = "stoplocations_test.csv" | ||
stoplocations = pd.read_csv(csv, sep='\t', header=0, na_values=["(null)"]) | ||
print('%s read shape: %s' % (csv, stoplocations.shape)) | ||
stoplocations.dropna(subset=["latitude", "longitude"], inplace=True) # ignoring locations without latitude or longitude | ||
stoplocations['position'] = stoplocations.apply(lambda x: (x['latitude'], x['longitude']), axis=1) | ||
stoplocations['deliverystatustimestamp'] = pd.to_datetime(stoplocations['deliverystatustimestamp'].apply(lambda x: remove_microseconds(remove_tzinfo(x)))) | ||
print(stoplocations.ftypes) | ||
return stoplocations | ||
|
||
def driverpositions(): | ||
driverpositions = {} | ||
|
||
with open("driverpositions_test.csv", "r") as f: | ||
reader = csv.reader(f, delimiter="\t") # use "," when using Terje's file | ||
for i, line in enumerate(reader): | ||
if i == 0: # skip headers | ||
continue | ||
routeid = int(line[0]) | ||
driverid = int(line[1]) | ||
data = { | ||
"position": (float(line[2]), float(line[3])), | ||
"logtime": datetime.fromisoformat(remove_microseconds(remove_tzinfo(line[4]))), | ||
"speed": float(line[6]) if line[6] else 0.0 | ||
} | ||
|
||
if routeid in driverpositions: | ||
drivers = driverpositions[routeid] | ||
|
||
if driverid in drivers: | ||
drivers[driverid].append(data) | ||
else: | ||
drivers[driverid] = [data] | ||
else: | ||
driverpositions[routeid] = {driverid: [data]} | ||
|
||
# sort | ||
for routeid, driver in driverpositions.items(): | ||
for driverid, position_data in driver.items(): | ||
driverpositions[routeid][driverid] = sorted(position_data, key=lambda position_datum: position_datum["logtime"]) | ||
|
||
def get_drivers(driverpositions): | ||
return driverpositions.groupby(['routeid', 'driverid']) | ||
|
||
|
||
def driverpositions(): | ||
csv = "driverpositions_test.csv" | ||
driverpositions = pd.read_csv(csv, sep='\t', header=0, na_values=["(null)"]) # use "," when using Terje's file | ||
print('%s read shape: %s' % (csv, driverpositions.shape)) | ||
driverpositions.dropna(subset=["latitude", "longitude"], inplace=True) # ignoring positions without latitude or longitude | ||
driverpositions['position'] = driverpositions.apply(lambda x: (x['latitude'], x['longitude']), axis=1) | ||
driverpositions['logtime'] = pd.to_datetime(driverpositions['logtime'].apply(lambda x: remove_microseconds(remove_tzinfo(x)))) | ||
driverpositions['speed'] = driverpositions['speed'].fillna(value=0) | ||
driverpositions = driverpositions.sort_values(by=['routeid', 'driverid', 'logtime']) | ||
print(driverpositions.ftypes) | ||
return driverpositions |