Skip to content

Commit

Permalink
use dataframes to and manipulate the csvs
Browse files Browse the repository at this point in the history
  • Loading branch information
belinhacbr committed Sep 25, 2018
1 parent c51822a commit e124f7c
Show file tree
Hide file tree
Showing 5 changed files with 80 additions and 210 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ __pycache__/

database_data.py

*.csv
*.csv
.vscode
97 changes: 31 additions & 66 deletions deliverytimes.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from geopy.distance import geodesic
import pandas as pd

from datetime import datetime, timedelta
import time

Expand All @@ -10,21 +12,25 @@
driverpositions = driverpositions()
stoplocations = stoplocations()

AVERAGE_WALKING_SPEED = 5 # km/h
R = 50 # radius of 30 meters
AVERAGE_WALKING_SPEED = 5 # km/h
R = 50 # radius of 30 meters


def driver_is_inside_location_area(driver_position, location_position):
return geodesic(driver_position, location_position).meters <= R


def estimated_duration(timestamps_list):
if not timestamps_list:
return 0
else:
durations = [int(timedelta.total_seconds(timestamps[1] - timestamps[0])) for timestamps in timestamps_list]
return max(durations)


def driver_is_moving_at_walking_speed(speed):
return speed < AVERAGE_WALKING_SPEED + 2 # buffer
return speed < AVERAGE_WALKING_SPEED + 2 # buffer


def enter_and_leave_timestamps(driver_positions, stoplocation_position, deliverystatustimestamp):
enter_timestamp = 0
Expand All @@ -35,7 +41,7 @@ def enter_and_leave_timestamps(driver_positions, stoplocation_position, delivery
# -- What does "best one" mean?
timestamps_list = []

for e in driver_positions:
for idx, e in driver_positions.iterrows():
if enter_timestamp == 0:
# Optimizing enter_timestamp
# Idea: Use speed data to shrink R, decreasing the area.
Expand All @@ -47,7 +53,7 @@ def enter_and_leave_timestamps(driver_positions, stoplocation_position, delivery
timestamps_list.append((enter_timestamp, leave_timestamp))
enter_timestamp = 0
leave_timestamp = 0

# Optimizing leave_timestamp
# Use deliverystatustimestamp if driver registered delivery before leaving the area
# Comment: Some drivers might register delivery before actually delivering. E.g., a driver might drive to
Expand All @@ -57,73 +63,32 @@ def enter_and_leave_timestamps(driver_positions, stoplocation_position, delivery

return timestamps_list

# def fill_in_delivery_time_estimates():
# for routeplanid, routes in routeplans.items():
# for routeid, driver in routes.items():
# for driverid, stoplocs in driver.items():
# for stoplocationid, duration in stoplocs.items():
# stoplocation = {}

# try:
# # some stoplocationids are not in stoplocations
# stoplocation = stoplocations[routeplanid][routeid][stoplocationid]
# driver_positions = driverpositions[routeid][driverid]
# except:
# continue

# deliverystatustimestamp = stoplocations[routeplanid][routeid][stoplocationid]["deliverystatustimestamp"]

# timestamps = enter_and_leave_timestamps(
# driver_positions,
# stoplocation["position"],
# deliverystatustimestamp
# )
estimated_durations_data = []

# routeplans[routeplanid][routeid][driverid][stoplocationid]["estimated_duration"] = estimated_duration(timestamps)

estimated_durations = []

def fill_in_delivery_time_estimates():
for item in routeplans:
try:
# item[0]: routeplanid
# item[1]: routeid
# item[2]: driverid
# item[3]: stoplocationid
# item[4]: duration

# some stoplocationids are not in stoplocations
routeplanid = int(item[0])
routeid = int(item[1])
driverid = int(item[2])
stoplocationid = int(item[3])

stoplocation = stoplocations[routeplanid][routeid][stoplocationid]
driver_positions = driverpositions[routeid][driverid]
stoplocation_position = stoplocation["position"]
deliverystatustimestamp = stoplocation["deliverystatustimestamp"]

timestamps = enter_and_leave_timestamps(
driver_positions,
stoplocation_position,
deliverystatustimestamp
)

estimated_durations.append(item + [estimated_duration(timestamps)])
except:
for idx, routeplan in routeplans.iterrows():
# some stoplocationids are not in stoplocations
stoplocation = stoplocations[ stoplocations['stoplocationid'] == routeplan['stoplocationid'] ]
driver_positions = driverpositions[ driverpositions['routeid'] == routeplan['routeid'] ][ driverpositions['driverid'] == routeplan['driverid'] ]
if stoplocation.empty or driverpositions.empty:
continue
stoplocation_position = stoplocation["position"].iloc[0]
deliverystatustimestamp = stoplocation["deliverystatustimestamp"].iloc[0]

fill_in_delivery_time_estimates()

#create_csv(routeplans, stoplocations)
timestamps = enter_and_leave_timestamps(
driver_positions,
stoplocation_position,
deliverystatustimestamp
)
estimated_durations_data.append([routeplan['routeplanid'], estimated_duration(timestamps)])

# for routeplanid, routes in routeplans.items():
# for routeid, driver in routes.items():
# for driverid, stoplocs in driver.items():
# for stoplocationid, durations in stoplocs.items():
# if len(durations) > 1:
# print(durations)

print(estimated_durations)
if __name__ == '__main__':
print('starting...')
fill_in_delivery_time_estimates()
estimated_durations = pd.DataFrame(estimated_durations_data, columns=['routeplanid', 'estimated_duration'])
create_csv(routeplans, stoplocations, driverpositions, estimated_durations)

print("--- %s seconds ---" % (time.time() - start_time))
print("--- %s seconds ---" % (time.time() - start_time))
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,5 @@ psycopg2==2.7.5
pylint==2.1.1
six==1.11.0
wrapt==1.10.11
pandas==0.23.4
numpy==1.15.1
6 changes: 6 additions & 0 deletions run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
if [ ! -d "venv" ]; then
virtualenv venv -p python3
fi
source venv/bin/activate
pip install -r requirements.txt
python deliverytimes.py
182 changes: 39 additions & 143 deletions utilities.py
Original file line number Diff line number Diff line change
@@ -1,161 +1,57 @@
import csv
from datetime import datetime

def create_csv(routeplans_dict, stoplocations_dict):
with open('estimated_delivery_times.csv', 'w', newline='') as csvfile:
fieldnames = ['routeplanid', 'routeid', 'driverid', 'stoplocationid', 'deliverystatus', 'duration', 'estimated_duration']
writer = csv.DictWriter(csvfile, fieldnames = fieldnames)

writer.writeheader()

for routeplanid, routes in routeplans_dict.items():
for routeid, drivers in routes.items():
for driverid, stoplocs in drivers.items():
for stoplocationid, durations in stoplocs.items():
deliverystatus = '(null)'
if stoplocationid in stoplocations_dict[routeplanid][routeid]:
deliverystatus = stoplocations_dict[routeplanid][routeid][stoplocationid]["deliverystatus"]
writer.writerow({
'routeplanid': routeplanid,
'routeid': routeid,
'driverid': driverid,
'stoplocationid': stoplocationid,
'deliverystatus': deliverystatus,
'duration': durations["duration"],
'estimated_duration': durations["estimated_duration"] if "estimated_duration" in durations else 0
})
import pandas as pd


def create_csv(routeplans, stoplocations, driverpositions, estimated_durations):
df = routeplans.merge(stoplocations[['stoplocationid', 'deliverystatus']], on='stoplocationid', how='left')
df = df.merge(estimated_durations[['routeplanid', 'estimated_duration']], on='routeplanid', how='left')
print(df)
df.to_csv('estimated_delivery_times.csv', index = False)


def remove_tzinfo(date_string):
return date_string.split("+")[0]


def remove_microseconds(date_string):
return date_string.split(".")[0]

# def routeplans():
# routeplans = {}

# with open("routeplans.csv", "r") as f:
# reader = csv.reader(f, delimiter="\t")
# for i, line in enumerate(reader):
# if i == 0: # skip headers
# continue
# if line[2] == '(null)': # no driver
# continue
# routeplanid = int(line[0])
# routeid = int(line[1])
# driverid = int(line[2])
# stoplocationid = int(line[3])
# duration = int(line[4]) if line[4] != '(null)' else 0

# if routeplanid in routeplans:
# routes = routeplans[routeplanid]

# if routeid in routes:
# drivers = routes[routeid]

# if driverid in drivers:
# stoplocations = drivers[driverid]

# stoplocations[stoplocationid] = {"duration": duration}
# else:
# drivers[driverid] = {stoplocationid: {"duration": duration}}
# else:
# routes[routeid] = {
# driverid: {
# stoplocationid: {
# "duration": duration
# }
# }
# }
# else:
# routeplans[routeplanid] = {
# routeid: {
# driverid: {
# stoplocationid: {
# "duration": duration
# }
# }
# }
# }

# return routeplans

def routeplans():
routeplans = []
csv = "routeplans_test.csv"
routeplans = pd.read_csv(csv, sep='\t', header=0, na_values=["(null)"])
print('%s read shape: %s' % (csv, routeplans.shape))
routeplans.dropna(subset=["driverid"], inplace=True) # ignoring routes without driver
print(routeplans.ftypes)
return routeplans

with open("routeplans_test.csv", "r") as f:
reader = csv.reader(f, delimiter="\t")
for i, line in enumerate(reader):
if i == 0: # skip headers
continue
if line[2] == '(null)': # no driver
continue

routeplans.append(line)
def get_routeids(stoplocations):
return stoplocations.groupby(['routeid', 'stoplocationid'])

return routeplans

def stoplocations():
stoplocations = {}

with open("stoplocations_test.csv", "r") as f:
reader = csv.reader(f, delimiter="\t")
for i, line in enumerate(reader):
if i == 0:
continue
if line[3] == '(null)' or line[4] == '(null)':
continue
routeplanid = int(line[0])
routeid = int(line[1])
stoplocationid = int(line[2])
data = {
"position": (float(line[3]), float(line[4])),
"deliverystatus": int(line[5]),
"deliverystatustimestamp": datetime.fromisoformat(line[6] if line[6] != '(null)' else 0)
}

if routeplanid in stoplocations:
routes = stoplocations[routeplanid]

if routeid in routes:
stoplocs = routes[routeid]
stoplocs[stoplocationid] = data # Assumption: only unique stoplocationids
else:
routes[routeid] = {stoplocationid: data}
else:
stoplocations[routeplanid] = {routeid: {stoplocationid: data}}

csv = "stoplocations_test.csv"
stoplocations = pd.read_csv(csv, sep='\t', header=0, na_values=["(null)"])
print('%s read shape: %s' % (csv, stoplocations.shape))
stoplocations.dropna(subset=["latitude", "longitude"], inplace=True) # ignoring locations without latitude or longitude
stoplocations['position'] = stoplocations.apply(lambda x: (x['latitude'], x['longitude']), axis=1)
stoplocations['deliverystatustimestamp'] = pd.to_datetime(stoplocations['deliverystatustimestamp'].apply(lambda x: remove_microseconds(remove_tzinfo(x))))
print(stoplocations.ftypes)
return stoplocations

def driverpositions():
driverpositions = {}

with open("driverpositions_test.csv", "r") as f:
reader = csv.reader(f, delimiter="\t") # use "," when using Terje's file
for i, line in enumerate(reader):
if i == 0: # skip headers
continue
routeid = int(line[0])
driverid = int(line[1])
data = {
"position": (float(line[2]), float(line[3])),
"logtime": datetime.fromisoformat(remove_microseconds(remove_tzinfo(line[4]))),
"speed": float(line[6]) if line[6] else 0.0
}

if routeid in driverpositions:
drivers = driverpositions[routeid]

if driverid in drivers:
drivers[driverid].append(data)
else:
drivers[driverid] = [data]
else:
driverpositions[routeid] = {driverid: [data]}

# sort
for routeid, driver in driverpositions.items():
for driverid, position_data in driver.items():
driverpositions[routeid][driverid] = sorted(position_data, key=lambda position_datum: position_datum["logtime"])

def get_drivers(driverpositions):
return driverpositions.groupby(['routeid', 'driverid'])


def driverpositions():
csv = "driverpositions_test.csv"
driverpositions = pd.read_csv(csv, sep='\t', header=0, na_values=["(null)"]) # use "," when using Terje's file
print('%s read shape: %s' % (csv, driverpositions.shape))
driverpositions.dropna(subset=["latitude", "longitude"], inplace=True) # ignoring positions without latitude or longitude
driverpositions['position'] = driverpositions.apply(lambda x: (x['latitude'], x['longitude']), axis=1)
driverpositions['logtime'] = pd.to_datetime(driverpositions['logtime'].apply(lambda x: remove_microseconds(remove_tzinfo(x))))
driverpositions['speed'] = driverpositions['speed'].fillna(value=0)
driverpositions = driverpositions.sort_values(by=['routeid', 'driverid', 'logtime'])
print(driverpositions.ftypes)
return driverpositions

0 comments on commit e124f7c

Please sign in to comment.