|
| 1 | +--- |
| 2 | +jupyter: |
| 3 | + jupytext: |
| 4 | + formats: ipynb,md |
| 5 | + text_representation: |
| 6 | + extension: .md |
| 7 | + format_name: markdown |
| 8 | + format_version: '1.3' |
| 9 | + jupytext_version: 1.11.3 |
| 10 | + kernelspec: |
| 11 | + display_name: Python 3 |
| 12 | + language: python |
| 13 | + name: python3 |
| 14 | +--- |
| 15 | + |
| 16 | +# Winter Cycling Network missing links analysis |
| 17 | + |
| 18 | +Goal: identify prime areas for expansion of the Winter Cycling Network, areas that would provide the most return on investment for increasing ridership and usefulness of the network. |
| 19 | + |
| 20 | +Analysis plan: |
| 21 | +- [x] **Fall to winter comparison**: calculate the relative change in ridership on between September & October 2019 and January & Februrary 2020 (normalized to the total per month) and plot on a map over the Winter Cycling Network |
| 22 | + - Why: Fall has a higher fraction of utility cycling trips than summer; people who cycle in the fall but stop in the winter would be prime candidates for using an expanded Winter Cycling Network. |
| 23 | + |
| 24 | +To do: |
| 25 | +- [x] Load September 2019 and January 2020 Strava data |
| 26 | +- [x] Load OSM tags for the Winter Cycling Network |
| 27 | +- [x] Calculate relative change in ridership between September and January, normalized to total monthly counts (or total people?) |
| 28 | + - [ ] Possibly split by weekday / weekend, or use just commute trips on Strava? |
| 29 | + - [ ] Split by men and women |
| 30 | +- [x] Plot Winter Cycling Network and relative change on the same map |
| 31 | + |
| 32 | +Strava user totals: |
| 33 | + |
| 34 | +| --- | Total activities | Total people | |
| 35 | +| ---- | ----------- | -------------| |
| 36 | +| September 2019 | 46,283 | 6,079 | |
| 37 | +| October 2019 | 30,176 | 3,826 | |
| 38 | +| January 2020 | 6,033 | 663 | |
| 39 | +| February 2020 | 5,268 | 758 | |
| 40 | + |
| 41 | +Right off the top we can notice that fewer people are making more trips in January: the ratio of trips to people is about 9:1, whereas in September it's about 7.5:1. Winter cyclists are more likely to be quite dedicated cyclists? |
| 42 | + |
| 43 | +```python |
| 44 | +# load packages |
| 45 | +import pandas as pd |
| 46 | +import numpy as np |
| 47 | +import matplotlib |
| 48 | +from mpl_toolkits.axes_grid1 import make_axes_locatable |
| 49 | +from matplotlib import pyplot as plt |
| 50 | +import seaborn as sns |
| 51 | +import geopandas as gpd # for shapefile |
| 52 | +import contextily as ctx # for background map |
| 53 | +``` |
| 54 | + |
| 55 | +```python |
| 56 | +# load data |
| 57 | +# data can be accessed on the Strava Metro downloads page for the Bike Ottawa organization |
| 58 | +Sept_2019 = pd.read_csv("data/Strava/all_edges_daily_2019-09-01-2019-09-30_ride" |
| 59 | + + "/1b5bb8a5a94591250339fa667a1bd40b690291651b529a30a0e0bbb732ea6a0f-1634409075566.csv") |
| 60 | +Oct_2019 = pd.read_csv("data/Strava/all_edges_daily_2019-10-01-2019-10-31_ride" |
| 61 | + + "/c4f3750259f4329081fc5e53cc782d5b2fd0b305cc4c7f2ba1e3cde95b8c6d58-1637607788760.csv") |
| 62 | +Jan_2020 = pd.read_csv("data/Strava/all_edges_daily_2020-01-01-2020-01-31_ride" |
| 63 | + + "/a92b60d941fcc0a525ada734d70ec2fb2aabf785ba28d1eaa9ad7293bab8559b-1634514902350.csv") |
| 64 | +Feb_2020 = pd.read_csv("data/Strava/all_edges_daily_2020-02-01-2020-02-29_ride" |
| 65 | + +"/18bf99a933c28df850a2268a62702fa98b5444359041c4e1f065771495110bc0-1637634385929.csv") |
| 66 | +``` |
| 67 | + |
| 68 | +```python |
| 69 | +# shapefile |
| 70 | +Sept_2019_shape = gpd.read_file("data/Strava/all_edges_daily_2019-09-01-2019-09-30_ride" |
| 71 | + +"/1b5bb8a5a94591250339fa667a1bd40b690291651b529a30a0e0bbb732ea6a0f-1634409075566.shp") |
| 72 | +Oct_2019_shape = gpd.read_file("data/Strava/all_edges_daily_2019-10-01-2019-10-31_ride" |
| 73 | + + "/c4f3750259f4329081fc5e53cc782d5b2fd0b305cc4c7f2ba1e3cde95b8c6d58-1637607788760.shp") |
| 74 | +Jan_2020_shape = gpd.read_file("data/Strava/all_edges_daily_2020-01-01-2020-01-31_ride" |
| 75 | + "/a92b60d941fcc0a525ada734d70ec2fb2aabf785ba28d1eaa9ad7293bab8559b-1634514902350.shp") |
| 76 | +Feb_2020_shape = gpd.read_file("data/Strava/all_edges_daily_2020-02-01-2020-02-29_ride" |
| 77 | + +"/18bf99a933c28df850a2268a62702fa98b5444359041c4e1f065771495110bc0-1637634385929.shp") |
| 78 | +``` |
| 79 | + |
| 80 | +```python |
| 81 | +# rename edgeUID for join |
| 82 | +Sept_2019_shape = Sept_2019_shape.rename(columns={"edgeUID": "edge_uid"}) |
| 83 | +Oct_2019_shape = Oct_2019_shape.rename(columns={"edgeUID": "edge_uid"}) |
| 84 | +Jan_2020_shape = Jan_2020_shape.rename(columns={"edgeUID": "edge_uid"}) |
| 85 | +Feb_2020_shape = Feb_2020_shape.rename(columns={"edgeUID": "edge_uid"}) |
| 86 | +``` |
| 87 | + |
| 88 | +```python |
| 89 | +# load Winter Cycling Network OSM features |
| 90 | +wcn = gpd.read_file("data/winter-map.geojson") |
| 91 | +``` |
| 92 | + |
| 93 | +```python |
| 94 | +# ahhhhh, so easy, love geopandas |
| 95 | +wcn.plot() |
| 96 | +``` |
| 97 | + |
| 98 | +```python |
| 99 | +# total Strava users for each month |
| 100 | +user_totals = [6079, 3826, 663, 758] # September, October, January, February |
| 101 | +``` |
| 102 | + |
| 103 | +```python |
| 104 | +# combine counts in each direction on all segments |
| 105 | +for i, df in enumerate([Sept_2019, Oct_2019, Jan_2020, Feb_2020]): |
| 106 | + df['total_trips'] = df['forward_trip_count'] + df['reverse_trip_count'] |
| 107 | + df['total_female_people'] = df['forward_female_people_count'] + df['reverse_female_people_count'] |
| 108 | + df['total_male_people'] = df['forward_male_people_count'] + df['reverse_male_people_count'] |
| 109 | + df['normalized_trips'] = df['total_trips'] / user_totals[i] |
| 110 | + df['female_normalized'] = df['total_female_people'] / user_totals[i] |
| 111 | + df['male_normalized'] = df['total_male_people'] / user_totals[i] |
| 112 | + |
| 113 | +``` |
| 114 | + |
| 115 | +```python |
| 116 | +# stack dataframes together |
| 117 | +Strava = pd.concat([Sept_2019, Oct_2019, Jan_2020, Feb_2020]) |
| 118 | +``` |
| 119 | + |
| 120 | +```python |
| 121 | +# drop columns with data we're not using |
| 122 | +Strava = Strava.drop(columns = ['forward_people_count', 'reverse_people_count', |
| 123 | + 'forward_commute_trip_count', 'reverse_commute_trip_count', |
| 124 | + 'forward_leisure_trip_count', 'reverse_leisure_trip_count', |
| 125 | + 'forward_morning_trip_count', 'reverse_morning_trip_count', |
| 126 | + 'forward_evening_trip_count', 'reverse_evening_trip_count', |
| 127 | + 'forward_male_people_count', 'reverse_male_people_count', |
| 128 | + 'forward_female_people_count', 'reverse_female_people_count', |
| 129 | + 'forward_unspecified_people_count', 'reverse_unspecified_people_count', |
| 130 | + 'forward_13_19_people_count', 'reverse_13_19_people_count', |
| 131 | + 'forward_20_34_people_count', 'reverse_20_34_people_count', |
| 132 | + 'forward_35_54_people_count', 'reverse_35_54_people_count', |
| 133 | + 'forward_55_64_people_count', 'reverse_55_64_people_count', |
| 134 | + 'forward_65_plus_people_count', 'reverse_65_plus_people_count', |
| 135 | + 'forward_average_speed', 'reverse_average_speed', |
| 136 | + 'activity_type'] ) |
| 137 | +``` |
| 138 | + |
| 139 | +```python |
| 140 | +# make a 'year' column |
| 141 | +Strava['year'] = Strava['date'].str.split('-', expand = True)[0] |
| 142 | +Strava['year'] = Strava['year'].astype('int') |
| 143 | +``` |
| 144 | + |
| 145 | +```python |
| 146 | +months_grouped = Strava.groupby(['edge_uid', 'year'])[['total_trips', 'normalized_trips']].sum().reset_index() |
| 147 | +``` |
| 148 | + |
| 149 | +```python |
| 150 | +# create a single shapefile by merging all the shapefiles (should be lots of overlap, possibly complete overlap) |
| 151 | +shape = Sept_2019_shape.merge(Oct_2019_shape, on = ['edge_uid', 'osmId', 'geometry'], how = 'outer') |
| 152 | +shape = shape.merge(Jan_2020_shape, on = ['edge_uid', 'osmId', 'geometry'], how = 'outer') |
| 153 | +shape = shape.merge(Feb_2020_shape, on = ['edge_uid', 'osmId', 'geometry'], how = 'outer') |
| 154 | +``` |
| 155 | + |
| 156 | +```python |
| 157 | +# create shape subset in the region of the WCN |
| 158 | +east_lim = -75.64 |
| 159 | +west_lim = -75.76 |
| 160 | +north_lim = 45.45 |
| 161 | +south_lim = 45.37 |
| 162 | +shape_subset = shape.cx[west_lim:east_lim, south_lim: north_lim] |
| 163 | +``` |
| 164 | + |
| 165 | +```python |
| 166 | +# merge the shape data with the Strava counts using the 'edge_uid' column |
| 167 | +Strava_osm = shape_subset.merge(months_grouped, on = 'edge_uid', how = 'inner') |
| 168 | +``` |
| 169 | + |
| 170 | +```python |
| 171 | +# take a look at the data |
| 172 | +Strava_osm.head() |
| 173 | +``` |
| 174 | + |
| 175 | +```python |
| 176 | +# pivot to wide format to calculate percentage change for each year |
| 177 | +Strava_pivot = Strava_osm.pivot(index = (['year']), columns = (['edge_uid']), |
| 178 | + values = 'normalized_trips').reset_index() |
| 179 | +``` |
| 180 | + |
| 181 | +```python |
| 182 | +# fill missing values with 0: if an edge is not there, it means 0 counts |
| 183 | +# pivot to wide format to calculate percentage change for each year |
| 184 | +Strava_pivot = Strava_osm.pivot(index = (['year']), columns = (['edge_uid']), |
| 185 | + values = 'normalized_trips').reset_index().fillna(0) |
| 186 | + |
| 187 | +edge_pct_change = Strava_pivot.pct_change()*100 |
| 188 | +``` |
| 189 | + |
| 190 | +```python |
| 191 | +edge_pct_change |
| 192 | +``` |
| 193 | + |
| 194 | +```python |
| 195 | +# rename year columns |
| 196 | +edge_pct_change['year'] = [2019, 2020] |
| 197 | +``` |
| 198 | + |
| 199 | +```python |
| 200 | +# melt back to long version |
| 201 | +pct_change_2019_2020 = edge_pct_change.iloc[1:].melt(id_vars = ['year']) |
| 202 | +``` |
| 203 | + |
| 204 | +```python |
| 205 | +# rename columns |
| 206 | +pct_change_2019_2020 = pct_change_2019_2020.rename(columns = {"value": "pct_change"}) |
| 207 | +``` |
| 208 | + |
| 209 | +```python |
| 210 | +# merge with the shape data again |
| 211 | +# this will remove any edges that aren't in the 2020 data, which I don't exactly want... |
| 212 | +# drop duplicates first |
| 213 | + |
| 214 | +Strava_fall_winter = Strava_osm[['edge_uid', 'osmId', |
| 215 | + 'geometry']].drop_duplicates().merge(pct_change_2019_2020, |
| 216 | + on = 'edge_uid', how = 'inner') |
| 217 | +``` |
| 218 | + |
| 219 | +```python |
| 220 | +# change to web mercator for adding basemap |
| 221 | +Strava_web_mercator = Strava_fall_winter.to_crs(epsg=3857) |
| 222 | +wcn_web_mercator = wcn.to_crs(epsg=3857) |
| 223 | +``` |
| 224 | + |
| 225 | +```python |
| 226 | +fig, ax = plt.subplots(figsize = (10,10)) |
| 227 | +vmin = -100 |
| 228 | +vmax = 100 |
| 229 | + |
| 230 | +ax.set_aspect('equal') |
| 231 | + |
| 232 | +divider = make_axes_locatable(ax) |
| 233 | + |
| 234 | +cax = divider.append_axes("right", size="5%", pad=0.1) |
| 235 | + |
| 236 | + |
| 237 | +wcn_web_mercator.plot(ax = ax, linewidth = 4, alpha = 0.5, color = 'green') |
| 238 | + |
| 239 | +Strava_web_mercator.plot(column = "pct_change", ax = ax, cax = cax, legend = True, cmap = 'plasma', |
| 240 | + vmin=vmin, vmax=vmax, linewidth = 1) |
| 241 | +ctx.add_basemap(ax, alpha = 0.5) |
| 242 | + |
| 243 | +plt.tight_layout() |
| 244 | + |
| 245 | +plt.savefig("wnc_Fall2019_Winter2020.pdf") |
| 246 | +``` |
| 247 | + |
| 248 | +```python |
| 249 | +# change to web mercator for adding basemap |
| 250 | +Strava_web_mercator = Strava_osm.to_crs(epsg=3857) |
| 251 | +Strava_2019_web_mercator = Strava_web_mercator[Strava_web_mercator['year'] == 2019] |
| 252 | +Strava_2020_web_mercator = Strava_web_mercator[Strava_web_mercator['year'] == 2020] |
| 253 | +``` |
| 254 | + |
| 255 | +```python |
| 256 | +import matplotlib.colors as colors |
| 257 | +``` |
| 258 | + |
| 259 | +```python |
| 260 | +fig, ax = plt.subplots(figsize = (10,10)) |
| 261 | + |
| 262 | +data = Strava_2019_web_mercator['total_trips'] |
| 263 | + |
| 264 | +vmin = np.min(data) |
| 265 | +vmax = np.max(data) |
| 266 | + |
| 267 | +ax.set_aspect('equal') |
| 268 | + |
| 269 | +divider = make_axes_locatable(ax) |
| 270 | + |
| 271 | +cax = divider.append_axes("right", size="5%", pad=0.1) |
| 272 | + |
| 273 | + |
| 274 | +wcn_web_mercator.plot(ax = ax, linewidth = 4, alpha = 0.5, color = 'green') |
| 275 | + |
| 276 | +Strava_2019_web_mercator.plot(column = "total_trips", ax = ax, cax = cax, legend = True, cmap = 'plasma', |
| 277 | + norm = colors.LogNorm(vmin=vmin, vmax=vmax),) |
| 278 | +ctx.add_basemap(ax, alpha = 0.5) |
| 279 | + |
| 280 | +plt.tight_layout() |
| 281 | + |
| 282 | +plt.savefig("Fall2019.pdf") |
| 283 | +``` |
| 284 | + |
| 285 | +```python |
| 286 | +fig, ax = plt.subplots(figsize = (10,10)) |
| 287 | +data = Strava_2020_web_mercator['total_trips'] |
| 288 | + |
| 289 | +vmin = np.min(data) |
| 290 | +vmax = np.max(data) |
| 291 | + |
| 292 | + |
| 293 | +ax.set_aspect('equal') |
| 294 | + |
| 295 | +divider = make_axes_locatable(ax) |
| 296 | + |
| 297 | +cax = divider.append_axes("right", size="5%", pad=0.1) |
| 298 | + |
| 299 | + |
| 300 | +wcn_web_mercator.plot(ax = ax, linewidth = 4, alpha = 0.5, color = 'green') |
| 301 | + |
| 302 | +Strava_2020_web_mercator.plot(column = "total_trips", ax = ax, cax = cax, legend = True, cmap = 'plasma', |
| 303 | + norm = colors.LogNorm(vmin=vmin, vmax=vmax),) |
| 304 | +ctx.add_basemap(ax, alpha = 0.5) |
| 305 | + |
| 306 | +plt.tight_layout() |
| 307 | + |
| 308 | +plt.savefig("Winter2020.pdf") |
| 309 | +``` |
| 310 | + |
| 311 | +```python |
| 312 | +fig, ax = plt.subplots(figsize = (10,10)) |
| 313 | +wcn_web_mercator.plot(ax = ax, linewidth = 3, alpha = 0.5, color = 'green') |
| 314 | +ctx.add_basemap(ax, alpha = 0.8) |
| 315 | +plt.savefig("wcn.pdf") |
| 316 | +plt.savefig("wcn.png", dpi = 200) |
| 317 | +``` |
| 318 | + |
| 319 | +```python |
| 320 | + |
| 321 | +``` |
0 commit comments