|
| 1 | +### Clear memory |
| 2 | +rm(list = ls()) |
| 3 | + |
| 4 | +# Libraries --------------------------------------------------------------- |
| 5 | +library(tidyverse) |
| 6 | +library(sf) # for spatial things |
| 7 | +library(lwgeom) # for advanced spatial things |
| 8 | +library(fitdistrplus) # for log normal distributions |
| 9 | +tripdata <- read.csv("data/Melbourne/DOT_VISTA/processed/T_VISTA_MAIN.csv",header=T, na.strings="N/A") |
| 10 | +df <- tripdata %>% |
| 11 | + mutate( |
| 12 | + orig_long = origlong, |
| 13 | + orig_lat = origlat, |
| 14 | + dest_long = destlong, |
| 15 | + dest_lat = destlat |
| 16 | + ) |
| 17 | +############################################################################################################################################ |
| 18 | +# filtering data based on geographic extent----------------------------------------------------------------------------------------------------- |
| 19 | + |
| 20 | + |
| 21 | +studyRegion <- st_read("~/Documents/melbourne/absRegionsReprojected.sqlite",layer="GCCSA_2016_AUST") %>% |
| 22 | + st_buffer(1) |
| 23 | + |
| 24 | +orig_within_region <- df %>% |
| 25 | + st_as_sf(coords=c("origlong","origlat"),crs=4326) %>% |
| 26 | + st_transform(28355) %>% |
| 27 | + st_snap_to_grid(0.1) %>% |
| 28 | + filter(lengths(st_intersects(., studyRegion,prepared=TRUE,sparse=TRUE)) > 0) |
| 29 | + |
| 30 | +dest_within_region <- df %>% |
| 31 | + st_as_sf(coords=c("destlong","destlat"),crs=4326) %>% |
| 32 | + st_transform(28355) %>% |
| 33 | + st_snap_to_grid(0.1) %>% |
| 34 | + filter(lengths(st_intersects(., studyRegion,prepared=TRUE,sparse=TRUE)) > 0) |
| 35 | + |
| 36 | +#trips within greater Melbourne |
| 37 | +write.csv(orig_within_region,file = "data/Melbourne/DOT_VISTA/processed/origmelb_trips.csv",row.names= F) |
| 38 | +write.csv(dest_within_region,file = "data/Melbourne/DOT_VISTA/processed/destmelb_trips.csv",row.names=F) |
| 39 | +orig <- read_csv("data/Melbourne/DOT_VISTA/processed/origmelb_trips.csv", col_names = T,show_col_types = F) |
| 40 | +dest <- read_csv("data/Melbourne/DOT_VISTA/processed/destmelb_trips.csv", col_names = T,show_col_types = F) |
| 41 | +melbtrips <- semi_join(orig,dest, by="tripid") |
| 42 | +############################################################################################################################################ |
| 43 | +# joining trip data with household&person data----------------------------------------------------------------------------------------------------- |
| 44 | +hhdata <- read.csv("data/Melbourne/DOT_VISTA/processed/H_VISTA_1220_Coord.csv") |
| 45 | +pdata <- read.csv("data/Melbourne/DOT_VISTA/processed/P_VISTA_1220_Coord.csv") |
| 46 | +tripshh <- merge(melbtrips,hhdata, by="hhid") |
| 47 | +tripshhp <- merge(tripshh,pdata, by="persid") |
| 48 | + |
| 49 | +# filtering data based on surveyperiod |
| 50 | +trips <- subset(tripshhp, surveyperiod=="2016-17" | surveyperiod=="2017-18" | surveyperiod=="2018-19" | surveyperiod=="2019-20") |
| 51 | + |
| 52 | +#recoding linkmode var (old tree structure) |
| 53 | +#trips$mainmode[mandatory_trips$linkmode=="Vehicle Driver"] = "card" |
| 54 | +#trips$mainmode[trips$linkmode=="Motorcycle"] = "card" |
| 55 | +#trips$mainmode[trips$linkmode=="Vehicle Passenger"] = "carp" |
| 56 | +#trips$mainmode[trips$linkmode=="Taxi"] = "carp" |
| 57 | +#trips$mainmode[trips$linkmode=="Walking"] = "walk" |
| 58 | +#trips$mainmode[trips$linkmode=="Jogging"] = "walk" |
| 59 | +#trips$mainmode[trips$linkmode=="Bicycle"] = "bike" |
| 60 | +#trips$mainmode[trips$linkmode=="Public Bus"] = "bus" |
| 61 | +#trips$mainmode[trips$linkmode=="School Bus"] = "bus" |
| 62 | +#trips$mainmode[trips$linkmode=="Train"] = "train" |
| 63 | +#trips$mainmode[trips$linkmode=="Tram"] = "tram" |
| 64 | +#trips$mainmode[trips$linkmode=="Other"] = "Other" |
| 65 | +#trips <- WE_trips[!(WE_trips$mainmode=="Other"),] |
| 66 | + |
| 67 | +#recoding modes |
| 68 | +trips <- trips %>% |
| 69 | + mutate_at(c('mode1', 'mode2', 'mode3','mode4','mode5','mode6','mode7','mode8','mode9','mode10'),funs(ifelse(. == 'Motorcycle', 'Vehicle Driver', .)))%>% |
| 70 | + mutate_at(c('mode1', 'mode2', 'mode3','mode4','mode5','mode6','mode7','mode8','mode9','mode10'),funs(ifelse(. == 'Taxi', 'Vehicle Passenger', .))) %>% |
| 71 | + mutate_at(c('mode1', 'mode2', 'mode3','mode4','mode5','mode6','mode7','mode8','mode9','mode10'),funs(ifelse(. == 'Jogging', 'Walking', .)))%>% |
| 72 | + mutate_at(c('mode1', 'mode2', 'mode3','mode4','mode5','mode6','mode7','mode8','mode9','mode10'),funs(ifelse(. == 'Public Bus', 'PT', .)))%>% |
| 73 | + mutate_at(c('mode1', 'mode2', 'mode3','mode4','mode5','mode6','mode7','mode8','mode9','mode10'),funs(ifelse(. == 'Mobility Scooter', 'Bicycle', .)))%>% |
| 74 | + mutate_at(c('mode1', 'mode2', 'mode3','mode4','mode5','mode6','mode7','mode8','mode9','mode10'),funs(ifelse(. == 'School Bus', 'PT', .)))%>% |
| 75 | + mutate_at(c('mode1', 'mode2', 'mode3','mode4','mode5','mode6','mode7','mode8','mode9','mode10'),funs(ifelse(. == 'Train', 'PT', .)))%>% |
| 76 | + mutate_at(c('mode1', 'mode2', 'mode3','mode4','mode5','mode6','mode7','mode8','mode9','mode10'),funs(ifelse(. == 'Tram', 'PT', .))) |
| 77 | +trips <- trips[!(trips$linkmode=="Other"),] |
| 78 | + |
| 79 | +# concatenating modes |
| 80 | +trips <- trips %>% |
| 81 | + unite("combinedmode", c('mode1', 'mode2', 'mode3','mode4','mode5','mode6','mode7','mode8','mode9','mode10'), sep ='_', na.rm = TRUE, remove = FALSE) |
| 82 | +trips$combinedmode2 <- sapply(trips$combinedmode, function(x) paste(unique(unlist(str_split(x,"_"))), collapse = "_")) |
| 83 | +trips$combinedmode2 <- gsub('_Other', '', trips$combinedmode2) |
| 84 | +trips$combinedmode2 <- gsub('Other_', '', trips$combinedmode2) |
| 85 | +rep_str = c('Vehicle Driver'='Vehicle Driver','Vehicle Passenger'='Vehicle Passenger','Walking'='Walking', 'Bicycle'='Bicycle','Bicycle_PT'= 'PT_walk_Bike', |
| 86 | + 'Bicycle_PT_Walking'='PT_walk_Bike', 'Bicycle_Walking_PT'='PT_walk_Bike', 'PT_Walking'='PT_walk_Bike', 'Vehicle Driver_PT_Walking'='PT_Car', 'Bicycle_PT'='PT_Walk_Bike', |
| 87 | + 'Vehicle Driver_Vehicle Passenger_PT_Walking'='PT_Car','Vehicle Driver_Walking_PT'='PT_Car','Vehicle Passenger_PT_Walking'='PT_Car', |
| 88 | + 'Vehicle Passenger_Vehicle Driver_Walking_PT'='PT_Car', 'Vehicle Passenger_Walking_PT'='PT_walk_Bike', 'Walking_PT'='PT_walk_Bike','Walking_PT_Vehicle Driver'='PT_Car', |
| 89 | + 'Walking_PT_Vehicle Passenger'='PT_walk_Bike','Vehicle Driver_Bicycle'='Vehicle Driver','Vehicle Driver_Vehicle Passenger'='Vehicle Driver', |
| 90 | + 'Vehicle Driver_Vehicle Passenger_Walking'='Vehicle Driver','Vehicle Driver_Walking'='Vehicle Driver','Vehicle Driver_Walking_Vehicle Passenger'='Vehicle Driver', |
| 91 | + 'Vehicle Passenger_Vehicle Driver_Walking'='Vehicle Driver','Vehicle Passenger_Walking'='Vehicle Passenger','Vehicle Passenger_Walking_Vehicle Driver'= |
| 92 | + 'Vehicle Driver','Walking_Bicycle'='Walking', 'Bicycle_Walking'='Bicycle', 'Walking_Vehicle Driver'='Vehicle Driver','Walking_Vehicle Passenger'='Vehicle Passenger', |
| 93 | + 'Vehicle Passenger_Vehicle Driver'='Vehicle Driver','Vehicle Passenger_PT_Walking'='PT_Walk_Bike','Vehicle Passenger_Walking_PT'='PT_Walk_Bike','Vehicle Driver_Vehicle Passenger'= |
| 94 | + 'Vehicle Driver','Vehicle Passenger_PT_Walking'='PT_Walk_Bike','Vehicle Driver_PT_walking'='PT_Car','PT_walk_Bike_Walking'='PT_walk_Bike','Vehicle Driver_PT_walk_Bike'='PT_Car', |
| 95 | + 'Vehicle Passenger_PT_walk_Bike'='PT_Car','PT_walk_Bike_Vehicle Driver'='PT_Car','PT_walk_Bike_Vehicle Passenger'='PT_Car','Vehicle Passenger_PT_Car'='PT_Car','Vehicle Driver_PT'='PT_Car', |
| 96 | + 'Vehicle Passenger_PT'='PT_Car','Bicycle_Vehicle Driver'='Vehicle Driver','PT_Vehicle Driver'='PT_Car','PT_Car_Vehicle Passenger'='PT_Car','Vehicle Passenger_Bicycle'='Bicycle', |
| 97 | + 'PT_Car_Bicycle'='PT_Car','PT_Vehicle Passenger'='PT_Car','PT_walk_Bike_Bicycle'='PT_walk_Bike') |
| 98 | + |
| 99 | +trips$mainmode <- str_replace_all(trips$combinedmode2, rep_str) |
| 100 | +trips$mainmode[trips$mainmode=="PT"] = "PT_walk_Bike" |
| 101 | + |
| 102 | +# mandatory data "home to work/education + work/education to home" |
| 103 | +mandatory_hwe<-subset(trips, trips$origplace1=="Accommodation"& trips$destpurp1=="Work Related" | trips$destpurp1=="Education") |
| 104 | +mandatory_weh<-subset(trips, trips$origplace1=="Workplace" | trips$origplace1=="Place of Education"& trips$destpurp1=="At or Go Home") |
| 105 | +mandatory_tot<-rbind(mandatory_hwe,mandatory_weh) |
| 106 | +trips<-mandatory_tot |
| 107 | + |
| 108 | +#generaing age groups |
| 109 | +trips$agegroup[trips$age<=14] = 1 |
| 110 | +trips$agegroup[15<=trips$age & trips$age<=24] = 2 |
| 111 | +trips$agegroup[25<=trips$age & trips$age<=34] = 3 |
| 112 | +trips$agegroup[35<=trips$age & trips$age<=44] = 3 |
| 113 | +trips$agegroup[45<=trips$age & trips$age<=54] = 4 |
| 114 | +trips$agegroup[55<=trips$age & trips$age<=64] = 5 |
| 115 | +trips$agegroup[trips$age>=65] = 6 |
| 116 | + |
| 117 | +# recoding sex var |
| 118 | +trips$female[trips$sex=="F"] = 1 |
| 119 | +trips$female[trips$sex=="M"] = 0 |
| 120 | + |
| 121 | +# recoding carlicence var |
| 122 | +trips$licence[trips$carlicence=="Full Licence"] = 1 |
| 123 | +trips$licence[trips$carlicence=="Green Probationary Licence"] = 1 |
| 124 | +trips$licence[trips$carlicence=="Learners Permit"] = 1 |
| 125 | +trips$licence[trips$carlicence=="Red Probationary Licence"] = 1 |
| 126 | +trips$licence[trips$carlicence=="No Car Licence"] = 0 |
| 127 | + |
| 128 | +# recoding No of cars in HH |
| 129 | +trips$carsno[trips$cars == 0] = 0 |
| 130 | +trips$carsno[trips$cars == 1] = 1 |
| 131 | +trips$carsno[trips$cars== 2] = 2 |
| 132 | +trips$carsno[trips$cars >=3] = 3 |
| 133 | + |
| 134 | +# recoding No of bikes in HH |
| 135 | +trips$bikes[trips$adultbikes == 0] = 0 |
| 136 | +trips$bikes[trips$adultbikes == 1] = 1 |
| 137 | +trips$bikes[trips$adultbikes== 2] = 2 |
| 138 | +trips$bikes[trips$adultbikes >=3] = 3 |
| 139 | + |
| 140 | +# recoding No of bikes in HH |
| 141 | +trips$hh.size[trips$adultbikes == 0] = 0 |
| 142 | +trips$hh.size[trips$adultbikes == 1] = 1 |
| 143 | +trips$hh.size[trips$adultbikes== 2] = 2 |
| 144 | +trips$hh.size[trips$adultbikes >=3] = 3 |
| 145 | + |
| 146 | +#recoding sex var |
| 147 | +trips$gender[trips$sex=="M"] = 1 |
| 148 | +trips$gender[trips$sex=="F"] = 0 |
| 149 | + |
| 150 | +#recoding household income var |
| 151 | +trips$hhinc_an = trips$hhinc*52 |
| 152 | +trips$hhincat[trips$hhinc_an<=59999] = 1 |
| 153 | +trips$hhincat[60000<=trips$hhinc_an & trips$hhinc_an<=94999] = 2 |
| 154 | +trips$hhincat[95000<=trips$hhinc_an & trips$hhinc_an<=139999] = 3 |
| 155 | +trips$hhincat[140000<=trips$hhinc_an & trips$hhinc_an<=189999] = 4 |
| 156 | +trips$hhincat[trips$hhinc_an>=190000] = 5 |
| 157 | +trips$hhincat[is.na(trips$hhinc_an)] = 6 # missing/refused to respond |
| 158 | + |
| 159 | +#recoding work type |
| 160 | +trips$work.type[trips$worktype=="Fixed Hours"] = 1 #working full time |
| 161 | +trips$work.type[trips$worktype=="Flexible Hours"|trips$worktype=="Work from Home"] = 2 |
| 162 | +trips$work.type[trips$worktype=="Rostered Shifts"] = 3 |
| 163 | +trips$work.type[trips$worktype=="Not in Work Force"] = 4 |
| 164 | + |
| 165 | +#recoding mainmode |
| 166 | +trips$mainmode2[trips$mainmode == "Vehicle Driver"] = 1 |
| 167 | +trips$mainmode2[trips$mainmode == "Vehicle Passenger"] = 2 |
| 168 | +trips$mainmode2[trips$mainmode == "Walking"] = 3 |
| 169 | +trips$mainmode2[trips$mainmode == "Bicycle"] = 4 |
| 170 | +trips$mainmode2[trips$mainmode == "PT_Car"] = 5 |
| 171 | +trips$mainmode2[trips$mainmode == "PT_walk_Bike"] = 6 |
| 172 | + |
| 173 | +#joining with route attributes |
| 174 | +route_attributes <- read.csv("data/Melbourne/DOT_VISTA/processed/route_attributes.csv") |
| 175 | +trips <- merge(trips, route_attributes, by="tripid") |
| 176 | +#joining car and pt travel times |
| 177 | +ptcar_time <- read.csv("data/Melbourne/DOT_VISTA/processed/carandpt_time.csv") |
| 178 | +trips <- merge(trips, ptcar_time, by="tripid") |
| 179 | + |
| 180 | +#generating availability of modes (should be edited) |
| 181 | +trips$availcard <- 1 |
| 182 | +trips$availcard[trips$time_car == 0] = 0 |
| 183 | +#trips$availcard[trips$carsno == 0] = 0 |
| 184 | +#trips$availcard[trips$mainmode2 == 1] = 1 |
| 185 | +trips$availcarp <- 1 |
| 186 | +trips$availcarp[trips$time_car == 0] = 0 |
| 187 | +#trips$availcarp[trips$carsno == 0] = 0 |
| 188 | +#trips$availcarp[trips$mainmode2 == 2] = 1 |
| 189 | +trips$availwalk <- 1 |
| 190 | +trips$availwalk[trips$troutewalk_short_tt_s == 0] = 0 |
| 191 | +#trips$availwalk[trips$troutewalk_fast_tt_s == 0] = 0 #to be activated before running models with fast route attributes |
| 192 | +#trips$availwalk[trips$troutewalk_jibe_tt_s == 0] = 0 #to be activated before running models with jibe route attributes |
| 193 | +#trips$availwalk[trips$mainmode == 3] = 1 |
| 194 | +trips$availbike <- 1 |
| 195 | +trips$availbike[trips$troutebike_short_tt_s == 0] = 0 |
| 196 | +#trips$availbike[trips$troutebike_fast_tt_s == 0] = 0 #to be activated before running models with fast route attributes |
| 197 | +#trips$availbike[trips$troutebike_jibe_tt_s == 0] = 0 #to be activated before running models with jibe route attributes |
| 198 | +#trips$availbike[trips$bikes == 0] = 0 |
| 199 | +#trips$availbike[trips$mainmode2 == 4] = 1 |
| 200 | +trips$availptcar <- 1 |
| 201 | +trips$availptcar[trips$time_ptcar == 0] = 0 |
| 202 | +#trips$availptcar[trips$mainmode2 == 5] = 1 |
| 203 | +trips$availptwalk <- 1 |
| 204 | +trips$availptwalk[trips$time_ptwalk == 0] = 0 |
| 205 | +#trips$availptwalk[trips$mainmode2 == 6] = 1 |
| 206 | +trips <- trips[!(trips$time_car==0 & trips$mainmode2==1),] |
| 207 | +trips <- trips[!(trips$time_car==0 & trips$mainmode2==2),] |
| 208 | +trips <- trips[!(trips$time_ptcar==0 & trips$mainmode2==5),] |
| 209 | +trips <- trips[!(trips$time_ptwalk==0 & trips$mainmode2==6),] |
| 210 | + |
| 211 | +#modifying stress junction var |
| 212 | +trips$stressjct_walk_short <- trips$troutewalk_short_stressjct*trips$troutewalk_short_distance_m |
| 213 | +trips$stressjct_bike_short <- trips$troutebike_short_stressjct*trips$troutebike_short_distance_m |
| 214 | +trips$stressjct_walk_fast <- trips$troutewalk_fast_stressjct*trips$troutewalk_fast_distance_m |
| 215 | +trips$stressjct_bike_fast <- trips$troutebike_fast_stressjct*trips$troutebike_fast_distance_m |
| 216 | +trips$stressjct_walk_jibe <- trips$troutewalk_jibe_stressjct*trips$troutewalk_jibe_distance_m |
| 217 | +trips$stressjct_bike_jibe <- trips$troutebike_jibe_stressjct*trips$troutebike_jibe_distance_m |
| 218 | + |
| 219 | +# generating log transforamtion of distance |
| 220 | +trips$logdist_walk_short = log(trips$troutewalk_short_distance_m) |
| 221 | +trips$logdist_bike_short = log(trips$troutewalk_short_distance_m) |
| 222 | +trips$logdist_walk_fast = log(trips$troutewalk_fast_distance_m) |
| 223 | +trips$logdist_bike_fast = log(trips$troutewalk_fast_distance_m) |
| 224 | +trips$logdist_walk_jibe = log(trips$troutewalk_jibe_distance_m) |
| 225 | +trips$logdist_bike_jibe = log(trips$troutewalk_jibe_distance_m) |
| 226 | + |
| 227 | +#generating a weighting variable based on walk and bike log distances |
| 228 | +trips <- with(trips,trips[order(trips$logdist_walk_short,trips$logdist_bike_short),]) |
| 229 | +trips <- trips %>% rowwise() %>% |
| 230 | + mutate(weight_short = mean(c(logdist_walk_short, logdist_bike_short))) |
| 231 | +trips <- with(trips,trips[order(trips$logdist_walk_fast,trips$logdist_bike_fast),]) |
| 232 | +trips <- trips %>% rowwise() %>% |
| 233 | + mutate(weight_fast = mean(c(logdist_walk_fast, logdist_bike_fast))) |
| 234 | +trips <- with(trips,trips[order(trips$logdist_walk_jibe,trips$logdist_bike_jibe),]) |
| 235 | +trips <- trips %>% rowwise() %>% |
| 236 | + mutate(weight_jibe = mean(c(logdist_walk_jibe, logdist_bike_jibe))) |
| 237 | + |
| 238 | +# mandatory tours |
| 239 | + |
| 240 | +#joining with gnaf point for area-based measures |
| 241 | +#orig long&lat of mandatory trips were joined to gnaf points using near analysis in qgis |
| 242 | +#gnaf <- read.csv("C:/Users/e18933/OneDrive - RMIT University/WORK/Lucy/Data/ganf_points_selectedBEmeasures.csv",header=T, na.strings="N/A") |
| 243 | +#mandatory_gnaf <- read_csv("data/Melbourne/DOT_VISTA/processed/Mandatory_tours_origgnaf.csv") |
| 244 | +#mandatory_gnaf <- rename(mandatory_gnaf, gnaf_pid = HubName) |
| 245 | +#mandatory_gnaf <- rename(mandatory_gnaf, origdist_gnaf = HubDist) |
| 246 | +#trips_alljoined <-merge(mandatory_gnaf, gnaf, by="gnaf_pid") |
| 247 | + |
| 248 | +# filtering trips started from home |
| 249 | +#hb_trips <- mandatory_tours %>% |
| 250 | +# group_by(persid) |
| 251 | +#hb_trips <- hb_trips[hb_trips$tripno == 1 & hb_trips$origpurp1 == "At Home", ] |
| 252 | + |
| 253 | +# filtering mandatory(work&education) trips |
| 254 | +#mandatory_trips <- subset(hb_trips, trippurp=="Education"|trippurp=="Work Related") |
| 255 | +#mandatory_trips <- mandatory_trips %>% |
| 256 | +# relocate(gnaf_pid, .before = origdist_gnaf) |
| 257 | + |
| 258 | +# exporting work and education trips to csv format |
| 259 | +write.csv(trips,file = "data/Melbourne/DOT_VISTA/processed/mandatory_trips.csv") |
| 260 | + |
| 261 | + |
0 commit comments