-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Prepare for OD data (admin level vs network level)
- Loading branch information
Showing
2 changed files
with
499 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,234 @@ | ||
""" OD data preparation | ||
- Disaggregate OD matrix from multi-admin-layers to OA level | ||
- Predict OD matrix from 2011 to 2021 | ||
Input OD matrix: Number of travel-to-work trips, UK, 2011, Multi-admins | ||
Output OD matrix: Numnber of travel-to-work trips, GB, 2021, OA level. | ||
""" | ||
|
||
from pathlib import Path | ||
import os | ||
import pandas as pd | ||
from collections import defaultdict | ||
from nird.utils import load_config | ||
import random | ||
import warnings | ||
|
||
warnings.simplefilter("ignore") | ||
|
||
base_path = Path(load_config()["paths"]["base_path"]) | ||
base_path = base_path / "census_datasets" / "od_matrix" | ||
|
||
# %% | ||
# Scottish population (2011) at LSOA and OA levels | ||
pop11_Scot = pd.read_csv(os.path.join(base_path, "pop11_scot.csv")) | ||
pop11_Scot_lsoa = pop11_Scot.groupby("LSOA11CD", as_index=False).agg({"Popcount": sum}) | ||
pop11_Scot_oa = pop11_Scot[["OA11CD", "Popcount"]] | ||
|
||
# Scottish population (2021) at LSOA level | ||
pop21_Scot_lsoa = pd.read_excel( | ||
os.path.join(base_path, "sape-2021.xlsx"), sheet_name="Sheet1" | ||
) | ||
pop21_Scot_lsoa.rename( | ||
columns={"Data zone code": "LSOA21CD", "Total population": "Popcount"}, inplace=True | ||
) | ||
|
||
# (Population disaggregation) to estimate Scottish population (2021) at OA level | ||
temp_pop = pd.merge(pop11_Scot, pop11_Scot_lsoa, on="LSOA11CD", how="left") | ||
temp_pop["oa/lsoa"] = temp_pop.Popcount_x / temp_pop.Popcount_y | ||
dict_ratio = defaultdict(lambda: defaultdict(list)) | ||
for idx, row in temp_pop.iterrows(): | ||
oa = row["OA11CD"] | ||
lsoa = row["LSOA11CD"] | ||
rt = row["oa/lsoa"] | ||
dict_ratio[lsoa][oa] = rt | ||
|
||
oaList = [] | ||
popList = [] | ||
for idx, row in pop21_Scot_lsoa.iterrows(): | ||
lsoa = row["LSOA21CD"] | ||
pop = row["Popcount"] | ||
if lsoa in dict_ratio.keys(): | ||
for oa, ratio in dict_ratio[lsoa].items(): | ||
oaList.append(oa) | ||
popList.append(pop * ratio) | ||
|
||
pop21_Scot_oa = pd.DataFrame({"OA21CD": oaList, "Popcount": popList}) | ||
|
||
# %% | ||
# Read OD data at multiple admin levels | ||
od11_gb = pd.read_csv( | ||
os.path.join(base_path, "od_gb_2011_multiAdminLevel.csv") | ||
) # source: WU03BUK_oa_wz_v4 (2011) | ||
|
||
# Extract od origining from Scotland (2011) | ||
od11_Scot_oa = od11_gb[od11_gb.from_region == "oa_scot"] | ||
od11_Scot_oa.reset_index(drop=True, inplace=True) | ||
od11_Scot_oa.rename(columns={"car": "travel11"}, inplace=True) | ||
|
||
# Project OD from 2011 to 2021: travel21 | ||
pop11_scot_dict = pop11_Scot_oa.set_index("OA11CD")["Popcount"].to_dict() | ||
pop21_scot_dict = pop21_Scot_oa.set_index("OA21CD")["Popcount"].to_dict() | ||
od11_Scot_oa["Pop11"] = od11_Scot_oa["Area of usual residence"].map(pop11_scot_dict) | ||
od11_Scot_oa["Pop21"] = od11_Scot_oa["Area of usual residence"].map(pop21_scot_dict) | ||
od11_Scot_oa["travel21"] = ( | ||
od11_Scot_oa.travel11 * od11_Scot_oa.Pop21 / od11_Scot_oa.Pop11 | ||
) | ||
od11_Scot_oa["travel21"] = od11_Scot_oa["travel21"].round() # round the number of trips | ||
# -> pop11, pop21, travel11, travel21 | ||
|
||
# %% | ||
# OD disaggregation | ||
od11_Scot_oa = od11_Scot_oa[ | ||
od11_Scot_oa["Area of usual residence"] != od11_Scot_oa["Area of workplace"] | ||
] | ||
od11_Scot_oa.reset_index(drop=True, inplace=True) | ||
|
||
# Trip Classification | ||
# within Scotland (oa-oa) -> keep | ||
od11_scotoa_scotoa = od11_Scot_oa[od11_Scot_oa.to_region != "msoa_enw"] | ||
od11_scotoa_scotoa.reset_index(drop=True, inplace=True) | ||
|
||
# %% | ||
# Extract OD from Scotland to ENW (oa-msoa) -> disaggregate | ||
od11_scotoa_enwmsoa = od11_Scot_oa[od11_Scot_oa.to_region == "msoa_enw"] | ||
od11_scotoa_enwmsoa.reset_index(drop=True, inplace=True) | ||
|
||
# ENW 2011/21 population at OA level | ||
pop11_enw_oa = pd.read_csv(os.path.join(base_path, "pop11_enw_oa.csv")) | ||
pop11_enw_oa_dict = pop11_enw_oa.set_index("OA11CD")["POP11"].to_dict() | ||
pop21_enw_oa = pd.read_csv(os.path.join(base_path, "gb_population_2021_estimates.csv")) | ||
pop21_enw_oa_dict = pop21_enw_oa.set_index("OA21CD")["OA_POP_2021"].to_dict() | ||
|
||
# from MSOA11 to OA11: MSOA11 contains multiple OA11 | ||
lut_enw_msoa11_oa11 = pd.read_csv( | ||
os.path.join(base_path, "lut\\_lut_enw_msoa11_oa11.csv") | ||
) | ||
msoa11_to_oa11_dict = ( | ||
lut_enw_msoa11_oa11.groupby(by=["MSOA11CD"], as_index=False) | ||
.agg({"OA11CD": list}) | ||
.set_index("MSOA11CD")["OA11CD"] | ||
.to_dict() | ||
) | ||
msoa11_oa11_pop11 = defaultdict(lambda: defaultdict(list)) | ||
for msoa11 in msoa11_to_oa11_dict.keys(): | ||
for oa11 in msoa11_to_oa11_dict[msoa11]: | ||
pop11 = pop11_enw_oa_dict[oa11] | ||
msoa11_oa11_pop11[msoa11][oa11] = pop11 # 2011 OA population | ||
|
||
# from OA11 to OA21: OA11 may contain multiple OA21 | ||
lut_enw_oa11_oa21 = pd.read_csv(os.path.join(base_path, "lut\\_lut_enw_oa11_oa21.csv")) | ||
oa11_to_oa21_dict = ( | ||
lut_enw_oa11_oa21.groupby(by=["OA11CD"]).agg({"OA21CD": list}).to_dict()["OA21CD"] | ||
) | ||
oa11_oa21_pop21 = defaultdict(lambda: defaultdict(list)) | ||
for oa11 in oa11_to_oa21_dict.keys(): | ||
for oa21 in oa11_to_oa21_dict[oa11]: | ||
pop21 = pop21_enw_oa_dict[oa21] | ||
oa11_oa21_pop21[oa11][oa21] = pop21 # 2021 OA population | ||
|
||
# %% | ||
# Estimate msoa11_oa21_pop21 | ||
msoa11_oa21_pop21 = defaultdict(lambda: defaultdict(list)) | ||
for msoa11, list_of_oa11 in msoa11_oa11_pop11.items(): | ||
for oa11 in list_of_oa11.keys(): | ||
if oa11 in oa11_oa21_pop21.keys(): | ||
for oa21, pop21 in oa11_oa21_pop21[oa11].items(): | ||
msoa11_oa21_pop21[msoa11][oa21] = pop21 | ||
else: | ||
print(f"We cannot find oa21 for {oa11}") | ||
|
||
msoa11_oa21_ratio21 = defaultdict(lambda: defaultdict(list)) | ||
for msoa11, list_of_oa21 in msoa11_oa21_pop21.items(): | ||
ttpop = sum(msoa11_oa21_pop21[msoa11].values()) | ||
for oa21, pop21 in msoa11_oa21_pop21[msoa11].items(): | ||
percentage = pop21 / ttpop | ||
msoa11_oa21_ratio21[msoa11][oa21] = percentage | ||
|
||
msoa11_oa21_ratio21_norm = defaultdict(lambda: defaultdict(list)) | ||
for msoa11, list_of_oa21 in msoa11_oa21_ratio21.items(): | ||
sum_ratio = sum(msoa11_oa21_ratio21[msoa11].values()) | ||
for oa21, ratio in list_of_oa21.items(): | ||
normalised_ratio = ratio / sum_ratio | ||
msoa11_oa21_ratio21_norm[msoa11][oa21] = normalised_ratio | ||
|
||
# Disaggregate OD 2021 to OA level | ||
assignment = {} # scot[oa21] = travel21 | ||
for idx, row in od11_scotoa_enwmsoa.iterrows(): # each: 0 ~ 16; total: 8631 | ||
scot11 = row["Area of usual residence"] | ||
msoa11 = row["Area of workplace"] | ||
list_of_oa21 = list(msoa11_oa21_ratio21_norm[msoa11].keys()) | ||
# initialisation | ||
if scot11 not in assignment: | ||
assignment[scot11] = {oa21: 0 for oa21 in list_of_oa21} | ||
else: | ||
for oa21 in list_of_oa21: | ||
if oa21 not in assignment[scot11]: | ||
assignment[scot11][oa21] = 0 | ||
# update | ||
travel21 = row["travel21"] | ||
list_of_prob21 = list(msoa11_oa21_ratio21_norm[msoa11].values()) | ||
for _ in range(int(travel21)): | ||
selected_oa21 = random.choices(list_of_oa21, list_of_prob21)[0] | ||
assignment[scot11][selected_oa21] += 1 | ||
|
||
scot11List = [] | ||
oa21List = [] | ||
travel21List = [] | ||
for scot11, v in assignment.items(): | ||
for oa21, travel21 in v.items(): | ||
if travel21 > 0: | ||
scot11List.append(scot11) | ||
oa21List.append(oa21) | ||
travel21List.append(travel21) | ||
|
||
temp_df = pd.DataFrame( | ||
{ | ||
"Area of usual residence": scot11List, | ||
"Area of workplace": oa21List, | ||
"travel21": travel21List, | ||
} | ||
) | ||
|
||
temp_df = temp_df.groupby( | ||
by=["Area of usual residence", "Area of workplace"], | ||
as_index=False, | ||
).agg({"travel21": sum}) | ||
|
||
# %% | ||
# combine dataframes | ||
# Scotland (from OA to OA) | ||
temp_od11_scotoa_scotoa = od11_scotoa_scotoa[ | ||
["Area of usual residence", "Area of workplace", "travel21"] | ||
] | ||
od21_scot_oa = pd.concat([temp_df, temp_od11_scotoa_scotoa], axis=0, ignore_index=True) | ||
od21_scot_oa.rename(columns={"travel21": "Car21"}, inplace=True) | ||
|
||
# %% | ||
# England and Wales (from OA to OA: real observation data) | ||
od21_enw_oa = pd.read_csv(os.path.join(base_path, "od_enw_2021\\ODWP01EW_OA.csv")) | ||
od21_enw_oa.rename( | ||
columns={ | ||
"Output Areas code": "Area of usual residence", | ||
"OA of workplace code": "Area of workplace", | ||
"Count": "Car21", | ||
}, | ||
inplace=True, | ||
) | ||
od21_enw_oa = od21_enw_oa[ | ||
(od21_enw_oa["Place of work indicator (4 categories) code"] == 3) | ||
& (od21_enw_oa["Area of usual residence"] != od21_enw_oa["Area of workplace"]) | ||
] | ||
od21_enw_oa = od21_enw_oa[["Area of usual residence", "Area of workplace", "Car21"]] | ||
|
||
# remove North Irland | ||
od21_enw_oa = od21_enw_oa[~od21_enw_oa["Area of workplace"].str.startswith("N")] | ||
od21_enw_oa.reset_index(drop=True, inplace=True) | ||
|
||
# %% | ||
# Combined OD 2021 at OA level | ||
od21_gb_oa = pd.concat([od21_scot_oa, od21_enw_oa], axis=0, ignore_index=True) | ||
|
||
# %% | ||
# export file | ||
od21_gb_oa.to_csv(base_path / "od_gb_oa_2021.csv", index=False) |
Oops, something went wrong.