Skip to content

Commit

Permalink
Extract get_administrative_data to open-gira package
Browse files Browse the repository at this point in the history
  • Loading branch information
tomalrussell committed Apr 5, 2024
1 parent c0874d5 commit c9ea5c0
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 36 deletions.
43 changes: 42 additions & 1 deletion src/open_gira/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import logging

import pandas as pd
import geopandas as gpd
import shapely


def merge_gadm_admin_levels(preference: pd.DataFrame, alternative: pd.DataFrame) -> pd.DataFrame:
Expand Down Expand Up @@ -37,4 +39,43 @@ def merge_gadm_admin_levels(preference: pd.DataFrame, alternative: pd.DataFrame)

merged = pd.concat([preference, substitute_regions])

return merged.sort_values("ISO_A3")
return merged.sort_values("ISO_A3")


def get_administrative_data(file_path: str, to_epsg: int = None) -> gpd.GeoDataFrame:
"""
Read administrative data (country ISO, country geometry) from disk
Arguments:
file_path (str): Location of file with country data:
containing an ISO three letter code as 'GID_0' and a geometry as
'geometry'
to_epsg (int): EPSG code to project data to
Returns:
gpd.GeoDataFrame: Table of country and geometry data with:
'iso_a3' and 'geometry' columns
"""

# read file
gdf = gpd.read_file(file_path)

# check schema is as expected
expected_columns = {"GID_0", "geometry"}
assert expected_columns.issubset(set(gdf.columns.values))

# reproject if desired
if to_epsg is not None:
gdf = gdf.to_crs(epsg=to_epsg)

# rename these columns first so we don't have to do this twice (to nodes and edges) later
gdf.rename(columns={"GID_0": "iso_a3"}, inplace=True)

# subset, sort by iso_a3 and return
return gdf[["iso_a3", "geometry"]].sort_values(by=["iso_a3"], ascending=True)


def boundary_geom(gdf: gpd.GeoDataFrame, iso_a3: str) -> shapely.Geometry:
"""Given administrative data, return the boundary geometry for a given ISO3 country code
"""
return gdf.set_index("iso_a3").loc[iso_a3, "geometry"]
3 changes: 2 additions & 1 deletion workflow/transport/create_rail_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@

import geopandas as gpd

from utils import annotate_country, get_administrative_data
from utils import annotate_country
from open_gira.admin import get_administrative_data
from open_gira.assets import RailAssets
from open_gira.io import write_empty_frames
from open_gira.network import create_network
Expand Down
3 changes: 2 additions & 1 deletion workflow/transport/create_road_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
import pandas as pd
import snkit

from utils import annotate_country, cast, get_administrative_data, strip_suffix
from utils import annotate_country, cast, strip_suffix
from open_gira.admin import get_administrative_data
from open_gira.assets import RoadAssets
from open_gira.io import write_empty_frames
from open_gira.network import create_network
Expand Down
33 changes: 0 additions & 33 deletions workflow/transport/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,39 +60,6 @@ def cast(x: Any, *, casting_function: Callable, nullable: bool) -> Any:
raise ValueError("Couldn't recast to non-nullable value") from casting_error


def get_administrative_data(file_path: str, to_epsg: int = None) -> gpd.GeoDataFrame:
"""
Read administrative data (country ISO, country geometry) from disk
Arguments:
file_path (str): Location of file with country data:
containing an ISO three letter code as 'GID_0' and a geometry as
'geometry'
to_epsg (int): EPSG code to project data to
Returns:
gpd.GeoDataFrame: Table of country and geometry data with:
'iso_a3' and 'geometry' columns
"""

# read file
gdf = gpd.read_file(file_path)

# check schema is as expected
expected_columns = {"GID_0", "geometry"}
assert expected_columns.issubset(set(gdf.columns.values))

# reproject if desired
if to_epsg is not None:
gdf = gdf.to_crs(epsg=to_epsg)

# rename these columns first so we don't have to do this twice (to nodes and edges) later
gdf.rename(columns={"GID_0": "iso_a3"}, inplace=True)

# subset, sort by iso_a3 and return
return gdf[["iso_a3", "geometry"]].sort_values(by=["iso_a3"], ascending=True)


def annotate_country(network: snkit.network.Network, countries: gpd.GeoDataFrame) -> snkit.network.Network:
"""
Label network edges and nodes with their country ISO code
Expand Down

0 comments on commit c9ea5c0

Please sign in to comment.