Skip to content

Commit

Permalink
Prioritize using longitude and latitude columns in building files whe…
Browse files Browse the repository at this point in the history
…n present.

This saves time over parsing building polygon WKTs and deriving centroids from them.

Also, drop duplicate buildings during example generation.

PiperOrigin-RevId: 686532902
  • Loading branch information
jzxu authored and copybara-github committed Oct 17, 2024
1 parent 1f52a97 commit 6cd3dbe
Showing 1 changed file with 7 additions and 5 deletions.
12 changes: 7 additions & 5 deletions src/skai/buildings.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,21 +41,23 @@ def _read_buildings_csv(path: str) -> gpd.GeoDataFrame:
"""
with tf.io.gfile.GFile(path, 'r') as csv_file:
df = pd.read_csv(csv_file)
if 'geometry' in df.columns:
if 'longitude' in df.columns and 'latitude' in df.columns:
geometries = gpd.points_from_xy(df['longitude'], df['latitude'])
df.drop(columns=['longitude', 'latitude'], inplace=True)
elif 'geometry' in df.columns:
logging.info('Parsing %d WKT strings. This could take a while.', len(df))
geometries = gpd.GeoSeries.from_wkt(df['geometry'])
df.drop(columns=['geometry'], inplace=True)
elif 'wkt' in df.columns:
logging.info('Parsing %d WKT strings. This could take a while.', len(df))
geometries = gpd.GeoSeries.from_wkt(df['wkt'])
df.drop(columns=['wkt'], inplace=True)
elif 'longitude' in df.columns and 'latitude' in df.columns:
geometries = gpd.points_from_xy(df['longitude'], df['latitude'])
df.drop(columns=['longitude', 'latitude'], inplace=True)
else:
raise ValueError(f'No geometry information found in file "{path}"')

return gpd.GeoDataFrame(df, geometry=geometries, crs=4326)
geometries = geometries.normalize()
gdf = gpd.GeoDataFrame(df, geometry=geometries, crs=4326)
return gdf.drop_duplicates()


def convert_buildings_file(
Expand Down

0 comments on commit 6cd3dbe

Please sign in to comment.