-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcollect.py
112 lines (85 loc) · 3.34 KB
/
collect.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import json
from pathlib import Path
import geoarrow.pyarrow as ga
import geopandas
import pyarrow as pa
import pyproj
from geoarrow.pyarrow import io
from geoarrow.rust.io import write_flatgeobuf
from pyarrow import ipc
# Original source:
# https://www.naturalearthdata.com/
here = Path(__file__).parent
CRSES = [
"OGC:CRS84",
"EPSG:4326",
"EPSG:32618",
"+proj=ortho +lat_0=43.88 +lon_0=-72.69 +ellps=WGS84",
]
CRS_LABELS = ["crs84", "4326", "utm", "custom"]
def download_vermont():
pd = geopandas.read_file(
"https://naciscdn.org/naturalearth/110m/cultural/ne_110m_admin_1_states_provinces.zip"
)
return pd[["geometry"]][pd["name"] == "Vermont"]
def write_parquet(lazy=True):
pd = None
for crs, label in zip(CRSES, CRS_LABELS):
out = here / "files" / f"example-crs_vermont-{label}_geo.parquet"
if lazy and out.exists():
continue
if pd is None:
pd = download_vermont()
tab = pa.table({"geometry": ga.array(pd["geometry"].to_crs(crs))})
io.write_geoparquet_table(
tab, out, compression="none", write_bbox=True, write_geometry_types=True
)
return out
def write_geoarrow():
for label in CRS_LABELS:
out = here / "files" / f"example-crs_vermont-{label}_wkb.arrows"
tab = io.read_geoparquet_table(
here / "files" / f"example-crs_vermont-{label}_geo.parquet"
)
# Ensure we write PROJJSON explicitly for these examples. Probably
# io.read_geoparquet_table() should make sure this is set.
crs = pyproj.CRS(tab["geometry"].type.crs)
tab = pa.table({"geometry": ga.with_crs(tab["geometry"], crs)})
with ipc.new_stream(out, tab.schema) as writer:
writer.write_table(tab)
def write_fgb():
for label in CRS_LABELS:
out = here / "files" / f"example-crs_vermont-{label}.fgb"
tab = io.read_geoparquet_table(
here / "files" / f"example-crs_vermont-{label}_geo.parquet"
)
# geoarrow-rust needs "native" and not WKB-encoding
tab = pa.table({"geometry": ga.as_geoarrow(tab["geometry"])})
with open(out, "wb") as f:
write_flatgeobuf(tab, f, write_index=False)
def write_geoarrow_alternative_crses():
tab = io.read_geoparquet_table(here / "files" / "example-crs_vermont-crs84_geo.parquet")
# Construct these metadatas by hand since that's the whole point of this data
extension_metadata = {
"wkt2": {
"crs": pyproj.CRS(tab["geometry"].type.crs).to_wkt(),
"crs_type": "wkt2",
},
"auth-code": {"crs": "OGC:CRS84", "crs_type": "authority_code"},
"unknown": {"crs": "OGC:CRS84"},
}
for name, ext_metadata in extension_metadata.items():
metadata = {
"ARROW:extension:name": "geoarrow.wkb",
"ARROW:extension:metadata": json.dumps(ext_metadata),
}
schema = pa.schema([pa.field("geometry", pa.binary(), metadata=metadata)])
tab_out = pa.table([tab["geometry"].chunk(0).storage], schema=schema)
out = here / "files" / f"example-crs_vermont-crs84-{name}_wkb.arrows"
with ipc.new_stream(out, schema) as writer:
writer.write_table(tab_out)
if __name__ == "__main__":
write_parquet()
write_geoarrow()
write_fgb()
write_geoarrow_alternative_crses()