Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

geocode: add suggest --country filter #1275

Merged
merged 3 commits into from
Sep 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 2 additions & 6 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -101,10 +101,8 @@ flexi_logger = { version = "0.26", features = [
], default-features = false }
futures = "0.3"
futures-util = "0.3"
geosuggest-core = { version = "0.3", optional = true }
geosuggest-utils = { version = "0.3", optional = true }
# geosuggest-core = { path = "../geosuggest/geosuggest-core", optional = true}
# geosuggest-utils = { path = "../geosuggest/geosuggest-utils", optional = true}
geosuggest-core = { version = "0.4", optional = true }
geosuggest-utils = { version = "0.4", optional = true }
governor = { version = "0.6", optional = true }
grex = { version = "1.4", default-features = false }
gzp = { version = "0.11", default-features = false, features = [
Expand Down Expand Up @@ -224,8 +222,6 @@ rusqlite = { version = "0.29", features = ["bundled"] }
serial_test = { version = "2.0", features = ["file_locks"] }

[patch.crates-io]
geosuggest-core = { git = "https://github.com/estin/geosuggest", rev = "5c6b08b" }
geosuggest-utils = { git = "https://github.com/estin/geosuggest", rev = "5c6b08b" }
calamine = { git = "https://github.com/jqnatividad/calamine", branch = "formula_empty_string_value" }

[features]
Expand Down
35 changes: 32 additions & 3 deletions src/cmd/geocode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,18 @@ Geocode file.csv city column and set the geocoded value to a new column named la

$ qsv geocode suggest city --new-column lat_long file.csv

Limit suggestions to the US, Canada and Mexico.

$ qsv geocode suggest city --country us,ca,mx file.csv

Geocode file.csv city column with --formatstr=%state and set the
geocoded value a new column named state.

$ qsv geocode suggest city --formatstr %state --new-column state file.csv

Use dynamic formatting to create a custom format.

$ qsv geocode suggest city --formatstr "{name}, {admin1}, {country} in {timezone}" file.csv
$ qsv geocode suggest city -f "{name}, {admin1}, {country} in {timezone}" file.csv

REVERSE
Reverse geocode a WGS 84 coordinate to the nearest Geonames city record.
Expand Down Expand Up @@ -109,6 +113,10 @@ geocode options:
Larger values will favor more populated cities.
If not set (default), the population is not used and the
nearest city is returned.
--country <country_list> The comma-delimited list of countries to filter for when calling suggest.
Country is specified as a ISO 3166-1 alpha-2 (two-letter) country code.
https://en.wikipedia.org/wiki/ISO_3166-2
If not set, suggest will search all countries in the current loaded index.
-f, --formatstr=<string> The place format to use. The predefined formats are:
- '%city-state' - e.g. Brooklyn, New York
- '%city-country' - Brooklyn, US
Expand Down Expand Up @@ -155,6 +163,7 @@ geocode options:
INDEX-UPDATE only options:
--languages <lang> The languages to use when building the Geonames cities index.
The languages are specified as a comma-separated list of ISO 639-1 codes.
https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes
[default: en]
--force Force update the Geonames cities index. If not set, qsv will check if there
are updates available at Geonames.org before updating the index.
Expand Down Expand Up @@ -209,6 +218,7 @@ struct Args {
flag_rename: Option<String>,
flag_min_score: Option<f32>,
flag_k_weight: Option<f32>,
flag_country: Option<String>,
flag_formatstr: String,
flag_invalid_result: Option<String>,
flag_batch: u32,
Expand Down Expand Up @@ -496,6 +506,22 @@ async fn geocode_main(args: Args) -> CliResult<()> {
}
wtr.write_record(&headers)?;

let country_filter_list = if let Some(country_list) = args.flag_country {
if args.cmd_reverse {
return fail_incorrectusage_clierror!(
"Country filter is not supported for reverse geocoding."
);
}
Some(
country_list
.split(',')
.map(|s| s.trim().to_string())
.collect::<Vec<String>>(),
)
} else {
None
};

// amortize memory allocation by reusing record
#[allow(unused_assignments)]
let mut batch_record = csv::StringRecord::new();
Expand Down Expand Up @@ -552,6 +578,7 @@ async fn geocode_main(args: Args) -> CliResult<()> {
&args.flag_formatstr,
min_score,
k_weight,
&country_filter_list,
);
if let Some(geocoded_result) = search_result {
// we have a valid geocode result, so use that
Expand Down Expand Up @@ -659,9 +686,10 @@ fn search_cached(
formatstr: &str,
min_score: Option<f32>,
k: Option<f32>,
country_filter_list: &Option<Vec<String>>,
) -> Option<String> {
if mode == GeocodeSubCmd::Suggest {
let search_result = engine.suggest(cell, 1, min_score);
let search_result = engine.suggest(cell, 1, min_score, country_filter_list.as_deref());
let Some(cityrecord) = search_result.into_iter().next() else {
return None;
};
Expand All @@ -687,7 +715,8 @@ fn search_cached(
let lat = fast_float::parse(&loccaps[1]).unwrap_or_default();
let long = fast_float::parse(&loccaps[2]).unwrap_or_default();
if (-90.0..=90.0).contains(&lat) && (-180.0..=180.0).contains(&long) {
let search_result = engine.reverse((lat, long), 1, k);
let search_result =
engine.reverse((lat, long), 1, k, country_filter_list.as_deref());
let Some(cityrecord) = (match search_result {
Some(search_result) => search_result.into_iter().next().map(|ri| ri.city),
None => return None,
Expand Down
122 changes: 122 additions & 0 deletions tests/test_geocode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,128 @@ fn geocode_suggest() {
assert_eq!(got, expected);
}

#[test]
fn geocode_suggest_intl() {
let wrk = Workdir::new("geocode_suggest_intl");
wrk.create(
"data.csv",
vec![
svec!["Location"],
svec!["Paris"],
svec!["Manila"],
svec!["London"],
svec!["Berlin"],
svec!["Moscow"],
svec!["This is not a Location and it will not be geocoded"],
svec!["Brazil"],
svec!["95.213424, 190,1234565"], // invalid lat, long
svec!["Havana"],
],
);
let mut cmd = wrk.command("geocode");
cmd.arg("suggest")
.arg("Location")
.args(["-f", "%city-admin1-country"])
.arg("data.csv");

let got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
let expected = vec![
svec!["Location"],
svec!["Paris, Île-de-France Region France"],
svec!["Manila, National Capital Region Philippines"],
svec!["London, England United Kingdom"],
svec!["Berlin, Germany"],
svec!["Moscow, Moscow Russia"],
svec!["This is not a Location and it will not be geocoded"],
svec!["Brasília, Federal District Brazil"],
svec!["95.213424, 190,1234565"],
svec!["Havana, La Habana Province Cuba"],
];
assert_eq!(got, expected);
}

#[test]
fn geocode_suggest_intl_country_filter() {
let wrk = Workdir::new("geocode_suggest_intl_country_filter");
wrk.create(
"data.csv",
vec![
svec!["Location"],
svec!["Paris"],
svec!["Manila"],
svec!["London"],
svec!["Berlin"],
svec!["Moscow"],
svec!["This is not a Location and it will not be geocoded"],
svec!["Brazil"],
svec!["95.213424, 190,1234565"], // invalid lat, long
svec!["Havana"],
],
);
let mut cmd = wrk.command("geocode");
cmd.arg("suggest")
.arg("Location")
.args(["--country", "us"])
.args(["-f", "%city-admin1-country"])
.arg("data.csv");

let got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
let expected = vec![
svec!["Location"],
svec!["Paris, Texas United States"],
svec!["Manteca, California United States"],
svec!["Sterling, Virginia United States"],
svec!["Burlington, North Carolina United States"],
svec!["Moscow, Idaho United States"],
svec!["This is not a Location and it will not be geocoded"],
svec!["Bradley, Illinois United States"],
svec!["95.213424, 190,1234565"],
svec!["Savannah, Georgia United States"],
];
assert_eq!(got, expected);
}

#[test]
fn geocode_suggest_intl_multi_country_filter() {
let wrk = Workdir::new("geocode_suggest_intl_multi_country_filter");
wrk.create(
"data.csv",
vec![
svec!["Location"],
svec!["Paris"],
svec!["Manila"],
svec!["London"],
svec!["Berlin"],
svec!["Moscow"],
svec!["This is not a Location and it will not be geocoded"],
svec!["Brazil"],
svec!["95.213424, 190,1234565"], // invalid lat, long
svec!["Havana"],
],
);
let mut cmd = wrk.command("geocode");
cmd.arg("suggest")
.arg("Location")
.args(["--country", "us,fr,ru"])
.args(["-f", "%city-admin1-country"])
.arg("data.csv");

let got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
let expected = vec![
svec!["Location"],
svec!["Paris, Île-de-France Region France"],
svec!["Manteca, California United States"],
svec!["Sterling, Virginia United States"],
svec!["Burlington, North Carolina United States"],
svec!["Moscow, Moscow Russia"],
svec!["This is not a Location and it will not be geocoded"],
svec!["Bradley, Illinois United States"],
svec!["95.213424, 190,1234565"],
svec!["Savannah, Georgia United States"],
];
assert_eq!(got, expected);
}

#[test]
fn geocode_suggest_invalid() {
let wrk = Workdir::new("geocode_suggest_invalid");
Expand Down
Loading