diff --git a/Cargo.lock b/Cargo.lock index 11560a294..0382abc30 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1946,8 +1946,9 @@ dependencies = [ [[package]] name = "geosuggest-core" -version = "0.3.0" -source = "git+https://github.com/estin/geosuggest?rev=5c6b08b#5c6b08bbc9211972b489d5cfa13ce13cde42cb43" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349233f6bc33e706e9b450ff58bc42f571e2d1e97dffcad24d2d96a3573d9f9c" dependencies = [ "bincode", "csv", @@ -1961,8 +1962,9 @@ dependencies = [ [[package]] name = "geosuggest-utils" -version = "0.3.0" -source = "git+https://github.com/estin/geosuggest?rev=5c6b08b#5c6b08bbc9211972b489d5cfa13ce13cde42cb43" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b534065956ab2e4eff18dc6402b8b68db6c136a9815d1de7aeac2387b7c0662" dependencies = [ "anyhow", "futures", diff --git a/Cargo.toml b/Cargo.toml index 1afeb64b5..3538f35ab 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -101,10 +101,8 @@ flexi_logger = { version = "0.26", features = [ ], default-features = false } futures = "0.3" futures-util = "0.3" -geosuggest-core = { version = "0.3", optional = true } -geosuggest-utils = { version = "0.3", optional = true } -# geosuggest-core = { path = "../geosuggest/geosuggest-core", optional = true} -# geosuggest-utils = { path = "../geosuggest/geosuggest-utils", optional = true} +geosuggest-core = { version = "0.4", optional = true } +geosuggest-utils = { version = "0.4", optional = true } governor = { version = "0.6", optional = true } grex = { version = "1.4", default-features = false } gzp = { version = "0.11", default-features = false, features = [ @@ -224,8 +222,6 @@ rusqlite = { version = "0.29", features = ["bundled"] } serial_test = { version = "2.0", features = ["file_locks"] } [patch.crates-io] -geosuggest-core = { git = "https://github.com/estin/geosuggest", rev = "5c6b08b" } -geosuggest-utils = { git = "https://github.com/estin/geosuggest", rev = "5c6b08b" } calamine = { git = "https://github.com/jqnatividad/calamine", branch = "formula_empty_string_value" } [features] diff --git a/src/cmd/geocode.rs b/src/cmd/geocode.rs index d9ca5d95f..10a4ffc39 100644 --- a/src/cmd/geocode.rs +++ b/src/cmd/geocode.rs @@ -30,6 +30,10 @@ Geocode file.csv city column and set the geocoded value to a new column named la $ qsv geocode suggest city --new-column lat_long file.csv +Limit suggestions to the US, Canada and Mexico. + +$ qsv geocode suggest city --country us,ca,mx file.csv + Geocode file.csv city column with --formatstr=%state and set the geocoded value a new column named state. @@ -37,7 +41,7 @@ $ qsv geocode suggest city --formatstr %state --new-column state file.csv Use dynamic formatting to create a custom format. -$ qsv geocode suggest city --formatstr "{name}, {admin1}, {country} in {timezone}" file.csv +$ qsv geocode suggest city -f "{name}, {admin1}, {country} in {timezone}" file.csv REVERSE Reverse geocode a WGS 84 coordinate to the nearest Geonames city record. @@ -109,6 +113,10 @@ geocode options: Larger values will favor more populated cities. If not set (default), the population is not used and the nearest city is returned. + --country The comma-delimited list of countries to filter for when calling suggest. + Country is specified as a ISO 3166-1 alpha-2 (two-letter) country code. + https://en.wikipedia.org/wiki/ISO_3166-2 + If not set, suggest will search all countries in the current loaded index. -f, --formatstr= The place format to use. The predefined formats are: - '%city-state' - e.g. Brooklyn, New York - '%city-country' - Brooklyn, US @@ -155,6 +163,7 @@ geocode options: INDEX-UPDATE only options: --languages The languages to use when building the Geonames cities index. The languages are specified as a comma-separated list of ISO 639-1 codes. + https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes [default: en] --force Force update the Geonames cities index. If not set, qsv will check if there are updates available at Geonames.org before updating the index. @@ -209,6 +218,7 @@ struct Args { flag_rename: Option, flag_min_score: Option, flag_k_weight: Option, + flag_country: Option, flag_formatstr: String, flag_invalid_result: Option, flag_batch: u32, @@ -496,6 +506,22 @@ async fn geocode_main(args: Args) -> CliResult<()> { } wtr.write_record(&headers)?; + let country_filter_list = if let Some(country_list) = args.flag_country { + if args.cmd_reverse { + return fail_incorrectusage_clierror!( + "Country filter is not supported for reverse geocoding." + ); + } + Some( + country_list + .split(',') + .map(|s| s.trim().to_string()) + .collect::>(), + ) + } else { + None + }; + // amortize memory allocation by reusing record #[allow(unused_assignments)] let mut batch_record = csv::StringRecord::new(); @@ -552,6 +578,7 @@ async fn geocode_main(args: Args) -> CliResult<()> { &args.flag_formatstr, min_score, k_weight, + &country_filter_list, ); if let Some(geocoded_result) = search_result { // we have a valid geocode result, so use that @@ -659,9 +686,10 @@ fn search_cached( formatstr: &str, min_score: Option, k: Option, + country_filter_list: &Option>, ) -> Option { if mode == GeocodeSubCmd::Suggest { - let search_result = engine.suggest(cell, 1, min_score); + let search_result = engine.suggest(cell, 1, min_score, country_filter_list.as_deref()); let Some(cityrecord) = search_result.into_iter().next() else { return None; }; @@ -687,7 +715,8 @@ fn search_cached( let lat = fast_float::parse(&loccaps[1]).unwrap_or_default(); let long = fast_float::parse(&loccaps[2]).unwrap_or_default(); if (-90.0..=90.0).contains(&lat) && (-180.0..=180.0).contains(&long) { - let search_result = engine.reverse((lat, long), 1, k); + let search_result = + engine.reverse((lat, long), 1, k, country_filter_list.as_deref()); let Some(cityrecord) = (match search_result { Some(search_result) => search_result.into_iter().next().map(|ri| ri.city), None => return None, diff --git a/tests/test_geocode.rs b/tests/test_geocode.rs index 511c4d2e1..d5f2f952b 100644 --- a/tests/test_geocode.rs +++ b/tests/test_geocode.rs @@ -37,6 +37,128 @@ fn geocode_suggest() { assert_eq!(got, expected); } +#[test] +fn geocode_suggest_intl() { + let wrk = Workdir::new("geocode_suggest_intl"); + wrk.create( + "data.csv", + vec![ + svec!["Location"], + svec!["Paris"], + svec!["Manila"], + svec!["London"], + svec!["Berlin"], + svec!["Moscow"], + svec!["This is not a Location and it will not be geocoded"], + svec!["Brazil"], + svec!["95.213424, 190,1234565"], // invalid lat, long + svec!["Havana"], + ], + ); + let mut cmd = wrk.command("geocode"); + cmd.arg("suggest") + .arg("Location") + .args(["-f", "%city-admin1-country"]) + .arg("data.csv"); + + let got: Vec> = wrk.read_stdout(&mut cmd); + let expected = vec![ + svec!["Location"], + svec!["Paris, Île-de-France Region France"], + svec!["Manila, National Capital Region Philippines"], + svec!["London, England United Kingdom"], + svec!["Berlin, Germany"], + svec!["Moscow, Moscow Russia"], + svec!["This is not a Location and it will not be geocoded"], + svec!["Brasília, Federal District Brazil"], + svec!["95.213424, 190,1234565"], + svec!["Havana, La Habana Province Cuba"], + ]; + assert_eq!(got, expected); +} + +#[test] +fn geocode_suggest_intl_country_filter() { + let wrk = Workdir::new("geocode_suggest_intl_country_filter"); + wrk.create( + "data.csv", + vec![ + svec!["Location"], + svec!["Paris"], + svec!["Manila"], + svec!["London"], + svec!["Berlin"], + svec!["Moscow"], + svec!["This is not a Location and it will not be geocoded"], + svec!["Brazil"], + svec!["95.213424, 190,1234565"], // invalid lat, long + svec!["Havana"], + ], + ); + let mut cmd = wrk.command("geocode"); + cmd.arg("suggest") + .arg("Location") + .args(["--country", "us"]) + .args(["-f", "%city-admin1-country"]) + .arg("data.csv"); + + let got: Vec> = wrk.read_stdout(&mut cmd); + let expected = vec![ + svec!["Location"], + svec!["Paris, Texas United States"], + svec!["Manteca, California United States"], + svec!["Sterling, Virginia United States"], + svec!["Burlington, North Carolina United States"], + svec!["Moscow, Idaho United States"], + svec!["This is not a Location and it will not be geocoded"], + svec!["Bradley, Illinois United States"], + svec!["95.213424, 190,1234565"], + svec!["Savannah, Georgia United States"], + ]; + assert_eq!(got, expected); +} + +#[test] +fn geocode_suggest_intl_multi_country_filter() { + let wrk = Workdir::new("geocode_suggest_intl_multi_country_filter"); + wrk.create( + "data.csv", + vec![ + svec!["Location"], + svec!["Paris"], + svec!["Manila"], + svec!["London"], + svec!["Berlin"], + svec!["Moscow"], + svec!["This is not a Location and it will not be geocoded"], + svec!["Brazil"], + svec!["95.213424, 190,1234565"], // invalid lat, long + svec!["Havana"], + ], + ); + let mut cmd = wrk.command("geocode"); + cmd.arg("suggest") + .arg("Location") + .args(["--country", "us,fr,ru"]) + .args(["-f", "%city-admin1-country"]) + .arg("data.csv"); + + let got: Vec> = wrk.read_stdout(&mut cmd); + let expected = vec![ + svec!["Location"], + svec!["Paris, Île-de-France Region France"], + svec!["Manteca, California United States"], + svec!["Sterling, Virginia United States"], + svec!["Burlington, North Carolina United States"], + svec!["Moscow, Moscow Russia"], + svec!["This is not a Location and it will not be geocoded"], + svec!["Bradley, Illinois United States"], + svec!["95.213424, 190,1234565"], + svec!["Savannah, Georgia United States"], + ]; + assert_eq!(got, expected); +} + #[test] fn geocode_suggest_invalid() { let wrk = Workdir::new("geocode_suggest_invalid");