Skip to content

Commit

Permalink
geocode: smarter invalid --formatstr handling for predefined format…
Browse files Browse the repository at this point in the history
…s and dynfmts
  • Loading branch information
jqnatividad committed Aug 30, 2023
1 parent 54e7757 commit 51c61e8
Show file tree
Hide file tree
Showing 2 changed files with 121 additions and 25 deletions.
66 changes: 47 additions & 19 deletions src/cmd/geocode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,16 +120,20 @@ geocode options:
- '%timezone' - the timezone
- '%+' - use the subcommand's default format. For suggest, '%location'.
For reverse, '%city-admin1'.
Alternatively, you can use dynamic formatting to create a custom format.
To do so, set the --formatstr to a dynfmt template, enclosing field names
in curly braces.
The following eight fields are available:
id, name, latitude, longitude, country, admin1, timezone, population
If an invalid format is specified, it will be treated as '%+'.
Alternatively, you can use dynamic formatting to create a custom format.
To do so, set the --formatstr to a dynfmt template, enclosing field names
in curly braces.
The following eight fields are available:
id, name, latitude, longitude, country, admin1, timezone, population
e.g. "City: {name}, State: {admin1}, Country: {country} - {timezone}"
If an invalid dynfmt template is specified, it will return "Invalid dynfmt template."
[default: %+]
--invalid-result <string> The string to use when the geocode result is empty/invalid.
If not set, the original value is used.
-j, --jobs <arg> The number of jobs to run in parallel.
Expand Down Expand Up @@ -224,6 +228,7 @@ static DEFAULT_ADMIN1_CODES_URL: &str =
"https://download.geonames.org/export/dump/admin1CodesASCII.txt";

static EMPTY_STRING: String = String::new();
static INVALID_DYNFMT: &str = "Invalid dynfmt template.";

// valid subcommands
#[derive(Clone, Copy, PartialEq)]
Expand Down Expand Up @@ -651,7 +656,7 @@ fn search_cached(
));
}

return Some(format_result(cityrecord, formatstr, admin1_name));
return Some(format_result(cityrecord, formatstr, true, admin1_name));
} else if mode == GeocodeSubCmd::Reverse {
// regex for Location field. Accepts (lat, long) & lat, long
let locregex: &'static Regex = regex_oncelock!(
Expand Down Expand Up @@ -681,13 +686,13 @@ fn search_cached(
if formatstr == "%+" {
// default for reverse is city-admin1 - e.g. "Brooklyn, New York"
return Some(format!(
"{city_name}, {admin1_name_value}",
city_name = cityrecord.name.clone(),
admin1_name_value = admin1_name.clone()
"{city}, {admin1}",
city = cityrecord.name.clone(),
admin1 = admin1_name.clone()
));
}

return Some(format_result(cityrecord, formatstr, admin1_name));
return Some(format_result(cityrecord, formatstr, false, admin1_name));
}
} else {
// not a valid lat, long
Expand All @@ -700,7 +705,12 @@ fn search_cached(

/// format the geocoded result based on formatstr if its not %+
#[inline]
fn format_result(cityrecord: &CitiesRecord, formatstr: &str, admin1_name: &str) -> String {
fn format_result(
cityrecord: &CitiesRecord,
formatstr: &str,
suggest_mode: bool,
admin1_name: &str,
) -> String {
if formatstr.starts_with('%') {
// if formatstr starts with %, then we're using a predefined format
match formatstr {
Expand All @@ -713,18 +723,36 @@ fn format_result(cityrecord: &CitiesRecord, formatstr: &str, admin1_name: &str)
cityrecord.country.clone().unwrap().name
),
"%city" => cityrecord.name.clone(),
"%city-state-country" | "%city-admin1-country" => format!(
"{}, {} {}",
cityrecord.name,
admin1_name,
cityrecord.country.clone().unwrap().name
),
"%state" | "%admin1" => admin1_name.to_owned(),
"%country" => cityrecord.country.clone().unwrap().name,
"%id" => format!("{}", cityrecord.id),
"%population" => format!("{}", cityrecord.population),
"%timezone" => cityrecord.timezone.clone(),
"%cityrecord" => format!("{cityrecord:?}"),
_ => format!(
"{}, {} {}",
cityrecord.name,
admin1_name,
cityrecord.country.clone().unwrap().name
),
_ => {
// invalid formatstr, so we use the default for suggest or reverse
if suggest_mode {
// default for suggest is location - e.g. "(lat, long)"
format!(
"({latitude}, {longitude})",
latitude = cityrecord.latitude,
longitude = cityrecord.longitude
)
} else {
// default for reverse is city-admin1 - e.g. "Brooklyn, New York"
format!(
"{city}, {admin1}",
city = cityrecord.name.clone(),
admin1 = admin1_name.to_owned()
)
}
},
}
} else {
// if formatstr does not start with %, then we're using dynfmt,
Expand All @@ -744,7 +772,7 @@ fn format_result(cityrecord: &CitiesRecord, formatstr: &str, admin1_name: &str)
if let Ok(formatted) = dynfmt::SimpleCurlyFormat.format(formatstr, cityrecord_map) {
formatted.to_string()
} else {
String::new()
INVALID_DYNFMT.to_string()
}
}
}
80 changes: 74 additions & 6 deletions tests/test_geocode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,39 @@ fn geocode_suggest_dynfmt() {
assert_eq!(got, expected);
}

#[test]
fn geocode_suggest_invalid_dynfmt() {
let wrk = Workdir::new("geocode_suggest_invalid_dynfmt");
wrk.create(
"data.csv",
vec![
svec!["Location"],
svec!["Melrose, New York"],
svec!["East Flatbush, New York"],
svec!["This is not a Location and it will not be geocoded"],
svec!["95.213424, 190,1234565"], // invalid lat, long
svec!["Makati, Metro Manila, Philippines"],
],
);
let mut cmd = wrk.command("geocode");
cmd.arg("suggest")
.arg("Location")
.arg("--formatstr")
.arg("{latitude}:{longitude} - {name}, {admin1} {invalid_field}")
.arg("data.csv");

let got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
let expected = vec![
svec!["Location"],
svec!["Invalid dynfmt template."],
svec!["Invalid dynfmt template."],
svec!["This is not a Location and it will not be geocoded"],
svec!["95.213424, 190,1234565"], // invalid lat, long
svec!["Invalid dynfmt template."],
];
assert_eq!(got, expected);
}

#[test]
fn geocode_suggest_fmt() {
let wrk = Workdir::new("geocode_suggest_fmt");
Expand All @@ -140,13 +173,13 @@ fn geocode_suggest_fmt() {
let got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
let expected = vec![
svec!["Location"],
svec!["Elmhurst, New York, United States"],
svec!["East Flatbush, New York, United States"],
svec!["New York City, New York, United States"],
svec!["East Harlem, New York, United States"],
svec!["Elmhurst, New York United States"],
svec!["East Flatbush, New York United States"],
svec!["New York City, New York United States"],
svec!["East Harlem, New York United States"],
svec!["This is not a Location and it will not be geocoded"],
svec!["40.71427, -74.00597"], // suggest doesn't work with lat, long
svec!["Makati City, National Capital Region, Philippines"],
svec!["Makati City, National Capital Region Philippines"],
];
assert_eq!(got, expected);
}
Expand Down Expand Up @@ -287,7 +320,7 @@ fn geocode_reverse_fmtstring() {
cmd.arg("reverse")
.arg("Location")
.arg("--formatstr")
.arg("%state-country")
.arg("%city-state-country")
.arg("data.csv");

let got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
Expand Down Expand Up @@ -372,3 +405,38 @@ fn geocode_reverse_fmtstring_intl_dynfmt() {
];
assert_eq!(got, expected);
}

#[test]
fn geocode_reverse_fmtstring_intl_invalid_dynfmt() {
let wrk = Workdir::new("geocode_reverse_fmtstring_intl_invalid_dynfmt");
wrk.create(
"data.csv",
vec![
svec!["Location"],
svec!["41.390205, 2.154007"],
svec!["52.371807, 4.896029"],
svec!["(52.520008, 13.404954)"],
svec!["(14.55027,121.03269)"],
svec!["This is not a Location and it will not be geocoded"],
svec!["95.213424, 190,1234565"], // invalid lat,long
],
);
let mut cmd = wrk.command("geocode");
cmd.arg("reverse")
.arg("Location")
.arg("--formatstr")
.arg("pop: {population} tz: {timezone} {doesnotexistfield}")
.arg("data.csv");

let got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
let expected = vec![
svec!["Location"],
svec!["Invalid dynfmt template."],
svec!["Invalid dynfmt template."],
svec!["Invalid dynfmt template."],
svec!["Invalid dynfmt template."],
svec!["This is not a Location and it will not be geocoded"],
svec!["95.213424, 190,1234565"], // invalid lat,long
];
assert_eq!(got, expected);
}

0 comments on commit 51c61e8

Please sign in to comment.