Skip to content

Commit 4544d79

Browse files
committed
geocode: add --min-score for suggest and --k_weight for reverse; cleanup --formatstr
and set default format for suggest and reverse
1 parent b85166f commit 4544d79

File tree

1 file changed

+78
-49
lines changed

1 file changed

+78
-49
lines changed

src/cmd/geocode.rs

+78-49
Original file line numberDiff line numberDiff line change
@@ -77,18 +77,21 @@ geocode arguments:
7777
geocode options:
7878
-c, --new-column <name> Put the transformed values in a new column instead.
7979
-r, --rename <name> New name for the transformed column.
80+
--min-score <score> The minimum score to use for suggest.
81+
[default: 0.8]
82+
-k, --k_weight <score> The weight to multiply population by to use for reverse.
83+
Larger values will favor more populated cities.
84+
If not set (default), the population is not used and the
85+
closest city is returned.
8086
-f, --formatstr=<string> This option is used by several subcommands:
8187
8288
The place format to use. The available formats are:
83-
- 'city-state' (default) - e.g. Brooklyn, New York
84-
- 'city-country' - Brooklyn, US
85-
- 'city-state-country' | 'city-admin1-country' - Brooklyn, New York US
86-
- 'city' - Brooklyn
87-
- 'county' | 'admin2' - Kings County
88-
- 'state' | 'admin1' - New York
89-
- 'county-country' | 'admin2-country' - Kings County, US
90-
- 'county-state-country' | 'admin2-admin1-country' - Kings County, New York US
91-
- 'country' - US
89+
- '%city-state' (default) - e.g. Brooklyn, New York
90+
- '%city-country' - Brooklyn, US
91+
- '%city-state-country' | '%city-admin1-country' - Brooklyn, New York US
92+
- '%city' - Brooklyn
93+
- '%state' | '%admin1' - New York
94+
- '%country' - US
9295
-j, --jobs <arg> The number of jobs to run in parallel.
9396
When not set, the number of jobs is set to the number of CPUs detected.
9497
-b, --batch <size> The number of rows per batch to load into memory, before running in parallel.
@@ -148,6 +151,8 @@ struct Args {
148151
arg_input: Option<String>,
149152
arg_index_file: Option<String>,
150153
flag_rename: Option<String>,
154+
flag_min_score: f32,
155+
flag_k_weight: Option<f32>,
151156
flag_formatstr: String,
152157
flag_batch: u32,
153158
flag_timeout: u16,
@@ -452,8 +457,14 @@ async fn geocode_main(args: Args) -> CliResult<()> {
452457
let mut record = record_item.clone();
453458
let mut cell = record[column_index].to_owned();
454459
if !cell.is_empty() {
455-
let search_result =
456-
search_cached(&engine, geocode_cmd, &cell, &args.flag_formatstr);
460+
let search_result = search_cached(
461+
&engine,
462+
geocode_cmd,
463+
&cell,
464+
&args.flag_formatstr,
465+
args.flag_min_score,
466+
args.flag_k_weight,
467+
);
457468
if let Some(geocoded_result) = search_result {
458469
cell = geocoded_result;
459470
}
@@ -550,6 +561,8 @@ fn search_cached(
550561
mode: GeocodeSubCmd,
551562
cell: &str,
552563
formatstr: &str,
564+
min_score: f32,
565+
k: Option<f32>,
553566
) -> Option<String> {
554567
static EMPTY_STRING: String = String::new();
555568

@@ -562,9 +575,10 @@ fn search_cached(
562575
let mut population = 0_usize;
563576
let mut timezone = String::new();
564577
let mut cityrecord_dbg = String::new();
578+
let mut format_to_use = formatstr.to_string();
565579

566580
if mode == GeocodeSubCmd::Suggest {
567-
let search_result = engine.suggest(cell, 1, None);
581+
let search_result = engine.suggest(cell, 1, Some(min_score));
568582
let Some(cityrecord) = search_result.into_iter().next() else {
569583
return None;
570584
};
@@ -576,19 +590,26 @@ fn search_cached(
576590
return None;
577591
};
578592

579-
id = cityrecord.id;
580-
city_name = cityrecord.name.clone();
581-
latitude = cityrecord.latitude;
582-
longitude = cityrecord.longitude;
583-
country = cityrecord.country.clone().unwrap().name;
584-
admin1_name_value = admin1_name_value_work.clone();
585-
population = cityrecord.population;
586-
timezone = cityrecord.timezone.clone();
587-
cityrecord_dbg = if formatstr == "cityrecord" {
588-
format!("{cityrecord:?}")
593+
if formatstr == "%+" {
594+
// default for suggest is city-state
595+
city_name = cityrecord.name.clone();
596+
admin1_name_value = admin1_name_value_work.clone();
597+
format_to_use = "%city-state".to_string();
589598
} else {
590-
EMPTY_STRING.clone()
591-
};
599+
id = cityrecord.id;
600+
city_name = cityrecord.name.clone();
601+
latitude = cityrecord.latitude;
602+
longitude = cityrecord.longitude;
603+
country = cityrecord.country.clone().unwrap().name;
604+
admin1_name_value = admin1_name_value_work.clone();
605+
population = cityrecord.population;
606+
timezone = cityrecord.timezone.clone();
607+
cityrecord_dbg = if formatstr == "cityrecord" {
608+
format!("{cityrecord:?}")
609+
} else {
610+
EMPTY_STRING.clone()
611+
};
612+
}
592613
} else if mode == GeocodeSubCmd::Reverse {
593614
// regex for Location field. Accepts (lat, long) & lat, long
594615
let locregex: &'static Regex = regex_oncelock!(
@@ -600,7 +621,7 @@ fn search_cached(
600621
let lat = fast_float::parse(&loccaps[1]).unwrap_or_default();
601622
let long = fast_float::parse(&loccaps[2]).unwrap_or_default();
602623
if (-90.0..=90.0).contains(&lat) && (-180.0..=180.0).contains(&long) {
603-
let search_result = engine.reverse((lat, long), 1, None);
624+
let search_result = engine.reverse((lat, long), 1, k);
604625
let Some(cityrecord) = (match search_result {
605626
Some(search_result) => search_result.into_iter().next().map(|ri| ri.city),
606627
None => return None,
@@ -617,19 +638,26 @@ fn search_cached(
617638
return None;
618639
};
619640

620-
id = cityrecord.id;
621-
city_name = cityrecord.name.clone();
622-
latitude = cityrecord.latitude;
623-
longitude = cityrecord.longitude;
624-
country = cityrecord.country.clone().unwrap().name;
625-
admin1_name_value = admin1_name_value_work.clone();
626-
population = cityrecord.population;
627-
timezone = cityrecord.timezone.clone();
628-
cityrecord_dbg = if formatstr == "cityrecord" {
629-
format!("{cityrecord:?}")
641+
if formatstr == "%+" {
642+
// default for suggest is city-state
643+
latitude = cityrecord.latitude;
644+
longitude = cityrecord.longitude;
645+
format_to_use = "%location".to_string();
630646
} else {
631-
EMPTY_STRING.clone()
632-
};
647+
id = cityrecord.id;
648+
city_name = cityrecord.name.clone();
649+
latitude = cityrecord.latitude;
650+
longitude = cityrecord.longitude;
651+
country = cityrecord.country.clone().unwrap().name;
652+
admin1_name_value = admin1_name_value_work.clone();
653+
population = cityrecord.population;
654+
timezone = cityrecord.timezone.clone();
655+
cityrecord_dbg = if formatstr == "cityrecord" {
656+
format!("{cityrecord:?}")
657+
} else {
658+
EMPTY_STRING.clone()
659+
};
660+
}
633661
}
634662
} else {
635663
return None;
@@ -641,18 +669,19 @@ fn search_cached(
641669
#[allow(clippy::match_same_arms)]
642670
// match arms are evaluated in order,
643671
// so we're optimizing for the most common cases first
644-
let result = match formatstr {
645-
"%+" | "city-state" => format!("{city_name}, {admin1_name_value}"),
646-
"lat-long" => format!("{latitude}, {longitude}"),
647-
"location" => format!("({latitude}, {longitude})"),
648-
"city-country" => format!("{city_name}, {country}"),
649-
"city" => city_name,
650-
"state" => admin1_name_value,
651-
"country" => country,
652-
"id" => format!("{id}"),
653-
"population" => format!("{population}"),
654-
"timezone" => timezone,
655-
"cityrecord" => cityrecord_dbg,
672+
let result = match format_to_use.as_str() {
673+
"%+" | "%city-state" => format!("{city_name}, {admin1_name_value}"),
674+
"%lat-long" => format!("{latitude}, {longitude}"),
675+
"%location" => format!("({latitude}, {longitude})"),
676+
"%city-country" => format!("{city_name}, {country}"),
677+
"%city-state-country" => format!("{city_name}, {admin1_name_value} {country}"),
678+
"%city" => city_name,
679+
"%state" | "%admin1" => admin1_name_value,
680+
"%country" => country,
681+
"%id" => format!("{id}"),
682+
"%population" => format!("{population}"),
683+
"%timezone" => timezone,
684+
"%cityrecord" => cityrecord_dbg,
656685
_ => format!("{city_name}, {admin1_name_value}, {country}"),
657686
};
658687
return Some(result);

0 commit comments

Comments
 (0)