@@ -77,18 +77,21 @@ geocode arguments:
77
77
geocode options:
78
78
-c, --new-column <name> Put the transformed values in a new column instead.
79
79
-r, --rename <name> New name for the transformed column.
80
+ --min-score <score> The minimum score to use for suggest.
81
+ [default: 0.8]
82
+ -k, --k_weight <score> The weight to multiply population by to use for reverse.
83
+ Larger values will favor more populated cities.
84
+ If not set (default), the population is not used and the
85
+ closest city is returned.
80
86
-f, --formatstr=<string> This option is used by several subcommands:
81
87
82
88
The place format to use. The available formats are:
83
- - 'city-state' (default) - e.g. Brooklyn, New York
84
- - 'city-country' - Brooklyn, US
85
- - 'city-state-country' | 'city-admin1-country' - Brooklyn, New York US
86
- - 'city' - Brooklyn
87
- - 'county' | 'admin2' - Kings County
88
- - 'state' | 'admin1' - New York
89
- - 'county-country' | 'admin2-country' - Kings County, US
90
- - 'county-state-country' | 'admin2-admin1-country' - Kings County, New York US
91
- - 'country' - US
89
+ - '%city-state' (default) - e.g. Brooklyn, New York
90
+ - '%city-country' - Brooklyn, US
91
+ - '%city-state-country' | '%city-admin1-country' - Brooklyn, New York US
92
+ - '%city' - Brooklyn
93
+ - '%state' | '%admin1' - New York
94
+ - '%country' - US
92
95
-j, --jobs <arg> The number of jobs to run in parallel.
93
96
When not set, the number of jobs is set to the number of CPUs detected.
94
97
-b, --batch <size> The number of rows per batch to load into memory, before running in parallel.
@@ -148,6 +151,8 @@ struct Args {
148
151
arg_input : Option < String > ,
149
152
arg_index_file : Option < String > ,
150
153
flag_rename : Option < String > ,
154
+ flag_min_score : f32 ,
155
+ flag_k_weight : Option < f32 > ,
151
156
flag_formatstr : String ,
152
157
flag_batch : u32 ,
153
158
flag_timeout : u16 ,
@@ -452,8 +457,14 @@ async fn geocode_main(args: Args) -> CliResult<()> {
452
457
let mut record = record_item. clone ( ) ;
453
458
let mut cell = record[ column_index] . to_owned ( ) ;
454
459
if !cell. is_empty ( ) {
455
- let search_result =
456
- search_cached ( & engine, geocode_cmd, & cell, & args. flag_formatstr ) ;
460
+ let search_result = search_cached (
461
+ & engine,
462
+ geocode_cmd,
463
+ & cell,
464
+ & args. flag_formatstr ,
465
+ args. flag_min_score ,
466
+ args. flag_k_weight ,
467
+ ) ;
457
468
if let Some ( geocoded_result) = search_result {
458
469
cell = geocoded_result;
459
470
}
@@ -550,6 +561,8 @@ fn search_cached(
550
561
mode : GeocodeSubCmd ,
551
562
cell : & str ,
552
563
formatstr : & str ,
564
+ min_score : f32 ,
565
+ k : Option < f32 > ,
553
566
) -> Option < String > {
554
567
static EMPTY_STRING : String = String :: new ( ) ;
555
568
@@ -562,9 +575,10 @@ fn search_cached(
562
575
let mut population = 0_usize ;
563
576
let mut timezone = String :: new ( ) ;
564
577
let mut cityrecord_dbg = String :: new ( ) ;
578
+ let mut format_to_use = formatstr. to_string ( ) ;
565
579
566
580
if mode == GeocodeSubCmd :: Suggest {
567
- let search_result = engine. suggest ( cell, 1 , None ) ;
581
+ let search_result = engine. suggest ( cell, 1 , Some ( min_score ) ) ;
568
582
let Some ( cityrecord) = search_result. into_iter ( ) . next ( ) else {
569
583
return None ;
570
584
} ;
@@ -576,19 +590,26 @@ fn search_cached(
576
590
return None ;
577
591
} ;
578
592
579
- id = cityrecord. id ;
580
- city_name = cityrecord. name . clone ( ) ;
581
- latitude = cityrecord. latitude ;
582
- longitude = cityrecord. longitude ;
583
- country = cityrecord. country . clone ( ) . unwrap ( ) . name ;
584
- admin1_name_value = admin1_name_value_work. clone ( ) ;
585
- population = cityrecord. population ;
586
- timezone = cityrecord. timezone . clone ( ) ;
587
- cityrecord_dbg = if formatstr == "cityrecord" {
588
- format ! ( "{cityrecord:?}" )
593
+ if formatstr == "%+" {
594
+ // default for suggest is city-state
595
+ city_name = cityrecord. name . clone ( ) ;
596
+ admin1_name_value = admin1_name_value_work. clone ( ) ;
597
+ format_to_use = "%city-state" . to_string ( ) ;
589
598
} else {
590
- EMPTY_STRING . clone ( )
591
- } ;
599
+ id = cityrecord. id ;
600
+ city_name = cityrecord. name . clone ( ) ;
601
+ latitude = cityrecord. latitude ;
602
+ longitude = cityrecord. longitude ;
603
+ country = cityrecord. country . clone ( ) . unwrap ( ) . name ;
604
+ admin1_name_value = admin1_name_value_work. clone ( ) ;
605
+ population = cityrecord. population ;
606
+ timezone = cityrecord. timezone . clone ( ) ;
607
+ cityrecord_dbg = if formatstr == "cityrecord" {
608
+ format ! ( "{cityrecord:?}" )
609
+ } else {
610
+ EMPTY_STRING . clone ( )
611
+ } ;
612
+ }
592
613
} else if mode == GeocodeSubCmd :: Reverse {
593
614
// regex for Location field. Accepts (lat, long) & lat, long
594
615
let locregex: & ' static Regex = regex_oncelock ! (
@@ -600,7 +621,7 @@ fn search_cached(
600
621
let lat = fast_float:: parse ( & loccaps[ 1 ] ) . unwrap_or_default ( ) ;
601
622
let long = fast_float:: parse ( & loccaps[ 2 ] ) . unwrap_or_default ( ) ;
602
623
if ( -90.0 ..=90.0 ) . contains ( & lat) && ( -180.0 ..=180.0 ) . contains ( & long) {
603
- let search_result = engine. reverse ( ( lat, long) , 1 , None ) ;
624
+ let search_result = engine. reverse ( ( lat, long) , 1 , k ) ;
604
625
let Some ( cityrecord) = ( match search_result {
605
626
Some ( search_result) => search_result. into_iter ( ) . next ( ) . map ( |ri| ri. city ) ,
606
627
None => return None ,
@@ -617,19 +638,26 @@ fn search_cached(
617
638
return None ;
618
639
} ;
619
640
620
- id = cityrecord. id ;
621
- city_name = cityrecord. name . clone ( ) ;
622
- latitude = cityrecord. latitude ;
623
- longitude = cityrecord. longitude ;
624
- country = cityrecord. country . clone ( ) . unwrap ( ) . name ;
625
- admin1_name_value = admin1_name_value_work. clone ( ) ;
626
- population = cityrecord. population ;
627
- timezone = cityrecord. timezone . clone ( ) ;
628
- cityrecord_dbg = if formatstr == "cityrecord" {
629
- format ! ( "{cityrecord:?}" )
641
+ if formatstr == "%+" {
642
+ // default for suggest is city-state
643
+ latitude = cityrecord. latitude ;
644
+ longitude = cityrecord. longitude ;
645
+ format_to_use = "%location" . to_string ( ) ;
630
646
} else {
631
- EMPTY_STRING . clone ( )
632
- } ;
647
+ id = cityrecord. id ;
648
+ city_name = cityrecord. name . clone ( ) ;
649
+ latitude = cityrecord. latitude ;
650
+ longitude = cityrecord. longitude ;
651
+ country = cityrecord. country . clone ( ) . unwrap ( ) . name ;
652
+ admin1_name_value = admin1_name_value_work. clone ( ) ;
653
+ population = cityrecord. population ;
654
+ timezone = cityrecord. timezone . clone ( ) ;
655
+ cityrecord_dbg = if formatstr == "cityrecord" {
656
+ format ! ( "{cityrecord:?}" )
657
+ } else {
658
+ EMPTY_STRING . clone ( )
659
+ } ;
660
+ }
633
661
}
634
662
} else {
635
663
return None ;
@@ -641,18 +669,19 @@ fn search_cached(
641
669
#[ allow( clippy:: match_same_arms) ]
642
670
// match arms are evaluated in order,
643
671
// so we're optimizing for the most common cases first
644
- let result = match formatstr {
645
- "%+" | "city-state" => format ! ( "{city_name}, {admin1_name_value}" ) ,
646
- "lat-long" => format ! ( "{latitude}, {longitude}" ) ,
647
- "location" => format ! ( "({latitude}, {longitude})" ) ,
648
- "city-country" => format ! ( "{city_name}, {country}" ) ,
649
- "city" => city_name,
650
- "state" => admin1_name_value,
651
- "country" => country,
652
- "id" => format ! ( "{id}" ) ,
653
- "population" => format ! ( "{population}" ) ,
654
- "timezone" => timezone,
655
- "cityrecord" => cityrecord_dbg,
672
+ let result = match format_to_use. as_str ( ) {
673
+ "%+" | "%city-state" => format ! ( "{city_name}, {admin1_name_value}" ) ,
674
+ "%lat-long" => format ! ( "{latitude}, {longitude}" ) ,
675
+ "%location" => format ! ( "({latitude}, {longitude})" ) ,
676
+ "%city-country" => format ! ( "{city_name}, {country}" ) ,
677
+ "%city-state-country" => format ! ( "{city_name}, {admin1_name_value} {country}" ) ,
678
+ "%city" => city_name,
679
+ "%state" | "%admin1" => admin1_name_value,
680
+ "%country" => country,
681
+ "%id" => format ! ( "{id}" ) ,
682
+ "%population" => format ! ( "{population}" ) ,
683
+ "%timezone" => timezone,
684
+ "%cityrecord" => cityrecord_dbg,
656
685
_ => format ! ( "{city_name}, {admin1_name_value}, {country}" ) ,
657
686
} ;
658
687
return Some ( result) ;
0 commit comments