Skip to content

Commit 60ad06a

Browse files
committed
Put a bloom filter in front of the binary search for shared nodes
1 parent 53ef0a9 commit 60ad06a

9 files changed

+47
-21
lines changed

geometry.cpp

+10-6
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,7 @@ drawvec impose_tile_boundaries(const drawvec &geom, long long extent) {
239239
return out;
240240
}
241241

242-
drawvec simplify_lines(drawvec &geom, int z, int tx, int ty, int detail, bool mark_tile_bounds, double simplification, size_t retain, drawvec const &shared_nodes, struct node *shared_nodes_map, size_t nodepos) {
242+
drawvec simplify_lines(drawvec &geom, int z, int tx, int ty, int detail, bool mark_tile_bounds, double simplification, size_t retain, drawvec const &shared_nodes, struct node *shared_nodes_map, size_t nodepos, std::string const &shared_nodes_bloom) {
243243
int res = 1 << (32 - detail - z);
244244
long long area = 1LL << (32 - z);
245245

@@ -276,12 +276,16 @@ drawvec simplify_lines(drawvec &geom, int z, int tx, int ty, int detail, bool ma
276276
d.y += ty * (1LL << (32 - z));
277277
}
278278

279-
// to quadkey
280279
struct node n;
281-
n.index = encode_quadkey((unsigned) d.x, (unsigned) d.y);
282-
283-
if (bsearch(&n, shared_nodes_map, nodepos / sizeof(node), sizeof(node), nodecmp) != NULL) {
284-
geom[i].necessary = true;
280+
n.index = encode_vertex((unsigned) d.x, (unsigned) d.y);
281+
size_t bloom_ix = n.index % (shared_nodes_bloom.size() * 8);
282+
unsigned char bloom_mask = 1 << (bloom_ix & 7);
283+
bloom_ix >>= 3;
284+
285+
if (shared_nodes_bloom[bloom_ix] & bloom_mask) {
286+
if (bsearch(&n, shared_nodes_map, nodepos / sizeof(node), sizeof(node), nodecmp) != NULL) {
287+
geom[i].necessary = true;
288+
}
285289
}
286290
}
287291
}

geometry.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ drawvec stairstep(drawvec &geom, int z, int detail);
8181
bool point_within_tile(long long x, long long y, int z);
8282
int quick_check(const long long *bbox, int z, long long buffer);
8383
void douglas_peucker(drawvec &geom, int start, int n, double e, size_t kept, size_t retain, bool prevent_simplify_shared_nodes);
84-
drawvec simplify_lines(drawvec &geom, int z, int tx, int ty, int detail, bool mark_tile_bounds, double simplification, size_t retain, drawvec const &shared_nodes, struct node *shared_nodes_map, size_t nodepos);
84+
drawvec simplify_lines(drawvec &geom, int z, int tx, int ty, int detail, bool mark_tile_bounds, double simplification, size_t retain, drawvec const &shared_nodes, struct node *shared_nodes_map, size_t nodepos, std::string const &shared_nodes_bloom);
8585
drawvec reorder_lines(const drawvec &geom);
8686
drawvec fix_polygon(const drawvec &geom);
8787
std::vector<drawvec> chop_polygon(std::vector<drawvec> &geoms);

main.cpp

+14-2
Original file line numberDiff line numberDiff line change
@@ -2070,7 +2070,7 @@ std::pair<int, metadata> read_input(std::vector<source> &sources, char *fname, i
20702070
#endif
20712071

20722072
struct node n;
2073-
n.index = encode_quadkey((unsigned) x, (unsigned) y);
2073+
n.index = encode_vertex((unsigned) x, (unsigned) y);
20742074

20752075
fwrite_check((char *) &n, sizeof(struct node), 1, readers[0].nodefile, &readers[0].nodepos, "vertices");
20762076
}
@@ -2084,6 +2084,9 @@ std::pair<int, metadata> read_input(std::vector<source> &sources, char *fname, i
20842084
fprintf(stderr, "Merging nodes \r");
20852085
}
20862086

2087+
std::string shared_nodes_bloom;
2088+
shared_nodes_bloom.resize(34567891); // circa 34MB, size nowhere near a power of 2
2089+
20872090
// Sort nodes that can't be simplified away; scan the list to remove duplicates
20882091

20892092
FILE *shared_nodes;
@@ -2149,6 +2152,15 @@ std::pair<int, metadata> read_input(std::vector<source> &sources, char *fname, i
21492152
fwrite_check((void *) &here, sizeof(here), 1, shared_nodes, &nodepos, "shared nodes");
21502153
written = here;
21512154

2155+
size_t bloom_ix = here.index % (shared_nodes_bloom.size() * 8);
2156+
unsigned char bloom_mask = 1 << (bloom_ix & 7);
2157+
bloom_ix >>= 3;
2158+
2159+
if (shared_nodes_bloom[bloom_ix] & bloom_mask) {
2160+
printf("collision at %lld : %zu/%d\n", nodepos / sizeof(here), bloom_ix, bloom_mask);
2161+
}
2162+
shared_nodes_bloom[bloom_ix] |= bloom_mask;
2163+
21522164
#if 0
21532165
unsigned wx, wy;
21542166
decode_quadkey(here.index, &wx, &wy);
@@ -2770,7 +2782,7 @@ std::pair<int, metadata> read_input(std::vector<source> &sources, char *fname, i
27702782
std::atomic<unsigned> midx(0);
27712783
std::atomic<unsigned> midy(0);
27722784
std::vector<strategy> strategies;
2773-
int written = traverse_zooms(fd, size, stringpool, &midx, &midy, maxzoom, minzoom, outdb, outdir, buffer, fname, tmpdir, gamma, full_detail, low_detail, min_detail, pool_off, initial_x, initial_y, simplification, maxzoom_simplification, layermaps, prefilter, postfilter, attribute_accum, filter, strategies, iz, shared_nodes_map, nodepos, basezoom, droprate, unidecode_data);
2785+
int written = traverse_zooms(fd, size, stringpool, &midx, &midy, maxzoom, minzoom, outdb, outdir, buffer, fname, tmpdir, gamma, full_detail, low_detail, min_detail, pool_off, initial_x, initial_y, simplification, maxzoom_simplification, layermaps, prefilter, postfilter, attribute_accum, filter, strategies, iz, shared_nodes_map, nodepos, shared_nodes_bloom, basezoom, droprate, unidecode_data);
27742786

27752787
if (maxzoom != written) {
27762788
if (written > minzoom) {

projection.cpp

+4
Original file line numberDiff line numberDiff line change
@@ -217,3 +217,7 @@ void set_projection_or_exit(const char *optarg) {
217217
exit(EXIT_ARGS);
218218
}
219219
}
220+
221+
unsigned long long encode_vertex(unsigned int wx, unsigned int wy) {
222+
return (((unsigned long long) wx) << 32) | wy;
223+
}

projection.hpp

+2
Original file line numberDiff line numberDiff line change
@@ -26,4 +26,6 @@ void decode_quadkey(unsigned long long index, unsigned *wx, unsigned *wy);
2626
unsigned long long encode_hilbert(unsigned int wx, unsigned int wy);
2727
void decode_hilbert(unsigned long long index, unsigned *wx, unsigned *wy);
2828

29+
unsigned long long encode_vertex(unsigned int wx, unsigned int wy);
30+
2931
#endif

serial.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -407,7 +407,7 @@ static void add_scaled_node(struct reader *r, serialization_state *sst, draw g)
407407
long long y = SHIFT_LEFT(g.y);
408408

409409
struct node n;
410-
n.index = encode_quadkey((unsigned) x, (unsigned) y);
410+
n.index = encode_vertex((unsigned) x, (unsigned) y);
411411

412412
fwrite_check((char *) &n, sizeof(struct node), 1, r->nodefile, &r->nodepos, sst->fname);
413413
}

shared_borders.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -357,7 +357,7 @@ bool find_common_edges(std::vector<serial_feature> &features, int z, int line_de
357357
}
358358
if (!(prevent[P_SIMPLIFY] || (z == maxzoom && prevent[P_SIMPLIFY_LOW]) || (z < maxzoom && additional[A_GRID_LOW_ZOOMS]))) {
359359
// tx and ty are 0 here because we aren't trying to do anything with the shared_nodes_map
360-
simplified_arcs[ai->second] = simplify_lines(dv, z, 0, 0, line_detail, !(prevent[P_CLIPPING] || prevent[P_DUPLICATION]), simplification, 4, drawvec(), NULL, 0);
360+
simplified_arcs[ai->second] = simplify_lines(dv, z, 0, 0, line_detail, !(prevent[P_CLIPPING] || prevent[P_DUPLICATION]), simplification, 4, drawvec(), NULL, 0, "");
361361
} else {
362362
simplified_arcs[ai->second] = dv;
363363
}

tile.cpp

+13-9
Original file line numberDiff line numberDiff line change
@@ -541,6 +541,7 @@ struct simplification_worker_arg {
541541
drawvec *shared_nodes;
542542
node *shared_nodes_map;
543543
size_t nodepos;
544+
std::string const *shared_nodes_bloom;
544545
};
545546

546547
// If a polygon has collapsed away to nothing during polygon cleaning,
@@ -590,7 +591,7 @@ static drawvec revive_polygon(drawvec &geom, double area, int z, int detail) {
590591
// This simplifies the geometry of one feature. It is generally called from the feature_simplification_worker
591592
// but is broken out here so that it can be called from earlier in write_tile if coalesced geometries build up
592593
// too much in memory.
593-
static double simplify_feature(serial_feature *p, drawvec const &shared_nodes, node *shared_nodes_map, size_t nodepos) {
594+
static double simplify_feature(serial_feature *p, drawvec const &shared_nodes, node *shared_nodes_map, size_t nodepos, std::string const &shared_nodes_bloom) {
594595
drawvec geom = p->geometry;
595596
signed char t = p->t;
596597
int z = p->z;
@@ -637,13 +638,13 @@ static double simplify_feature(serial_feature *p, drawvec const &shared_nodes, n
637638
}
638639

639640
// continues to simplify to line_detail even if we have extra detail
640-
drawvec ngeom = simplify_lines(geom, z, p->tx, p->ty, line_detail, !(prevent[P_CLIPPING] || prevent[P_DUPLICATION]), p->simplification, t == VT_POLYGON ? 4 : 0, shared_nodes, shared_nodes_map, nodepos);
641+
drawvec ngeom = simplify_lines(geom, z, p->tx, p->ty, line_detail, !(prevent[P_CLIPPING] || prevent[P_DUPLICATION]), p->simplification, t == VT_POLYGON ? 4 : 0, shared_nodes, shared_nodes_map, nodepos, shared_nodes_bloom);
641642

642643
if (p->coalesced && prevent[P_SIMPLIFY_SHARED_NODES]) {
643644
// do another simplification to eliminate collinearities
644645
// that were left behind at the former corners between
645646
// coalesced geometries
646-
ngeom = simplify_lines(ngeom, z, p->tx, p->ty, line_detail, !(prevent[P_CLIPPING] || prevent[P_DUPLICATION]), 0.1, t == VT_POLYGON ? 4 : 0, shared_nodes, NULL, 0);
647+
ngeom = simplify_lines(ngeom, z, p->tx, p->ty, line_detail, !(prevent[P_CLIPPING] || prevent[P_DUPLICATION]), 0.1, t == VT_POLYGON ? 4 : 0, shared_nodes, NULL, 0, "");
647648
}
648649

649650
if (t != VT_POLYGON || ngeom.size() >= 3) {
@@ -671,7 +672,7 @@ static void *simplification_worker(void *v) {
671672
for (size_t i = a->task; i < (*features).size(); i += a->tasks) {
672673
double area = 0;
673674
if (!a->trying_to_stop_early) {
674-
area = simplify_feature(&((*features)[i]), *(a->shared_nodes), a->shared_nodes_map, a->nodepos);
675+
area = simplify_feature(&((*features)[i]), *(a->shared_nodes), a->shared_nodes_map, a->nodepos, *(a->shared_nodes_bloom));
675676
}
676677

677678
signed char t = (*features)[i].t;
@@ -896,6 +897,7 @@ struct write_tile_args {
896897
bool compressed;
897898
node *shared_nodes_map;
898899
size_t nodepos;
900+
std::string const *shared_nodes_bloom;
899901
std::set<zxy> const *skip_children; // what is being skipped at this zoom
900902
std::set<zxy> skip_children_out; // what will be skipped in the next zoom
901903
};
@@ -1534,7 +1536,7 @@ void skip_tile(decompressor *geoms, std::atomic<long long> *geompos_in, bool com
15341536
}
15351537
}
15361538

1537-
long long write_tile(decompressor *geoms, std::atomic<long long> *geompos_in, char *global_stringpool, int z, const unsigned tx, const unsigned ty, const int detail, int min_detail, sqlite3 *outdb, const char *outdir, int buffer, const char *fname, compressor **geomfile, std::atomic<long long> *geompos, int minzoom, int maxzoom, double todo, std::atomic<long long> *along, long long alongminus, double gamma, int child_shards, long long *pool_off, unsigned *initial_x, unsigned *initial_y, std::atomic<int> *running, double simplification, std::vector<std::map<std::string, layermap_entry>> *layermaps, std::vector<std::vector<std::string>> *layer_unmaps, size_t tiling_seg, size_t pass, unsigned long long mingap, long long minextent, unsigned long long mindrop_sequence, const char *prefilter, const char *postfilter, json_object *filter, write_tile_args *arg, atomic_strategy *strategy_out, bool compressed_input, node *shared_nodes_map, size_t nodepos, std::vector<std::string> const &unidecode_data, long long estimated_complexity, std::set<zxy> &skip_children_out) {
1539+
long long write_tile(decompressor *geoms, std::atomic<long long> *geompos_in, char *global_stringpool, int z, const unsigned tx, const unsigned ty, const int detail, int min_detail, sqlite3 *outdb, const char *outdir, int buffer, const char *fname, compressor **geomfile, std::atomic<long long> *geompos, int minzoom, int maxzoom, double todo, std::atomic<long long> *along, long long alongminus, double gamma, int child_shards, long long *pool_off, unsigned *initial_x, unsigned *initial_y, std::atomic<int> *running, double simplification, std::vector<std::map<std::string, layermap_entry>> *layermaps, std::vector<std::vector<std::string>> *layer_unmaps, size_t tiling_seg, size_t pass, unsigned long long mingap, long long minextent, unsigned long long mindrop_sequence, const char *prefilter, const char *postfilter, json_object *filter, write_tile_args *arg, atomic_strategy *strategy_out, bool compressed_input, node *shared_nodes_map, size_t nodepos, std::string const &shared_nodes_bloom, std::vector<std::string> const &unidecode_data, long long estimated_complexity, std::set<zxy> &skip_children_out) {
15381540
double merge_fraction = 1;
15391541
double mingap_fraction = 1;
15401542
double minextent_fraction = 1;
@@ -2048,7 +2050,7 @@ long long write_tile(decompressor *geoms, std::atomic<long long> *geompos_in, ch
20482050
// may not be very effective for reducing memory usage.
20492051

20502052
for (; simplified_geometry_through < features.size(); simplified_geometry_through++) {
2051-
simplify_feature(&features[simplified_geometry_through], shared_nodes, shared_nodes_map, nodepos);
2053+
simplify_feature(&features[simplified_geometry_through], shared_nodes, shared_nodes_map, nodepos, shared_nodes_bloom);
20522054

20532055
if (features[simplified_geometry_through].t == VT_POLYGON) {
20542056
drawvec to_clean = features[simplified_geometry_through].geometry;
@@ -2247,6 +2249,7 @@ long long write_tile(decompressor *geoms, std::atomic<long long> *geompos_in, ch
22472249
args[i].shared_nodes = &shared_nodes;
22482250
args[i].shared_nodes_map = shared_nodes_map;
22492251
args[i].nodepos = nodepos;
2252+
args[i].shared_nodes_bloom = &shared_nodes_bloom;
22502253
args[i].trying_to_stop_early = trying_to_stop_early;
22512254

22522255
if (tasks > 1) {
@@ -2326,7 +2329,7 @@ long long write_tile(decompressor *geoms, std::atomic<long long> *geompos_in, ch
23262329
if (!(prevent[P_SIMPLIFY] || (z == maxzoom && prevent[P_SIMPLIFY_LOW]))) {
23272330
// XXX revisit: why does this not take zoom into account?
23282331
layer_features[x].geometry = simplify_lines(layer_features[x].geometry, 32, 0, 0, 0,
2329-
!(prevent[P_CLIPPING] || prevent[P_DUPLICATION]), simplification, layer_features[x].t == VT_POLYGON ? 4 : 0, shared_nodes, NULL, 0);
2332+
!(prevent[P_CLIPPING] || prevent[P_DUPLICATION]), simplification, layer_features[x].t == VT_POLYGON ? 4 : 0, shared_nodes, NULL, 0, "");
23302333
}
23312334
}
23322335

@@ -2777,7 +2780,7 @@ exit(EXIT_IMPOSSIBLE);
27772780
skip_tile(&dc, &geompos, arg->compressed);
27782781
len = 1;
27792782
} else {
2780-
len = write_tile(&dc, &geompos, arg->global_stringpool, z, x, y, z == arg->maxzoom ? arg->full_detail : arg->low_detail, arg->min_detail, arg->outdb, arg->outdir, arg->buffer, arg->fname, arg->geomfile, arg->geompos, arg->minzoom, arg->maxzoom, arg->todo, arg->along, geompos, arg->gamma, arg->child_shards, arg->pool_off, arg->initial_x, arg->initial_y, arg->running, arg->simplification, arg->layermaps, arg->layer_unmaps, arg->tiling_seg, arg->pass, arg->mingap, arg->minextent, arg->mindrop_sequence, arg->prefilter, arg->postfilter, arg->filter, arg, arg->strategy, arg->compressed, arg->shared_nodes_map, arg->nodepos, (*arg->unidecode_data), estimated_complexity, arg->skip_children_out);
2783+
len = write_tile(&dc, &geompos, arg->global_stringpool, z, x, y, z == arg->maxzoom ? arg->full_detail : arg->low_detail, arg->min_detail, arg->outdb, arg->outdir, arg->buffer, arg->fname, arg->geomfile, arg->geompos, arg->minzoom, arg->maxzoom, arg->todo, arg->along, geompos, arg->gamma, arg->child_shards, arg->pool_off, arg->initial_x, arg->initial_y, arg->running, arg->simplification, arg->layermaps, arg->layer_unmaps, arg->tiling_seg, arg->pass, arg->mingap, arg->minextent, arg->mindrop_sequence, arg->prefilter, arg->postfilter, arg->filter, arg, arg->strategy, arg->compressed, arg->shared_nodes_map, arg->nodepos, *(arg->shared_nodes_bloom), (*arg->unidecode_data), estimated_complexity, arg->skip_children_out);
27812784
}
27822785

27832786
if (pthread_mutex_lock(&var_lock) != 0) {
@@ -2843,7 +2846,7 @@ exit(EXIT_IMPOSSIBLE);
28432846
return err_or_null;
28442847
}
28452848

2846-
int traverse_zooms(int *geomfd, off_t *geom_size, char *global_stringpool, std::atomic<unsigned> *midx, std::atomic<unsigned> *midy, int &maxzoom, int minzoom, sqlite3 *outdb, const char *outdir, int buffer, const char *fname, const char *tmpdir, double gamma, int full_detail, int low_detail, int min_detail, long long *pool_off, unsigned *initial_x, unsigned *initial_y, double simplification, double maxzoom_simplification, std::vector<std::map<std::string, layermap_entry>> &layermaps, const char *prefilter, const char *postfilter, std::unordered_map<std::string, attribute_op> const *attribute_accum, json_object *filter, std::vector<strategy> &strategies, int iz, node *shared_nodes_map, size_t nodepos, int basezoom, double droprate, std::vector<std::string> const &unidecode_data) {
2849+
int traverse_zooms(int *geomfd, off_t *geom_size, char *global_stringpool, std::atomic<unsigned> *midx, std::atomic<unsigned> *midy, int &maxzoom, int minzoom, sqlite3 *outdb, const char *outdir, int buffer, const char *fname, const char *tmpdir, double gamma, int full_detail, int low_detail, int min_detail, long long *pool_off, unsigned *initial_x, unsigned *initial_y, double simplification, double maxzoom_simplification, std::vector<std::map<std::string, layermap_entry>> &layermaps, const char *prefilter, const char *postfilter, std::unordered_map<std::string, attribute_op> const *attribute_accum, json_object *filter, std::vector<strategy> &strategies, int iz, node *shared_nodes_map, size_t nodepos, std::string const &shared_nodes_bloom, int basezoom, double droprate, std::vector<std::string> const &unidecode_data) {
28472850
last_progress = 0;
28482851

28492852
// The existing layermaps are one table per input thread.
@@ -3060,6 +3063,7 @@ int traverse_zooms(int *geomfd, off_t *geom_size, char *global_stringpool, std::
30603063
args[thread].compressed = (z != iz);
30613064
args[thread].shared_nodes_map = shared_nodes_map;
30623065
args[thread].nodepos = nodepos;
3066+
args[thread].shared_nodes_bloom = &shared_nodes_bloom;
30633067
args[thread].skip_children = &skip_children;
30643068
args[thread].skip_children_out.clear();
30653069

tile.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ struct strategy {
5858

5959
// long long write_tile(char **geom, char *stringpool, unsigned *file_bbox, int z, unsigned x, unsigned y, int detail, int min_detail, int basezoom, sqlite3 *outdb, const char *outdir, double droprate, int buffer, const char *fname, FILE **geomfile, int file_minzoom, int file_maxzoom, double todo, char *geomstart, long long along, double gamma, int nlayers, std::atomic<strategy> *strategy);
6060

61-
int traverse_zooms(int *geomfd, off_t *geom_size, char *stringpool, std::atomic<unsigned> *midx, std::atomic<unsigned> *midy, int &maxzoom, int minzoom, sqlite3 *outdb, const char *outdir, int buffer, const char *fname, const char *tmpdir, double gamma, int full_detail, int low_detail, int min_detail, long long *pool_off, unsigned *initial_x, unsigned *initial_y, double simplification, double maxzoom_simplification, std::vector<std::map<std::string, layermap_entry> > &layermap, const char *prefilter, const char *postfilter, std::unordered_map<std::string, attribute_op> const *attribute_accum, struct json_object *filter, std::vector<strategy> &strategies, int iz, struct node *shared_nodes_map, size_t nodepos, int basezoom, double droprate, std::vector<std::string> const &unidecode_data);
61+
int traverse_zooms(int *geomfd, off_t *geom_size, char *stringpool, std::atomic<unsigned> *midx, std::atomic<unsigned> *midy, int &maxzoom, int minzoom, sqlite3 *outdb, const char *outdir, int buffer, const char *fname, const char *tmpdir, double gamma, int full_detail, int low_detail, int min_detail, long long *pool_off, unsigned *initial_x, unsigned *initial_y, double simplification, double maxzoom_simplification, std::vector<std::map<std::string, layermap_entry> > &layermap, const char *prefilter, const char *postfilter, std::unordered_map<std::string, attribute_op> const *attribute_accum, struct json_object *filter, std::vector<strategy> &strategies, int iz, struct node *shared_nodes_map, size_t nodepos, std::string const &shared_nodes_bloom, int basezoom, double droprate, std::vector<std::string> const &unidecode_data);
6262

6363
int manage_gap(unsigned long long index, unsigned long long *previndex, double scale, double gamma, double *gap);
6464

0 commit comments

Comments
 (0)