Skip to content

Commit 423fea1

Browse files
authoredJan 31, 2025··
Reduce memory consumption during tiling and tile encoding (#319)
* Improve the memory spike during tile construction * Remove `need_tilestats`. Add a bunch of debug logging * Remove debug logging * Update version and changelog
1 parent 583fc37 commit 423fea1

File tree

5 files changed

+32
-41
lines changed

5 files changed

+32
-41
lines changed
 

‎CHANGELOG.md

+4
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
# 2.75.1
2+
3+
* Further reduce memory consumption in attribute sorting and tilestats tracking
4+
15
# 2.75.0
26

37
* Reduce memory consumption in attribute accumulation and feature sorting

‎mvt.cpp

+17-9
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,10 @@ struct sorted_value {
316316

317317
return false;
318318
}
319+
320+
bool operator()(const std::shared_ptr<sorted_value> &a, const std::shared_ptr<sorted_value> &b) {
321+
return *a < *b;
322+
}
319323
};
320324

321325
std::string mvt_tile::encode() {
@@ -334,7 +338,7 @@ std::string mvt_tile::encode() {
334338
layer_writer.add_string(3, layers[i].keys[j]); /* key */
335339
}
336340

337-
std::vector<sorted_value> sorted_values;
341+
std::vector<std::shared_ptr<sorted_value>> sorted_values;
338342

339343
for (size_t v = 0; v < layers[i].values.size(); v++) {
340344
std::string value_string;
@@ -371,30 +375,34 @@ std::string mvt_tile::encode() {
371375
exit(EXIT_IMPOSSIBLE);
372376
}
373377

374-
sorted_value sv;
375-
sv.val = std::move(value_string);
376-
sv.orig = v;
378+
std::shared_ptr<sorted_value> sv = std::make_shared<sorted_value>();
379+
sv->val = std::move(value_string);
380+
sv->orig = v;
377381
sorted_values.push_back(std::move(sv));
378382
}
379383

380-
std::stable_sort(sorted_values.begin(), sorted_values.end());
384+
std::stable_sort(sorted_values.begin(), sorted_values.end(), sorted_value());
381385
std::vector<size_t> mapping;
382386
mapping.resize(sorted_values.size());
383387

384388
size_t value_index = 0;
385389
for (size_t v = 0; v < sorted_values.size(); v++) {
386-
mapping[sorted_values[v].orig] = value_index;
387-
layer_writer.add_message(4, sorted_values[v].val);
390+
mapping[sorted_values[v]->orig] = value_index;
391+
layer_writer.add_message(4, sorted_values[v]->val);
388392

389393
// crunch out duplicates that were missed by the hashing
390-
while (v + 1 < sorted_values.size() && sorted_values[v].val == sorted_values[v + 1].val) {
391-
mapping[sorted_values[v + 1].orig] = value_index;
394+
while (v + 1 < sorted_values.size() && sorted_values[v]->val == sorted_values[v + 1]->val) {
395+
sorted_values[v]->val.clear();
396+
mapping[sorted_values[v + 1]->orig] = value_index;
392397
v++;
393398
}
394399

400+
sorted_values[v]->val.clear();
395401
value_index++;
396402
}
397403

404+
sorted_values.clear();
405+
398406
for (size_t f = 0; f < layers[i].features.size(); f++) {
399407
std::string feature_string;
400408
protozero::pbf_writer feature_writer(feature_string);

‎serial.hpp

-1
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,6 @@ struct serial_feature {
165165
long long clustered; // does this feature need the clustered/point_count attributes?
166166
const char *stringpool; // string pool for keys/values lookup
167167
std::shared_ptr<std::string> tile_stringpool; // string pool for mvt_value construction
168-
std::set<std::string> need_tilestats;
169168

170169
int z; // tile being produced
171170
int tx;

‎tile.cpp

+10-30
Original file line numberDiff line numberDiff line change
@@ -1405,10 +1405,6 @@ void add_tilestats(std::string const &layername, int z, std::vector<std::map<std
14051405
}
14061406

14071407
void promote_attribute(std::string const &key, serial_feature &p, key_pool &key_pool) {
1408-
if (p.need_tilestats.count(key) == 0) {
1409-
p.need_tilestats.insert(key);
1410-
}
1411-
14121408
// If the feature being merged into has this key as a metadata reference,
14131409
// promote it to a full_key so it can be modified
14141410

@@ -1430,10 +1426,6 @@ void promote_attribute(std::string const &key, serial_feature &p, key_pool &key_
14301426
}
14311427

14321428
void promote_attribute_prefix(std::string const &key, std::string const &prefixed_key, serial_feature &p, key_pool &key_pool) {
1433-
if (p.need_tilestats.count(prefixed_key) == 0) {
1434-
p.need_tilestats.insert(prefixed_key);
1435-
}
1436-
14371429
// does the prefixed attribute already exist as a full key?
14381430
ssize_t found_as = -1;
14391431
for (size_t i = 0; i < p.full_keys.size(); i++) {
@@ -1450,6 +1442,7 @@ void promote_attribute_prefix(std::string const &key, std::string const &prefixe
14501442
if (found_as >= 0) {
14511443
p.full_keys.push_back(key_pool.pool(prefixed_key));
14521444
p.full_values.push_back(p.full_values[found_as]);
1445+
14531446
return;
14541447
}
14551448

@@ -2300,8 +2293,6 @@ long long write_tile(decompressor *geoms, std::atomic<long long> *geompos_in, ch
23002293
std::vector<std::shared_ptr<serial_feature>> &features = kv.second.features;
23012294

23022295
if (retain_points_multiplier > 1) {
2303-
add_tilestats(layername, z, layermaps, tiling_seg, layer_unmaps, "tippecanoe:retain_points_multiplier_first", serial_val(mvt_bool, "true"));
2304-
23052296
// mapping from input sequence to current sequence within this tile
23062297
std::vector<std::pair<size_t, size_t>> feature_sequences;
23072298

@@ -2322,8 +2313,6 @@ long long write_tile(decompressor *geoms, std::atomic<long long> *geompos_in, ch
23222313

23232314
features[j]->full_keys.push_back(key_pool.pool("tippecanoe:retain_points_multiplier_sequence"));
23242315
features[j]->full_values.push_back(sv);
2325-
2326-
add_tilestats(layername, z, layermaps, tiling_seg, layer_unmaps, *features[j]->full_keys.back(), sv);
23272316
}
23282317
}
23292318

@@ -2340,22 +2329,16 @@ long long write_tile(decompressor *geoms, std::atomic<long long> *geompos_in, ch
23402329
sv.s = "true";
23412330
p.full_values.push_back(sv);
23422331

2343-
add_tilestats(layername, z, layermaps, tiling_seg, layer_unmaps, "clustered", sv);
2344-
23452332
p.full_keys.push_back(key_pool.pool("point_count"));
23462333
sv2.type = mvt_double;
23472334
sv2.s = std::to_string(point_count);
23482335
p.full_values.push_back(sv2);
23492336

2350-
add_tilestats(layername, z, layermaps, tiling_seg, layer_unmaps, "point_count", sv2);
2351-
23522337
p.full_keys.push_back(key_pool.pool("sqrt_point_count"));
23532338
sv3.type = mvt_double;
23542339
sv3.s = std::to_string(round(100 * sqrt(point_count)) / 100.0);
23552340
p.full_values.push_back(sv3);
23562341

2357-
add_tilestats(layername, z, layermaps, tiling_seg, layer_unmaps, "sqrt_point_count", sv3);
2358-
23592342
p.full_keys.push_back(key_pool.pool("point_count_abbreviated"));
23602343
sv4.type = mvt_string;
23612344
if (point_count >= 10000) {
@@ -2367,21 +2350,15 @@ long long write_tile(decompressor *geoms, std::atomic<long long> *geompos_in, ch
23672350
}
23682351
sv4.s = abbrev;
23692352
p.full_values.push_back(sv4);
2370-
2371-
add_tilestats(layername, z, layermaps, tiling_seg, layer_unmaps, "point_count_abbreviated", sv4);
23722353
}
23732354

2374-
if (p.need_tilestats.size() > 0) {
2375-
for (size_t j = 0; j < p.full_keys.size(); j++) {
2376-
if (p.need_tilestats.count(*p.full_keys[j]) > 0) {
2377-
// remove accumulation state
2378-
size_t found = p.full_values[j].s.find('\0');
2379-
if (found != std::string::npos) {
2380-
p.full_values[j].s = p.full_values[j].s.substr(0, found);
2381-
}
2382-
add_tilestats(layername, z, layermaps, tiling_seg, layer_unmaps, *p.full_keys[j], p.full_values[j]);
2383-
}
2355+
for (size_t j = 0; j < p.full_keys.size(); j++) {
2356+
// remove accumulation state
2357+
size_t found = p.full_values[j].s.find('\0');
2358+
if (found != std::string::npos) {
2359+
p.full_values[j].s = p.full_values[j].s.substr(0, found);
23842360
}
2361+
add_tilestats(layername, z, layermaps, tiling_seg, layer_unmaps, *p.full_keys[j], p.full_values[j]);
23852362
}
23862363
}
23872364

@@ -2574,6 +2551,9 @@ long long write_tile(decompressor *geoms, std::atomic<long long> *geompos_in, ch
25742551
layer.tag(feature, *layer_features[x]->full_keys[a], v);
25752552
}
25762553

2554+
layer_features[x]->full_keys.clear();
2555+
layer_features[x]->full_values.clear();
2556+
25772557
if (additional[A_CALCULATE_FEATURE_DENSITY]) {
25782558
int glow = 255;
25792559
if (layer_features[x]->spacing > 0) {

‎version.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#ifndef VERSION_HPP
22
#define VERSION_HPP
33

4-
#define VERSION "v2.75.0"
4+
#define VERSION "v2.75.1"
55

66
#endif

0 commit comments

Comments
 (0)
Please sign in to comment.