Skip to content

Commit

Permalink
Reduce memory consumption from attribute accumulation (#290)
Browse files Browse the repository at this point in the history
* Progress on plumbing a string pool for full_keys through

* More plumbing for key_pool

* Don't keep features with identical locations as multiplier features

* Revert "Don't keep features with identical locations as multiplier features"

This reverts commit 413f0c8.

* Adjust calculated maxzoom to account for duplicate feature locations

* Update changelog and version

* Add a test affected by the maxzoom change with duplicate locations

* Round the drop rate a little for cross-platform test consistency
  • Loading branch information
e-n-f authored Nov 5, 2024
1 parent b3b89e1 commit 23667bb
Show file tree
Hide file tree
Showing 21 changed files with 1,230 additions and 285 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# 2.67.0

* Reduce memory consumption of duplicate attribute names in `serial_feature`
* The maxzoom guess calculation now takes into account the number of duplicate feature locations

# 2.66.0

* Only bin by ID, not by geometry, if --bin-by-id-list is specified
Expand Down
14 changes: 7 additions & 7 deletions attribute.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,9 @@ void set_attribute_accum(std::unordered_map<std::string, attribute_op> &attribut
}

template <class T>
static void preserve_attribute1(attribute_op const &op, std::string const &key, T const &val, std::vector<std::string> &full_keys, std::vector<T> &full_values, std::unordered_map<std::string, accum_state> &attribute_accum_state) {
static void preserve_attribute1(attribute_op const &op, std::string const &key, T const &val, std::vector<std::shared_ptr<std::string>> &full_keys, std::vector<T> &full_values, std::unordered_map<std::string, accum_state> &attribute_accum_state, key_pool &key_pool) {
for (size_t i = 0; i < full_keys.size(); i++) {
if (key == full_keys[i]) {
if (key == *full_keys[i]) {
switch (op) {
case op_sum:
full_values[i] = (full_values[i].to_double() + val.to_double());
Expand Down Expand Up @@ -193,14 +193,14 @@ static void preserve_attribute1(attribute_op const &op, std::string const &key,
exit(EXIT_IMPOSSIBLE);
}

full_keys.push_back(key);
full_keys.push_back(key_pool.pool(key));
full_values.push_back(v);
}

void preserve_attribute(attribute_op const &op, std::string const &key, mvt_value const &val, std::vector<std::string> &full_keys, std::vector<mvt_value> &full_values, std::unordered_map<std::string, accum_state> &attribute_accum_state) {
preserve_attribute1(op, key, val, full_keys, full_values, attribute_accum_state);
void preserve_attribute(attribute_op const &op, std::string const &key, mvt_value const &val, std::vector<std::shared_ptr<std::string>> &full_keys, std::vector<mvt_value> &full_values, std::unordered_map<std::string, accum_state> &attribute_accum_state, key_pool &key_pool) {
preserve_attribute1(op, key, val, full_keys, full_values, attribute_accum_state, key_pool);
}

void preserve_attribute(attribute_op const &op, std::string const &key, serial_val const &val, std::vector<std::string> &full_keys, std::vector<serial_val> &full_values, std::unordered_map<std::string, accum_state> &attribute_accum_state) {
preserve_attribute1(op, key, val, full_keys, full_values, attribute_accum_state);
void preserve_attribute(attribute_op const &op, std::string const &key, serial_val const &val, std::vector<std::shared_ptr<std::string>> &full_keys, std::vector<serial_val> &full_values, std::unordered_map<std::string, accum_state> &attribute_accum_state, key_pool &key_pool) {
preserve_attribute1(op, key, val, full_keys, full_values, attribute_accum_state, key_pool);
}
6 changes: 4 additions & 2 deletions attribute.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <vector>
#include <unordered_map>
#include <map>
#include <memory>
#include "mvt.hpp"
#include "milo/dtoa_milo.h"

Expand All @@ -24,12 +25,13 @@ struct accum_state {
};

struct serial_val;
struct key_pool;

void set_attribute_accum(std::unordered_map<std::string, attribute_op> &attribute_accum, std::string name, std::string type);
void set_attribute_accum(std::unordered_map<std::string, attribute_op> &attribute_accum, const char *arg, char **argv);

void preserve_attribute(attribute_op const &op, std::string const &key, serial_val const &val, std::vector<std::string> &full_keys, std::vector<serial_val> &full_values, std::unordered_map<std::string, accum_state> &attribute_accum_state);
void preserve_attribute(attribute_op const &op, std::string const &key, mvt_value const &val, std::vector<std::string> &full_keys, std::vector<mvt_value> &full_values, std::unordered_map<std::string, accum_state> &attribute_accum_state);
void preserve_attribute(attribute_op const &op, std::string const &key, serial_val const &val, std::vector<std::shared_ptr<std::string>> &full_keys, std::vector<serial_val> &full_values, std::unordered_map<std::string, accum_state> &attribute_accum_state, key_pool &key_pool);
void preserve_attribute(attribute_op const &op, std::string const &key, mvt_value const &val, std::vector<std::shared_ptr<std::string>> &full_keys, std::vector<mvt_value> &full_values, std::unordered_map<std::string, accum_state> &attribute_accum_state, key_pool &key_pool);

extern std::map<std::string, attribute_op> numeric_operations;

Expand Down
49 changes: 26 additions & 23 deletions clip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1163,14 +1163,14 @@ static void add_mean(mvt_feature &feature, mvt_layer &layer, std::string const &
};

// accumulate :sum:, :min:, :max:, and :count: versions of the specified attribute
static void preserve_numeric(const std::string &key, const mvt_value &val, // numeric attribute being accumulated
std::vector<std::string> &full_keys, // keys of feature being accumulated onto
std::vector<mvt_value> &full_values, // values of features being accumulated onto
const std::string &accumulate_numeric, // prefix of accumulations
std::set<std::string> &keys, // key presence in the source feature
std::map<std::string, size_t> &numeric_out_field, // key index in the output feature
std::unordered_map<std::string, accum_state> &attribute_accum_state // accumulation state for preserve_attribute()
) {
static void preserve_numeric(const std::string &key, const mvt_value &val, // numeric attribute being accumulated
std::vector<std::shared_ptr<std::string>> &full_keys, // keys of feature being accumulated onto
std::vector<mvt_value> &full_values, // values of features being accumulated onto
const std::string &accumulate_numeric, // prefix of accumulations
std::set<std::string> &keys, // key presence in the source feature
std::map<std::string, size_t> &numeric_out_field, // key index in the output feature
std::unordered_map<std::string, accum_state> &attribute_accum_state, // accumulation state for preserve_attribute()
key_pool &key_pool) {
// If this is a numeric attribute, but there is also a prefix:sum (etc.) for the
// same attribute, we want to use that one instead of this one.

Expand Down Expand Up @@ -1213,7 +1213,7 @@ static void preserve_numeric(const std::string &key, const mvt_value &val, /
if (out_attr == numeric_out_field.end()) {
// not present at all, so copy our value to the prefixed output
numeric_out_field.emplace(prefixed, full_keys.size());
full_keys.push_back(prefixed);
full_keys.push_back(key_pool.pool(prefixed));

if (op.second == op_count) {
if (starting_from_accumulation) {
Expand All @@ -1229,7 +1229,7 @@ static void preserve_numeric(const std::string &key, const mvt_value &val, /
} else {
// exists unprefixed, so copy it, and then accumulate on our value
numeric_out_field.emplace(prefixed, full_keys.size());
full_keys.push_back(prefixed);
full_keys.push_back(key_pool.pool(prefixed));

if (op.second == op_count) {
mvt_value v;
Expand All @@ -1243,7 +1243,7 @@ static void preserve_numeric(const std::string &key, const mvt_value &val, /
full_values.push_back(v);
} else {
full_values.push_back(full_values[out_attr->second]);
preserve_attribute(op.second, prefixed, val, full_keys, full_values, attribute_accum_state);
preserve_attribute(op.second, prefixed, val, full_keys, full_values, attribute_accum_state, key_pool);
}
}
} else {
Expand All @@ -1256,7 +1256,7 @@ static void preserve_numeric(const std::string &key, const mvt_value &val, /
full_values[prefixed_attr->second] = mvt_value(mvt_value_to_long_long(full_values[prefixed_attr->second]) + 1);
}
} else {
preserve_attribute(op.second, prefixed, val, full_keys, full_values, attribute_accum_state);
preserve_attribute(op.second, prefixed, val, full_keys, full_values, attribute_accum_state, key_pool);
}
}
}
Expand Down Expand Up @@ -1289,7 +1289,8 @@ static void feature_out(std::vector<tile_feature> const &features, mvt_layer &ou
std::set<std::string> const &exclude,
std::vector<std::string> const &exclude_prefix,
std::unordered_map<std::string, attribute_op> const &attribute_accum,
std::string const &accumulate_numeric) {
std::string const &accumulate_numeric,
key_pool &key_pool) {
// Add geometry to output feature

mvt_feature outfeature;
Expand All @@ -1315,7 +1316,7 @@ static void feature_out(std::vector<tile_feature> const &features, mvt_layer &ou
// multiplier cluster accumulated onto them

std::unordered_map<std::string, accum_state> attribute_accum_state;
std::vector<std::string> full_keys;
std::vector<std::shared_ptr<std::string>> full_keys;
std::vector<mvt_value> full_values;
std::map<std::string, size_t> numeric_out_field;

Expand All @@ -1324,12 +1325,12 @@ static void feature_out(std::vector<tile_feature> const &features, mvt_layer &ou
auto f = attribute_accum.find(key);
if (f != attribute_accum.end()) {
// this attribute has an accumulator, so convert it
full_keys.push_back(features[0].layer->keys[features[0].tags[i]]);
full_keys.push_back(key_pool.pool(features[0].layer->keys[features[0].tags[i]]));
full_values.push_back(features[0].layer->values[features[0].tags[i + 1]]);
} else if (accumulate_numeric.size() > 0 && features[0].layer->values[features[0].tags[i + 1]].is_numeric()) {
// convert numeric for accumulation
numeric_out_field.emplace(key, full_keys.size());
full_keys.push_back(key);
full_keys.push_back(key_pool.pool(key));
full_values.push_back(features[0].layer->values[features[0].tags[i + 1]]);
} else {
// otherwise just tag it directly onto the output feature
Expand Down Expand Up @@ -1357,13 +1358,13 @@ static void feature_out(std::vector<tile_feature> const &features, mvt_layer &ou
auto f = attribute_accum.find(key);
if (f != attribute_accum.end()) {
mvt_value val = features[i].layer->values[features[i].tags[j + 1]];
preserve_attribute(f->second, key, val, full_keys, full_values, attribute_accum_state);
preserve_attribute(f->second, key, val, full_keys, full_values, attribute_accum_state, key_pool);
} else if (accumulate_numeric.size() > 0) {
const mvt_value &val = features[i].layer->values[features[i].tags[j + 1]];
if (val.is_numeric()) {
preserve_numeric(key, val, full_keys, full_values,
accumulate_numeric,
keys, numeric_out_field, attribute_accum_state);
keys, numeric_out_field, attribute_accum_state, key_pool);
}
}
}
Expand All @@ -1373,8 +1374,8 @@ static void feature_out(std::vector<tile_feature> const &features, mvt_layer &ou
// and tag them onto the output feature

for (size_t i = 0; i < full_keys.size(); i++) {
if (should_keep(full_keys[i], keep, exclude, exclude_prefix)) {
outlayer.tag(outfeature, full_keys[i], full_values[i]);
if (should_keep(*full_keys[i], keep, exclude, exclude_prefix)) {
outlayer.tag(outfeature, *full_keys[i], full_values[i]);
}
}

Expand Down Expand Up @@ -1522,6 +1523,7 @@ mvt_tile assign_to_bins(mvt_tile &features,
std::set<std::string> exclude,
std::vector<std::string> exclude_prefix) {
std::vector<index_event> events;
key_pool key_pool;

// Index bins
for (size_t i = 0; i < bins.size(); i++) {
Expand Down Expand Up @@ -1678,7 +1680,7 @@ mvt_tile assign_to_bins(mvt_tile &features,
if (outfeatures[i].size() > 1) {
feature_out(outfeatures[i], outlayer,
keep, exclude, exclude_prefix, attribute_accum,
accumulate_numeric);
accumulate_numeric, key_pool);
mvt_feature &nfeature = outlayer.features.back();
mvt_value val;
val.type = mvt_uint;
Expand Down Expand Up @@ -1713,6 +1715,7 @@ std::string overzoom(std::vector<source_tile> const &tiles, int nz, int nx, int
std::vector<mvt_layer> const &bins, std::string const &bin_by_id_list,
std::string const &accumulate_numeric) {
mvt_tile outtile;
key_pool key_pool;

for (auto const &tile : tiles) {
for (auto const &layer : tile.tile.layers) {
Expand Down Expand Up @@ -1837,7 +1840,7 @@ std::string overzoom(std::vector<source_tile> const &tiles, int nz, int nx, int

if (flush_multiplier_cluster) {
if (pending_tile_features.size() > 0) {
feature_out(pending_tile_features, *outlayer, keep, exclude, exclude_prefix, attribute_accum, accumulate_numeric);
feature_out(pending_tile_features, *outlayer, keep, exclude, exclude_prefix, attribute_accum, accumulate_numeric, key_pool);
pending_tile_features.clear();
}
}
Expand Down Expand Up @@ -1894,7 +1897,7 @@ std::string overzoom(std::vector<source_tile> const &tiles, int nz, int nx, int
}

if (pending_tile_features.size() > 0) {
feature_out(pending_tile_features, *outlayer, keep, exclude, exclude_prefix, attribute_accum, accumulate_numeric);
feature_out(pending_tile_features, *outlayer, keep, exclude, exclude_prefix, attribute_accum, accumulate_numeric, key_pool);
pending_tile_features.clear();
}

Expand Down
5 changes: 3 additions & 2 deletions flatgeobuf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,8 +146,9 @@ void readFeature(const FlatGeobuf::Feature *feature, long long feature_sequence_
sf.geometry = dv;
sf.t = drawvec_type;

std::vector<std::string> full_keys;
std::vector<std::shared_ptr<std::string>> full_keys;
std::vector<serial_val> full_values;
key_pool key_pool;

// assume tabular schema with columns in header
size_t p_pos = 0;
Expand Down Expand Up @@ -243,7 +244,7 @@ void readFeature(const FlatGeobuf::Feature *feature, long long feature_sequence_
fprintf(stderr, "flatgeobuf has unsupported column type %u\n", (unsigned int)col_type);
exit(EXIT_IMPOSSIBLE);
}
full_keys.push_back(h_column_names[col_idx]);
full_keys.push_back(key_pool.pool(h_column_names[col_idx]));
full_values.push_back(sv);
}

Expand Down
9 changes: 5 additions & 4 deletions geobuf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -270,14 +270,14 @@ std::vector<drawvec_type> readGeometry(protozero::pbf_reader &pbf, size_t dim, d
return ret;
}

void readFeature(protozero::pbf_reader &pbf, size_t dim, double e, std::vector<std::string> &keys, struct serialization_state *sst, int layer, std::string layername) {
void readFeature(protozero::pbf_reader &pbf, size_t dim, double e, std::vector<std::string> &keys, struct serialization_state *sst, int layer, std::string layername, key_pool &key_pool) {
std::vector<drawvec_type> dv;
long long id = 0;
bool has_id = false;
std::vector<serial_val> values;
std::map<std::string, serial_val> other;

std::vector<std::string> full_keys;
std::vector<std::shared_ptr<std::string>> full_keys;
std::vector<serial_val> full_values;

while (pbf.next()) {
Expand Down Expand Up @@ -338,7 +338,7 @@ void readFeature(protozero::pbf_reader &pbf, size_t dim, double e, std::vector<s
exit(EXIT_IMPOSSIBLE);
}

full_keys.push_back(keys[properties[i]]);
full_keys.push_back(key_pool.pool(keys[properties[i]]));
full_values.push_back(values[properties[i + 1]]);
}

Expand Down Expand Up @@ -434,10 +434,11 @@ struct queue_run_arg {

void *run_parse_feature(void *v) {
struct queue_run_arg *qra = (struct queue_run_arg *) v;
key_pool key_pool;

for (size_t i = qra->start; i < qra->end; i++) {
struct queued_feature &qf = feature_queue[i];
readFeature(qf.pbf, qf.dim, qf.e, *qf.keys, &(*qf.sst)[qra->segment], qf.layer, qf.layername);
readFeature(qf.pbf, qf.dim, qf.e, *qf.keys, &(*qf.sst)[qra->segment], qf.layer, qf.layername, key_pool);
}

return NULL;
Expand Down
5 changes: 3 additions & 2 deletions geocsv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ void parse_geocsv(std::vector<struct serialization_state> &sst, std::string fnam
}

size_t seq = 0;
key_pool key_pool;
while ((s = csv_getline(f)).size() > 0) {
std::string err = check_utf8(s);
if (err != "") {
Expand Down Expand Up @@ -89,7 +90,7 @@ void parse_geocsv(std::vector<struct serialization_state> &sst, std::string fnam
drawvec dv;
dv.push_back(draw(VT_MOVETO, x, y));

std::vector<std::string> full_keys;
std::vector<std::shared_ptr<std::string>> full_keys;
std::vector<serial_val> full_values;

for (size_t i = 0; i < line.size(); i++) {
Expand All @@ -107,7 +108,7 @@ void parse_geocsv(std::vector<struct serialization_state> &sst, std::string fnam
}
sv.s = line[i];

full_keys.push_back(header[i]);
full_keys.push_back(key_pool.pool(header[i]));
full_values.push_back(sv);
}
}
Expand Down
9 changes: 5 additions & 4 deletions geojson.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -182,17 +182,18 @@ int serialize_geojson_feature(struct serialization_state *sst, json_object *geom
nprop = properties->value.object.length;
}

std::vector<std::string> keys;
std::vector<std::shared_ptr<std::string>> full_keys;
std::vector<serial_val> values;

keys.reserve(nprop);
full_keys.reserve(nprop);
values.reserve(nprop);
key_pool key_pool;

for (size_t i = 0; i < nprop; i++) {
if (properties->value.object.keys[i]->type == JSON_STRING) {
serial_val sv = stringify_value(properties->value.object.values[i], sst->fname, sst->line, feature);

keys.emplace_back(properties->value.object.keys[i]->value.string.string);
full_keys.emplace_back(key_pool.pool(properties->value.object.keys[i]->value.string.string));
values.push_back(std::move(sv));
}
}
Expand All @@ -211,7 +212,7 @@ int serialize_geojson_feature(struct serialization_state *sst, json_object *geom
sf.geometry = dv;
sf.feature_minzoom = 0; // Will be filled in during index merging
sf.seq = *(sst->layer_seq);
sf.full_keys = std::move(keys);
sf.full_keys = std::move(full_keys);
sf.full_values = std::move(values);

return serialize_feature(sst, sf, tippecanoe_layername);
Expand Down
Loading

0 comments on commit 23667bb

Please sign in to comment.