Skip to content

Commit

Permalink
Reducing attribute tagging within tippecanoe-overzoom (#296)
Browse files Browse the repository at this point in the history
* Strip out unwanted attributes earlier in the process

* Skip aggregations whose attributes have been excluded

* Forgot one

* Add some more tests

* Update version and changelog
  • Loading branch information
e-n-f authored Nov 15, 2024
1 parent 6d46578 commit 905b58c
Show file tree
Hide file tree
Showing 6 changed files with 127 additions and 61 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# 2.70.0

* Performance improvements to tippecanoe-overzoom with attribute exclusion

# 2.69.0

* Fix crash when the first bin gets clipped away
Expand Down
33 changes: 32 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -368,10 +368,16 @@ overzoom-test: tippecanoe-overzoom
cmp tests/pbf/bin-11-327-791.pbf.out.json.check tests/pbf/bin-11-327-791.pbf.out.json
rm tests/pbf/bin-11-327-791.pbf.out.json.check tests/pbf/bin-11-327-791.pbf.out
# Binning by id
./tippecanoe-overzoom -o tests/pbf/bin-11-327-791-ids.pbf.out --assign-to-bins tests/pbf/sf-zips.json --bin-by-id bin-ids tests/pbf/yearbuilt.pbf 11/327/791 11/327/791
./tippecanoe-overzoom -o tests/pbf/bin-11-327-791-ids.pbf.out --assign-to-bins tests/pbf/sf-zips.json --bin-by-id-list bin-ids tests/pbf/yearbuilt.pbf 11/327/791 11/327/791
./tippecanoe-decode tests/pbf/bin-11-327-791-ids.pbf.out 11 327 791 > tests/pbf/bin-11-327-791-ids.pbf.out.json.check
cmp tests/pbf/bin-11-327-791-ids.pbf.out.json.check tests/pbf/bin-11-327-791-ids.pbf.out.json
rm tests/pbf/bin-11-327-791-ids.pbf.out.json.check tests/pbf/bin-11-327-791-ids.pbf.out
# Binning by id, attribute stripping
# Note that it still works even if we exclude the ID that we are binning by
./tippecanoe-overzoom -yZCTA5CE10 -ytippecanoe:count -o tests/pbf/bin-11-327-791-ids-zip.pbf.out --assign-to-bins tests/pbf/sf-zips.json --bin-by-id-list bin-ids tests/pbf/yearbuilt.pbf 11/327/791 11/327/791
./tippecanoe-decode tests/pbf/bin-11-327-791-ids-zip.pbf.out 11 327 791 > tests/pbf/bin-11-327-791-ids-zip.pbf.out.json.check
cmp tests/pbf/bin-11-327-791-ids-zip.pbf.out.json.check tests/pbf/bin-11-327-791-ids-zip.pbf.out.json
rm tests/pbf/bin-11-327-791-ids-zip.pbf.out.json.check tests/pbf/bin-11-327-791-ids-zip.pbf.out
# Binning with longitude wraparound problems
./tippecanoe-overzoom -o tests/pbf/0-0-0-pop-2-0-1.pbf.out --accumulate-numeric-attributes=tippecanoe --assign-to-bins tests/pbf/h3-2-0-1.geojson tests/pbf/0-0-0.pbf 2/0/1 2/0/1
./tippecanoe-decode tests/pbf/0-0-0-pop-2-0-1.pbf.out 2 0 1 > tests/pbf/0-0-0-pop-2-0-1.pbf.out.json.check
Expand Down Expand Up @@ -618,11 +624,36 @@ accumulate-test:
# the cluster sizes still add up to the 243 original features
test `./tippecanoe-decode -c tests/pbf/bins-0-0-0.pbf 0 0 0 | sed 's/.*clustered:cluster_size": //' | awk '{sum += $$1} END {print sum}'` == 243
#
# Binning with attribute stripping
./tippecanoe-overzoom -y clustered:count:POP1950 -y clustered:sum:POP1950 -y POP1950 -y clustered:cluster_size --assign-to-bins tests/pbf/h3-0-0-0.geojson --accumulate-numeric-attributes=clustered --accumulate-attribute '{"clustered:cluster_size":"sum"}' -o tests/pbf/bins-0-0-0.pbf tests/pbf/accum.dir/0/0/0.pbf 0/0/0 0/0/0
# Now there are 30 bins with POP1950 clusters
test `./tippecanoe-decode -c tests/pbf/bins-0-0-0.pbf 0 0 0 | grep 'clustered:count:POP1950' | wc -l` == 41
# There are none with bare POP1950 (which is expected; we should only have summary statistics)
test `./tippecanoe-decode -c tests/pbf/bins-0-0-0.pbf 0 0 0 | grep -v 'clustered:count:POP1950' | grep 'POP1950' | wc -l` == 0
# And 4 with no POP1950 at all
test `./tippecanoe-decode -c tests/pbf/bins-0-0-0.pbf 0 0 0 | grep -v 'POP1950' | wc -l` == 3
#
# the clustered and megatile-filtered and binned POP1950s add up to 161590
test `./tippecanoe-decode -c tests/pbf/bins-0-0-0.pbf 0 0 0 | grep 'clustered:sum:POP1950' | sed 's/.*"clustered:sum:POP1950": //' | awk '{sum += $$1} END {print sum}'` == 161590
# which is the right global total
# Make sure we do *not* accumulate a numeric attribute that already has the magic prefix:
test `./tippecanoe-decode -c tests/pbf/bins-0-0-0.pbf 0 0 0 | grep sum:clustered:unrelated | wc -l` == 0
# And those attributes do *not* make it onto the bins
test `./tippecanoe-decode -c tests/pbf/bins-0-0-0.pbf 0 0 0 | grep clustered:unrelated | wc -l` == 0
# the cluster sizes still add up to the 243 original features
test `./tippecanoe-decode -c tests/pbf/bins-0-0-0.pbf 0 0 0 | sed 's/.*clustered:cluster_size": //' | awk '{sum += $$1} END {print sum}'` == 243
#
#
# A tile where the counts and means were previously wrong:
./tippecanoe-overzoom --accumulate-numeric-attributes=felt -m -o tests/pbf/yearbuilt-accum.pbf tests/pbf/yearbuilt.pbf 0/0/0 0/0/0
./tippecanoe-decode tests/pbf/yearbuilt-accum.pbf 0 0 0 > tests/pbf/yearbuilt-accum.pbf.json.check
cmp tests/pbf/yearbuilt-accum.pbf.json.check tests/pbf/yearbuilt-accum.pbf.json
rm tests/pbf/yearbuilt-accum.pbf tests/pbf/yearbuilt-accum.pbf.json.check
# Same tile, with attribute stripping
./tippecanoe-overzoom --accumulate-numeric-attributes=felt -y bldgsqft -y felt:sum:bldgsqft -m -o tests/pbf/yearbuilt-accum-bldgsqft.pbf tests/pbf/yearbuilt.pbf 0/0/0 0/0/0
./tippecanoe-decode tests/pbf/yearbuilt-accum-bldgsqft.pbf 0 0 0 > tests/pbf/yearbuilt-accum-bldgsqft.pbf.json.check
cmp tests/pbf/yearbuilt-accum-bldgsqft.pbf.json.check tests/pbf/yearbuilt-accum-bldgsqft.pbf.json
rm tests/pbf/yearbuilt-accum-bldgsqft.pbf tests/pbf/yearbuilt-accum-bldgsqft.pbf.json.check

join-filter-test: tippecanoe tippecanoe-decode tile-join
# Comes out different from the direct tippecanoe run because null attributes are lost
Expand Down
135 changes: 76 additions & 59 deletions clip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1127,7 +1127,30 @@ struct tile_feature {
size_t seq = 0;
};

static void add_mean(mvt_feature &feature, mvt_layer &layer, std::string const &accumulate_numeric) {
static bool should_keep(std::string const &key,
std::set<std::string> const &keep,
std::set<std::string> const &exclude,
std::vector<std::string> const &exclude_prefix) {
if (keep.size() == 0 || keep.find(key) != keep.end()) {
if (exclude.find(key) != exclude.end()) {
return false;
}

for (auto const &prefix : exclude_prefix) {
if (starts_with(key, prefix)) {
return false;
}
}

return true;
}

return false;
}

static void add_mean(mvt_feature &feature, mvt_layer &layer, std::string const &accumulate_numeric,
std::set<std::string> const &keep, std::set<std::string> const &exclude,
std::vector<std::string> const &exclude_prefix) {
std::string accumulate_numeric_colon = accumulate_numeric + ":";

std::unordered_map<std::string, size_t> attributes;
Expand Down Expand Up @@ -1156,7 +1179,10 @@ static void add_mean(mvt_feature &feature, mvt_layer &layer, std::string const &
mvt_value mean;
mean.type = mvt_double;
mean.numeric_value.double_value = mvt_value_to_double(sum) / count_val;
layer.tag(feature, accumulate_numeric + ":mean:" + trunc, mean);

if (should_keep(key, keep, exclude, exclude_prefix)) {
layer.tag(feature, accumulate_numeric + ":mean:" + trunc, mean);
}
}
}
}
Expand All @@ -1170,7 +1196,9 @@ static void preserve_numeric(const std::string &key, const mvt_value &val,
std::set<std::string> &keys, // key presence in the source feature
std::map<std::string, size_t> &numeric_out_field, // key index in the output feature
std::unordered_map<std::string, accum_state> &attribute_accum_state, // accumulation state for preserve_attribute()
key_pool &key_pool) {
key_pool &key_pool,
std::set<std::string> const &keep, std::set<std::string> const &exclude,
std::vector<std::string> const &exclude_prefix) {
// If this is a numeric attribute, but there is also a prefix:sum (etc.) for the
// same attribute, we want to use that one instead of this one.

Expand Down Expand Up @@ -1203,6 +1231,10 @@ static void preserve_numeric(const std::string &key, const mvt_value &val,
// and then put it back on for the output field
std::string prefixed = accumulate_numeric + ":" + op.first + ":" + outkey;

if (!should_keep(prefixed, keep, exclude, exclude_prefix)) {
continue;
}

// Does it exist in the output feature already?

auto prefixed_attr = numeric_out_field.find(prefixed);
Expand Down Expand Up @@ -1263,27 +1295,6 @@ static void preserve_numeric(const std::string &key, const mvt_value &val,
}
}

static bool should_keep(std::string const &key,
std::set<std::string> const &keep,
std::set<std::string> const &exclude,
std::vector<std::string> const &exclude_prefix) {
if (keep.size() == 0 || keep.find(key) != keep.end()) {
if (exclude.find(key) != exclude.end()) {
return false;
}

for (auto const &prefix : exclude_prefix) {
if (starts_with(key, prefix)) {
return false;
}
}

return true;
}

return false;
}

static void handle_closepath_from_mvt(drawvec &geom) {
// mvt geometries close polygons with a mvt_closepath operation
// tippecanoe-internal geometries close polygons with a lineto to the initial point
Expand All @@ -1306,7 +1317,7 @@ static bool feature_out(std::vector<tile_feature> const &features, mvt_layer &ou
std::vector<std::string> const &exclude_prefix,
std::unordered_map<std::string, attribute_op> const &attribute_accum,
std::string const &accumulate_numeric,
key_pool &key_pool, int buffer) {
key_pool &key_pool, int buffer, bool include_nonaggregate) {
// Add geometry to output feature

drawvec geom = features[0].geom;
Expand Down Expand Up @@ -1357,22 +1368,22 @@ static bool feature_out(std::vector<tile_feature> const &features, mvt_layer &ou
std::vector<mvt_value> full_values;
std::map<std::string, size_t> numeric_out_field;

for (size_t i = 0; i + 1 < features[0].tags.size(); i += 2) {
const std::string &key = features[0].layer->keys[features[0].tags[i]];
auto f = attribute_accum.find(key);
if (f != attribute_accum.end()) {
// this attribute has an accumulator, so convert it
full_keys.push_back(key_pool.pool(features[0].layer->keys[features[0].tags[i]]));
full_values.push_back(features[0].layer->values[features[0].tags[i + 1]]);
} else if (accumulate_numeric.size() > 0 && features[0].layer->values[features[0].tags[i + 1]].is_numeric()) {
// convert numeric for accumulation
numeric_out_field.emplace(key, full_keys.size());
full_keys.push_back(key_pool.pool(key));
full_values.push_back(features[0].layer->values[features[0].tags[i + 1]]);
} else {
// otherwise just tag it directly onto the output feature
if (should_keep(features[0].layer->keys[features[0].tags[i]], keep, exclude, exclude_prefix)) {
outlayer.tag(outfeature, features[0].layer->keys[features[0].tags[i]], features[0].layer->values[features[0].tags[i + 1]]);
auto const &f = features[0];
for (size_t i = 0; i + 1 < f.tags.size(); i += 2) {
const std::string &key = f.layer->keys[f.tags[i]];
if (should_keep(key, keep, exclude, exclude_prefix)) {
if (attribute_accum.find(key) != attribute_accum.end()) {
// this attribute has an accumulator, so convert it
full_keys.push_back(key_pool.pool(f.layer->keys[f.tags[i]]));
full_values.push_back(f.layer->values[f.tags[i + 1]]);
} else if (accumulate_numeric.size() > 0 && f.layer->values[f.tags[i + 1]].is_numeric()) {
// convert numeric for accumulation
numeric_out_field.emplace(key, full_keys.size());
full_keys.push_back(key_pool.pool(key));
full_values.push_back(f.layer->values[f.tags[i + 1]]);
} else if (include_nonaggregate) {
// otherwise just tag it directly onto the output feature
outlayer.tag(outfeature, f.layer->keys[f.tags[i]], f.layer->values[f.tags[i + 1]]);
}
}
}
Expand All @@ -1386,22 +1397,26 @@ static bool feature_out(std::vector<tile_feature> const &features, mvt_layer &ou

for (size_t j = 0; j + 1 < features[i].tags.size(); j += 2) {
const std::string &key = features[i].layer->keys[features[i].tags[j]];
keys.insert(key);
if (should_keep(key, keep, exclude, exclude_prefix)) {
keys.insert(key);
}
}

for (size_t j = 0; j + 1 < features[i].tags.size(); j += 2) {
const std::string &key = features[i].layer->keys[features[i].tags[j]];

auto f = attribute_accum.find(key);
if (f != attribute_accum.end()) {
mvt_value val = features[i].layer->values[features[i].tags[j + 1]];
preserve_attribute(f->second, key, val, full_keys, full_values, attribute_accum_state, key_pool);
} else if (accumulate_numeric.size() > 0) {
const mvt_value &val = features[i].layer->values[features[i].tags[j + 1]];
if (val.is_numeric()) {
preserve_numeric(key, val, full_keys, full_values,
accumulate_numeric,
keys, numeric_out_field, attribute_accum_state, key_pool);
if (should_keep(key, keep, exclude, exclude_prefix)) {
auto found = attribute_accum.find(key);
if (found != attribute_accum.end()) {
mvt_value val = features[i].layer->values[features[i].tags[j + 1]];
preserve_attribute(found->second, key, val, full_keys, full_values, attribute_accum_state, key_pool);
} else if (accumulate_numeric.size() > 0) {
const mvt_value &val = features[i].layer->values[features[i].tags[j + 1]];
if (val.is_numeric()) {
preserve_numeric(key, val, full_keys, full_values,
accumulate_numeric,
keys, numeric_out_field, attribute_accum_state, key_pool,
keep, exclude, exclude_prefix);
}
}
}
}
Expand All @@ -1417,9 +1432,9 @@ static bool feature_out(std::vector<tile_feature> const &features, mvt_layer &ou
}

if (accumulate_numeric.size() > 0) {
add_mean(outfeature, outlayer, accumulate_numeric);
add_mean(outfeature, outlayer, accumulate_numeric, keep, exclude, exclude_prefix);
}
} else {
} else if (include_nonaggregate) {
for (size_t i = 0; i + 1 < features[0].tags.size(); i += 2) {
if (should_keep(features[0].layer->keys[features[0].tags[i]], keep, exclude, exclude_prefix)) {
outlayer.tag(outfeature, features[0].layer->keys[features[0].tags[i]], features[0].layer->values[features[0].tags[i + 1]]);
Expand Down Expand Up @@ -1725,7 +1740,7 @@ mvt_tile assign_to_bins(mvt_tile &features,
if (outfeatures[i].size() > 1) {
if (feature_out(outfeatures[i], outlayer,
keep, exclude, exclude_prefix, attribute_accum,
accumulate_numeric, key_pool, buffer)) {
accumulate_numeric, key_pool, buffer, true)) {
mvt_feature &nfeature = outlayer.features.back();
mvt_value val;
val.type = mvt_uint;
Expand All @@ -1737,7 +1752,9 @@ mvt_tile assign_to_bins(mvt_tile &features,
} else {
attrname = accumulate_numeric + ":count";
}
outlayer.tag(nfeature, attrname, val);
if (should_keep(attrname, keep, exclude, exclude_prefix)) {
outlayer.tag(nfeature, attrname, val);
}
}
}
}
Expand Down Expand Up @@ -1886,7 +1903,7 @@ std::string overzoom(std::vector<source_tile> const &tiles, int nz, int nx, int

if (flush_multiplier_cluster) {
if (pending_tile_features.size() > 0) {
feature_out(pending_tile_features, *outlayer, keep, exclude, exclude_prefix, attribute_accum, accumulate_numeric, key_pool, -1);
feature_out(pending_tile_features, *outlayer, keep, exclude, exclude_prefix, attribute_accum, accumulate_numeric, key_pool, -1, bins.size() == 0);
if (outlayer->features.size() >= feature_limit) {
break;
}
Expand Down Expand Up @@ -1946,7 +1963,7 @@ std::string overzoom(std::vector<source_tile> const &tiles, int nz, int nx, int
}

if (pending_tile_features.size() > 0) {
feature_out(pending_tile_features, *outlayer, keep, exclude, exclude_prefix, attribute_accum, accumulate_numeric, key_pool, -1);
feature_out(pending_tile_features, *outlayer, keep, exclude, exclude_prefix, attribute_accum, accumulate_numeric, key_pool, -1, bins.size() == 0);
pending_tile_features.clear();
if (outlayer->features.size() >= feature_limit) {
break;
Expand Down
Loading

0 comments on commit 905b58c

Please sign in to comment.