Skip to content
Draft
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
d37c8ce
add FilterCategoryAccessor modeled after FilterValueAccessor
ATL2001 Sep 21, 2025
53abc41
use FilterCategoryAccessor for get_filter_category, and set the min v…
ATL2001 Sep 21, 2025
8ff247e
pass filterSize and categorySize if they're both defined to the _Data…
ATL2001 Sep 21, 2025
5c7f72e
add temporary example notebook
ATL2001 Sep 21, 2025
a1f519b
lint
ATL2001 Sep 21, 2025
e5281be
more linting...
ATL2001 Sep 21, 2025
732fcf2
Merge branch 'main' into category_filter
kylebarron Oct 14, 2025
2403779
simplify if/else blocks and add defaults if None on python side
ATL2001 Oct 16, 2025
e4625c2
change min values to 1 and set default to 1 for filter_size
ATL2001 Oct 16, 2025
647a111
default category_size to None, filter_size to 1
ATL2001 Oct 16, 2025
b0086ad
added a few more cells to make sure I didnt break anything :)
ATL2001 Oct 16, 2025
daf0a51
lint the trowaway notebook
ATL2001 Oct 16, 2025
fa6d94b
Merge branch 'main' into category_filter
ATL2001 Oct 18, 2025
6f2a8da
Merge branch 'main' into category_filter
ATL2001 Oct 27, 2025
90426c0
update docstring for filter/category_size being optional
ATL2001 Oct 27, 2025
4270606
need to use ravel("C") for multiple categories
ATL2001 Nov 1, 2025
00b0763
add data filter extension tests
ATL2001 Nov 1, 2025
7046fad
Merge remote-tracking branch 'upstream/main' into category_filter
ATL2001 Nov 1, 2025
d733fce
move FilterCategoryAccessor to _extensions.py
ATL2001 Nov 1, 2025
23b8fc7
import TraitError from traitlets
ATL2001 Nov 1, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
183 changes: 183 additions & 0 deletions examples/!category_filter.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "9ffdeba2",
"metadata": {},
"outputs": [],
"source": [
"# no intention of actually keeping this notebook in the repo, once it's all working good I'll make up a new doc about the category_filter\n",
"# I have added it for now to demonstrate that I've got the data filter filter_categories functionality working for numeric inputs\n",
"# looking at the deck gl docs: https://deck.gl/docs/api-reference/extensions/data-filter-extension#layer-properties\n",
"# it appears that we should be able to use strings for the categories but when I try to use string data the layer simply doesnt work\n",
"\n",
"import geopandas as gpd\n",
"import ipywidgets\n",
"import pyarrow as pa # noqa\n",
"from shapely.geometry import Point\n",
"\n",
"import lonboard\n",
"from lonboard.basemap import CartoBasemap\n",
"from lonboard.layer_extension import DataFilterExtension\n",
"\n",
"cat_col = \"int_col\"\n",
"# int_col: works\n",
"# float_col: works\n",
"# str_col: does NOT work :(\n",
"# as is it will throw an arro3 ValueError: Expected object with __arrow_c_array__ method or implementing buffer protocol.\n",
"# we can avoid the arro3 exception by using pyarrow as the input to get_filter_category when we create the layer:\n",
"# `get_filter_category=pa.array(gdf[cat_col])`\n",
"# but the layer doesn't display and throws a lot of the following WebGL error:\n",
"# GL_INVALID_OPERATION: Vertex shader input type does not match the type of the bound vertex attribute\n",
"\n",
"\n",
"d = {\n",
" \"int_col\": [0, 1, 2, 3, 4, 5],\n",
" \"float_col\": [0.0, 1.5, 0.0, 1.5, 0.0, 1.5],\n",
" \"str_col\": [\"even\", \"odd\", \"even\", \"odd\", \"even\", \"odd\"],\n",
" \"geometry\": [\n",
" Point(0, 0),\n",
" Point(1, 1),\n",
" Point(2, 2),\n",
" Point(3, 3),\n",
" Point(4, 4),\n",
" Point(5, 5),\n",
" ],\n",
"}\n",
"gdf = gpd.GeoDataFrame(d, crs=\"EPSG:4326\")\n",
"\n",
"point_layer = lonboard.ScatterplotLayer.from_geopandas(\n",
" gdf,\n",
" get_fill_color=(0, 255, 0),\n",
" radius_min_pixels=10,\n",
" extensions=[\n",
" DataFilterExtension(filter_size=0, category_size=1),\n",
" ], # no range filter, just a category\n",
" get_filter_category=gdf[cat_col], # use the cat column for the filter category\n",
")\n",
"\n",
"m = lonboard.Map(layers=[point_layer], basemap_style=CartoBasemap.DarkMatter)\n",
"\n",
"filter_enabled_w = ipywidgets.Checkbox(\n",
" value=True,\n",
" description=\"Filter Enabled\",\n",
")\n",
"\n",
"\n",
"def on_filter_enabled_change(change): # noqa\n",
" # when we change the checkbox, toggle filtering on the layer\n",
" point_layer.filter_enabled = filter_enabled_w.value\n",
"\n",
"\n",
"filter_enabled_w.observe(on_filter_enabled_change, names=\"value\")\n",
"\n",
"cat_selector = ipywidgets.SelectMultiple( # make a select multiple so we can see interaction on the map\n",
" options=list(gdf[cat_col].unique()),\n",
" value=[list(gdf[cat_col].unique())[0]],\n",
" description=\"Category\",\n",
" disabled=False,\n",
")\n",
"\n",
"\n",
"def on_cat_selector_change(change) -> None: # noqa\n",
" # when we change the selector, update the filter on the layer.\n",
" point_layer.filter_categories = cat_selector.value\n",
"\n",
"\n",
"cat_selector.observe(on_cat_selector_change, names=\"value\")\n",
"\n",
"ipywidgets.VBox([m, filter_enabled_w, cat_selector])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ce25544c",
"metadata": {},
"outputs": [],
"source": [
"point_layer2 = lonboard.ScatterplotLayer.from_geopandas(\n",
" gdf,\n",
" get_fill_color=(0, 255, 0),\n",
" radius_min_pixels=10,\n",
" extensions=[\n",
" DataFilterExtension(filter_size=1, category_size=0),\n",
" ], # no category filter, just a range\n",
" get_filter_value=gdf[\"int_col\"], # use the int_col for the filter category\n",
")\n",
"\n",
"m2 = lonboard.Map(layers=[point_layer2], basemap_style=CartoBasemap.DarkMatter)\n",
"point_layer2.filter_range = [0, 5]\n",
"m2"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ef71f006",
"metadata": {},
"outputs": [],
"source": [
"point_layer2.filter_range = [1, 4]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "10e27c5d",
"metadata": {},
"outputs": [],
"source": [
"point_layer3 = lonboard.ScatterplotLayer.from_geopandas(\n",
" gdf,\n",
" get_fill_color=(0, 255, 0),\n",
" radius_min_pixels=10,\n",
" extensions=[\n",
" DataFilterExtension(filter_size=1, category_size=1),\n",
" ], # no category filter, just a range\n",
" get_filter_category=gdf[\n",
" \"float_col\"\n",
" ], # use the float column for the filter category\n",
" get_filter_value=gdf[\"int_col\"], # use the int column for the filter category\n",
")\n",
"\n",
"point_layer3.filter_categories = [1.5]\n",
"point_layer3.filter_range = [0, 3]\n",
"m3 = lonboard.Map(layers=[point_layer3], basemap_style=CartoBasemap.DarkMatter)\n",
"m3"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c3f48195",
"metadata": {},
"outputs": [],
"source": [
"point_layer3.filter_range = [0, 5]"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "lonboard_category_filter",
"language": "python",
"name": "lonboard_category_filter"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
10 changes: 7 additions & 3 deletions lonboard/layer_extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from lonboard._base import BaseExtension
from lonboard.traits import (
DashArrayAccessor,
FilterCategoryAccessor,
FilterValueAccessor,
FloatAccessor,
PointAccessor,
Expand Down Expand Up @@ -353,10 +354,13 @@ class DataFilterExtension(BaseExtension):
"filter_transform_size": t.Bool(default_value=True).tag(sync=True),
"filter_transform_color": t.Bool(default_value=True).tag(sync=True),
"get_filter_value": FilterValueAccessor(default_value=None, allow_none=True),
"get_filter_category": FilterValueAccessor(default_value=None, allow_none=True),
"get_filter_category": FilterCategoryAccessor(
default_value=None,
allow_none=True,
),
}

filter_size = t.Int(None, min=1, max=4, allow_none=True).tag(sync=True)
filter_size = t.Int(1, min=1, max=4, allow_none=True).tag(sync=True)
"""The size of the filter (number of columns to filter by).

The data filter can show/hide data based on 1-4 numeric properties of each object.
Expand All @@ -371,7 +375,7 @@ class DataFilterExtension(BaseExtension):
The category filter can show/hide data based on 1-4 properties of each object.

- Type: `int`. This is required if using category-based filtering.
- Default 0.
- Default None.
"""


Expand Down
172 changes: 172 additions & 0 deletions lonboard/traits.py
Original file line number Diff line number Diff line change
Expand Up @@ -823,6 +823,178 @@ def validate(
return value.rechunk(max_chunksize=obj._rows_per_chunk)


class FilterCategoryAccessor(FixedErrorTraitType):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add some tests for this? There are some example tests in test_traits.py and you can look at #917 for more examples.

It might be worth making test_traits into a folder and having a file specifically for test_traits/test_filter_extension.py

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ran out of time tonight, but I'll see what I can do in the next couple evenings, or maybe this weekend

"""Validate input for `get_filter_category`.

A trait to validate input for the `get_filter_category` accessor added by the
[`DataFilterExtension`][lonboard.layer_extension.DataFilterExtension], which can
have between 1 and 4 values per row.


Various input is allowed:

- An `int` or `float`. This will be used as the value for all objects. The
`category_size` of the
[`DataFilterExtension`][lonboard.layer_extension.DataFilterExtension] instance
must be 1.
- A one-dimensional numpy `ndarray` with a numeric data type. Each value in the array will
be used as the value for the object at the same row index. The `category_size` of
the [`DataFilterExtension`][lonboard.layer_extension.DataFilterExtension] instance
must be 1.
- A two-dimensional numpy `ndarray` with a numeric data type. Each value in the array will
be used as the value for the object at the same row index. The `category_size` of
the [`DataFilterExtension`][lonboard.layer_extension.DataFilterExtension] instance
must match the size of the second dimension of the array.
- A pandas `Series` with a numeric data type. Each value in the array will be used as
the value for the object at the same row index. The `category_size` of the
[`DataFilterExtension`][lonboard.layer_extension.DataFilterExtension] instance
must be 1.
- A pyarrow [`FloatArray`][pyarrow.FloatArray], [`DoubleArray`][pyarrow.DoubleArray]
or [`ChunkedArray`][pyarrow.ChunkedArray] containing either a `FloatArray` or
`DoubleArray`. Each value in the array will be used as the value for the object at
the same row index. The `category_size` of the
[`DataFilterExtension`][lonboard.layer_extension.DataFilterExtension] instance
must be 1.

Alternatively, you can pass any corresponding Arrow data structure from a library
that implements the [Arrow PyCapsule
Interface](https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html).
- A pyarrow [`FixedSizeListArray`][pyarrow.FixedSizeListArray] or
[`ChunkedArray`][pyarrow.ChunkedArray] containing `FixedSizeListArray`s. The `category_size` of
the [`DataFilterExtension`][lonboard.layer_extension.DataFilterExtension] instance
must match the list size.

Alternatively, you can pass any corresponding Arrow data structure from a library
that implements the [Arrow PyCapsule
Interface](https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html).
"""

default_value = None
info_text = "a value or numpy ndarray or Arrow array representing an array of data"

def __init__(
self: TraitType,
*args: Any,
**kwargs: Any,
) -> None:
super().__init__(*args, **kwargs)
self.tag(sync=True, **ACCESSOR_SERIALIZATION)

def _pandas_to_numpy(
self,
obj: BaseArrowLayer,
value: Any,
category_size: int,
) -> np.ndarray:
# Assert that category_size == 1 for a pandas series.
# Pandas series can technically contain Python list objects inside them, but
# for simplicity we disallow that.
if category_size != 1:
self.error(obj, value, info="category_size==1 with pandas Series")

# Cast pandas Series to numpy ndarray
return np.asarray(value)

def _numpy_to_arrow(
self,
obj: BaseArrowLayer,
value: Any,
category_size: int,
) -> ChunkedArray:
if len(value.shape) == 1:
if category_size != 1:
self.error(obj, value, info="category_size==1 with 1-D numpy array")
array = fixed_size_list_array(value, category_size)
return ChunkedArray(array)

if len(value.shape) != 2:
self.error(obj, value, info="1-D or 2-D numpy array")

if value.shape[1] != category_size:
self.error(
obj,
value,
info=(
f"category_size ({category_size}) to match 2nd dimension of numpy array"
),
)
array = fixed_size_list_array(value, category_size)
return ChunkedArray([array])

def validate(
self,
obj: BaseArrowLayer,
value: Any,
) -> str | float | tuple | list | ChunkedArray:
# Find the data filter extension in the attributes of the parent object so we
# can validate against the filter size.
data_filter_extension = [
ext
for ext in obj.extensions
if ext._extension_type == "data-filter" # type: ignore
]
assert len(data_filter_extension) == 1
category_size = data_filter_extension[0].category_size # type: ignore

if isinstance(value, (int, float, str)):
if category_size != 1:
self.error(obj, value, info="category_size==1 with scalar value")
return value

if isinstance(value, (tuple, list)):
if category_size != len(value):
self.error(
obj,
value,
info=f"category_size ({category_size}) to match length of tuple/list",
)
return value

# pandas Series
if (
value.__class__.__module__.startswith("pandas")
and value.__class__.__name__ == "Series"
):
value = self._pandas_to_numpy(obj, value, category_size)

if isinstance(value, np.ndarray):
value = self._numpy_to_arrow(obj, value, category_size)
elif hasattr(value, "__arrow_c_array__"):
value = ChunkedArray([Array.from_arrow(value)])
elif hasattr(value, "__arrow_c_stream__"):
value = ChunkedArray.from_arrow(value)
else:
self.error(obj, value)

assert isinstance(value, ChunkedArray)

# Allowed inputs are either a FixedSizeListArray or array.
if not DataType.is_fixed_size_list(value.type):
if category_size != 1:
self.error(
obj,
value,
info="category_size==1 with non-FixedSizeList type arrow array",
)

return value

# We have a FixedSizeListArray
if category_size != value.type.list_size:
self.error(
obj,
value,
info=(
f"category_size ({category_size}) to match list size of "
"FixedSizeList arrow array"
),
)

value_type = value.type.value_type
assert value_type is not None
return value.rechunk(max_chunksize=obj._rows_per_chunk)


class NormalAccessor(FixedErrorTraitType):
"""A representation of a deck.gl "normal" accessor.

Expand Down
Loading