Skip to content

Commit

Permalink
Migrate contains
Browse files Browse the repository at this point in the history
  • Loading branch information
Matt711 committed Jun 12, 2024
1 parent 8f9d9e4 commit afb4061
Show file tree
Hide file tree
Showing 5 changed files with 90 additions and 29 deletions.
22 changes: 5 additions & 17 deletions python/cudf/cudf/_lib/lists.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ from cudf._lib.column cimport Column
from cudf._lib.pylibcudf.libcudf.column.column cimport column
from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
from cudf._lib.pylibcudf.libcudf.lists.contains cimport (
contains,
index_of as cpp_index_of,
)
from cudf._lib.pylibcudf.libcudf.lists.count_elements cimport (
Expand Down Expand Up @@ -154,23 +153,12 @@ def extract_element_column(Column col, Column index):

@acquire_spill_lock()
def contains_scalar(Column col, object py_search_key):

cdef DeviceScalar search_key = py_search_key.device_value

cdef shared_ptr[lists_column_view] list_view = (
make_shared[lists_column_view](col.view())
return Column.from_pylibcudf(
pylibcudf.lists.contains(
col.to_pylibcudf(mode="read"),
py_search_key,
)
)
cdef const scalar* search_key_value = search_key.get_raw_ptr()

cdef unique_ptr[column] c_result

with nogil:
c_result = move(contains(
list_view.get()[0],
search_key_value[0],
))
result = Column.from_unique_ptr(move(c_result))
return result


@acquire_spill_lock()
Expand Down
14 changes: 8 additions & 6 deletions python/cudf/cudf/_lib/pylibcudf/libcudf/lists/contains.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,17 @@ from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport scalar
cdef extern from "cudf/lists/contains.hpp" namespace "cudf::lists" nogil:

cpdef enum class duplicate_find_option(int32_t):
FIND_FIRST
FIND_LAST
FIND_FIRST "cudf::lists::duplicate_find_option::FIND_FIRST"
FIND_LAST "cudf::lists::duplicate_find_option::FIND_LAST"

cdef unique_ptr[column] contains(
lists_column_view lists,
scalar search_key,
const lists_column_view& lists,
const scalar& search_key,
) except +cudf_exception_handler

cdef unique_ptr[column] contains(
lists_column_view lists,
column_view search_keys,
const lists_column_view& lists,
const column_view& search_keys,
) except +cudf_exception_handler

cdef unique_ptr[column] contains_nulls(
Expand All @@ -35,9 +35,11 @@ cdef extern from "cudf/lists/contains.hpp" namespace "cudf::lists" nogil:
cdef unique_ptr[column] index_of(
lists_column_view lists,
scalar search_key,
# duplicate_find_option find_option,
) except +cudf_exception_handler

cdef unique_ptr[column] index_of(
lists_column_view lists,
column_view search_keys,
# duplicate_find_option find_option,
) except +cudf_exception_handler
14 changes: 14 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/lists.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,25 @@ from libcpp cimport bool
from cudf._lib.pylibcudf.libcudf.types cimport size_type

from .column cimport Column
from .scalar cimport Scalar
from .table cimport Table

ctypedef fused ColumnOrScalar:
Column
Scalar

cpdef Table explode_outer(Table, size_type explode_column_idx)

cpdef Column concatenate_rows(Table)

cpdef Column concatenate_list_elements(Column, bool dropna)

cpdef Column contains(Column, ColumnOrScalar)

# cpdef Column contains_nulls(Column)

# ctypedef Column index_of(Column, ColumnOrScalar)

# from cudf._lib.pylibcudf.libcudf.binaryop import \
# binary_operator as BinaryOperator # no-cython-lint
# from cudf._lib.pylibcudf.libcudf.lists.contains cimport duplicate_find_option
65 changes: 59 additions & 6 deletions python/cudf/cudf/_lib/pylibcudf/lists.pyx
Original file line number Diff line number Diff line change
@@ -1,19 +1,31 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from libcpp cimport bool
from libcpp.memory cimport unique_ptr
from libcpp.memory cimport make_shared, shared_ptr, unique_ptr
from libcpp.utility cimport move

from cudf._lib.pylibcudf.libcudf.column.column cimport column
from cudf._lib.pylibcudf.libcudf.lists cimport explode as cpp_explode
from cudf._lib.pylibcudf.libcudf.lists cimport (
contains as cpp_contains,
explode as cpp_explode,
)
from cudf._lib.pylibcudf.libcudf.lists.combine cimport (
concatenate_list_elements as cpp_concatenate_list_elements,
concatenate_null_policy,
concatenate_rows as cpp_concatenate_rows,
)
from cudf._lib.pylibcudf.libcudf.lists.lists_column_view cimport (
lists_column_view,
)
from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport scalar
from cudf._lib.pylibcudf.libcudf.table.table cimport table
from cudf._lib.pylibcudf.libcudf.types cimport size_type

from cudf._lib.pylibcudf.libcudf.lists.contains import \
duplicate_find_option as DuplicateFindOption # no-cython-lint

from cudf._lib.scalar cimport DeviceScalar

from .column cimport Column
from .table cimport Table

Expand Down Expand Up @@ -71,15 +83,15 @@ cpdef Column concatenate_list_elements(Column input, bool dropna):
----------
input : Column
The input column
dropna : bool
If true, null list elements will be ignored
from concatenation. Otherwise any input null values will result in
the corresponding output row being set to null.
Returns
-------
Column
A new Column of concatenated list elements
dropna : bool
If true, null list elements will be ignored
from concatenation. Otherwise any input null values will result in
the corresponding output row being set to null.
"""
cdef concatenate_null_policy null_policy = (
concatenate_null_policy.IGNORE if dropna
Expand All @@ -94,3 +106,44 @@ cpdef Column concatenate_list_elements(Column input, bool dropna):
))

return Column.from_libcudf(move(c_result))

cpdef Column contains(Column input, ColumnOrScalar search_key):
"""Create a column of bool values based upon the search key.
``search_key`` may be a
:py:class:`~cudf._lib.pylibcudf.column.Column` or a
:py:class:`~cudf._lib.pylibcudf.scalar.Scalar`.
For details, see :cpp:func:`contains`.
Parameters
----------
input : Column
The input column.
search_key : Union[Column, Scalar]
The search key.
Returns
-------
Column
A new Column of bools
"""
cdef unique_ptr[column] c_result
cdef shared_ptr[lists_column_view] list_view = (
make_shared[lists_column_view](input.view())
)
if ColumnOrScalar is Column:
with nogil:
c_result = move(cpp_contains.contains(
list_view.get()[0],
search_key.view(),
))
return Column.from_libcudf(move(c_result))
cdef DeviceScalar key = search_key.device_value
cdef const scalar* key_value = key.get_raw_ptr()
with nogil:
c_result = move(cpp_contains.contains(
list_view.get()[0],
key_value[0],
))
return Column.from_libcudf(move(c_result))
4 changes: 4 additions & 0 deletions python/cudf/cudf/pylibcudf_tests/test_lists.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,7 @@ def test_concatenate_list_elements(test_data, dropna, expected):
expect = pa.array(expected)

assert_column_eq(expect, res)


def test_contains():
pass

0 comments on commit afb4061

Please sign in to comment.