Skip to content

Commit

Permalink
Merge branch 'pylibcudf-lists-contains' of github.com:Matt711/cudf in…
Browse files Browse the repository at this point in the history
…to branch-24.08
  • Loading branch information
Matt711 committed Jun 21, 2024
2 parents c70be18 + 0a98c7a commit 4d5c233
Show file tree
Hide file tree
Showing 5 changed files with 123 additions and 25 deletions.
24 changes: 6 additions & 18 deletions python/cudf/cudf/_lib/lists.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ from cudf._lib.column cimport Column
from cudf._lib.pylibcudf.libcudf.column.column cimport column
from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
from cudf._lib.pylibcudf.libcudf.lists.contains cimport (
contains,
index_of as cpp_index_of,
)
from cudf._lib.pylibcudf.libcudf.lists.count_elements cimport (
Expand Down Expand Up @@ -153,24 +152,13 @@ def extract_element_column(Column col, Column index):


@acquire_spill_lock()
def contains_scalar(Column col, object py_search_key):

cdef DeviceScalar search_key = py_search_key.device_value

cdef shared_ptr[lists_column_view] list_view = (
make_shared[lists_column_view](col.view())
def contains_scalar(Column col, py_search_key):
return Column.from_pylibcudf(
pylibcudf.lists.contains(
col.to_pylibcudf(mode="read"),
py_search_key.device_value,
)
)
cdef const scalar* search_key_value = search_key.get_raw_ptr()

cdef unique_ptr[column] c_result

with nogil:
c_result = move(contains(
list_view.get()[0],
search_key_value[0],
))
result = Column.from_unique_ptr(move(c_result))
return result


@acquire_spill_lock()
Expand Down
19 changes: 18 additions & 1 deletion python/cudf/cudf/_lib/pylibcudf/libcudf/lists/contains.pxd
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Copyright (c) 2021-2024, NVIDIA CORPORATION.

from libc.stdint cimport int32_t
from libcpp.memory cimport unique_ptr

from cudf._lib.exception_handler cimport cudf_exception_handler
Expand All @@ -12,17 +13,33 @@ from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport scalar


cdef extern from "cudf/lists/contains.hpp" namespace "cudf::lists" nogil:

cpdef enum class duplicate_find_option(int32_t):
FIND_FIRST "cudf::lists::duplicate_find_option::FIND_FIRST"
FIND_LAST "cudf::lists::duplicate_find_option::FIND_LAST"

cdef unique_ptr[column] contains(
const lists_column_view& lists,
const scalar& search_key,
) except +cudf_exception_handler

cdef unique_ptr[column] contains(
const lists_column_view& lists,
const column_view& search_keys,
) except +cudf_exception_handler

cdef unique_ptr[column] contains_nulls(
lists_column_view lists,
scalar search_key,
) except +cudf_exception_handler

cdef unique_ptr[column] index_of(
lists_column_view lists,
scalar search_key,
# duplicate_find_option find_option,
) except +cudf_exception_handler

cdef unique_ptr[column] index_of(
lists_column_view lists,
column_view search_keys,
# duplicate_find_option find_option,
) except +cudf_exception_handler
12 changes: 12 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/lists.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,25 @@
from libcpp cimport bool

from cudf._lib.pylibcudf.libcudf.types cimport size_type
from cudf._lib.scalar cimport DeviceScalar

from .column cimport Column
from .scalar cimport Scalar
from .table cimport Table

ctypedef fused ColumnOrScalar:
Column
Scalar
DeviceScalar

cpdef Table explode_outer(Table, size_type explode_column_idx)

cpdef Column concatenate_rows(Table)

cpdef Column concatenate_list_elements(Column, bool dropna)

cpdef Column contains(Column, ColumnOrScalar)

# cpdef Column contains_nulls(Column)

# ctypedef Column index_of(Column, ColumnOrScalar)
64 changes: 58 additions & 6 deletions python/cudf/cudf/_lib/pylibcudf/lists.pyx
Original file line number Diff line number Diff line change
@@ -1,18 +1,26 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from libcpp cimport bool
from libcpp.memory cimport unique_ptr
from libcpp.memory cimport make_shared, shared_ptr, unique_ptr
from libcpp.utility cimport move

from cudf._lib.pylibcudf.libcudf.column.column cimport column
from cudf._lib.pylibcudf.libcudf.lists cimport explode as cpp_explode
from cudf._lib.pylibcudf.libcudf.lists cimport (
contains as cpp_contains,
explode as cpp_explode,
)
from cudf._lib.pylibcudf.libcudf.lists.combine cimport (
concatenate_list_elements as cpp_concatenate_list_elements,
concatenate_null_policy,
concatenate_rows as cpp_concatenate_rows,
)
from cudf._lib.pylibcudf.libcudf.lists.lists_column_view cimport (
lists_column_view,
)
from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport scalar
from cudf._lib.pylibcudf.libcudf.table.table cimport table
from cudf._lib.pylibcudf.libcudf.types cimport size_type
from cudf._lib.scalar cimport DeviceScalar

from .column cimport Column
from .table cimport Table
Expand Down Expand Up @@ -71,15 +79,15 @@ cpdef Column concatenate_list_elements(Column input, bool dropna):
----------
input : Column
The input column
dropna : bool
If true, null list elements will be ignored
from concatenation. Otherwise any input null values will result in
the corresponding output row being set to null.
Returns
-------
Column
A new Column of concatenated list elements
dropna : bool
If true, null list elements will be ignored
from concatenation. Otherwise any input null values will result in
the corresponding output row being set to null.
"""
cdef concatenate_null_policy null_policy = (
concatenate_null_policy.IGNORE if dropna
Expand All @@ -94,3 +102,47 @@ cpdef Column concatenate_list_elements(Column input, bool dropna):
))

return Column.from_libcudf(move(c_result))


cpdef Column contains(Column input, ColumnOrScalar search_key):
"""Create a column of bool values based upon the search key.
Parameters
----------
input : Column
The input column.
search_key : Union[Column, Scalar]
The search key.
Returns
-------
Column
A new Column of bools
"""
cdef unique_ptr[column] c_result
cdef shared_ptr[lists_column_view] list_view = (
make_shared[lists_column_view](input.view())
)
cdef const scalar* search_key_value = NULL

if ColumnOrScalar is Column:
with nogil:
c_result = move(cpp_contains.contains(
list_view.get()[0],
search_key.view(),
))
elif ColumnOrScalar is DeviceScalar:
search_key_value = search_key.get_raw_ptr()
with nogil:
c_result = move(cpp_contains.contains(
list_view.get()[0],
search_key_value[0],
))
else:
search_key_value = search_key.get()
with nogil:
c_result = move(cpp_contains.contains(
list_view.get()[0],
search_key_value[0],
))
return Column.from_libcudf(move(c_result))
29 changes: 29 additions & 0 deletions python/cudf/cudf/pylibcudf_tests/test_lists.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,32 @@ def test_concatenate_list_elements(test_data, dropna, expected):
expect = pa.array(expected)

assert_column_eq(expect, res)


def test_contains_scalar():
list_column = [[1, 2], [1, 3, 4], [5, 6]]
arr = pa.array(list_column)
scalar = pa.scalar(1)

plc_column = plc.interop.from_arrow(arr)
plc_scalar = plc.interop.from_arrow(scalar)
res = plc.lists.contains(plc_column, plc_scalar)

expect = pa.array([True, True, False])

assert_column_eq(expect, res)


def test_contains_list_column():
list_column1 = [[1, 2], [1, 3, 4], [5, 6]]
list_column2 = [1, 3, 6]
arr1 = pa.array(list_column1)
arr2 = pa.array(list_column2)

plc_column1 = plc.interop.from_arrow(arr1)
plc_column2 = plc.interop.from_arrow(arr2)
res = plc.lists.contains(plc_column1, plc_column2)

expect = pa.array([True, True, True])

assert_column_eq(expect, res)

0 comments on commit 4d5c233

Please sign in to comment.