Skip to content

Commit

Permalink
Merge branch 'branch-24.08' of github.com:rapidsai/cudf into branch-2…
Browse files Browse the repository at this point in the history
…4.08
  • Loading branch information
Matt711 committed Jun 17, 2024
2 parents 68f9cae + 87f6a7e commit 64931fc
Show file tree
Hide file tree
Showing 73 changed files with 733 additions and 787 deletions.
13 changes: 12 additions & 1 deletion cpp/include/cudf/ast/detail/operators.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

#include <cudf/ast/expressions.hpp>
#include <cudf/types.hpp>
#include <cudf/unary.hpp>
#include <cudf/utilities/error.hpp>
#include <cudf/utilities/type_dispatcher.hpp>

Expand Down Expand Up @@ -819,7 +820,17 @@ struct operator_functor<ast_operator::NOT, false> {
template <typename To>
struct cast {
static constexpr auto arity{1};
template <typename From>
template <typename From, typename std::enable_if_t<is_fixed_point<From>()>* = nullptr>
__device__ inline auto operator()(From f) -> To
{
if constexpr (cuda::std::is_floating_point_v<To>) {
return convert_fixed_to_floating<To>(f);
} else {
return static_cast<To>(f);
}
}

template <typename From, typename cuda::std::enable_if_t<!is_fixed_point<From>()>* = nullptr>
__device__ inline auto operator()(From f) -> decltype(static_cast<To>(f))
{
return static_cast<To>(f);
Expand Down
6 changes: 5 additions & 1 deletion cpp/src/stream_compaction/distinct_count.cu
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,11 @@ cudf::size_type distinct_count(column_view const& input,
nan_policy nan_handling,
rmm::cuda_stream_view stream)
{
if (0 == input.size() or input.null_count() == input.size()) { return 0; }
if (0 == input.size()) { return 0; }

if (input.null_count() == input.size()) {
return static_cast<size_type>(null_handling == null_policy::INCLUDE);
}

auto count = detail::distinct_count(table_view{{input}}, null_equality::EQUAL, stream);

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ quiet-level = 3
line-length = 79

[tool.ruff.lint]
select = ["E", "F", "W", "D201", "D204", "D206", "D207", "D208", "D209", "D210", "D211", "D214", "D215", "D300", "D301", "D403", "D405", "D406", "D407", "D408", "D409", "D410", "D411", "D412", "D414", "D418", "TCH"]
select = ["E", "F", "W", "D201", "D204", "D206", "D207", "D208", "D209", "D210", "D211", "D214", "D215", "D300", "D301", "D403", "D405", "D406", "D407", "D408", "D409", "D410", "D411", "D412", "D414", "D418", "TCH", "FA", "UP006", "UP007"]
ignore = [
# whitespace before :
"E203",
Expand Down
46 changes: 22 additions & 24 deletions python/cudf/cudf/_lib/column.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -2,36 +2,34 @@

from __future__ import annotations

from typing import Dict, Optional, Tuple

from typing_extensions import Self

from cudf._typing import Dtype, DtypeObj, ScalarLike
from cudf.core.buffer import Buffer
from cudf.core.column import ColumnBase

class Column:
_data: Optional[Buffer]
_mask: Optional[Buffer]
_base_data: Optional[Buffer]
_base_mask: Optional[Buffer]
_data: Buffer | None
_mask: Buffer | None
_base_data: Buffer | None
_base_mask: Buffer | None
_dtype: DtypeObj
_size: int
_offset: int
_null_count: int
_children: Tuple[ColumnBase, ...]
_base_children: Tuple[ColumnBase, ...]
_distinct_count: Dict[bool, int]
_children: tuple[ColumnBase, ...]
_base_children: tuple[ColumnBase, ...]
_distinct_count: dict[bool, int]

def __init__(
self,
data: Optional[Buffer],
data: Buffer | None,
size: int,
dtype: Dtype,
mask: Optional[Buffer] = None,
offset: Optional[int] = None,
null_count: Optional[int] = None,
children: Tuple[ColumnBase, ...] = (),
mask: Buffer | None = None,
offset: int | None = None,
null_count: int | None = None,
children: tuple[ColumnBase, ...] = (),
) -> None: ...
@property
def base_size(self) -> int: ...
Expand All @@ -40,35 +38,35 @@ class Column:
@property
def size(self) -> int: ...
@property
def base_data(self) -> Optional[Buffer]: ...
def base_data(self) -> Buffer | None: ...
@property
def data(self) -> Optional[Buffer]: ...
def data(self) -> Buffer | None: ...
@property
def data_ptr(self) -> int: ...
def set_base_data(self, value: Buffer) -> None: ...
@property
def nullable(self) -> bool: ...
def has_nulls(self, include_nan: bool = False) -> bool: ...
@property
def base_mask(self) -> Optional[Buffer]: ...
def base_mask(self) -> Buffer | None: ...
@property
def mask(self) -> Optional[Buffer]: ...
def mask(self) -> Buffer | None: ...
@property
def mask_ptr(self) -> int: ...
def set_base_mask(self, value: Optional[Buffer]) -> None: ...
def set_mask(self, value: Optional[Buffer]) -> Self: ...
def set_base_mask(self, value: Buffer | None) -> None: ...
def set_mask(self, value: Buffer | None) -> Self: ...
@property
def null_count(self) -> int: ...
@property
def offset(self) -> int: ...
@property
def base_children(self) -> Tuple[ColumnBase, ...]: ...
def base_children(self) -> tuple[ColumnBase, ...]: ...
@property
def children(self) -> Tuple[ColumnBase, ...]: ...
def set_base_children(self, value: Tuple[ColumnBase, ...]) -> None: ...
def children(self) -> tuple[ColumnBase, ...]: ...
def set_base_children(self, value: tuple[ColumnBase, ...]) -> None: ...
def _mimic_inplace(
self, other_col: ColumnBase, inplace=False
) -> Optional[Self]: ...
) -> Self | None: ...

# TODO: The val parameter should be Scalar, not ScalarLike
@staticmethod
Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/api/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from collections import abc
from functools import wraps
from inspect import isclass
from typing import List, Union, cast
from typing import cast

import cupy as cp
import numpy as np
Expand Down Expand Up @@ -219,7 +219,7 @@ def wrapped_func(obj):


def _union_categoricals(
to_union: List[Union[cudf.Series, cudf.CategoricalIndex]],
to_union: list[cudf.Series | cudf.CategoricalIndex],
sort_categories: bool = False,
ignore_order: bool = False,
):
Expand Down
10 changes: 5 additions & 5 deletions python/cudf/cudf/core/_base_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import pickle
import warnings
from functools import cached_property
from typing import TYPE_CHECKING, Any, Literal, Set, Tuple
from typing import TYPE_CHECKING, Any, Literal

import pandas as pd
from typing_extensions import Self
Expand Down Expand Up @@ -44,11 +44,11 @@
class BaseIndex(Serializable):
"""Base class for all cudf Index types."""

_accessors: Set[Any] = set()
_accessors: set[Any] = set()
_data: ColumnAccessor

@property
def _columns(self) -> Tuple[Any, ...]:
def _columns(self) -> tuple[Any, ...]:
raise NotImplementedError

@cached_property
Expand Down Expand Up @@ -342,9 +342,9 @@ def deserialize(cls, header, frames):
@property
def names(self):
"""
Returns a tuple containing the name of the Index.
Returns a FrozenList containing the name of the Index.
"""
return (self.name,)
return pd.core.indexes.frozen.FrozenList([self.name])

@names.setter
def names(self, values):
Expand Down
12 changes: 6 additions & 6 deletions python/cudf/cudf/core/_internals/expressions.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# Copyright (c) 2022-2023, NVIDIA CORPORATION.
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
from __future__ import annotations

import ast
import functools
from typing import List, Tuple

from cudf._lib.expressions import (
ASTOperator,
Expand Down Expand Up @@ -98,9 +98,9 @@ class libcudfASTVisitor(ast.NodeVisitor):
The column names used to map the names in an expression.
"""

def __init__(self, col_names: Tuple[str]):
self.stack: List[Expression] = []
self.nodes: List[Expression] = []
def __init__(self, col_names: tuple[str]):
self.stack: list[Expression] = []
self.nodes: list[Expression] = []
self.col_names = col_names

@property
Expand Down Expand Up @@ -218,7 +218,7 @@ def visit_Call(self, node):


@functools.lru_cache(256)
def parse_expression(expr: str, col_names: Tuple[str]):
def parse_expression(expr: str, col_names: tuple[str]):
visitor = libcudfASTVisitor(col_names)
visitor.visit(ast.parse(expr))
return visitor
19 changes: 11 additions & 8 deletions python/cudf/cudf/core/_internals/timezones.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,23 @@
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
from __future__ import annotations

import os
import zoneinfo
from functools import lru_cache
from typing import Literal, Tuple
from typing import TYPE_CHECKING, Literal

import numpy as np

from cudf._lib.timezone import make_timezone_transition_table
from cudf.core.column.column import as_column
from cudf.core.column.datetime import DatetimeColumn
from cudf.core.column.timedelta import TimeDeltaColumn

if TYPE_CHECKING:
from cudf.core.column.datetime import DatetimeColumn
from cudf.core.column.timedelta import TimeDeltaColumn


@lru_cache(maxsize=20)
def get_tz_data(zone_name: str) -> Tuple[DatetimeColumn, TimeDeltaColumn]:
def get_tz_data(zone_name: str) -> tuple[DatetimeColumn, TimeDeltaColumn]:
"""
Return timezone data (transition times and UTC offsets) for the
given IANA time zone.
Expand All @@ -40,7 +43,7 @@ def get_tz_data(zone_name: str) -> Tuple[DatetimeColumn, TimeDeltaColumn]:

def _find_and_read_tzfile_tzpath(
zone_name: str,
) -> Tuple[DatetimeColumn, TimeDeltaColumn]:
) -> tuple[DatetimeColumn, TimeDeltaColumn]:
for search_path in zoneinfo.TZPATH:
if os.path.isfile(os.path.join(search_path, zone_name)):
return _read_tzfile_as_columns(search_path, zone_name)
Expand All @@ -49,7 +52,7 @@ def _find_and_read_tzfile_tzpath(

def _find_and_read_tzfile_tzdata(
zone_name: str,
) -> Tuple[DatetimeColumn, TimeDeltaColumn]:
) -> tuple[DatetimeColumn, TimeDeltaColumn]:
import importlib.resources

package_base = "tzdata.zoneinfo"
Expand Down Expand Up @@ -78,7 +81,7 @@ def _find_and_read_tzfile_tzdata(

def _read_tzfile_as_columns(
tzdir, zone_name: str
) -> Tuple[DatetimeColumn, TimeDeltaColumn]:
) -> tuple[DatetimeColumn, TimeDeltaColumn]:
transition_times_and_offsets = make_timezone_transition_table(
tzdir, zone_name
)
Expand All @@ -92,7 +95,7 @@ def _read_tzfile_as_columns(

def check_ambiguous_and_nonexistent(
ambiguous: Literal["NaT"], nonexistent: Literal["NaT"]
) -> Tuple[Literal["NaT"], Literal["NaT"]]:
) -> tuple[Literal["NaT"], Literal["NaT"]]:
if ambiguous != "NaT":
raise NotImplementedError(
"Only ambiguous='NaT' is currently supported"
Expand Down
15 changes: 9 additions & 6 deletions python/cudf/cudf/core/_internals/where.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,17 @@
# Copyright (c) 2021-2023, NVIDIA CORPORATION.
# Copyright (c) 2021-2024, NVIDIA CORPORATION.
from __future__ import annotations

import warnings
from typing import Tuple, Union
from typing import TYPE_CHECKING

import numpy as np

import cudf
from cudf._typing import ScalarLike
from cudf.api.types import (
_is_non_decimal_numeric_dtype,
is_bool_dtype,
is_scalar,
)
from cudf.core.column import ColumnBase
from cudf.core.dtypes import CategoricalDtype
from cudf.utils.dtypes import (
_can_cast,
Expand All @@ -21,6 +20,10 @@
is_mixed_with_object_dtype,
)

if TYPE_CHECKING:
from cudf._typing import ScalarLike
from cudf.core.column import ColumnBase


def _normalize_categorical(input_col, other):
if isinstance(input_col, cudf.core.column.CategoricalColumn):
Expand All @@ -41,9 +44,9 @@ def _normalize_categorical(input_col, other):

def _check_and_cast_columns_with_other(
source_col: ColumnBase,
other: Union[ScalarLike, ColumnBase],
other: ScalarLike | ColumnBase,
inplace: bool,
) -> Tuple[ColumnBase, Union[ScalarLike, ColumnBase]]:
) -> tuple[ColumnBase, ScalarLike | ColumnBase]:
# Returns type-casted `source_col` & `other` based on `inplace`.
source_dtype = source_col.dtype
if isinstance(source_dtype, CategoricalDtype):
Expand Down
Loading

0 comments on commit 64931fc

Please sign in to comment.