Skip to content

Commit 50bb9c1

Browse files
authored
Merge pull request #1521 from jrhemstad/fea-ext-row-bitmask
[REVIEW] Add `row_bitmask` feature. Cleanup/reorganization of bitmask_ops/valid_ops
2 parents b14ef72 + de91e89 commit 50bb9c1

File tree

17 files changed

+1065
-800
lines changed

17 files changed

+1065
-800
lines changed

CHANGELOG.md

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -60,11 +60,10 @@
6060
- PR #1463 Allow and default melt keyword argument var_name to be None
6161
- PR #1486 Parquet Reader: Use device_buffer rather than device_ptr
6262
- PR #1520 Renamed `src/dataframe` to `src/table` and moved `table.hpp`. Made `types.hpp` to be type declarations only.
63-
- PR #1492 Convert transpose CFFI to Cython
64-
- PR #1495 Convert binary and unary ops CFFI to Cython
65-
- PR #1503 Convert sorting and hashing ops CFFI to Cython
63+
- PR #1521 Added `row_bitmask` to compute bitmask for rows of a table. Merged `valids_ops.cu` and `bitmask_ops.cu`
6664
- PR #1553 Overload `hash_row` to avoid using intial hash values. Updated `gdf_hash` to select between overloads
6765

66+
6867
## Bug Fixes
6968

7069
- PR #1233 Fix dtypes issue while adding the column to `str` dataframe.

cpp/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,6 @@ add_library(cudf SHARED
241241
src/binary/jit/util/operator.cpp
242242
src/binary/jit/util/type.cpp
243243
src/bitmask/bitmask_ops.cu
244-
src/bitmask/valid_ops.cu
245244
src/compaction/stream_compaction_ops.cu
246245
src/datetime/datetime_ops.cu
247246
src/hash/hashing.cu

cpp/include/bitmask.hpp

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
/*
2+
* Copyright (c) 2019, NVIDIA CORPORATION.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#ifndef BITMASK_HPP
18+
#define BITMASK_HPP
19+
20+
#include <cudf.h>
21+
#include <types.hpp>
22+
23+
/**
24+
* @brief Counts the number of valid bits for the specified number of rows
25+
* in a validity bitmask.
26+
*
27+
* If the bitmask is null, returns a count equal to the number of rows.
28+
*
29+
* @param[in] masks The validity bitmask buffer in device memory
30+
* @param[in] num_rows The number of bits to count
31+
* @param[out] count The number of valid bits in the buffer from [0, num_rows)
32+
*
33+
* @returns GDF_SUCCESS upon successful completion
34+
*
35+
*/
36+
gdf_error gdf_count_nonzero_mask(gdf_valid_type const* masks,
37+
gdf_size_type num_rows, gdf_size_type* count);
38+
39+
/** ---------------------------------------------------------------------------*
40+
* @brief Concatenate the validity bitmasks of multiple columns
41+
*
42+
* Accounts for the differences between lengths of columns and their bitmasks
43+
* (e.g. because gdf_valid_type is larger than one bit).
44+
*
45+
* @param[out] output_mask The concatenated mask
46+
* @param[in] output_column_length The total length (in data elements) of the
47+
* concatenated column
48+
* @param[in] masks_to_concat The array of device pointers to validity bitmasks
49+
* for the columns to concatenate
50+
* @param[in] column_lengths An array of lengths of the columns to concatenate
51+
* @param[in] num_columns The number of columns to concatenate
52+
* @return gdf_error GDF_SUCCESS or GDF_CUDA_ERROR if there is a runtime CUDA
53+
error
54+
*
55+
---------------------------------------------------------------------------**/
56+
gdf_error gdf_mask_concat(gdf_valid_type* output_mask,
57+
gdf_size_type output_column_length,
58+
gdf_valid_type* masks_to_concat[],
59+
gdf_size_type* column_lengths,
60+
gdf_size_type num_columns);
61+
62+
63+
#endif

cpp/include/cudf/functions.h

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -41,21 +41,6 @@ gdf_error gdf_nvtx_range_push_hex(char const * const name, unsigned int color );
4141
*/
4242
gdf_error gdf_nvtx_range_pop();
4343

44-
/**
45-
* @brief Counts the number of valid bits for the specified number of rows
46-
* in a validity bitmask.
47-
*
48-
* If the bitmask is null, returns a count equal to the number of rows.
49-
*
50-
* @param[in] masks The validity bitmask buffer in device memory
51-
* @param[in] num_rows The number of bits to count
52-
* @param[out] count The number of valid bits in the buffer from [0, num_rows)
53-
*
54-
* @returns GDF_SUCCESS upon successful completion
55-
*
56-
*/
57-
gdf_error gdf_count_nonzero_mask(gdf_valid_type const *masks,
58-
gdf_size_type num_rows, gdf_size_type *count);
5944

6045
/**
6146
* Calculates the number of bytes to allocate for a column's validity bitmask

cpp/include/types.hpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,12 @@
1919
/**---------------------------------------------------------------------------*
2020
* @file types.hpp
2121
* @brief Type declarations for libcudf.
22-
*
23-
*---------------------------------------------------------------------------**/
22+
*
23+
*---------------------------------------------------------------------------**/
24+
25+
namespace bit_mask {
26+
using bit_mask_t = uint32_t;
27+
}
2428

2529
// Forward declaration
2630
namespace cudf {

cpp/src/binary/jit/core/binop.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020
#include "binary/jit/core/launcher.h"
2121
#include "binary/jit/util/operator.h"
22-
#include "bitmask/bitmask_ops.h"
22+
#include <bitmask/bitmask_ops.hpp>
2323
#include "utilities/error_utils.hpp"
2424
#include "utilities/cudf_utils.h"
2525
#include "cudf.h"

0 commit comments

Comments
 (0)