Skip to content

Commit 23b1168

Browse files
authored
Merge branch 'branch-0.7' into fea-ext-cython-exceptions
2 parents bf3e7d7 + 50bb9c1 commit 23b1168

39 files changed

+5833
-841
lines changed

CHANGELOG.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
- PR #1445 Parquet Reader: Add selective reading of rows and row group
2323
- PR #1532 Parquet Reader: Add support for INT96 timestamps
2424
- PR #1516 Add Series and DataFrame.ndim
25+
- PR #1466 Add GPU-accelerated ORC Reader
2526

2627
## Improvements
2728

@@ -59,12 +60,11 @@
5960
- PR #1463 Allow and default melt keyword argument var_name to be None
6061
- PR #1486 Parquet Reader: Use device_buffer rather than device_ptr
6162
- PR #1520 Renamed `src/dataframe` to `src/table` and moved `table.hpp`. Made `types.hpp` to be type declarations only.
62-
- PR #1492 Convert transpose CFFI to Cython
63-
- PR #1495 Convert binary and unary ops CFFI to Cython
64-
- PR #1503 Convert sorting and hashing ops CFFI to Cython
63+
- PR #1521 Added `row_bitmask` to compute bitmask for rows of a table. Merged `valids_ops.cu` and `bitmask_ops.cu`
6564
- PR #1553 Overload `hash_row` to avoid using intial hash values. Updated `gdf_hash` to select between overloads
6665
- PR #1559 Add `except +` to all Cython function definitions to catch C++ exceptions properly
6766

67+
6868
## Bug Fixes
6969

7070
- PR #1233 Fix dtypes issue while adding the column to `str` dataframe.
@@ -106,6 +106,7 @@
106106
- PR #1535 Fix doc issue to ensure correct labelling of cudf.series
107107
- PR #1537 Fix `undefined reference` link error in HashPartitionTest
108108
- PR #1548 Fix ci/local/build.sh README from using an incorrect image example
109+
- PR #1551 CSV Reader: Fix integer column name indexing
109110

110111

111112
# cuDF 0.6.1 (25 Mar 2019)

cpp/CMakeLists.txt

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -91,16 +91,16 @@ include(FeatureSummary)
9191
include(CheckIncludeFiles)
9292
include(CheckLibraryExists)
9393

94-
include(ConfigureArrow)
95-
9694
###################################################################################################
9795
# - find arrow ------------------------------------------------------------------------------------
9896

97+
include(ConfigureArrow)
98+
9999
if (ARROW_FOUND)
100100
message(STATUS "Apache Arrow found in ${ARROW_INCLUDE_DIR}")
101101
else()
102102
message(FATAL_ERROR "Apache Arrow not found, please check your settings.")
103-
endif()
103+
endif(ARROW_FOUND)
104104

105105
###################################################################################################
106106
# - find zlib -------------------------------------------------------------------------------------
@@ -241,7 +241,6 @@ add_library(cudf SHARED
241241
src/binary/jit/util/operator.cpp
242242
src/binary/jit/util/type.cpp
243243
src/bitmask/bitmask_ops.cu
244-
src/bitmask/valid_ops.cu
245244
src/compaction/stream_compaction_ops.cu
246245
src/datetime/datetime_ops.cu
247246
src/hash/hashing.cu
@@ -259,6 +258,11 @@ add_library(cudf SHARED
259258
src/io/convert/dlpack/cudf_dlpack.cpp
260259
src/io/csv/csv_reader.cu
261260
src/io/csv/csv_writer.cu
261+
src/io/orc/orc_reader.cu
262+
src/io/orc/orc.cpp
263+
src/io/orc/timezone.cpp
264+
src/io/orc/stripe_data.cu
265+
src/io/orc/stripe_init.cu
262266
src/io/parquet/page_data.cu
263267
src/io/parquet/page_hdr.cu
264268
src/io/parquet/parquet_reader.cu

cpp/include/bitmask.hpp

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
/*
2+
* Copyright (c) 2019, NVIDIA CORPORATION.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#ifndef BITMASK_HPP
18+
#define BITMASK_HPP
19+
20+
#include <cudf.h>
21+
#include <types.hpp>
22+
23+
/**
24+
* @brief Counts the number of valid bits for the specified number of rows
25+
* in a validity bitmask.
26+
*
27+
* If the bitmask is null, returns a count equal to the number of rows.
28+
*
29+
* @param[in] masks The validity bitmask buffer in device memory
30+
* @param[in] num_rows The number of bits to count
31+
* @param[out] count The number of valid bits in the buffer from [0, num_rows)
32+
*
33+
* @returns GDF_SUCCESS upon successful completion
34+
*
35+
*/
36+
gdf_error gdf_count_nonzero_mask(gdf_valid_type const* masks,
37+
gdf_size_type num_rows, gdf_size_type* count);
38+
39+
/** ---------------------------------------------------------------------------*
40+
* @brief Concatenate the validity bitmasks of multiple columns
41+
*
42+
* Accounts for the differences between lengths of columns and their bitmasks
43+
* (e.g. because gdf_valid_type is larger than one bit).
44+
*
45+
* @param[out] output_mask The concatenated mask
46+
* @param[in] output_column_length The total length (in data elements) of the
47+
* concatenated column
48+
* @param[in] masks_to_concat The array of device pointers to validity bitmasks
49+
* for the columns to concatenate
50+
* @param[in] column_lengths An array of lengths of the columns to concatenate
51+
* @param[in] num_columns The number of columns to concatenate
52+
* @return gdf_error GDF_SUCCESS or GDF_CUDA_ERROR if there is a runtime CUDA
53+
error
54+
*
55+
---------------------------------------------------------------------------**/
56+
gdf_error gdf_mask_concat(gdf_valid_type* output_mask,
57+
gdf_size_type output_column_length,
58+
gdf_valid_type* masks_to_concat[],
59+
gdf_size_type* column_lengths,
60+
gdf_size_type num_columns);
61+
62+
63+
#endif

cpp/include/cudf/functions.h

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -41,21 +41,6 @@ gdf_error gdf_nvtx_range_push_hex(char const * const name, unsigned int color );
4141
*/
4242
gdf_error gdf_nvtx_range_pop();
4343

44-
/**
45-
* @brief Counts the number of valid bits for the specified number of rows
46-
* in a validity bitmask.
47-
*
48-
* If the bitmask is null, returns a count equal to the number of rows.
49-
*
50-
* @param[in] masks The validity bitmask buffer in device memory
51-
* @param[in] num_rows The number of bits to count
52-
* @param[out] count The number of valid bits in the buffer from [0, num_rows)
53-
*
54-
* @returns GDF_SUCCESS upon successful completion
55-
*
56-
*/
57-
gdf_error gdf_count_nonzero_mask(gdf_valid_type const *masks,
58-
gdf_size_type num_rows, gdf_size_type *count);
5944

6045
/**
6146
* Calculates the number of bytes to allocate for a column's validity bitmask

cpp/include/cudf/io_functions.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,18 @@ gdf_error read_csv(csv_read_arg *args);
3434
*/
3535
gdf_error write_csv(csv_write_arg* args);
3636

37+
/*
38+
* @brief Interface to parse ORC data to GDF columns
39+
*
40+
* This function accepts an input source for an Apache ORC dataset and outputs
41+
* an array of gdf_columns.
42+
*
43+
* @param[in,out] args Structure containing input and output args
44+
*
45+
* @return gdf_error GDF_SUCCESS if successful
46+
**/
47+
gdf_error read_orc(orc_read_arg *args);
48+
3749
/*
3850
* @brief Interface to parse Parquet data to GDF columns
3951
*/

cpp/include/cudf/io_types.h

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,33 @@ typedef struct
158158

159159
} csv_write_arg;
160160

161+
/**---------------------------------------------------------------------------*
162+
* @brief Input and output arguments to the read_orc interface.
163+
*---------------------------------------------------------------------------**/
164+
typedef struct {
165+
166+
/*
167+
* Output arguments
168+
*/
169+
int num_cols_out; ///< Out: Number of columns returned
170+
int num_rows_out; ///< Out: Number of rows returned
171+
gdf_column **data; ///< Out: Array of gdf_columns*
172+
173+
/*
174+
* Input arguments
175+
*/
176+
gdf_input_type source_type; ///< In: Type of data source
177+
const char *source; ///< In: If source_type is FILE_PATH, contains the filepath. If input_data_type is HOST_BUFFER, points to the host memory buffer
178+
size_t buffer_size; ///< In: If source_type is HOST_BUFFER, represents the size of the buffer in bytes. Unused otherwise.
179+
180+
const char **use_cols; ///< In: Columns of interest; only these columns will be parsed and returned.
181+
int use_cols_len; ///< In: Number of columns
182+
183+
int skip_rows; ///< In: Number of rows to skip from the start
184+
int num_rows; ///< In: Number of rows to read. Actual number of returned rows may be less
185+
186+
} orc_read_arg;
187+
161188
/**---------------------------------------------------------------------------*
162189
* @brief Input and output arguments to the read_parquet interface.
163190
*---------------------------------------------------------------------------**/

cpp/include/types.hpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,12 @@
1919
/**---------------------------------------------------------------------------*
2020
* @file types.hpp
2121
* @brief Type declarations for libcudf.
22-
*
23-
*---------------------------------------------------------------------------**/
22+
*
23+
*---------------------------------------------------------------------------**/
24+
25+
namespace bit_mask {
26+
using bit_mask_t = uint32_t;
27+
}
2428

2529
// Forward declaration
2630
namespace cudf {

cpp/src/binary/jit/core/binop.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020
#include "binary/jit/core/launcher.h"
2121
#include "binary/jit/util/operator.h"
22-
#include "bitmask/bitmask_ops.h"
22+
#include <bitmask/bitmask_ops.hpp>
2323
#include "utilities/error_utils.hpp"
2424
#include "utilities/cudf_utils.h"
2525
#include "cudf.h"

0 commit comments

Comments
 (0)