rapidsai
diff --git a/‎.github/CODEOWNERS‎
Lines changed: 14 additions & 5 deletions b/‎.github/CODEOWNERS‎
Lines changed: 14 additions & 5 deletions
diff --git a/‎CHANGELOG.md‎
Lines changed: 18 additions & 1 deletion b/‎CHANGELOG.md‎
Lines changed: 18 additions & 1 deletion
diff --git a/‎ci/local/README.md‎
Lines changed: 8 additions & 1 deletion b/‎ci/local/README.md‎
Lines changed: 8 additions & 1 deletion
diff --git a/‎cpp/CMakeLists.txt‎
Lines changed: 10 additions & 6 deletions b/‎cpp/CMakeLists.txt‎
Lines changed: 10 additions & 6 deletions
diff --git a/‎cpp/include/bitmask.hpp‎
Lines changed: 63 additions & 0 deletions b/‎cpp/include/bitmask.hpp‎
Lines changed: 63 additions & 0 deletions
diff --git a/‎cpp/include/cudf/functions.h‎
Lines changed: 0 additions & 15 deletions b/‎cpp/include/cudf/functions.h‎
Lines changed: 0 additions & 15 deletions
diff --git a/‎cpp/include/cudf/io_functions.h‎
Lines changed: 12 additions & 0 deletions b/‎cpp/include/cudf/io_functions.h‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎cpp/include/cudf/io_types.h‎
Lines changed: 29 additions & 0 deletions b/‎cpp/include/cudf/io_types.h‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎cpp/include/cudf/types.h‎
Lines changed: 11 additions & 7 deletions b/‎cpp/include/cudf/types.h‎
Lines changed: 11 additions & 7 deletions
@@ -1,8 +1,17 @@
-#Admins & project code owners
-#*           @rapidsai/admins @rapidsai/cudf-admin @rapidsai/cudf-codeowners
-
 #cpp code owners
-#cpp/       @rapidsai/cudf-cpp-codeowners
+cpp/       @rapidsai/cudf-cpp-codeowners
 
 #python code owners
-#python/    @rapidsai/cudf-python-codeowners
+python/    @rapidsai/cudf-python-codeowners
+
+#cmake code owners
+**/CMakeLists.txt  @rapidsai/cudf-cmake-codeowners
+**/cmake/          @rapidsai/cudf-cmake-codeowners
+
+#build/ops code owners
+.github/           @rapidsai/cudf-ops-codeowners 
+ci/                @rapidsai/cudf-ops-codeowners
+conda/             @rapidsai/cudf-ops-codeowners
+**/Dockerfile      @rapidsai/cudf-ops-codeowners
+**/.dockerignore   @rapidsai/cudf-ops-codeowners
+docker/            @rapidsai/cudf-ops-codeowners
@@ -1,7 +1,7 @@
 # cuDF 0.7.0 (Date TBD)
 
 ## New Features
-
+- PR #1142 Add `GDF_BOOL` column type
 - PR #1194 Implement overloads for CUDA atomic operations
 - PR #1292 Implemented Bitwise binary ops AND, OR, XOR (&, |, ^)
 - PR #1235 Add GPU-accelerated Parquet Reader
@@ -20,9 +20,13 @@
 - PR #1441 Add Series level cumulative ops (cumsum, cummin, cummax, cumprod)
 - PR #1461 Add Python coverage test to gpu build
 - PR #1445 Parquet Reader: Add selective reading of rows and row group
+- PR #1532 Parquet Reader: Add support for INT96 timestamps
+- PR #1516 Add Series and DataFrame.ndim
+- PR #1466 Add GPU-accelerated ORC Reader
 
 ## Improvements
 
+- PR #1531 Refactor closures as private functions in gpuarrow
 - PR #1404 Parquet reader page data decoding speedup
 - PR #1076 Use `type_dispatcher` in join, quantiles, filter, segmented sort, radix sort and hash_groupby
 - PR #1202 Simplify README.md
@@ -40,6 +44,7 @@
 - PR #1319 CSV Reader: Use column wrapper for gdf_column output alloc/dealloc
 - PR #1376 Change series quantile default to linear
 - PR #1399 Replace CFFI bindings for NVTX functions with Cython bindings
+- PR #1407 Rename and cleanup of `gdf_table` to `device_table`
 - PR #1389 Refactored `set_null_count()`
 - PR #1386 Added macros `GDF_TRY()`, `CUDF_TRY()` and `ASSERT_CUDF_SUCCEEDED()`
 - PR #1435 Rework CMake and conda recipes to depend on installed libraries
@@ -50,10 +55,14 @@
 - PR #1479 Convert Parquet Reader CFFI to Cython
 - PR #1397 Add a utility function for producing an overflow-safe kernel launch grid configuration
 - PR #1382 Add GPU parsing of nested brackets to cuIO parsing utilities
+- PR #1481 Add cudf::table constructor to allocate a set of `gdf_column`s
 - PR #1484 Convert GroupBy CFFI to Cython
 - PR #1463 Allow and default melt keyword argument var_name to be None
 - PR #1486 Parquet Reader: Use device_buffer rather than device_ptr
 - PR #1525 Add cudatoolkit conda dependency
+- PR #1520 Renamed `src/dataframe` to `src/table` and moved `table.hpp`. Made `types.hpp` to be type declarations only.
+- PR #1521 Added `row_bitmask` to compute bitmask for rows of a table. Merged `valids_ops.cu` and `bitmask_ops.cu`
+- PR #1553 Overload `hash_row` to avoid using intial hash values. Updated `gdf_hash` to select between overloads
 
 ## Bug Fixes
 
@@ -87,9 +96,16 @@
 - PR #1451 Fix hash join estimated result size is not correct
 - PR #1454 Fix local build script improperly change directory permissions
 - PR #1490 Require Dask 1.1.0+ for `is_dataframe_like` test or skip otherwise.
+- PR #1491 Use more specific directories & groups in CODEOWNERS
 - PR #1497 Fix Thrust issue on CentOS caused by missing default constructor of host_vector elements
 - PR #1498 Add missing include guard to device_atomics.cuh and separated DEVICE_ATOMICS_TEST
 - PR #1506 Fix csv-write call to updated NVStrings method
+- PR #1510 Added nvstrings `fillna()` function
+- PR #1507 Parquet Reader: Default string data to GDF_STRING
+- PR #1535 Fix doc issue to ensure correct labelling of cudf.series
+- PR #1537 Fix `undefined reference` link error in HashPartitionTest
+- PR #1548 Fix ci/local/build.sh README from using an incorrect image example
+- PR #1551 CSV Reader: Fix integer column name indexing
 
 
 # cuDF 0.6.1 (25 Mar 2019)
@@ -151,6 +167,7 @@
 - PR #1155 Add __array_ufunc__ for DataFrame and Series for sqrt
 - PR #1168 to_frame for series accepts a name argument
 
+
 ## Improvements
 
 - PR #1218 Add dask-cudf page to API docs
 
@@ -23,10 +23,17 @@ where:
 ```
 
 Example Usage:
-`bash build.sh -r ~/rapids/cudf -i gpuci/cuda9.2-ubuntu16.04-gcc5-py3.6`
+`bash build.sh -r ~/rapids/cudf -i gpuci/rapidsai-base:cuda9.2-ubuntu16.04-gcc5-py3.6`
 
 For a full list of available gpuCI docker images, visit our [DockerHub](https://hub.docker.com/r/gpuci/rapidsai-base/tags) page.
 
+Style Check:
+```bash
+$ bash ci/local/build.sh -r ~/rapids/cudf -s
+$ source activate gdf    #Activate gpuCI conda environment
+$ cd rapids
+$ flake8 python
+```
 
 ## Information
 
 
@@ -91,16 +91,16 @@ include(FeatureSummary)
 include(CheckIncludeFiles)
 include(CheckLibraryExists)
 
-include(ConfigureArrow)
-
 ###################################################################################################
 # - find arrow ------------------------------------------------------------------------------------
 
+include(ConfigureArrow)
+
 if (ARROW_FOUND)
     message(STATUS "Apache Arrow found in ${ARROW_INCLUDE_DIR}")
 else()
     message(FATAL_ERROR "Apache Arrow not found, please check your settings.")
-endif()
+endif(ARROW_FOUND)
 
 ###################################################################################################
 # - find zlib -------------------------------------------------------------------------------------
@@ -221,8 +221,8 @@ link_directories("${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}" # CMAKE_CUDA_IMPLICIT
 
 add_library(cudf SHARED
             src/comms/ipc/ipc.cu
-            src/dataframe/column.cpp
-            src/dataframe/context.cpp
+            src/column/column.cpp
+            src/column/context.cpp
             src/string/nvcategory_util.cpp
             src/join/joining.cu
             src/orderby/orderby.cu
@@ -241,7 +241,6 @@ add_library(cudf SHARED
             src/binary/jit/util/operator.cpp
             src/binary/jit/util/type.cpp
             src/bitmask/bitmask_ops.cu
-            src/bitmask/valid_ops.cu
             src/compaction/stream_compaction_ops.cu
             src/datetime/datetime_ops.cu
             src/hash/hashing.cu
@@ -259,6 +258,11 @@ add_library(cudf SHARED
             src/io/convert/dlpack/cudf_dlpack.cpp
             src/io/csv/csv_reader.cu
             src/io/csv/csv_writer.cu
+            src/io/orc/orc_reader.cu
+            src/io/orc/orc.cpp
+            src/io/orc/timezone.cpp
+            src/io/orc/stripe_data.cu
+            src/io/orc/stripe_init.cu
             src/io/parquet/page_data.cu
             src/io/parquet/page_hdr.cu
             src/io/parquet/parquet_reader.cu
 
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2019, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef BITMASK_HPP
+#define BITMASK_HPP
+
+#include <cudf.h>
+#include <types.hpp>
+
+/**
+ * @brief  Counts the number of valid bits for the specified number of rows
+ * in a validity bitmask.
+ *
+ * If the bitmask is null, returns a count equal to the number of rows.
+ *
+ * @param[in] masks The validity bitmask buffer in device memory
+ * @param[in] num_rows The number of bits to count
+ * @param[out] count The number of valid bits in the buffer from [0, num_rows)
+ *
+ * @returns  GDF_SUCCESS upon successful completion
+ *
+ */
+gdf_error gdf_count_nonzero_mask(gdf_valid_type const* masks,
+                                 gdf_size_type num_rows, gdf_size_type* count);
+
+/** ---------------------------------------------------------------------------*
+ * @brief Concatenate the validity bitmasks of multiple columns
+ *
+ * Accounts for the differences between lengths of columns and their bitmasks
+ * (e.g. because gdf_valid_type is larger than one bit).
+ *
+ * @param[out] output_mask The concatenated mask
+ * @param[in] output_column_length The total length (in data elements) of the
+ *                                 concatenated column
+ * @param[in] masks_to_concat The array of device pointers to validity bitmasks
+ *                            for the columns to concatenate
+ * @param[in] column_lengths An array of lengths of the columns to concatenate
+ * @param[in] num_columns The number of columns to concatenate
+ * @return gdf_error GDF_SUCCESS or GDF_CUDA_ERROR if there is a runtime CUDA
+           error
+ *
+ ---------------------------------------------------------------------------**/
+gdf_error gdf_mask_concat(gdf_valid_type* output_mask,
+                          gdf_size_type output_column_length,
+                          gdf_valid_type* masks_to_concat[],
+                          gdf_size_type* column_lengths,
+                          gdf_size_type num_columns);
+
+
+#endif
@@ -41,21 +41,6 @@ gdf_error gdf_nvtx_range_push_hex(char const * const name, unsigned int color );
  */
 gdf_error gdf_nvtx_range_pop();
 
-/**
- * @brief  Counts the number of valid bits for the specified number of rows
- * in a validity bitmask.
- *
- * If the bitmask is null, returns a count equal to the number of rows.
- *
- * @param[in] masks The validity bitmask buffer in device memory
- * @param[in] num_rows The number of bits to count
- * @param[out] count The number of valid bits in the buffer from [0, num_rows)
- *
- * @returns  GDF_SUCCESS upon successful completion
- *
- */
-gdf_error gdf_count_nonzero_mask(gdf_valid_type const *masks,
-                                 gdf_size_type num_rows, gdf_size_type *count);
 
 /**
  * Calculates the number of bytes to allocate for a column's validity bitmask
 
@@ -34,6 +34,18 @@ gdf_error read_csv(csv_read_arg *args);
  */
 gdf_error write_csv(csv_write_arg* args);
 
+/*
+ * @brief Interface to parse ORC data to GDF columns
+ *
+ * This function accepts an input source for an Apache ORC dataset and outputs
+ * an array of gdf_columns.
+ *
+ * @param[in,out] args Structure containing input and output args
+ *
+ * @return gdf_error GDF_SUCCESS if successful
+ **/
+gdf_error read_orc(orc_read_arg *args);
+
 /*
  * @brief Interface to parse Parquet data to GDF columns
  */
 
@@ -158,6 +158,33 @@ typedef struct
 
 } csv_write_arg;
 
+/**---------------------------------------------------------------------------*
+ * @brief Input and output arguments to the read_orc interface.
+ *---------------------------------------------------------------------------**/
+typedef struct {
+
+  /*
+   * Output arguments
+   */
+  int           num_cols_out;               ///< Out: Number of columns returned
+  int           num_rows_out;               ///< Out: Number of rows returned
+  gdf_column    **data;                     ///< Out: Array of gdf_columns*
+
+  /*
+   * Input arguments
+   */
+  gdf_input_type source_type;               ///< In: Type of data source
+  const char    *source;                    ///< In: If source_type is FILE_PATH, contains the filepath. If input_data_type is HOST_BUFFER, points to the host memory buffer
+  size_t        buffer_size;                ///< In: If source_type is HOST_BUFFER, represents the size of the buffer in bytes. Unused otherwise.
+
+  const char    **use_cols;                 ///< In: Columns of interest; only these columns will be parsed and returned.
+  int           use_cols_len;               ///< In: Number of columns
+
+  int           skip_rows;                  ///< In: Number of rows to skip from the start
+  int           num_rows;                   ///< In: Number of rows to read. Actual number of returned rows may be less
+
+} orc_read_arg;
+
 /**---------------------------------------------------------------------------*
  * @brief Input and output arguments to the read_parquet interface.
  *---------------------------------------------------------------------------**/
@@ -185,4 +212,6 @@ typedef struct {
   const char    **use_cols;                 ///< In: Columns of interest; only these columns will be parsed and returned.
   int           use_cols_len;               ///< In: Number of columns
 
+  bool          strings_to_categorical;     ///< In: If TRUE, returns string data as GDF_CATEGORY, otherwise GDF_STRING
+
 } pq_read_arg;
@@ -1,14 +1,17 @@
 #pragma once
 
 // TODO: Update to use fixed width types when CFFI goes away
-typedef int gdf_size_type; ///< Limits the maximum size of a gdf_column to 2^31-1
+typedef int           gdf_size_type; ///< Limits the maximum size of a gdf_column to 2^31-1
 typedef gdf_size_type gdf_index_type;
 typedef unsigned char gdf_valid_type;
-typedef  long  gdf_date64;
-typedef  int    gdf_date32;
-typedef  int    gdf_category;
-typedef  long  gdf_timestamp;
-typedef int   gdf_nvstring_category;
+typedef char          gdf_bool8;     /*< Storage type for Boolean values. 
+                                        char is used to guarantee 8-bit storage. 
+                                        zero == false, nonzero == true. */
+typedef	long          gdf_date64;
+typedef	int           gdf_date32;
+typedef	int           gdf_category;
+typedef	long          gdf_timestamp;
+typedef int           gdf_nvstring_category;
 
 
  /**
@@ -22,6 +25,7 @@ typedef enum {
     GDF_INT64,
     GDF_FLOAT32,
     GDF_FLOAT64,
+    GDF_BOOL8,      ///< Boolean stored in 8 bits per Boolean. zero==false, nonzero==true.
     GDF_DATE32,     ///< int32_t days since the UNIX epoch
     GDF_DATE64,     ///< int64_t milliseconds since the UNIX epoch
     GDF_TIMESTAMP,  ///< Exact timestamp encoded with int64 since UNIX epoch (Default unit millisecond)
@@ -32,7 +36,6 @@ typedef enum {
 } gdf_dtype;
 
 
-
 /**
  * @brief  These are all possible gdf error codes that can be returned from
  * a libgdf function. ANY NEW ERROR CODE MUST ALSO BE ADDED TO `gdf_error_get_name`
@@ -110,6 +113,7 @@ typedef union {
   long          si64;  /**< GDF_INT64     */
   float         fp32;  /**< GDF_FLOAT32   */
   double        fp64;  /**< GDF_FLOAT64   */
+  char           b08;  /**< GDF_BOOL8     */
   gdf_date32    dt32;  /**< GDF_DATE32    */
   gdf_date64    dt64;  /**< GDF_DATE64    */
   gdf_timestamp tmst;  /**< GDF_TIMESTAMP */