Skip to content
This repository was archived by the owner on Dec 21, 2018. It is now read-only.

[REVIEW] IO Patch 1 #158

Merged
merged 9 commits into from
Oct 3, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 17 additions & 30 deletions include/gdf/cffi/io_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,49 +28,36 @@ typedef struct {
/*
* Output Arguments - space created in reader.
*/
int num_cols_out; // Out: number of columns
int num_rows_out; // Out: number of rows
gdf_column **data; // Out: array of *gdf_columns


int num_cols_out; /**< Out: return the number of columns read in */
int num_rows_out; /**< Out: return the number of rows read in */
gdf_column **data; /**< Out: return the array of *gdf_columns */

/*
* Input arguments - all data is in the host
*/
char *file_path; // file location to read from - if the file is compressed, it needs proper file extensions {.gz}
char *file_path; /**< file location to read from - currently the file cannot be compressed */

char lineterminator; // can change the end of line character
char delimiter; // also called 'sep' this is the field separator
bool delim_whitespace; // use white space as the delimiter
bool skipinitialspace; // Skip spaces after delimiter
char lineterminator; /**< define the line terminator character. Default is '\n' */
char delimiter; /**< define the field separator, default is ',' This argument is also called 'sep' */
bool delim_whitespace; /**< use white space as the delimiter - default is false. This overrides the delimiter argument */
bool skipinitialspace; /**< skip white spaces after the delimiter - default is false */

int num_cols; // number of columns (array sizes)
const char **names; // array of char * Ordered List of column names to use. names cannot be used with header
const char **dtype; // array of char * Ordered List of data types as strings
int num_cols; /**< number of columns in the names and dtype arrays */
const char **names; /**< ordered List of column names, this is a required field */
const char **dtype; /**< ordered List of data types, this is required */

int skiprows; // number of rows at the start of the files to skip
int skipfooter; // number of rows at the bottom of the file to skip - counting is backwards from end, 0 = last line
int skiprows; /**< number of rows at the start of the files to skip, default is 0 */
int skipfooter; /**< number of rows at the bottom of the file to skip - default is 0 */

bool dayfirst; // is the first value the day? DD/MM versus MM/DD
bool dayfirst; /**< is the first value the day? DD/MM versus MM/DD, default is false */


} csv_read_arg;

} csv_read_arg;


/*
* NOT USED
*
* squeeze - data is always returned as a gdf_column array
* engine - this is the only engine
* keep_default_na - this has no meaning since the field is marked invalid and the value not seen
* na_filter - empty fields are automatically tagged as invalid
* verbose
* keep_date_col - will not maintain raw data
* date_parser - there is only this parser
* float_precision - there is only one converter that will cover all specified values
* quoting - this is for out
* dialect - not used
*
*/



Expand Down
2 changes: 1 addition & 1 deletion python/libgdf_cffi/libgdf_build.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
ffibuilder = cffi.FFI()
ffibuilder.set_source("libgdf_cffi.libgdf_cffi", None)

for fname in ['types.h', 'functions.h']:
for fname in ['types.h', 'convert_types.h', 'functions.h', 'io_types.h', 'io_functions.h']:
with open('include/gdf/cffi/{}'.format(fname), 'r') as fin:
ffibuilder.cdef(fin.read())

Expand Down
28 changes: 27 additions & 1 deletion src/io/csv/csv-reader.cu
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ __device__ int whichBit(int bit) { return (bit % 8); }

__inline__ __device__ void validAtomicOR(gdf_valid_type* address, gdf_valid_type val)
{
int32_t *base_address = (int32_t*)((gdf_valid_type*)address - ((int32_t)address & 3));
int32_t *base_address = (int32_t*)((gdf_valid_type*)address - ((size_t)address & 3));
int32_t int_val = (int32_t)val << (((size_t) address & 3) * 8);

atomicOr(base_address, int_val);
Expand All @@ -135,6 +135,32 @@ __device__ void setBit(gdf_valid_type* address, int bit) {
*
* @param[in and out] args the input arguments, but this also contains the returned data
*
* Arguments:
*
* Required Arguments
* file_path - file location to read from - currently the file cannot be compressed
* num_cols - number of columns in the names and dtype arrays
* names - ordered List of column names, this is a required field
* dtype - ordered List of data types, this is required
*
* Optional
* lineterminator - define the line terminator character. Default is '\n'
* delimiter - define the field separator, default is ',' This argument is also called 'sep'
* delim_whitespace - use white space as the delimiter - default is false. This overrides the delimiter argument
* skipinitialspace - skip white spaces after the delimiter - default is false
*
* skiprows - number of rows at the start of the files to skip, default is 0
* skipfooter - number of rows at the bottom of the file to skip - default is 0
*
* dayfirst - is the first value the day? DD/MM versus MM/DD
*
*
* Output
* num_cols_out - Out: return the number of columns read in
* num_rows_out - Out: return the number of rows read in
* gdf_column **data - Out: return the array of *gdf_columns
*
*
* @return gdf_error
*
*/
Expand Down
6 changes: 3 additions & 3 deletions src/io/csv/date-time-parser.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ gdf_date64 parseDateTimeFormat(char *data, long start_idx, long end_idx, bool da
if ( (end_idx - start_idx) < 11 ) {
// only have a date portion, no time
extractDate(data, start_idx, end_idx, dayfirst, &year, &month, &day);
answer = secondsFromEpoch(year, month, day, 12, 0, 0);
answer = secondsFromEpoch(year, month, day, 0, 0, 0);
} else {
answer = -1;
}
Expand Down Expand Up @@ -323,7 +323,7 @@ bool extractTime(char *data, int sIdx, int eIdx, int *hour, int *minute, int *se
__host__ __device__
gdf_date32 daysSinceEpoch(int year, int month, int day) {

static unsigned short days[12] = {0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335};
static unsigned short days[12] = {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334};

// years since epoch
int ye = year - 1970;
Expand All @@ -345,7 +345,7 @@ gdf_date32 daysSinceEpoch(int year, int month, int day) {
days_e += days[me];

// now just add days, but not current full days since this one is not over
days_e += day - 1;
days_e += day;

return days_e;
}
Expand Down
2 changes: 1 addition & 1 deletion src/io/csv/type_conversion.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#ifndef CONVERSION_FUNCTIONS_CUH
#define CONVERSION_FUNCTIONS_CUH

#include <math_functions.h>
#include <cuda_runtime_api.h>



Expand Down