rapidsai · nsakharnykh · Oct 3, 2018 · Oct 2, 2018 · Oct 2, 2018 · Oct 2, 2018
diff --git a/include/gdf/cffi/io_types.h b/include/gdf/cffi/io_types.h
@@ -28,49 +28,36 @@ typedef struct {
 	/*
 	 * Output Arguments - space created in reader.
 	 */
-	int				num_cols_out;							// Out: number of columns
-	int				num_rows_out;							// Out: number of rows
-	gdf_column		**data;								// Out: array of *gdf_columns
+
+
+	int				num_cols_out;			/**< Out: return the number of columns read in	*/
+	int				num_rows_out;			/**< Out: return the number of rows read in 	*/
+	gdf_column		**data;					/**< Out: return the array of *gdf_columns 		*/
 
 	/*
 	 * Input arguments - all data is in the host
 	 */
-	char		*file_path;				// file location to read from	- if the file is compressed, it needs proper file extensions {.gz}
+	char		*file_path;					/**< file location to read from	- currently the file cannot be compressed 							*/
 
-	char		lineterminator;			// can change the end of line character
-	char		delimiter;				// also called 'sep'  this is the field separator
-	bool 		delim_whitespace;		// use white space as the delimiter
-	bool		skipinitialspace;		// Skip spaces after delimiter
+	char		lineterminator;				/**< define the line terminator character.  Default is  '\n'  										*/
+	char		delimiter;					/**< define the field separator, default is ','   This argument is also called 'sep'  				*/
+	bool 		delim_whitespace;			/**< use white space as the delimiter - default is false.  This overrides the delimiter argument 	*/
+	bool		skipinitialspace;			/**< skip white spaces after the delimiter - default is false  										*/
 
-	int			num_cols;				// number of columns (array sizes)
-	const char	**names;				// array of char *  Ordered List of column names to use.   names cannot be used with header
-	const char	**dtype;				// array of char *	Ordered List of data types as strings
+	int			num_cols;					/**< number of columns in the names and dtype arrays												*/
+	const char	**names;					/**< ordered List of column names, this is a required field 										*/
+	const char	**dtype;					/**< ordered List of data types, this is required													*/
 
-	int			skiprows;				// number of rows at the start of the files to skip
-	int			skipfooter;				// number of rows at the bottom of the file to skip - counting is backwards from end, 0 = last line
+	int			skiprows;					/**< number of rows at the start of the files to skip, default is 0									*/
+	int			skipfooter;					/**< number of rows at the bottom of the file to skip - default is 0								*/
 
-	bool		dayfirst;				// is the first value the day?  DD/MM  versus MM/DD
+	bool		dayfirst;					/**< is the first value the day?  DD/MM  versus MM/DD, default is false								*/
 
 
-} csv_read_arg;
 
+} csv_read_arg;
 
 
-/*
- * NOT USED
- *
- * squeeze			- data is always returned as a gdf_column array
- * engine			- this is the only engine
- * keep_default_na  - this has no meaning since the field is marked invalid and the value not seen
- * na_filter		- empty fields are automatically tagged as invalid
- * verbose
- * keep_date_col	- will not maintain raw data
- * date_parser		- there is only this parser
- * float_precision	- there is only one converter that will cover all specified values
- * quoting			- this is for out
- * dialect			- not used
- *
- */
 
 
 

diff --git a/python/libgdf_cffi/libgdf_build.py b/python/libgdf_cffi/libgdf_build.py
@@ -3,7 +3,7 @@
 ffibuilder = cffi.FFI()
 ffibuilder.set_source("libgdf_cffi.libgdf_cffi", None)
 
-for fname in ['types.h', 'functions.h']:
+for fname in ['types.h', 'convert_types.h', 'functions.h', 'io_types.h', 'io_functions.h']:
     with open('include/gdf/cffi/{}'.format(fname), 'r') as fin:
         ffibuilder.cdef(fin.read())
 

diff --git a/src/io/csv/csv-reader.cu b/src/io/csv/csv-reader.cu
@@ -115,7 +115,7 @@ __device__ int whichBit(int bit) { return (bit % 8);  }
 
 __inline__ __device__ void validAtomicOR(gdf_valid_type* address, gdf_valid_type val)
 {
-	int32_t *base_address = (int32_t*)((gdf_valid_type*)address - ((int32_t)address & 3));
+	int32_t *base_address = (int32_t*)((gdf_valid_type*)address - ((size_t)address & 3));
 	int32_t int_val = (int32_t)val << (((size_t) address & 3) * 8);
 
 	atomicOr(base_address, int_val);
@@ -135,6 +135,32 @@ __device__ void setBit(gdf_valid_type* address, int bit) {
  *
  * @param[in and out] args the input arguments, but this also contains the returned data
  *
+ * Arguments:
+ *
+ *  Required Arguments
+ * 		file_path			- 	file location to read from	- currently the file cannot be compressed
+ * 		num_cols			-	number of columns in the names and dtype arrays
+ * 		names				-	ordered List of column names, this is a required field
+ * 		dtype				- 	ordered List of data types, this is required
+ *
+ * 	Optional
+ * 		lineterminator		-	define the line terminator character.  Default is  '\n'
+ * 		delimiter			-	define the field separator, default is ','   This argument is also called 'sep'
+ * 		delim_whitespace	-	use white space as the delimiter - default is false.  This overrides the delimiter argument
+ * 		skipinitialspace	-	skip white spaces after the delimiter - default is false
+ *
+ * 		skiprows			-	number of rows at the start of the files to skip, default is 0
+ * 		skipfooter			-	number of rows at the bottom of the file to skip - default is 0
+ *
+ * 		dayfirst			-	is the first value the day?  DD/MM  versus MM/DD
+ *
+ *
+ *  Output
+ *  	num_cols_out		-	Out: return the number of columns read in
+ *  	num_rows_out		- 	Out: return the number of rows read in
+ *  	gdf_column		**data	-  Out: return the array of *gdf_columns
+ *
+ *
  * @return gdf_error
  *
  */

diff --git a/src/io/csv/date-time-parser.cuh b/src/io/csv/date-time-parser.cuh
@@ -145,7 +145,7 @@ gdf_date64 parseDateTimeFormat(char *data, long start_idx, long end_idx, bool da
 		if ( (end_idx - start_idx) < 11 ) {
 			// only have a date portion, no time
 			extractDate(data, start_idx, end_idx, dayfirst, &year, &month, &day);
-			answer = secondsFromEpoch(year, month, day, 12, 0, 0);
+			answer = secondsFromEpoch(year, month, day, 0, 0, 0);
 		} else {
 			answer = -1;
 		}
@@ -323,7 +323,7 @@ bool extractTime(char *data, int sIdx, int eIdx, int *hour, int *minute, int *se
 __host__ __device__
 gdf_date32 daysSinceEpoch(int year, int month, int day)  {
 
-	static unsigned short days[12] = {0,  31,  60,  91, 121, 152, 182, 213, 244, 274, 305, 335};
+	static unsigned short days[12] = {0,  31,  59,  90, 120, 151, 181, 212, 243, 273, 304, 334};
 
 	// years since epoch
 	int ye = year - 1970;
@@ -345,7 +345,7 @@ gdf_date32 daysSinceEpoch(int year, int month, int day)  {
 	days_e += days[me];
 
 	// now just add days, but not current full days since this one is not over
-	days_e +=  day - 1;
+	days_e +=  day;
 
 	return days_e;
 }

diff --git a/src/io/csv/type_conversion.cuh b/src/io/csv/type_conversion.cuh
@@ -3,7 +3,7 @@
 #ifndef CONVERSION_FUNCTIONS_CUH
 #define CONVERSION_FUNCTIONS_CUH
 
-#include <math_functions.h>
+#include <cuda_runtime_api.h>