Version 0.0.7. First fairly well-tested release. Dropping the "dev" s…

…uffix from version number.
ryan-workFromHome · May 20, 2014 · bdf9480 · bdf9480
1 parent c06e764
commit bdf9480
Show file tree

Hide file tree

Showing 9 changed files with 142 additions and 25 deletions.
diff --git a/EXAMPLES.md b/EXAMPLES.md
@@ -54,23 +54,27 @@ import tstables
 import pandas
 from datetime import *
 
-# Class to use as the table discription
-class bpi_values(tables.IsDescription):
+# Class to use as the table description
+class BpiValues(tables.IsDescription):
     timestamp = tables.Int64Col(pos=0)
     bpi = tables.Float64Col(pos=1)
 
+# Use pandas to read in the CSV data
 bpi = pandas.read_csv('bpi_2014_01.csv',index_col=0,names=['date','bpi'],parse_dates=True)
 
 f = tables.open_file('bpi.h5','a')
 
-ts = f.create_ts('/','BPI',bpi_values)
+# Create a new time series
+ts = f.create_ts('/','BPI',BpiValues)
 
+# Append the BPI data
 ts.append(bpi)
-ts.flush()
 
-# Now, read in some data
+# Read in some data
 read_start_dt = datetime(2014,1,4,12,00)
 read_end_dt = datetime(2014,1,4,14,30)
 
 rows = ts.read_range(read_start_dt,read_end_dt)
+
+# `rows` will be a pandas DataFrame with a DatetimeIndex.
 ```
diff --git a/README.md b/README.md
@@ -8,8 +8,39 @@ Its goals are to support a workflow where tons (gigabytes) of time series data a
 appended periodically to a HDF5 file, and need to be read many times (quickly) for analytical models
 and research.
 
-## Not ready for use yet
+## Example
 
-TsTables is not ready for use yet and is currently under development. The goal is to have something
-workable and being testing by end of May, 2014. If you are interested in the project (to contribute
-or learn when it is finished), email Andy Fiedler at <[email protected]>.
+This example reads in minutely bitcoin price data and then fetches a range of data. For the full example here, and other
+examples, see [EXAMPLES.md](EXAMPLES.md).
+
+```python
+# Class to use as the table description
+class BpiValues(tables.IsDescription):
+    timestamp = tables.Int64Col(pos=0)
+    bpi = tables.Float64Col(pos=1)
+
+# Use pandas to read in the CSV data
+bpi = pandas.read_csv('bpi_2014_01.csv',index_col=0,names=['date','bpi'],parse_dates=True)
+
+f = tables.open_file('bpi.h5','a')
+
+# Create a new time series
+ts = f.create_ts('/','BPI',BpiValues)
+
+# Append the BPI data
+ts.append(bpi)
+
+# Read in some data
+read_start_dt = datetime(2014,1,4,12,00)
+read_end_dt = datetime(2014,1,4,14,30)
+
+rows = ts.read_range(read_start_dt,read_end_dt)
+
+# `rows` will be a pandas DataFrame with a DatetimeIndex.
+```
+
+## Pre-release software
+
+TsTables is currently under development and has yet to be used extensively in production. It is reaching the point where
+it is reasonably well-tested, so if you'd like to use it, feel free! If you are interested in the project (to contribute
+or to hear about updates), email Andy Fiedler at <[email protected]>.
diff --git a/register.py → release.py b/register.py → release.py
@@ -1,4 +1,4 @@
-# Converts README.md to README.txt (in restructured text format) and registers on PyPI
+# Converts README.md to README.txt (in restructured text format), builds package, and uploads to PyPI
 
 import pypandoc
 import os
@@ -7,6 +7,6 @@
 f = open('README.txt','w+')
 f.write(rst)
 f.close()
-os.system("python3 setup.py register")
+os.system("python3 setup.py register sdist upload")
 os.remove('README.txt')
 
diff --git a/setup.cfg b/setup.cfg
@@ -14,7 +14,7 @@ force = True
 
 [egg_info]
 # We are doing development build
-tag_build = dev
+# tag_build = dev
 # Do we want to have date in file name?
 tag_date = 0
 

diff --git a/setup.py b/setup.py
@@ -12,6 +12,8 @@
 if os.path.exists('README.txt'):
     long_description = open('README.txt').read()
 
+exec(open('src/tstables/_version.py').read())
+
 
 setup(
 
@@ -57,7 +59,7 @@
     tests_require = 'docutils >= 0.6',
 
     name = "tstables",
-    version = "0.0.5",
+    version = __version__,
 
     # metadata for upload to PyPI
     author = "Andy Fiedler",
@@ -67,5 +69,5 @@
     keywords = "time series high frequency HDF5",
     url = "http://github.com/afiedler/tstables",   # project home page, if any
     long_description = long_description
-    # could also include long_description, download_url, classifiers, etc.
+    # could also include download_url, classifiers, etc.
 )
diff --git a/src/tstables/__init__.py b/src/tstables/__init__.py
@@ -15,7 +15,7 @@
 TsTables is a wrapper for PyTables that allows you to manage very large time series.
 
 """
-
+from ._version import __version__
 from tstables.tstable import TsTable
 from tstables.file import create_ts
 from tstables.group import timeseries_repr

diff --git a/src/tstables/_version.py b/src/tstables/_version.py
@@ -0,0 +1,5 @@
+# Store the version here so:
+# 1) we don't load dependencies by storing it in __init__.py
+# 2) we can import it in setup.py for the same reason
+# 3) we can import it into your module module
+__version__ = '0.0.7'
diff --git a/src/tstables/tests/test_tstable_file.py b/src/tstables/tests/test_tstable_file.py
@@ -33,6 +33,7 @@ def test_create_ts(self):
         # - the group exists
         # - it has a _TS_TABLES_CLASS attribute equal to "TIMESERIES"
         # - it has a table at yYYYY/mMM/dDD/ts_data, where YYY-MM-DD is today (in UTC)
+        # - the dtype is correct
         self.assertEqual(self.h5_file.root.EURUSD.__class__, tables.Group)
         self.assertEqual(self.h5_file.root.EURUSD._v_attrs._TS_TABLES_CLASS,'TIMESERIES')
 
@@ -43,6 +44,10 @@ def test_create_ts(self):
 
         self.assertEqual(ts_data.attrs._TS_TABLES_EXPECTEDROWS_PER_PARTITION,10000)
 
+        self.assertEqual(ts_data._v_dtype[0],tables.dtype_from_descr(Price)[0])
+        self.assertEqual(ts_data._v_dtype[1],tables.dtype_from_descr(Price)[1])
+
+
     def test_create_ts_with_invalid_description_incorrect_order(self):
         class InvalidDesc(tables.IsDescription):
             # Positions are out of order here!
@@ -96,6 +101,17 @@ def test_load_same_timestamp(self):
         for idx,p in enumerate(rows_read['price']):
             self.assertEqual(p,rows['price'][idx])
 
+    def __load_csv_data(self,csv):
+        sfile = io.StringIO(csv)
+
+        # Note: don't need the 'timestamp' column in the dtype param here because it will become the DatetimeIndex.
+        rows = pandas.read_csv(sfile,parse_dates=[0],index_col=0,names=['timestamp', 'price'],dtype={'price': 'i4'})
+
+        ts = self.h5_file.create_ts('/','EURUSD',description=Price)
+        ts.append(rows)
+
+        return ts,rows
+
     def test_load_cross_partition_boundary_timestamps(self):
 
         # This data should just cross the partition boundary between 5/4 and 5/5
@@ -105,13 +121,7 @@ def test_load_cross_partition_boundary_timestamps(self):
                  2014-05-05T00:00:00.000Z,4
                  2014-05-05T00:00:00.001Z,5"""
 
-        sfile = io.StringIO(csv)
-
-        # Note: don't need the 'timestamp' column in the dtype param here because it will become the DatetimeIndex.
-        rows = pandas.read_csv(sfile,parse_dates=[0],index_col=0,names=['timestamp', 'price'],dtype={'price': 'i4'})
-
-        ts = self.h5_file.create_ts('/','EURUSD',description=Price)
-        ts.append(rows)
+        ts,rows = self.__load_csv_data(csv)
 
         # Inspect to ensure that data has been stored correctly
         tbl = ts.root_group.y2014.m05.d04.ts_data
@@ -139,6 +149,68 @@ def test_load_cross_partition_boundary_timestamps(self):
         for idx,p in enumerate(rows_read['price']):
             self.assertEqual(p,rows['price'][idx])
 
+    def test_read_data_end_date_before_start_date(self):
+        csv = """2014-05-04T23:59:59.998Z,1
+                 2014-05-04T23:59:59.999Z,2
+                 2014-05-04T23:59:59.999Z,3
+                 2014-05-05T00:00:00.000Z,4
+                 2014-05-05T00:00:00.001Z,5"""
+
+        ts,rows = self.__load_csv_data(csv)
+
+        # Try to fetch with end_dt before start_dt
+        end_dt = datetime.datetime(2014,5,5)
+        start_dt = datetime.datetime(2014,5,4)
+        self.assertRaises(AttributeError, ts.read_range, start_dt, end_dt)
+
+        # This should work, and return just this row: '2014-05-05T00:00:00.000Z,4'
+        start_dt = end_dt
+        rng = ts.read_range(start_dt,end_dt)
+
+        self.assertEqual(rng['price'].size, 1)
+        self.assertEqual(rng['price'][0],4)
+
+    def test_no_data_stored_in_missing_day(self):
+        # Note that May 5 is missing
+        csv = """2014-05-04T23:59:59.998Z,1
+                 2014-05-04T23:59:59.999Z,2
+                 2014-05-04T23:59:59.999Z,3
+                 2014-05-06T00:00:00.000Z,4
+                 2014-05-06T00:00:00.001Z,5"""
+
+        ts,rows = self.__load_csv_data(csv)
+
+        tbl = ts.root_group.y2014.m05.d05.ts_data
+
+        # No rows on the 5th
+        self.assertEqual(tbl.nrows,0)
+
+        tbl = ts.root_group.y2014.m05.d06.ts_data
+
+        # Two rows on the 6th
+        self.assertEqual(tbl.nrows,2)
+
+        tbl = ts.root_group.y2014.m05.d04.ts_data
+
+        # Three rows on the 4th
+        self.assertEqual(tbl.nrows,3)
+
+    def test_append_no_data(self):
+        # No data, just making sure this doesn't throw an exception or anything
+        csv = """"""
+
+        ts,rows = self.__load_csv_data(csv)
+
+        self.assertEqual(rows['price'].size, 0)
+
+
+
+
+
+
+
+
+
 
 
 

diff --git a/src/tstables/tstable.py b/src/tstables/tstable.py
@@ -180,14 +180,16 @@ def __get_min_ts(self):
 
         return min_ts
 
-
-
     def read_range(self,start_dt,end_dt,as_pandas_dataframe=True):
         # Convert start_dt and end_dt to UTC if they are naive
         if start_dt.tzinfo is None:
             start_dt = pytz.utc.localize(start_dt)
         if end_dt.tzinfo is None:
             end_dt = pytz.utc.localize(end_dt)
+
+
+        if start_dt < end_dt:
+            raise AttributeError('start_dt must be >= end_dt')
 
 
         partitions = self.__dtrange_to_partition_ranges(start_dt,end_dt)
@@ -208,11 +210,12 @@ def read_range(self,start_dt,end_dt,as_pandas_dataframe=True):
 
         return result
 
-
     def append(self,rows,convert_strings=False):
         # This part is specific to pandas support. If rows is a pandas DataFrame, convert it to a
         # format suitable to PyTables
         if rows.__class__ == pandas.core.frame.DataFrame:
+            if rows.empty:
+                return # Do nothing if we are appending nothing
             if rows.index.__class__ != pandas.tseries.index.DatetimeIndex:
                 raise ValueError('when rows is a DataFrame, the index must be a DatetimeIndex.')