Skip to content

Commit

Permalink
Add load tests
Browse files Browse the repository at this point in the history
  • Loading branch information
joseangelhernao committed Sep 10, 2021
1 parent 91cf4f5 commit 6199e79
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 21 deletions.
42 changes: 24 additions & 18 deletions tests/test_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,109 +4,115 @@
class TestLoadPandas(TestBase):

def test_json(self):
df = self.load_dataframe("examples/data/foo.json", type="json", multiline=True)
df = self.load_dataframe("../../examples/data/foo.json", type="json", multiline=True)
self.assertEqual(df.rows.count(), 19)
self.assertEqual(df.cols.names(), ["id", "firstName", "lastName", "billingId", "product", "price", "birth", "dummyCol"])
self.assertEqual(self.config.get("n_partitions", 1), df.partitions())

def test_json_less_rows(self):
df = self.load_dataframe("examples/data/foo.json", type="json", n_rows=13)
df = self.load_dataframe("../../examples/data/foo.json", type="json", n_rows=13)
self.assertEqual(df.rows.count(), 13)
self.assertEqual(df.cols.names(), ["id", "firstName", "lastName", "billingId", "product", "price", "birth", "dummyCol"])
self.assertEqual(self.config.get("n_partitions", 1), df.partitions())

def test_json_more_rows(self):
df = self.load_dataframe("examples/data/foo.json", type="json", n_rows=50)
df = self.load_dataframe("../../examples/data/foo.json", type="json", n_rows=50)
self.assertLess(df.rows.count(), 50)
self.assertEqual(df.cols.names(), ["id", "firstName", "lastName", "billingId", "product", "price", "birth", "dummyCol"])
self.assertEqual(self.config.get("n_partitions", 1), df.partitions())

def test_json_multiline(self):
df = self.load_dataframe("../../examples/data/foo.json", type="json", multiline=True)
self.assertEqual(df.rows.count(), 19)
self.assertEqual(df.cols.names(), ["id", "firstName", "lastName", "billingId", "product", "price", "birth", "dummyCol"])
self.assertEqual(self.config.get("n_partitions", 1), df.partitions())

def test_xml(self):
df = self.load_dataframe("examples/data/foo.xml", type="xml")
df = self.load_dataframe("../../examples/data/foo.xml", type="xml")
self.assertEqual(df.rows.count(), 19)
self.assertEqual(df.cols.names(), ["id", "firstName", "lastName", "billingId", "product", "price", "birth", "dummyCol"])
self.assertEqual(self.config.get("n_partitions", 1), df.partitions())

def test_xml_less_rows(self):
df = self.load_dataframe("examples/data/foo.xml", type="xml", n_rows=13)
df = self.load_dataframe("../../examples/data/foo.xml", type="xml", n_rows=13)
self.assertEqual(df.rows.count(), 13)
self.assertEqual(df.cols.names(), ["id", "firstName", "lastName", "billingId", "product", "price", "birth", "dummyCol"])
self.assertEqual(self.config.get("n_partitions", 1), df.partitions())

def test_xml_more_rows(self):
df = self.load_dataframe("examples/data/foo.xml", type="xml", n_rows=50)
df = self.load_dataframe("../../examples/data/foo.xml", type="xml", n_rows=50)
self.assertLess(df.rows.count(), 50)
self.assertEqual(df.cols.names(), ["id", "firstName", "lastName", "billingId", "product", "price", "birth", "dummyCol"])
self.assertEqual(self.config.get("n_partitions", 1), df.partitions())

def test_parquet(self):
df = self.load_dataframe("examples/data/foo.parquet", type="parquet")
df = self.load_dataframe("../../examples/data/foo.parquet", type="parquet")
self.assertEqual(df.rows.count(), 19)
self.assertEqual(df.cols.names(), ["id", "firstName", "lastName", "billingId", "product", "price", "birth", "dummyCol"])
self.assertEqual(self.config.get("n_partitions", 1), df.partitions())

def test_parquet_less_rows(self):
df = self.load_dataframe("examples/data/foo.parquet", type="parquet", n_rows=13)
df = self.load_dataframe("../../examples/data/foo.parquet", type="parquet", n_rows=13)
self.assertEqual(df.rows.count(), 13)
self.assertEqual(df.cols.names(), ["id", "firstName", "lastName", "billingId", "product", "price", "birth", "dummyCol"])
self.assertEqual(self.config.get("n_partitions", 1), df.partitions())

def test_parquet_more_rows(self):
df = self.load_dataframe("examples/data/foo.parquet", type="parquet", n_rows=50)
df = self.load_dataframe("../../examples/data/foo.parquet", type="parquet", n_rows=50)
self.assertLess(df.rows.count(), 50)
self.assertEqual(df.cols.names(), ["id", "firstName", "lastName", "billingId", "product", "price", "birth", "dummyCol"])
self.assertEqual(self.config.get("n_partitions", 1), df.partitions())

def test_avro(self):
df = self.load_dataframe("examples/data/foo.avro", type="avro")
df = self.load_dataframe("../../examples/data/foo.avro", type="avro")
self.assertEqual(df.rows.count(), 19)
self.assertEqual(df.cols.names(), ["id", "firstName", "lastName", "billingId", "product", "price", "birth", "dummyCol"])
self.assertEqual(self.config.get("n_partitions", 1), df.partitions())

def test_avro_less_rows(self):
df = self.load_dataframe("examples/data/foo.avro", type="avro", n_rows=13)
df = self.load_dataframe("../../examples/data/foo.avro", type="avro", n_rows=13)
self.assertEqual(df.rows.count(), 13)
self.assertEqual(df.cols.names(), ["id", "firstName", "lastName", "billingId", "product", "price", "birth", "dummyCol"])
self.assertEqual(self.config.get("n_partitions", 1), df.partitions())

def test_avro_more_rows(self):
df = self.load_dataframe("examples/data/foo.avro", type="avro", n_rows=50)
df = self.load_dataframe("../../examples/data/foo.avro", type="avro", n_rows=50)
self.assertLess(df.rows.count(), 50)
self.assertEqual(df.cols.names(), ["id", "firstName", "lastName", "billingId", "product", "price", "birth", "dummyCol"])
self.assertEqual(self.config.get("n_partitions", 1), df.partitions())

def test_tsv(self):
df = self.load_dataframe("examples/data/foo.tsv", type="tsv")
df = self.load_dataframe("../../examples/data/foo.tsv", type="tsv")
self.assertEqual(df.rows.count(), 5)
self.assertEqual(df.cols.names(), ["Sepal length", "Sepal width", "Petal length", "Petal width", "Species"])
self.assertEqual(self.config.get("n_partitions", 1), df.partitions())

def test_tsv_less_rows(self):
df = self.load_dataframe("examples/data/foo.tsv", type="tsv", n_rows=3)
df = self.load_dataframe("../../examples/data/foo.tsv", type="tsv", n_rows=3)
self.assertEqual(df.rows.count(), 3)
self.assertEqual(df.cols.names(), ["Sepal length", "Sepal width", "Petal length", "Petal width", "Species"])
self.assertEqual(self.config.get("n_partitions", 1), df.partitions())

def test_tsv_more_rows(self):
df = self.load_dataframe("examples/data/foo.tsv", type="tsv", n_rows=50)
df = self.load_dataframe("../../examples/data/foo.tsv", type="tsv", n_rows=50)
self.assertLess(df.rows.count(), 50)
self.assertEqual(df.cols.names(), ["Sepal length", "Sepal width", "Petal length", "Petal width", "Species"])
self.assertEqual(self.config.get("n_partitions", 1), df.partitions())

def test_xls(self):
df = self.load_dataframe("examples/data/titanic3.xls", type="excel")
df = self.load_dataframe("../../examples/data/titanic3.xls", type="excel")
self.assertEqual(df.rows.count(), 1309)
self.assertEqual(df.cols.names(), ["pclass", "survived", "name", "sex", "age", "sibsp", "parch", "ticket", "fare", "cabin", "embarked", "boat", "body", "home.dest"])
self.assertEqual(self.config.get("n_partitions", 1), df.partitions())

def test_xls_less_rows(self):
df = self.load_dataframe("examples/data/titanic3.xls", type="excel", n_rows=13)
df = self.load_dataframe("../../examples/data/titanic3.xls", type="excel", n_rows=13)
self.assertEqual(df.rows.count(), 13)
self.assertEqual(df.cols.names(), ["pclass", "survived", "name", "sex", "age", "sibsp", "parch", "ticket", "fare", "cabin", "embarked", "boat", "body", "home.dest"])
self.assertEqual(self.config.get("n_partitions", 1), df.partitions())

def test_xls_more_rows(self):
df = self.load_dataframe("examples/data/titanic3.xls", type="excel", n_rows=50)
df = self.load_dataframe("../../examples/data/titanic3.xls", type="excel", n_rows=50)
self.assertLess(df.rows.count(), 5000)
self.assertEqual(df.cols.names(), ["pclass", "survived", "name", "sex", "age", "sibsp", "parch", "ticket", "fare", "cabin", "embarked", "boat", "body", "home.dest"])
self.assertEqual(self.config.get("n_partitions", 1), df.partitions())
Expand Down
23 changes: 20 additions & 3 deletions tests/test_load_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,36 @@
class TestCSVPandas(TestBase):

def test_csv(self):
df = self.load_dataframe("examples/data/foo.csv")
df = self.load_dataframe("../../examples/data/foo.csv")
self.assertEqual(df.rows.count(), 19)
self.assertEqual(df.cols.names(), ["id", "firstName", "lastName", "billingId", "product", "price", "birth", "dummyCol"])

def test_csv_less_rows(self):
df = self.load_dataframe("examples/data/foo.csv", n_rows=13)
df = self.load_dataframe("../../examples/data/foo.csv", n_rows=13)
self.assertEqual(df.rows.count(), 13)
self.assertEqual(df.cols.names(), ["id", "firstName", "lastName", "billingId", "product", "price", "birth", "dummyCol"])

def test_csv_more_rows(self):
df = self.load_dataframe("examples/data/foo.csv", n_rows=50)
df = self.load_dataframe("../../examples/data/foo.csv", n_rows=50)
self.assertLess(df.rows.count(), 50)
self.assertEqual(df.cols.names(), ["id", "firstName", "lastName", "billingId", "product", "price", "birth", "dummyCol"])

def test_csv_no_header(self):
df = self.load_dataframe("../../examples/data/foo.csv", header=None)
self.assertEqual(df.cols.names(), [0,1,2,3,4,5,6,7])

def test_csv_null(self):
df = self.load_dataframe("../../examples/data/foo.csv", null_value="null")
self.assertEqual(df.mask.null(cols='product').cols.frequency(), {'frequency': {'product': {'values': [{'value': False, 'count': 18},
{'value': True, 'count': 1}]}}})

def test_csv_semicolon(self):
df = self.load_dataframe("../../exmaples/data/foo.csv", sep=";")
self.assertEqual(df.cols.names(), ['id,firstName,lastName,billingId,product,price,birth,dummyCol'])

def test_csv_coma(self):
df = self.load_dataframe("../../exmaples/data/foo.csv", sep=",")
self.assertEqual(df.cols.names(), ["id", "firstName", "lastName", "billingId", "product", "price", "birth", "dummyCol"])

class TestCSVDask(TestCSVPandas):
config = {'engine': 'dask', 'n_partitions': 1}
Expand Down

0 comments on commit 6199e79

Please sign in to comment.