Skip to content

Commit

Permalink
Add experimental support for db2. (#107)
Browse files Browse the repository at this point in the history
* Draft integration of db2.

* Fix date gap.

* Add dependencies.

* Fix capitaliation tests.

* Add case distinction for varchar column.

* Add bash script for local development.

* Add changelog entry.

* Update CHANGELOG.rst

Co-authored-by: Ignacio Vergara Kausel <[email protected]>

* Add skip message.

---------

Co-authored-by: Ignacio Vergara Kausel <[email protected]>
  • Loading branch information
kklein and ivergara authored Feb 24, 2023
1 parent 9f2d5e2 commit e5b1e35
Show file tree
Hide file tree
Showing 9 changed files with 156 additions and 19 deletions.
50 changes: 50 additions & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,55 @@ jobs:
with:
file: ./coverage.xml


linux-integration_tests-db2:
name: "Linux - integration tests - Python ${{ matrix.PYTHON_VERSION }} - DB2"
runs-on: ubuntu-20.04
env:
CI: True
strategy:
fail-fast: false
matrix:
PYTHON_VERSION: [ '3.8', '3.9', '3.10' ]
services:
DB:
image: ibmcom/db2:11.5.5.1
env:
LICENSE: accept
DB2INSTANCE: db2inst1
DB2INST1_PASSWORD: password
DBNAME: testdb
UPDATEAVAIL: "NO"
options: --privileged
ports:
- 50000:50000

steps:
- name: Checkout branch
uses: actions/checkout@v3
with:
ref: ${{ github.head_ref }}
- name: Fetch full git history
run: git fetch --prune --unshallow
- uses: conda-incubator/setup-miniconda@v2
with:
python-version: ${{ matrix.PYTHON_VERSION }}
miniforge-variant: Mambaforge
miniforge-version: 4.11.0-2
use-mamba: true
environment-file: environment.yml
activate-environment: datajudge
- name: Run Integration Tests
shell: bash -l {0}
run: |
flit install -s
pytest --cov=datajudge --cov-report=xml --cov-append --backend=db2 tests/integration
- name: Generate code coverage report
uses: codecov/[email protected]
with:
file: ./coverage.xml


linux-integration_tests-snowflake:
name: "Linux - integration tests - Python ${{ matrix.PYTHON_VERSION }} - Snowflake"
runs-on: ubuntu-latest
Expand Down Expand Up @@ -228,6 +277,7 @@ jobs:
with:
file: ./coverage.xml


linux-integration_tests-impala-column-pt1:
name: "Linux - integration tests - Python ${{ matrix.PYTHON_VERSION }} - Impala - pt1"
runs-on: ubuntu-20.04
Expand Down
10 changes: 9 additions & 1 deletion CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,15 @@
Changelog
=========

1.3.0 - 2022.xx.xx
1.4.0 - 2022.02.24
------------------

**New features**

- Add partial and experimental support for db2 as a backend.


1.3.0 - 2022.01.17
------------------

**New features**
Expand Down
2 changes: 2 additions & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,5 @@ dependencies:
- flit
- sphinx-autodoc-typehints
- impyla
- ibm_db
- ibm_db_sa
21 changes: 21 additions & 0 deletions src/datajudge/db_access.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ def is_impala(engine: sa.engine.Engine) -> bool:
return engine.name == "impala"


def is_db2(engine: sa.engine.Engine) -> bool:
return engine.name == "ibm_db_sa"


def get_table_columns(table, column_names):
return [table.c[column_name] for column_name in column_names]

Expand Down Expand Up @@ -421,6 +425,15 @@ def get_date_span(engine, ref, date_column_name):
)
]
)
elif is_db2(engine):
selection = sa.select(
[
sa.func.days_between(
sa.func.max(column),
sa.func.min(column),
)
]
)
else:
raise NotImplementedError(
"Date spans not yet implemented for this sql dialect."
Expand Down Expand Up @@ -663,6 +676,14 @@ def get_date_gaps(
)
> legitimate_gap_size
)
elif is_db2(engine):
gap_condition = (
sa.func.days_between(
start_table.c[start_column],
end_table.c[end_column],
)
> legitimate_gap_size
)
else:
raise NotImplementedError(f"Date gaps not yet implemented for {engine.name}.")

Expand Down
5 changes: 5 additions & 0 deletions start_db2.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/bin/bash

set -e

docker run -itd --name mydb2 --privileged=true -p 50000:50000 -e LICENSE=accept -e DB2INST1_PASSWORD=password -e DBNAME=testdb -v ~/database ibmcom/db2
42 changes: 31 additions & 11 deletions tests/integration/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import sqlalchemy as sa
from impala.dbapi import connect

from datajudge.db_access import apply_patches, is_bigquery, is_impala, is_mssql
from datajudge.db_access import apply_patches, is_bigquery, is_db2, is_impala, is_mssql

TEST_DB_NAME = "tempdb"
SCHEMA = "dbo" # 'dbo' is the standard schema in mssql
Expand All @@ -30,6 +30,8 @@ def conn_creator():

if backend == "postgres":
connection_string = f"postgresql://datajudge:datajudge@{address}:5432/datajudge"
if backend == "db2":
connection_string = f"db2+ibm_db://db2inst1:password@{address}:50000/testdb"
elif "mssql" in backend:
connection_string = (
f"mssql+pyodbc://sa:datajudge-123@{address}:1433/{TEST_DB_NAME}"
Expand All @@ -56,6 +58,12 @@ def conn_creator():
return engine


def _string_column(engine):
if is_db2(engine):
return sa.String(40)
return sa.String()


@pytest.fixture(scope="module")
def engine(backend):
engine = get_engine(backend)
Expand Down Expand Up @@ -111,7 +119,7 @@ def mix_table1(engine, metadata):
table_name = "mix_table1"
columns = [
sa.Column("col_int", sa.Integer()),
sa.Column("col_varchar", sa.String()),
sa.Column("col_varchar", _string_column(engine)),
sa.Column("col_date", sa.DateTime()),
]
data = [
Expand All @@ -131,7 +139,7 @@ def mix_table2(engine, metadata):
table_name = "mix_table2"
columns = [
sa.Column("col_int", sa.Integer()),
sa.Column("col_varchar", sa.String()),
sa.Column("col_varchar", _string_column(engine)),
sa.Column("col_date", sa.DateTime()),
]
data = [
Expand All @@ -152,7 +160,7 @@ def mix_table2_pk(engine, metadata):
table_name = "mix_table2_pk"
columns = [
sa.Column("col_int", sa.Integer(), primary_key=True),
sa.Column("col_varchar", sa.String()),
sa.Column("col_varchar", _string_column(engine)),
sa.Column("col_date", sa.DateTime()),
]
data = [
Expand Down Expand Up @@ -477,7 +485,7 @@ def unique_table1(engine, metadata):
table_name = "unique_table1"
columns = [
sa.Column("col_int", sa.Integer()),
sa.Column("col_varchar", sa.String()),
sa.Column("col_varchar", _string_column(engine)),
]
data = [{"col_int": i // 2, "col_varchar": f"hi{i // 3}"} for i in range(60)]
data += [
Expand All @@ -493,7 +501,7 @@ def unique_table2(engine, metadata):
table_name = "unique_table2"
columns = [
sa.Column("col_int", sa.Integer()),
sa.Column("col_varchar", sa.String()),
sa.Column("col_varchar", _string_column(engine)),
]
data = [{"col_int": i // 2, "col_varchar": f"hi{i // 3}"} for i in range(40)]
_handle_table(engine, metadata, table_name, columns, data)
Expand All @@ -503,7 +511,7 @@ def unique_table2(engine, metadata):
@pytest.fixture(scope="module")
def nested_table(engine, metadata):
table_name = "nested_table"
columns = [sa.Column("nested_varchar", sa.String())]
columns = [sa.Column("nested_varchar", _string_column(engine))]
data = [
{"nested_varchar": "ABC#1,"},
{"nested_varchar": "ABC#1,DEF#2,"},
Expand All @@ -517,7 +525,7 @@ def nested_table(engine, metadata):
def varchar_table1(engine, metadata):
table_name = "varchar_table1"
columns = [
sa.Column("col_varchar", sa.String()),
sa.Column("col_varchar", _string_column(engine)),
]
data = [{"col_varchar": "qq" * i} for i in range(1, 10)]
data.append({"col_varchar": None})
Expand All @@ -529,7 +537,7 @@ def varchar_table1(engine, metadata):
def varchar_table2(engine, metadata):
table_name = "varchar_table2"
columns = [
sa.Column("col_varchar", sa.String()),
sa.Column("col_varchar", _string_column(engine)),
]
data = [{"col_varchar": "qq" * i} for i in range(2, 11)]
_handle_table(engine, metadata, table_name, columns, data)
Expand All @@ -540,7 +548,7 @@ def varchar_table2(engine, metadata):
def varchar_table_real(engine, metadata):
table_name = "varchar_table_real"
columns = [
sa.Column("col_varchar", sa.String()),
sa.Column("col_varchar", _string_column(engine)),
]
data = [
{"col_varchar": val}
Expand Down Expand Up @@ -754,6 +762,10 @@ def capitalization_table(engine, metadata):
str_datatype = "STRING"
# Impala supports primary keys but uses a different grammar.
primary_key = ""
elif is_db2(engine):
str_datatype = "VARCHAR(20)"
# Primary key needs to be non-nullable.
primary_key = ""
else:
str_datatype = "TEXT"
with engine.connect() as connection:
Expand Down Expand Up @@ -796,7 +808,15 @@ def pytest_addoption(parser):
parser.addoption(
"--backend",
choices=(
("mssql", "mssql-freetds", "postgres", "snowflake", "bigquery", "impala")
(
"mssql",
"mssql-freetds",
"postgres",
"snowflake",
"bigquery",
"impala",
"db2",
)
),
help="which database backend to use to run the integration tests",
)
Expand Down
6 changes: 5 additions & 1 deletion tests/integration/test_column_capitalization.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pytest

from datajudge import Condition, WithinRequirement
from datajudge.db_access import is_bigquery, is_impala, is_mssql, is_postgresql
from datajudge.db_access import is_bigquery, is_db2, is_impala, is_mssql, is_postgresql

# These tests

Expand All @@ -21,6 +21,10 @@ def test_column_existence(
)
if is_postgresql(engine):
pytest.skip("Postgres interface always expects lower-cased columns.")
if is_db2(engine) and use_uppercase_query:
pytest.skip(
"Db2 interface transforms writes to lower-case, expects lower-case reads."
)
(
db_name,
schema_name,
Expand Down
Loading

0 comments on commit e5b1e35

Please sign in to comment.