test(pyspark): fix failing pyspark test #25898
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Backends | |
on: | |
push: | |
# Skip the backend suite if all changes are docs | |
paths-ignore: | |
- "docs/**" | |
- "**/*.md" | |
- "**/*.qmd" | |
- "codecov.yml" | |
- ".envrc" | |
- ".codespellrc" | |
branches: | |
- main | |
- "*.x.x" | |
pull_request: | |
# Skip the backend suite if all changes are docs | |
paths-ignore: | |
- "docs/**" | |
- "**/*.md" | |
- "**/*.qmd" | |
- "codecov.yml" | |
- ".envrc" | |
- ".codespellrc" | |
branches: | |
- main | |
- "*.x.x" | |
merge_group: | |
permissions: | |
# this allows extractions/setup-just to list releases for `just` at a higher | |
# rate limit while restricting GITHUB_TOKEN permissions elsewhere | |
contents: read | |
concurrency: | |
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} | |
cancel-in-progress: true | |
env: | |
FORCE_COLOR: "1" | |
ODBCSYSINI: "${{ github.workspace }}/ci/odbc" | |
HYPOTHESIS_PROFILE: "ci" | |
jobs: | |
test_bigquery_lite: | |
name: BigQuery ${{ matrix.os }} python-${{ matrix.python-version }} | |
runs-on: ${{ matrix.os }} | |
strategy: | |
fail-fast: false | |
matrix: | |
os: | |
- ubuntu-latest | |
- windows-latest | |
python-version: | |
- "3.10" | |
- "3.13" | |
steps: | |
- name: checkout | |
uses: actions/checkout@v4 | |
- name: install python | |
uses: actions/setup-python@v5 | |
id: install_python | |
with: | |
python-version: ${{ matrix.python-version }} | |
- name: install uv | |
uses: astral-sh/[email protected] | |
- uses: extractions/setup-just@v2 | |
env: | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
- name: run simple bigquery unit tests | |
run: just ci-check "--extra bigquery" ibis/backends/bigquery/tests/unit | |
- name: upload code coverage | |
if: success() | |
continue-on-error: true | |
uses: codecov/codecov-action@v5 | |
with: | |
flags: backend,bigquery,${{ runner.os }},python-${{ steps.install_python.outputs.python-version }} | |
token: ${{ secrets.CODECOV_TOKEN }} | |
test_backends: | |
name: ${{ matrix.backend.title }} ${{ matrix.os }} python-${{ matrix.python-version }} | |
runs-on: ${{ matrix.os }} | |
strategy: | |
fail-fast: false | |
matrix: | |
os: | |
- ubuntu-latest | |
- windows-latest | |
python-version: | |
- "3.10" | |
- "3.13" | |
backend: | |
- name: duckdb | |
title: DuckDB | |
serial: true | |
extras: | |
- --extra duckdb | |
- --extra deltalake | |
- --extra geospatial | |
- --extra examples | |
- --extra decompiler | |
- --extra polars | |
additional_deps: | |
- torch | |
- name: clickhouse | |
title: ClickHouse | |
services: | |
- clickhouse | |
extras: | |
- --extra clickhouse | |
- --extra examples | |
- name: sqlite | |
title: SQLite | |
extras: | |
- --extra sqlite | |
- name: datafusion | |
title: DataFusion | |
serial: true | |
extras: | |
- --extra datafusion | |
- name: polars | |
title: Polars | |
extras: | |
- --extra polars | |
- --extra deltalake | |
- name: mysql | |
title: MySQL | |
services: | |
- mysql | |
extras: | |
- --extra mysql | |
- --extra geospatial | |
- --extra polars | |
sys-deps: | |
- libgeos-dev | |
- default-libmysqlclient-dev | |
- name: postgres | |
title: PostgreSQL | |
extras: | |
- --extra postgres | |
- --extra geospatial | |
services: | |
- postgres | |
sys-deps: | |
- libgeos-dev | |
- name: postgres | |
title: PostgreSQL + Torch | |
extras: | |
- --extra postgres | |
- --extra geospatial | |
- --extra polars | |
additional_deps: | |
- torch | |
services: | |
- postgres | |
sys-deps: | |
- libgeos-dev | |
- name: risingwave | |
title: RisingWave | |
serial: true | |
services: | |
- risingwave | |
extras: | |
- --extra risingwave | |
- name: impala | |
title: Impala | |
serial: true | |
extras: | |
- --extra impala | |
services: | |
- impala | |
- kudu | |
sys-deps: | |
- cmake | |
- ninja-build | |
- name: mssql | |
title: MS SQL Server | |
extras: | |
- --extra mssql | |
- --extra polars | |
services: | |
- mssql | |
sys-deps: | |
- freetds-dev | |
- unixodbc-dev | |
- tdsodbc | |
- name: trino | |
title: Trino | |
extras: | |
- --extra trino | |
services: | |
- trino | |
- name: druid | |
title: Druid | |
extras: | |
- --extra druid | |
services: | |
- druid | |
- name: exasol | |
title: Exasol | |
serial: true | |
extras: | |
- --extra exasol | |
services: | |
- exasol | |
- name: oracle | |
title: Oracle | |
serial: true | |
extras: | |
- --extra oracle | |
- --extra polars | |
services: | |
- oracle | |
- name: flink | |
title: Flink | |
serial: true | |
extras: | |
- --extra flink | |
additional_deps: | |
- "'apache-flink==1.20.0'" | |
- "'pandas<2.2'" | |
- setuptools | |
services: | |
- flink | |
include: | |
- os: ubuntu-latest | |
python-version: "3.11" | |
backend: | |
name: flink | |
title: Flink | |
serial: true | |
extras: | |
- --extra flink | |
additional_deps: | |
- "'apache-flink==1.20.0'" | |
- "'pandas<2.2'" | |
- setuptools | |
services: | |
- flink | |
- os: ubuntu-latest | |
python-version: "3.11" | |
backend: | |
name: impala | |
title: Impala | |
serial: true | |
extras: | |
- --extra impala | |
services: | |
- impala | |
- kudu | |
sys-deps: | |
- cmake | |
- ninja-build | |
# pytorch wheel doesn't exist for windows + python 3.13, so test | |
# against 3.12 until that's shipped | |
- os: windows-latest | |
python-version: "3.12" | |
backend: | |
name: duckdb | |
title: DuckDB | |
serial: true | |
extras: | |
- --extra duckdb | |
- --extra deltalake | |
- --extra geospatial | |
- --extra examples | |
- --extra decompiler | |
- --extra polars | |
additional_deps: | |
- torch | |
# also test duckdb with python 3.13 on windows, *without* pytorch | |
- os: windows-latest | |
python-version: "3.13" | |
backend: | |
name: duckdb | |
title: DuckDB | |
serial: true | |
extras: | |
- --extra duckdb | |
- --extra deltalake | |
- --extra geospatial | |
- --extra examples | |
- --extra decompiler | |
- --extra polars | |
exclude: | |
- os: windows-latest | |
python-version: "3.13" | |
backend: | |
name: duckdb | |
- os: windows-latest | |
backend: | |
name: mysql | |
title: MySQL | |
extras: | |
- --extra mysql | |
- --extra geospatial | |
- --extra polars | |
services: | |
- mysql | |
sys-deps: | |
- libgeos-dev | |
- default-libmysqlclient-dev | |
- os: windows-latest | |
backend: | |
name: clickhouse | |
title: ClickHouse | |
extras: | |
- --extra clickhouse | |
- --extra examples | |
services: | |
- clickhouse | |
- os: windows-latest | |
backend: | |
name: postgres | |
title: PostgreSQL | |
extras: | |
- --extra postgres | |
- --extra geospatial | |
services: | |
- postgres | |
sys-deps: | |
- libgeos-dev | |
- os: windows-latest | |
backend: | |
name: risingwave | |
title: RisingWave | |
serial: true | |
services: | |
- risingwave | |
extras: | |
- --extra risingwave | |
- os: windows-latest | |
backend: | |
name: postgres | |
title: PostgreSQL + Torch | |
extras: | |
- --extra postgres | |
- --extra geospatial | |
- --extra polars | |
additional_deps: | |
- torch | |
services: | |
- postgres | |
sys-deps: | |
- libgeos-dev | |
# TODO(deepyaman): Test whether this works upon releasing https://github.com/cloudera/impyla/commit/bf1f94c3c4106ded6267d2485c1e939775a6a87f | |
- os: ubuntu-latest | |
python-version: "3.13" | |
backend: | |
name: impala | |
title: Impala | |
serial: true | |
extras: | |
- --extra impala | |
services: | |
- impala | |
- kudu | |
sys-deps: | |
- cmake | |
- ninja-build | |
- os: windows-latest | |
backend: | |
name: impala | |
title: Impala | |
serial: true | |
extras: | |
- --extra impala | |
services: | |
- impala | |
- kudu | |
sys-deps: | |
- cmake | |
- ninja-build | |
- os: windows-latest | |
backend: | |
name: mssql | |
title: MS SQL Server | |
extras: | |
- --extra mssql | |
- --extra polars | |
services: | |
- mssql | |
sys-deps: | |
- freetds-dev | |
- unixodbc-dev | |
- tdsodbc | |
- os: windows-latest | |
backend: | |
name: trino | |
title: Trino | |
services: | |
- trino | |
extras: | |
- --extra trino | |
- os: windows-latest | |
backend: | |
name: druid | |
title: Druid | |
extras: | |
- --extra druid | |
services: | |
- druid | |
- os: windows-latest | |
backend: | |
name: oracle | |
title: Oracle | |
serial: true | |
extras: | |
- --extra oracle | |
- --extra polars | |
services: | |
- oracle | |
- os: ubuntu-latest | |
python-version: "3.13" | |
backend: | |
name: flink | |
title: Flink | |
serial: true | |
extras: | |
- --extra flink | |
additional_deps: | |
- "'apache-flink==1.20.0'" | |
- "'pandas<2.2'" | |
- setuptools | |
services: | |
- flink | |
- os: windows-latest | |
backend: | |
name: flink | |
title: Flink | |
serial: true | |
extras: | |
- --extra flink | |
additional_deps: | |
- "'apache-flink==1.20.0'" | |
- "'pandas<2.2'" | |
- setuptools | |
services: | |
- flink | |
- os: windows-latest | |
backend: | |
name: exasol | |
title: Exasol | |
serial: true | |
extras: | |
- --extra exasol | |
services: | |
- exasol | |
steps: | |
- name: update and install system dependencies | |
if: matrix.os == 'ubuntu-latest' && matrix.backend.sys-deps != null | |
run: | | |
set -euo pipefail | |
sudo apt-get update -qq -y | |
sudo apt-get install -qq -y build-essential ${{ join(matrix.backend.sys-deps, ' ') }} | |
- name: install sqlite | |
if: matrix.os == 'windows-latest' && matrix.backend.name == 'sqlite' | |
run: choco install sqlite | |
- name: checkout | |
uses: actions/checkout@v4 | |
- uses: extractions/setup-just@v2 | |
env: | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
- name: download backend data | |
run: just download-data | |
- name: start services | |
if: matrix.backend.services != null | |
run: just up ${{ join(matrix.backend.services, ' ') }} | |
- name: install python | |
uses: actions/setup-python@v5 | |
id: install_python | |
with: | |
python-version: ${{ matrix.python-version }} | |
- name: install uv | |
uses: astral-sh/[email protected] | |
- name: install other deps | |
if: matrix.backend.additional_deps != null | |
run: uv add --no-sync --optional ${{ matrix.backend.name }} ${{ join(matrix.backend.additional_deps, ' ') }} | |
- name: show installed deps | |
run: uv tree | |
- name: "run parallel tests: ${{ matrix.backend.name }}" | |
if: ${{ !matrix.backend.serial }} | |
run: just ci-check "${{ join(matrix.backend.extras, ' ') }} --extra examples" -m ${{ matrix.backend.name }} --numprocesses auto --dist=loadgroup | |
env: | |
IBIS_TEST_IMPALA_HOST: localhost | |
IBIS_TEST_IMPALA_PORT: 21050 | |
IBIS_EXAMPLES_DATA: ${{ runner.temp }}/examples-${{ matrix.backend.name }}-${{ matrix.os }}-${{ steps.install_python.outputs.python-version }} | |
- name: "run serial tests: ${{ matrix.backend.name }}" | |
if: matrix.backend.serial | |
run: just ci-check "${{ join(matrix.backend.extras, ' ') }} --extra examples" -m ${{ matrix.backend.name }} | |
env: | |
FLINK_REMOTE_CLUSTER_ADDR: localhost | |
FLINK_REMOTE_CLUSTER_PORT: "8081" | |
IBIS_EXAMPLES_DATA: ${{ runner.temp }}/examples-${{ matrix.backend.name }}-${{ matrix.os }}-${{ steps.install_python.outputs.python-version }} | |
- name: "run backend doctests: ${{ matrix.backend.name }}" | |
if: matrix.os == 'ubuntu-latest' | |
run: just backend-doctests ${{ matrix.backend.name }} | |
env: | |
FLINK_REMOTE_CLUSTER_ADDR: localhost | |
FLINK_REMOTE_CLUSTER_PORT: "8081" | |
IBIS_EXAMPLES_DATA: ${{ runner.temp }}/examples-${{ matrix.backend.name }}-${{ matrix.os }}-${{ steps.install_python.outputs.python-version }} | |
- name: checkout uv.lock and pyproject.toml | |
run: git checkout uv.lock pyproject.toml | |
- name: check that no untracked files were produced | |
shell: bash | |
run: | | |
! git status --porcelain | grep -F . | |
- name: upload code coverage | |
if: success() | |
continue-on-error: true | |
uses: codecov/codecov-action@v5 | |
with: | |
flags: backend,${{ matrix.backend.name }},${{ runner.os }},python-${{ steps.install_python.outputs.python-version }} | |
token: ${{ secrets.CODECOV_TOKEN }} | |
- name: Show docker compose logs on fail | |
if: matrix.backend.services != null && failure() | |
run: docker compose logs | |
test_pyspark: | |
name: PySpark ${{ matrix.tag }} ${{ matrix.pyspark-minor-version }} ubuntu-latest python-${{ matrix.python-version }} | |
runs-on: ubuntu-latest | |
strategy: | |
fail-fast: false | |
matrix: | |
include: | |
- python-version: "3.10" | |
pyspark-minor-version: "3.3" | |
tag: local | |
deps: | |
- pyspark==3.3.4 | |
- pandas==1.5.3 | |
- numpy==1.23.5 | |
- python-version: "3.11" | |
pyspark-minor-version: "3.5" | |
tag: local | |
deps: | |
- delta-spark==3.2.1 | |
- python-version: "3.13" | |
pyspark-minor-version: "3.5" | |
tag: local | |
deps: | |
- setuptools==75.1.0 | |
- delta-spark==3.2.1 | |
- python-version: "3.12" | |
pyspark-minor-version: "3.5" | |
SPARK_REMOTE: "sc://localhost:15002" | |
tag: remote | |
deps: | |
- setuptools==75.1.0 | |
- delta-spark==3.2.1 | |
- googleapis-common-protos | |
- grpcio | |
- grpcio-status | |
steps: | |
- name: checkout | |
uses: actions/checkout@v4 | |
- uses: actions/setup-java@v4 | |
with: | |
distribution: microsoft | |
java-version: 17 | |
- uses: extractions/setup-just@v2 | |
env: | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
- name: start services | |
if: matrix.tag == 'remote' | |
run: just up spark-connect | |
- name: download backend data | |
run: just download-data | |
- name: install python | |
uses: actions/setup-python@v5 | |
id: install_python | |
with: | |
python-version: ${{ matrix.python-version }} | |
- name: install uv | |
uses: astral-sh/[email protected] | |
# it requires a version of pandas that pyspark is not compatible with | |
- name: remove lonboard | |
if: matrix.pyspark-minor-version == '3.3' | |
run: uv remove --group docs --no-sync lonboard | |
- name: install pyspark-specific dependencies | |
run: uv add --no-sync ${{ join(matrix.deps, ' ') }} | |
- name: install iceberg | |
shell: bash | |
run: just download-iceberg-jar ${{ matrix.pyspark-minor-version }} | |
- name: run spark connect tests | |
if: matrix.tag == 'remote' | |
run: just ci-check "--extra pyspark --extra examples" -m pyspark | |
env: | |
SPARK_REMOTE: ${{ matrix.SPARK_REMOTE }} | |
- name: run spark tests | |
if: matrix.tag == 'local' | |
run: just ci-check "--extra pyspark --extra examples" -m pyspark | |
- name: check that no untracked files were produced | |
shell: bash | |
run: git checkout uv.lock pyproject.toml && ! git status --porcelain | grep -F . | |
- name: upload code coverage | |
# only upload coverage for jobs that aren't mostly xfails | |
if: success() | |
continue-on-error: true | |
uses: codecov/codecov-action@v5 | |
with: | |
flags: backend,pyspark,${{ runner.os }},python-${{ steps.install_python.outputs.python-version }} | |
token: ${{ secrets.CODECOV_TOKEN }} | |
backends: | |
# this job exists so that we can use a single job from this workflow to gate merging | |
runs-on: ubuntu-latest | |
needs: | |
- test_bigquery_lite | |
- test_backends | |
- test_pyspark | |
steps: | |
- run: exit 0 |