Skip to content

Merge staging

Merge staging #112

Workflow file for this run

# ---------------------------------------------------------
# Copyright (c) Recommenders contributors.
# Licensed under the MIT License.
# ---------------------------------------------------------
# This workflow will run tests and do packaging for contrib/sarplus.
#
# References:
# * GitHub Actions workflow templates
# + [python package](https://github.com/actions/starter-workflows/blob/main/ci/python-package.yml)
# + [scala](https://github.com/actions/starter-workflows/blob/main/ci/scala.yml)
# * [GitHub hosted runner - Ubuntu 20.04 LTS](https://github.com/actions/virtual-environments/blob/main/images/linux/Ubuntu2004-README.md)
# * [Azure Databricks runtime releases](https://docs.microsoft.com/en-us/azure/databricks/release-notes/runtime/releases)
# * [Azure Synapse Analytics runtimes](https://docs.microsoft.com/en-us/azure/synapse-analytics/spark/apache-spark-version-support)
name: sarplus test and package
on:
push:
paths:
- contrib/sarplus/python/**
- contrib/sarplus/scala/**
- contrib/sarplus/VERSION
- .github/workflows/sarplus.yml
# Enable manual trigger
workflow_dispatch:
env:
SARPLUS_ROOT: ${{ github.workspace }}/contrib/sarplus
PYTHON_ROOT: ${{ github.workspace }}/contrib/sarplus/python
SCALA_ROOT: ${{ github.workspace }}/contrib/sarplus/scala
jobs:
python:
# Test pysarplus with different versions of Python.
# Package pysarplus and upload as GitHub workflow artifact when merged into
# the main branch.
runs-on: ubuntu-22.04
strategy:
matrix:
python-version: ["3.8", "3.9"]
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install -U build cibuildwheel pip twine
python -m pip install -U flake8 pytest pytest-cov scikit-learn
- name: Lint with flake8
run: |
cd "${PYTHON_ROOT}"
# See https://flake8.pycqa.org/en/latest/user/index.html
flake8 .
- name: Package and check
run: |
# build
cd "${PYTHON_ROOT}"
cp "${SARPLUS_ROOT}/VERSION" ./pysarplus/VERSION
python -m build --sdist
PYTHON_VER='${{ matrix.python-version }}'
MINOR_VER="${PYTHON_VER#*.}"
CIBW_BUILD="cp3${MINOR_VER}-manylinux_x86_64" python -m cibuildwheel --platform linux --output-dir dist
python -m twine check dist/*
# set sarplus_version for the artifact upload step
SARPLUS_VERSION=$(cat "${SARPLUS_ROOT}/VERSION")
echo "sarplus_version=${SARPLUS_VERSION}" >> $GITHUB_ENV
- name: Test
run: |
cd "${PYTHON_ROOT}"
python -m pip install dist/*.whl
cd "${SCALA_ROOT}"
export SPARK_VERSION=$(python -m pip show pyspark | grep -i version | cut -d ' ' -f 2)
SPARK_JAR_DIR=$(python -m pip show pyspark | grep -i location | cut -d ' ' -f2)/pyspark/jars
SCALA_JAR=$(ls ${SPARK_JAR_DIR}/scala-library*)
HADOOP_JAR=$(ls ${SPARK_JAR_DIR}/hadoop-client-api*)
SCALA_VERSION=${SCALA_JAR##*-}
export SCALA_VERSION=${SCALA_VERSION%.*}
HADOOP_VERSION=${HADOOP_JAR##*-}
export HADOOP_VERSION=${HADOOP_VERSION%.*}
sbt ++"${SCALA_VERSION}"! package
cd "${PYTHON_ROOT}"
pytest ./tests
- name: Upload Python wheel as GitHub artifact when merged into main
# Upload the whl file of the specific python version
if: github.ref == 'refs/heads/main'
uses: actions/upload-artifact@v4
with:
name: pysarplus-${{ env.sarplus_version }}-cp${{ matrix.python-version }}-wheel
path: ${{ env.PYTHON_ROOT }}/dist/*.whl
- name: Upload Python source as GitHub artifact when merged into main
# Only one pysarplus source tar file is needed
if: github.ref == 'refs/heads/main' && matrix.python-version == '3.10'
uses: actions/upload-artifact@v4
with:
name: pysarplus-${{ env.sarplus_version }}-source
path: ${{ env.PYTHON_ROOT }}/dist/*.tar.gz
scala:
# Test sarplus with different versions of Databricks and Synapse runtime
runs-on: ubuntu-22.04
strategy:
matrix:
include:
# For Azure Databricks 7.3 LTS
- scala-version: "2.12.10"
spark-version: "3.0.1"
hadoop-version: "2.7.4"
# For Azure Databricks 9.1 LTS and Azure Synapse Apache Spark 3.1 runtime
- scala-version: "2.12.10"
spark-version: "3.1.2"
hadoop-version: "2.7.4"
# For Azure Databricks 10.4 LTS
- scala-version: "2.12.14"
spark-version: "3.2.1"
hadoop-version: "3.3.1"
steps:
- uses: actions/checkout@v4
- name: Test
run: |
cd "${SCALA_ROOT}"
export SPARK_VERSION="${{ matrix.spark-version }}"
export HADOOP_VERSION="${{ matrix.hadoop-version }}"
sbt ++${{ matrix.scala-version }}! test
- name: Package
# Generate jar files for Spark below 3.2 and above 3.2
if: github.ref == 'refs/heads/main' && matrix.spark-version != '3.0.1'
env:
GPG_KEY: ${{ secrets.SARPLUS_GPG_PRI_KEY_ASC }}
run: |
SARPLUS_VERSION=$(cat "${SARPLUS_ROOT}/VERSION")
# generate artifacts
cd "${SCALA_ROOT}"
export SPARK_VERSION='${{ matrix.spark-version }}'
export HADOOP_VERSION='${{ matrix.hadoop-version }}'
export SCALA_VERSION='${{ matrix.scala-version }}'
sbt ++${SCALA_VERSION}! package
sbt ++${SCALA_VERSION}! packageDoc
sbt ++${SCALA_VERSION}! packageSrc
sbt ++${SCALA_VERSION}! makePom
# sign with GPG
cd "${SCALA_ROOT}/target/scala-2.12"
gpg --import <(cat <<< "${GPG_KEY}")
for file in {*.jar,*.pom}; do gpg -ab "${file}"; done
# bundle
[ '${{ matrix.spark-version }}' == '3.1.2' ] \
&& jar cvf sarplus-${SARPLUS_VERSION}-bundle_2.12-spark-${SPARK_VERSION}.jar \
sarplus_*.jar \
sarplus_*.pom \
sarplus_*.asc
[ '${{ matrix.spark-version }}' == '3.2.1' ] \
&& jar cvf sarplus-spark-3.2-plus-bundle_2.12-${SARPLUS_VERSION}.jar \
sarplus-spark*.jar \
sarplus-spark*.pom \
sarplus-spark*.asc
# set sarplus_version for the artifact upload step
echo "sarplus_version=${SARPLUS_VERSION}" >> $GITHUB_ENV
- name: Upload Scala bundle as GitHub artifact when merged into main
if: github.ref == 'refs/heads/main'
uses: actions/upload-artifact@v4
with:
name: sarplus-${{ env.sarplus_version }}-bundle_2.12-spark-${{ matrix.spark-version }}-jar
path: ${{ env.SCALA_ROOT }}/target/scala-2.12/*bundle*.jar