Skip to content

Create Parquet

Create Parquet #935

Workflow file for this run

name: Create Parquet
on:
workflow_dispatch:
schedule:
- cron: '50 */6 * * *'
concurrency: ci-parquet-${{ github.ref }}
permissions:
contents: write
jobs:
create-parquet:
name: Create parquet
runs-on: ubuntu-latest
if: github.repository == 'pypi-data/pypi-json-data'
steps:
- name: Maximize build space
uses: easimon/maximize-build-space@master
with:
root-reserve-mb: 512
swap-size-mb: 1024
remove-dotnet: 'true'
remove-codeql: 'true'
remove-docker-images: 'true'
remove-android: 'true'
- name: Install tools
shell: bash
run: |
set -euo pipefail
IFS=$'\n\t'
sudo apt install -y wget libdeflate-tools
- uses: actions/setup-python@v4
with:
python-version: "3.10"
- run: pip install "polars[pyarrow]" tqdm click
- run: wget https://raw.githubusercontent.com/pypi-data/pypi-json-data/main/pypi_data_downloader/parquet.py
- run: wget --progress=dot:giga https://github.com/pypi-data/pypi-json-data/releases/download/latest/pypi-data.sqlite.gz
- run: libdeflate-gzip -d pypi-data.sqlite.gz
- run: python parquet.py pypi-data.sqlite pypi-data.parquet
- name: Publish
uses: softprops/action-gh-release@v1
with:
draft: false
name: "Latest Release"
tag_name: "latest"
target_commitish: "empty"
files: 'pypi-data.parquet'