zenml-io · wjayesh · Dec 11, 2024 · Jan 2, 2025 · Jan 2, 2025 · Jan 3, 2025
diff --git a/.github/workflows/docs_summarization_check.yml b/.github/workflows/docs_summarization_check.yml
@@ -0,0 +1,90 @@
+name: Check Docs Summarization
+
+on:
+  push:
+    branches: [release/**]
+
+jobs:
+  check-batch:
+    runs-on: ubuntu-latest
+    if: ${{ github.event.workflow_run.conclusion == 'success' }}
+    permissions:
+      contents: read
+      id-token: write
+      actions: read
+
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.11'
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install openai huggingface_hub
+
+      - name: List artifacts
+        uses: actions/github-script@v6
+        id: artifacts
+        with:
+          script: |
+            const artifacts = await github.rest.actions.listArtifactsForRepo({
+              owner: context.repo.owner,
+              repo: context.repo.name,
+            });
+            const batchArtifact = artifacts.data.artifacts
+              .find(artifact => artifact.name.startsWith('batch-id-'));
+            if (!batchArtifact) {
+              throw new Error('No batch ID artifact found');
+            }
+            console.log(`Found artifact: ${batchArtifact.name}`);
+            return batchArtifact.name;
+
+      - name: Download batch ID
+        uses: actions/download-artifact@v3
+        with:
+          name: ${{ steps.artifacts.outputs.result }}
+
+      - name: Download repomix outputs
+        uses: actions/download-artifact@v3
+        with:
+          name: repomix-outputs
+          path: repomix-outputs
+
+      - name: Process batch results and upload to HuggingFace
+        env:
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        run: |
+          # Process OpenAI batch results
+          python scripts/check_batch_output.py
+
+          # Upload all files to HuggingFace
+          python -c '
+          from huggingface_hub import HfApi
+          import os
+
+          api = HfApi()
+
+          # Upload OpenAI summary
+          api.upload_file(
+              token=os.environ["HF_TOKEN"],
+              repo_id="zenml/llms.txt",
+              repo_type="dataset",
+              path_in_repo="how-to-guides.txt",
+              path_or_fileobj="zenml_docs.txt",
+          )
+
+          # Upload repomix outputs
+          for filename in ["component-guide.txt", "basics.txt", "llms-full.txt"]:
+              api.upload_file(
+                  token=os.environ["HF_TOKEN"],
+                  repo_id="zenml/llms.txt",
+                  repo_type="dataset",
+                  path_in_repo=filename,
+                  path_or_fileobj=f"repomix-outputs/{filename}",
+              )
+          ' 
diff --git a/.github/workflows/docs_summarization_submit.yml b/.github/workflows/docs_summarization_submit.yml
@@ -0,0 +1,76 @@
+name: Submit Docs Summarization
+
+on:
+  workflow_run:
+    workflows: ["release-prepare"]
+    types:
+      - completed
+
+jobs:
+  submit-batch:
+    runs-on: ubuntu-latest
+    if: ${{ github.event.workflow_run.conclusion == 'success' }}
+    permissions:
+      contents: read
+      id-token: write
+      actions: write
+
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.11'
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install openai pathlib repomix
+
+      - name: Generate repomix outputs
+        run: |
+          # Create directory for outputs
+          mkdir -p repomix-outputs
+
+          # Full docs
+          repomix --include "docs/book/**/*.md"
+          mv repomix-output.txt repomix-outputs/llms-full.txt
+
+          # Component guide
+          repomix --include "docs/book/component-guide/**/*.md"
+          mv repomix-output.txt repomix-outputs/component-guide.txt
+
+          # User guide
+          repomix --include "docs/book/user-guide/**/*.md"
+          mv repomix-output.txt user-guide.txt
+
+          # Getting started
+          repomix --include "docs/book/getting-started/**/*.md"
+          mv repomix-output.txt getting-started.txt
+
+          # Merge user guide and getting started into basics
+          cat user-guide.txt getting-started.txt > repomix-outputs/basics.txt
+          rm user-guide.txt getting-started.txt
+
+      - name: Upload repomix outputs
+        uses: actions/upload-artifact@v3
+        with:
+          name: repomix-outputs
+          path: repomix-outputs
+          retention-days: 5
+
+      - name: Submit batch job
+        env:
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+        id: submit
+        run: |
+          python scripts/summarize_docs.py
+          echo "batch_id=$(cat batch_id.txt)" >> $GITHUB_OUTPUT
+
+      - name: Upload batch ID
+        uses: actions/upload-artifact@v3
+        with:
+          name: batch-id-${{ steps.submit.outputs.batch_id }}
+          path: batch_id.txt
+          retention-days: 5