zenml-io · htahir1 · Sep 24, 2024 · Sep 24, 2024 · Sep 24, 2024 · Sep 24, 2024
diff --git a/.github/workflows/generate-pr-description.yml b/.github/workflows/generate-pr-description.yml
@@ -0,0 +1,70 @@
+name: Auto PR Description
+
+on:
+  pull_request:
+    types: [opened, edited]
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number }}
+  cancel-in-progress: true
+
+jobs:
+  auto-describe:
+    runs-on: ubuntu-latest
+    if: github.event.pull_request.draft == false
+    permissions:
+      contents: read
+      pull-requests: write
+      issues: write
+    steps:
+      - name: Checkout code
+        uses: actions/[email protected]
+
+      - name: Set up Python
+        uses: actions/[email protected]
+        with:
+          python-version: '3.11'
+
+      - name: Install dependencies
+        run: |
+          curl -LsSf https://astral.sh/uv/install.sh | sh
+          source $HOME/.cargo/env
+          uv pip install --system requests openai
+
+      - name: Check for previous successful run
+        id: check_comment
+        run: |
+          PR_NUMBER="${{ github.event.pull_request.number }}"
+          COMMENT=$(gh api -X GET "/repos/${{ github.repository }}/issues/${PR_NUMBER}/comments" | jq '.[] | select(.body | contains("Auto PR description generated successfully")) | .id')
+          if [ -n "$COMMENT" ]; then
+            echo "Workflow has already run successfully for this PR."
+            echo "skip=true" >> $GITHUB_OUTPUT
+          else
+            echo "skip=false" >> $GITHUB_OUTPUT
+          fi
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Wait for potential edits
+        if: steps.check_comment.outputs.skip == 'false'
+        run: sleep 300  # Wait for 5 minutes
+
+      - name: Generate PR description
+        if: steps.check_comment.outputs.skip == 'false'
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+        run: python scripts/generate_pr_description.py
+
+      - name: Add success comment
+        if: steps.check_comment.outputs.skip == 'false'
+        run: |
+          PR_NUMBER="${{ github.event.pull_request.number }}"
+          gh issue comment ${PR_NUMBER} --body "Auto PR description generated successfully"
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Check for errors
+        if: failure()
+        run: |
+          echo "The PR description generation failed. Please check the logs for more information."
diff --git a/scripts/generate_pr_description.py b/scripts/generate_pr_description.py
@@ -0,0 +1,91 @@
+import os
+import requests
+import openai
+
+MAX_CHARS = 400000  # Maximum characters for changes summary
+
+def truncate_changes(changes_summary):
+    """Truncates the changes summary to fit within MAX_CHARS."""
+    total_chars = 0
+    truncated_summary = []
+    for change in changes_summary:
+        change_chars = len(change)
+        if total_chars + change_chars > MAX_CHARS:
+            remaining_chars = MAX_CHARS - total_chars
+            if remaining_chars > 50:  # Ensure we're not adding just a few characters
+                truncated_change = change[:remaining_chars]
+                truncated_summary.append(truncated_change + "...")
+            break
+        total_chars += change_chars
+        truncated_summary.append(change)
+    return truncated_summary
+
+def generate_pr_description():
+    # GitHub API setup
+    token = os.environ['GITHUB_TOKEN']
+    repo = os.environ['GITHUB_REPOSITORY']
+    pr_number = os.environ['GITHUB_EVENT_NUMBER']
+    headers = {'Authorization': f'token {token}'}
+    api_url = f'https://api.github.com/repos/{repo}/pulls/{pr_number}'
+
+    # Get current PR description
+    pr_info = requests.get(api_url, headers=headers).json()
+    current_description = pr_info['body'] or ''
+
+    # Check if description matches the default template
+    default_template_indicator = "I implemented/fixed _ to achieve _."
+
+    if default_template_indicator in current_description:
+        # Get PR files
+        files_url = f'{api_url}/files'
+        files = requests.get(files_url, headers=headers).json()
+
+        # Process files
+        changes_summary = []
+        for file in files:
+            filename = file['filename']
+            status = file['status']
+
+            if status == 'added':
+                changes_summary.append(f"Added new file: {filename}")
+            elif status == 'removed':
+                changes_summary.append(f"Removed file: {filename}")
+            elif status == 'modified':
+                if file['binary']:
+                    changes_summary.append(f"Modified binary file: {filename}")
+                else:
+                    patch = file.get('patch', '')
+                    if patch:
+                        changes_summary.append(f"Modified {filename}:")
+                        changes_summary.append(patch)
+            elif status == 'renamed':
+                changes_summary.append(f"Renamed file from {file['previous_filename']} to {filename}")
+
+        # Truncate changes summary if it's too long
+        truncated_changes = truncate_changes(changes_summary)
+        changes_text = "\n".join(truncated_changes)
+
+        # Generate description using OpenAI
+        openai.api_key = os.environ['OPENAI_API_KEY']
+        response = openai.OpenAI().chat.completions.create(
+            model="gpt-4o-mini",
+            messages=[
+                {"role": "system", "content": "You are a helpful assistant that generates concise pull request descriptions based on changes to files."},
+                {"role": "user", "content": f"Generate a brief, informative pull request description based on these changes:\n\n{changes_text}"}
+            ],
+            max_tokens=1000
+        )
+
+        generated_description = response.choices[0].message['content'].strip()
+
+        # Update PR description
+        data = {'body': generated_description}
+        requests.patch(api_url, json=data, headers=headers)
+        print(f"Updated PR description with generated content")
+        return True
+    else:
+        print("PR already has a non-default description. No action taken.")
+        return False
+
+if __name__ == "__main__":
+    generate_pr_description()