Skip to content

Start of the CodeOptiX #1

Start of the CodeOptiX

Start of the CodeOptiX #1

name: CodeOptiX Behavior Check
on:
pull_request:
branches: [ main, develop ]
push:
branches: [ main, develop ]
jobs:
codeoptix-eval:
name: Evaluate Agent Behavior
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.12'
- name: Install uv
uses: astral-sh/setup-uv@v4
with:
version: "latest"
- name: Install CodeOptiX
run: |
uv sync --dev
- name: Run CodeOptiX Evaluation
env:
# Use secrets for API keys
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
run: |
# Run evaluation with specified behaviors
codeoptix eval \
--agent codex \
--behaviors insecure-code,vacuous-tests,plan-drift \
--llm-provider openai \
--output .codeoptix/results.json \
--context '{"plan": "Check PR for security issues and test quality"}'
- name: Generate Reflection Report
if: always()
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: |
# Generate reflection from results
codeoptix reflect \
--input .codeoptix/results.json \
--output .codeoptix/reflection.md
- name: Upload Results Artifacts
if: always()
uses: actions/upload-artifact@v4
with:
name: codeoptix-results
path: |
.codeoptix/results.json
.codeoptix/reflection.md
retention-days: 30
- name: Check Behavior Results
if: always()
run: |
# Check if any behaviors failed
python << 'EOF'
import json
import sys
try:
with open('.codeoptix/results.json', 'r') as f:
results = json.load(f)
failed_behaviors = []
for behavior_name, behavior_data in results.get('behaviors', {}).items():
if not behavior_data.get('passed', True):
failed_behaviors.append(behavior_name)
if failed_behaviors:
print(f"❌ Failed behaviors: {', '.join(failed_behaviors)}")
sys.exit(1)
else:
print("✅ All behaviors passed")
sys.exit(0)
except FileNotFoundError:
print("⚠️ Results file not found")
sys.exit(0)
except Exception as e:
print(f"⚠️ Error checking results: {e}")
sys.exit(0)
EOF
- name: Comment PR with Results
if: github.event_name == 'pull_request' && always()
uses: actions/github-script@v7
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
const fs = require('fs');
const path = require('path');
try {
// Read results
const resultsPath = '.codeoptix/results.json';
const reflectionPath = '.codeoptix/reflection.md';
if (!fs.existsSync(resultsPath)) {
return;
}
const results = JSON.parse(fs.readFileSync(resultsPath, 'utf8'));
// Build summary
let summary = '## 🔍 CodeOptiX Behavior Check Results\n\n';
const behaviors = results.behaviors || {};
let allPassed = true;
for (const [name, data] of Object.entries(behaviors)) {
const passed = data.passed || false;
const score = data.score || 0;
const emoji = passed ? '✅' : '❌';
summary += `${emoji} **${name}**: ${(score * 100).toFixed(0)}% ${passed ? 'PASSED' : 'FAILED'}\n`;
if (!passed && data.evidence) {
summary += ` - Issues: ${data.evidence.slice(0, 2).join(', ')}\n`;
}
if (!passed) allPassed = false;
}
summary += `\n**Overall**: ${allPassed ? '✅ All checks passed' : '❌ Some checks failed'}\n`;
// Add reflection link if available
if (fs.existsSync(reflectionPath)) {
summary += `\n📄 [View detailed reflection report](./.codeoptix/reflection.md)`;
}
// Post comment
github.rest.issues.createComment({
issue_number: context.issue.number,
owner: context.repo.owner,
repo: context.repo.repo,
body: summary
});
} catch (error) {
console.log('Error posting comment:', error);
}