diff --git a/.gitignore b/.gitignore index e12568547..b70da4bdf 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,6 @@ +venv/ +.venv/ __integration_*__/ __pycache__/ __test_fixtures__/ diff --git a/pipelines/azure-benchmarks.yml b/pipelines/azure-benchmarks.yml index f07a300c3..e2f9ab13a 100644 --- a/pipelines/azure-benchmarks.yml +++ b/pipelines/azure-benchmarks.yml @@ -1,8 +1,13 @@ -trigger: none +trigger: + branches: + include: + - main + pr: none stages: - stage: RunBenchmark + displayName: 'Run Benchmarks' pool: name: $(LINUXPOOL) image: $(LINUXVMIMAGE) @@ -14,6 +19,16 @@ stages: - job: RunBenchmark displayName: 'Run Copilot Benchmarks' steps: + - task: UsePythonVersion@0 + displayName: 'Use Python 3.12' + inputs: + versionSpec: '3.12' + + - task: PipAuthenticate@1 + displayName: 'Authenticate pip with MicrosoftSweBench feed' + inputs: + artifactFeeds: 'internal/MicrosoftSweBench' + - task: AzureCLI@2 displayName: 'Run Copilot Benchmarks script' inputs: @@ -22,5 +37,3 @@ stages: scriptType: 'pscore' scriptLocation: 'scriptPath' scriptPath: $(Build.SourcesDirectory)/pipelines/scripts/Invoke-CopilotBenchmarks.ps1 - arguments: > - -BuildId $(OfficialBuildId) diff --git a/pipelines/scripts/Invoke-CopilotBenchmarks.ps1 b/pipelines/scripts/Invoke-CopilotBenchmarks.ps1 index ff151e2dd..1cf03886e 100644 --- a/pipelines/scripts/Invoke-CopilotBenchmarks.ps1 +++ b/pipelines/scripts/Invoke-CopilotBenchmarks.ps1 @@ -1,12 +1,120 @@ -param( - [string]$BuildId -) +<# +.SYNOPSIS + Installs MSBench CLI and runs a Copilot Azure benchmark. +.DESCRIPTION + This script runs in Azure DevOps under an AzureCLI@2 task with federated authentication. + Feed authentication is handled by a preceding PipAuthenticate@1 task that sets + PIP_EXTRA_INDEX_URL for the azure-sdk/internal/MicrosoftSweBench feed. + The script retrieves a GitHub PAT from KeyVault, installs MSBench CLI, and invokes: + msbench-cli run --agent github-copilot-cli --benchmark --model + MSBench CLI reference: + - https://github.com/devdiv-microsoft/MicrosoftSweBench/wiki -# Install MSBench CLI -Write-Host "Installing keyring" -pip install keyring artifacts-keyring +.PARAMETER Benchmark + Benchmark identifier. Default: azure -Write-Host "Listing key vaults in the resource group" -az keyvault list --resource-group rg-msbench-eval-kv-azure-mcp --query "[].name" -o tsv +.PARAMETER Model + Model identifier. Default: claude-sonnet-4.5-autodev-test + +.PARAMETER NoWait + Whether to add --no-wait to the run command. + +.LINK + https://github.com/devdiv-microsoft/MicrosoftSweBench/wiki +#> + + param( + [string]$Benchmark = "azure", + [string]$Model = "claude-sonnet-4.5-autodev-test", + [switch]$NoWait + ) + + Set-StrictMode -Version Latest + $ErrorActionPreference = "Stop" + + if (!$Benchmark) { + throw "Benchmark parameter is required." + } + + if (!$Model) { + throw "Model parameter is required." + } + + $vaultName = "kv-msbench-eval-azuremcp" + $secretName = "azure-eval-gh-pat" + + Write-Host "Benchmark: $Benchmark" + Write-Host "Model: $Model" + Write-Host "NoWait: $NoWait" + + $pipelineRun = $env:TF_BUILD -eq "True" + + # --- Retrieve GitHub PAT from KeyVault --- + try { + Write-Host "Retrieving GitHub PAT from KeyVault $vaultName secret $secretName" + $pat = az keyvault secret show --vault-name $vaultName --name $secretName --query value -o tsv + + if (!$pat) { + throw "Secret $secretName not found in KeyVault $vaultName." + } + + $env:GITHUB_MCP_SERVER_TOKEN = $pat + + # Log the PAT as a secret variable to avoid exposing it in logs + if ($pipelineRun) { + Write-Host "##vso[task.setsecret]$pat" + } + } + catch { + throw "Failed to retrieve GitHub PAT from KeyVault: $_" + } + + # --- Feed auth is handled by the PipAuthenticate@1 pipeline task --- + # PipAuthenticate sets PIP_EXTRA_INDEX_URL for the azure-sdk/internal/MicrosoftSweBench feed. + if ($env:PIP_EXTRA_INDEX_URL) { + Write-Host "PIP_EXTRA_INDEX_URL is set (feed auth configured by PipAuthenticate task)" + } else { + Write-Warning "PIP_EXTRA_INDEX_URL is not set. Feed authentication may fail. Ensure PipAuthenticate@1 runs before this script." + } + + $pythonCommand = Get-Command python + Write-Host "Using python from: $($pythonCommand.Path). Version: $(python --version 2>&1)" + + Write-Host "Install/upgrade pip" + python -m pip install --upgrade pip + if ($LASTEXITCODE -ne 0) { + throw "pip install/upgrade failed with exit code $LASTEXITCODE" + } + + Write-Host "Installing/upgrading MSBench CLI" + python -m pip install msbench-cli --no-input + if ($LASTEXITCODE -ne 0) { + throw "pip install msbench-cli failed with exit code $LASTEXITCODE" + } + + Write-Host "MSBench CLI version" + & 'msbench-cli' version + if ($LASTEXITCODE -ne 0) { + throw "msbench-cli version failed with exit code $LASTEXITCODE" + } + + $runArgs = @( + "run", + "--agent", "github-copilot-cli", + "--benchmark", $Benchmark, + "--model", $Model, + "--env", "GITHUB_MCP_SERVER_TOKEN" + ) + + if ($NoWait) { + $runArgs += "--no-wait" + } + + Write-Host "Running: msbench-cli $($runArgs -join ' ')" + & 'msbench-cli' @runArgs + + if ($LASTEXITCODE -ne 0) { + throw "msbench-cli run failed with exit code $LASTEXITCODE" + }