update_data.py

import os
import requests
from datetime import datetime, timedelta

import subprocess

def run_curl_command(token, url):
    command = [
        "curl",
        "-H", f"Authorization: Bearer {token}",
        "-X", "GET",
        url
    ]

    try:
        result = subprocess.run(command, check=True, text=True, capture_output=True)
        return result.stdout  # Returns the content of the log file
    except subprocess.CalledProcessError as e:
        raise Exception(f"Command failed with return code {e.returncode}: {e.output}")


def get_builds(org_slug, branch, token, days=30):
    url = f"https://api.buildkite.com/v2/organizations/{org_slug}/builds"
    headers = {
        "Authorization": f"Bearer {token}",
        "Content-Type": "application/json"
    }
    
    # Calculate the date 30 days ago from today
    date_from = (datetime.utcnow() - timedelta(days=days)).isoformat() + "Z"
    
    params = {
        "branch": branch,
        "created_from": date_from,
        "per_page": "100",
    }

    all_builds = []
    while url:
        response = requests.get(url, headers=headers, params=params)
        if response.status_code == 200:
            all_builds.extend(response.json())
            # Parse the Link header and look for a 'next' relation
            link_header = response.headers.get('Link', None)
            url = None
            if link_header:
                links = link_header.split(',')
                next_link = [link for link in links if 'rel="next"' in link]
                if next_link:
                    next_url = next_link[0].split(';')[0].strip('<>')
                    url = next_url
                    params = {}  # Clear params because next URL will have necessary params
        else:
            raise Exception(f"Failed to get builds: {response.status_code} - {response.text}")

    return all_builds

import re

# Define a list of dictionaries for patterns
log_patterns = [
    {'key': 'Average Latency', 'pattern': re.compile(r"Avg latency: ([\d.]+) seconds")},
    {'key': '10% Percentile Latency', 'pattern': re.compile(r"10% percentile latency: ([\d.]+) seconds")},
    {'key': '25% Percentile Latency', 'pattern': re.compile(r"25% percentile latency: ([\d.]+) seconds")},
    {'key': '50% Percentile Latency', 'pattern': re.compile(r"50% percentile latency: ([\d.]+) seconds")},
    {'key': '75% Percentile Latency', 'pattern': re.compile(r"75% percentile latency: ([\d.]+) seconds")},
    {'key': '90% Percentile Latency', 'pattern': re.compile(r"90% percentile latency: ([\d.]+) seconds")},
    {'key': 'Throughput', 'pattern': re.compile(r"Throughput: ([\d.]+) requests/s")},
    {'key': 'Token Throughput', 'pattern': re.compile(r"Throughput: [\d.]+ requests/s, ([\d.]+) tokens/s")},
    {'key': 'Successful Requests', 'pattern': re.compile(r"Successful requests: +(\d+)")},
    {'key': 'Benchmark Duration', 'pattern': re.compile(r"Benchmark duration \(s\): +([\d.]+)")},
    {'key': 'Total Input Tokens', 'pattern': re.compile(r"Total input tokens: +(\d+)")},
    {'key': 'Total Generated Tokens', 'pattern': re.compile(r"Total generated tokens: +(\d+)")},
    {'key': 'Request Throughput', 'pattern': re.compile(r"Request throughput \(req/s\): +([\d.]+)")},
    {'key': 'Input Token Throughput', 'pattern': re.compile(r"Input token throughput \(tok/s\): +([\d.]+)")},
    {'key': 'Output Token Throughput', 'pattern': re.compile(r"Output token throughput \(tok/s\): +([\d.]+)")},
    {'key': 'Mean TTFT', 'pattern': re.compile(r"Mean TTFT \(ms\): +([\d.]+)")},
    {'key': 'Median TTFT', 'pattern': re.compile(r"Median TTFT \(ms\): +([\d.]+)")},
    {'key': 'P99 TTFT', 'pattern': re.compile(r"P99 TTFT \(ms\): +([\d.]+)")},
    {'key': 'Mean TPOT', 'pattern': re.compile(r"Mean TPOT \(ms\): +([\d.]+)")},
    {'key': 'Median TPOT', 'pattern': re.compile(r"Median TPOT \(ms\): +([\d.]+)")},
    {'key': 'P99 TPOT', 'pattern': re.compile(r"P99 TPOT \(ms\): +([\d.]+)")}
]

# Function to process log entries using defined patterns
def extract_data_from_logs(logs, patterns=log_patterns):
    results = {}
    for line in logs.split('\n'):
        for pattern_dict in patterns:
            match = pattern_dict['pattern'].search(line)
            if match:
                results[pattern_dict['key']] = match.group(1)
    return results

# Replace 'your_token_here' with your actual Buildkite API token
API_TOKEN = os.environ.get("BUILDKIT_API_TOKEN")  # or 'your_token_here'
ORG_SLUG = "vllm"  # Replace 'vllm' with the actual slug of your organization if different
BRANCH = "main"
cache_dir = ".cache"
os.makedirs(cache_dir, exist_ok=True)

columns = [
    'commit',
    'commit_url',
    'build_datetime',
    'Average Latency',
    '10% Percentile Latency',
    '25% Percentile Latency',
    '50% Percentile Latency',
    '75% Percentile Latency',
    '90% Percentile Latency',
    'Throughput',
    'Token Throughput',
    'Successful Requests',
    'Benchmark Duration',
    'Total Input Tokens',
    'Total Generated Tokens',
    'Request Throughput',
    'Input Token Throughput',
    'Output Token Throughput',
    'Mean TTFT',
    'Median TTFT',
    'P99 TTFT',
    'Mean TPOT',
    'Median TPOT',
    'P99 TPOT'
]
values = []

builds = get_builds(ORG_SLUG, BRANCH, API_TOKEN)
for build in builds:
    commit = build['commit']
    commit_url = f"{build['pipeline']['repository'].replace('.git', '')}/commit/{build['commit']}"
    raw_log_url = None
    for job in build.get('jobs', []):
        if 'name' in job and job['name'] == "Benchmarks":
            raw_log_url = job['raw_log_url']
            break
    if raw_log_url is None:
        continue
    build_datetime = build['created_at']
    filename = f"{build_datetime}_{commit}.log"
    filepath = os.path.join(cache_dir, filename)
    if os.path.exists(filepath):
        print(f"Skipping downloading {filepath} for commit {commit} because it already exists")
    else:
        data = run_curl_command(API_TOKEN, raw_log_url)
        if len(data) <= 100:
            print(f"Skipping processing {filepath} for commit {commit} because the log is empty")
            continue
        with open(filepath, "w") as f:
            f.write(data)
        print(f"Saved {filepath} for commit {commit}")
    with open(filepath, "r") as f:
        logs = f.read()
        results = extract_data_from_logs(logs)
        values.append([commit, commit_url, build_datetime] + [results.get(col, "") for col in columns[3:]])

import pandas as pd
df = pd.DataFrame(values, columns=columns)
df.to_excel("buildkite_benchmarks.xlsx", index=False)