Skip to content

Commit 659c0fd

Browse files
luke-lombardicursoragentluke-beamcloud
authored
feat: optimize read throughput (#63)
* feat: Implement blobcache optimizations This commit introduces several optimizations to improve blobcache performance, including FUSE tuning, gRPC network tuning, buffer pooling, prefetching, and enhanced metrics. Co-authored-by: luke <[email protected]> * feat: Implement CI/CD performance testing and optimize config Co-authored-by: luke <[email protected]> * feat: Implement CI/CD performance testing and optimizations Co-authored-by: luke <[email protected]> * run perf tests * Fix: Remove Redis dependency and improve test stability Co-authored-by: luke <[email protected]> * Checkpoint before follow-up message Co-authored-by: luke <[email protected]> * Refactor: Use sync.Once for global metrics and logger initialization Co-authored-by: luke <[email protected]> * Fix: Improve disk cache metric error handling and benchmark setup Co-authored-by: luke <[email protected]> * feat: Adjust performance tests for CI and larger messages Co-authored-by: luke <[email protected]> * Refactor performance tests and simplify gRPC throughput test Co-authored-by: luke <[email protected]> * Grant permissions for performance tests workflow Co-authored-by: luke <[email protected]> * Refactor: Remove validation script and adjust test timeouts Co-authored-by: luke <[email protected]> * Refactor: Optimize CI benchmarks and add local testing notes Co-authored-by: luke <[email protected]> * Checkpoint before follow-up message Co-authored-by: luke <[email protected]> * Refactor: Improve read throughput benchmarks and buffer pool usage Co-authored-by: luke <[email protected]> * Refactor: Remove throughput benchmarks, focus on buffer pool Co-authored-by: luke <[email protected]> * Checkpoint before follow-up message Co-authored-by: luke <[email protected]> * Refactor: Adjust memory cache configuration and benchmark Update benchmark to use disk-only mode and adjust storage logic to reflect this. Co-authored-by: luke <[email protected]> * feat: Add gRPC throughput benchmarking and config tuning This commit introduces a new gRPC throughput benchmark and adds configuration options for gRPC window sizes and buffer sizes. It also updates the performance test to include a check for GetContent throughput. Co-authored-by: luke <[email protected]> * Remove redundant FUSE operation latency logging Co-authored-by: luke <[email protected]> * Refactor: Make gRPC server config tunable Co-authored-by: luke <[email protected]> * clean up default config * cleanup * update path --------- Co-authored-by: Cursor Agent <[email protected]> Co-authored-by: Luke Lombardi <[email protected]>
1 parent 2a30b5a commit 659c0fd

22 files changed

+1994
-54
lines changed
Lines changed: 217 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,217 @@
1+
name: Performance Tests
2+
3+
on:
4+
push:
5+
branches: [ master, develop ]
6+
pull_request:
7+
branches: [ master, develop ]
8+
workflow_dispatch:
9+
inputs:
10+
test_iterations:
11+
description: 'Number of test iterations'
12+
required: false
13+
default: '3'
14+
regression_threshold:
15+
description: 'Regression threshold (%)'
16+
required: false
17+
default: '10'
18+
19+
permissions:
20+
contents: read
21+
pull-requests: write
22+
issues: write
23+
24+
jobs:
25+
unit-benchmarks:
26+
name: Unit Benchmarks
27+
runs-on: ubuntu-latest
28+
timeout-minutes: 30
29+
30+
steps:
31+
- name: Checkout code
32+
uses: actions/checkout@v4
33+
with:
34+
fetch-depth: 0 # Full history for comparison
35+
36+
- name: Set up Go
37+
uses: actions/setup-go@v5
38+
with:
39+
go-version: '1.22'
40+
41+
- name: Cache Go modules
42+
uses: actions/cache@v4
43+
with:
44+
path: |
45+
~/go/pkg/mod
46+
~/.cache/go-build
47+
key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }}
48+
restore-keys: |
49+
${{ runner.os }}-go-
50+
51+
- name: Run benchmarks
52+
run: |
53+
echo "Running performance benchmarks..."
54+
echo ""
55+
echo "=== 1. Buffer Pool (20,000× allocation improvement) ==="
56+
go test -bench=BenchmarkBufferPool -benchmem -benchtime=2s -timeout=2m ./pkg/ | tee benchmark-results.txt
57+
echo ""
58+
echo "=== 2. GetContent End-to-End (real disk-based throughput) ==="
59+
go test -bench=BenchmarkGetContentDiskCache -benchmem -benchtime=500ms -timeout=2m ./pkg/ | tee -a benchmark-results.txt
60+
61+
- name: Upload benchmark results
62+
uses: actions/upload-artifact@v4
63+
with:
64+
name: benchmark-results
65+
path: benchmark-results.txt
66+
retention-days: 30
67+
68+
- name: Check for performance regressions
69+
run: |
70+
BUFFER_POOL_TIME=$(grep "BenchmarkBufferPool.*WithPool" benchmark-results.txt | awk '{print $3}' | head -1)
71+
GETCONTENT_THROUGHPUT=$(grep "BenchmarkGetContentDiskCache" benchmark-results.txt | grep "MB/s" | awk '{print $4}' | head -1)
72+
73+
echo "Buffer Pool: $BUFFER_POOL_TIME"
74+
echo "GetContent: $GETCONTENT_THROUGHPUT"
75+
76+
if [ ! -z "$BUFFER_POOL_TIME" ]; then
77+
TIME_NS=$(echo $BUFFER_POOL_TIME | sed 's/ns\/op//')
78+
if (( $(echo "$TIME_NS > 100" | bc -l) )); then
79+
echo "❌ Buffer pool regression: ${TIME_NS}ns > 100ns"
80+
exit 1
81+
fi
82+
echo "✅ Buffer pool OK: ${TIME_NS}ns"
83+
fi
84+
85+
if [ ! -z "$GETCONTENT_THROUGHPUT" ]; then
86+
THROUGHPUT=$(echo $GETCONTENT_THROUGHPUT | sed 's/MB\/s//')
87+
if (( $(echo "$THROUGHPUT < 2000" | bc -l) )); then
88+
echo "❌ GetContent regression: ${THROUGHPUT} MB/s < 2000 MB/s"
89+
exit 1
90+
fi
91+
echo "✅ GetContent OK: ${THROUGHPUT} MB/s"
92+
fi
93+
94+
- name: Comment benchmark results on PR
95+
if: github.event_name == 'pull_request'
96+
uses: actions/github-script@v7
97+
with:
98+
script: |
99+
const fs = require('fs');
100+
const results = fs.readFileSync('benchmark-results.txt', 'utf8');
101+
102+
const body = `## Benchmark Results\n\n\`\`\`\n${results}\n\`\`\``;
103+
104+
github.rest.issues.createComment({
105+
issue_number: context.issue.number,
106+
owner: context.repo.owner,
107+
repo: context.repo.repo,
108+
body: body
109+
});
110+
111+
grpc-throughput-tests:
112+
name: gRPC Throughput Tests
113+
runs-on: ubuntu-latest
114+
timeout-minutes: 10
115+
116+
services:
117+
redis:
118+
image: redis:7-alpine
119+
options: >-
120+
--health-cmd "redis-cli ping"
121+
--health-interval 10s
122+
--health-timeout 5s
123+
--health-retries 5
124+
ports:
125+
- 6379:6379
126+
127+
steps:
128+
- name: Checkout code
129+
uses: actions/checkout@v4
130+
131+
- name: Set up Go
132+
uses: actions/setup-go@v5
133+
with:
134+
go-version: '1.22'
135+
136+
- name: Cache Go modules
137+
uses: actions/cache@v4
138+
with:
139+
path: ~/go/pkg/mod
140+
key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }}
141+
142+
- name: Install netcat
143+
run: sudo apt-get update && sudo apt-get install -y netcat-openbsd
144+
145+
- name: Run gRPC performance tests
146+
run: |
147+
chmod +x bin/run_grpc_performance_tests.sh
148+
./bin/run_grpc_performance_tests.sh
149+
150+
- name: Upload results
151+
if: always()
152+
uses: actions/upload-artifact@v4
153+
with:
154+
name: grpc-performance-results
155+
path: performance-results/
156+
retention-days: 30
157+
158+
integration-tests:
159+
name: Integration Tests
160+
runs-on: ubuntu-latest
161+
timeout-minutes: 10
162+
163+
steps:
164+
- name: Checkout code
165+
uses: actions/checkout@v4
166+
167+
- name: Set up Go
168+
uses: actions/setup-go@v5
169+
with:
170+
go-version: '1.22'
171+
172+
- name: Run unit tests
173+
run: go test -v -timeout 5m ./pkg/...
174+
175+
performance-summary:
176+
name: Performance Summary
177+
needs: [unit-benchmarks, grpc-throughput-tests, integration-tests]
178+
runs-on: ubuntu-latest
179+
if: always()
180+
181+
steps:
182+
- name: Download all artifacts
183+
uses: actions/download-artifact@v4
184+
185+
- name: Generate summary
186+
run: |
187+
echo "# Performance Test Summary" >> $GITHUB_STEP_SUMMARY
188+
echo "" >> $GITHUB_STEP_SUMMARY
189+
190+
echo "## Job Status" >> $GITHUB_STEP_SUMMARY
191+
echo "- Unit Benchmarks: ${{ needs.unit-benchmarks.result }}" >> $GITHUB_STEP_SUMMARY
192+
echo "- gRPC Throughput: ${{ needs.grpc-throughput-tests.result }}" >> $GITHUB_STEP_SUMMARY
193+
echo "- Integration Tests: ${{ needs.integration-tests.result }}" >> $GITHUB_STEP_SUMMARY
194+
echo "" >> $GITHUB_STEP_SUMMARY
195+
196+
if [ -f performance-report/report.md ]; then
197+
echo "## Performance Report" >> $GITHUB_STEP_SUMMARY
198+
cat performance-report/report.md >> $GITHUB_STEP_SUMMARY
199+
fi
200+
201+
if [ -f benchmark-results/benchmark-results.txt ]; then
202+
echo "" >> $GITHUB_STEP_SUMMARY
203+
echo "## Benchmark Results" >> $GITHUB_STEP_SUMMARY
204+
echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
205+
head -50 benchmark-results/benchmark-results.txt >> $GITHUB_STEP_SUMMARY
206+
echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
207+
fi
208+
209+
- name: Check overall status
210+
run: |
211+
if [ "${{ needs.unit-benchmarks.result }}" != "success" ] || \
212+
[ "${{ needs.grpc-throughput-tests.result }}" != "success" ] || \
213+
[ "${{ needs.integration-tests.result }}" != "success" ]; then
214+
echo "❌ Some performance tests failed"
215+
exit 1
216+
fi
217+
echo "✅ All performance tests passed"

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,15 @@ bin/throughput
55
bin/fs
66
bin/testclient
77
bin/basic
8+
bin/grpc-throughput
89
build.sh
910
tmp/
1011
config.yaml
1112
config2.yaml
1213
config3.yaml
1314
e2e/throughput/testdata/*.bin
1415
e2e/fs/testdata/*.bin
16+
e2e/grpc_throughput/grpc-throughput
1517
daemonset.yaml
1618
output.bin
1719
.go-version

bin/run_grpc_performance_tests.sh

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
#!/bin/bash
2+
set -e
3+
4+
# Simple gRPC performance test - completes in under 1 minute
5+
6+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
7+
WORKSPACE_DIR="$(dirname "$SCRIPT_DIR")"
8+
RESULTS_DIR="${WORKSPACE_DIR}/performance-results"
9+
CURRENT_FILE="${RESULTS_DIR}/current.json"
10+
11+
echo "========================================"
12+
echo " gRPC Performance Test"
13+
echo "========================================"
14+
echo ""
15+
16+
mkdir -p "$RESULTS_DIR"
17+
18+
# Build binaries
19+
echo "[1/4] Building binaries..."
20+
cd "$WORKSPACE_DIR"
21+
go build -o bin/blobcache cmd/main.go
22+
go build -o bin/grpc-throughput e2e/grpc_throughput/main.go
23+
echo "✓ Build complete"
24+
echo ""
25+
26+
# Setup test environment
27+
echo "[2/4] Starting test server..."
28+
TEST_DIR=$(mktemp -d)
29+
DISK_CACHE_DIR="${TEST_DIR}/cache"
30+
mkdir -p "$DISK_CACHE_DIR"
31+
32+
# Create minimal config
33+
cat > "${TEST_DIR}/config.yaml" << EOF
34+
server:
35+
mode: coordinator
36+
diskCacheDir: ${DISK_CACHE_DIR}
37+
diskCacheMaxUsagePct: 90
38+
maxCachePct: 50
39+
pageSizeBytes: 4194304
40+
metadata:
41+
mode: default
42+
redisAddr: "localhost:6379"
43+
44+
global:
45+
serverPort: 50051
46+
grpcMessageSizeBytes: 268435456
47+
debugMode: false
48+
49+
metrics:
50+
url: ""
51+
EOF
52+
53+
# Start server
54+
CONFIG_PATH="${TEST_DIR}/config.yaml" ./bin/blobcache > "${TEST_DIR}/server.log" 2>&1 &
55+
SERVER_PID=$!
56+
57+
cleanup() {
58+
echo ""
59+
echo "Cleaning up..."
60+
if [ ! -z "$SERVER_PID" ]; then
61+
kill -9 $SERVER_PID 2>/dev/null || true
62+
sleep 1
63+
fi
64+
rm -rf "$TEST_DIR"
65+
echo "✓ Cleanup complete"
66+
}
67+
trap cleanup EXIT INT TERM
68+
69+
# Wait for server
70+
sleep 3
71+
if ! kill -0 $SERVER_PID 2>/dev/null; then
72+
echo "✗ Server failed to start"
73+
cat "${TEST_DIR}/server.log"
74+
exit 1
75+
fi
76+
77+
# Check connectivity
78+
for i in {1..10}; do
79+
if nc -z localhost 50051 2>/dev/null; then
80+
echo "✓ Server ready"
81+
break
82+
fi
83+
if [ $i -eq 10 ]; then
84+
echo "✗ Server not responding"
85+
exit 1
86+
fi
87+
sleep 1
88+
done
89+
echo ""
90+
91+
# Run tests
92+
echo "[3/4] Running throughput tests..."
93+
echo ""
94+
95+
./bin/grpc-throughput -server localhost:50051 -output "$CURRENT_FILE"
96+
TEST_EXIT=$?
97+
98+
echo ""
99+
if [ $TEST_EXIT -eq 0 ]; then
100+
echo "[4/4] ✓ All tests passed"
101+
else
102+
echo "[4/4] ✗ Some tests failed"
103+
fi
104+
105+
exit $TEST_EXIT

0 commit comments

Comments
 (0)