forked from gptme/gptme
-
Notifications
You must be signed in to change notification settings - Fork 0
107 lines (89 loc) · 3.51 KB
/
benchmark.yml
File metadata and controls
107 lines (89 loc) · 3.51 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
name: Benchmark
on:
push:
branches: [ master ]
pull_request:
branches: [ master ]
permissions:
pull-requests: write
jobs:
benchmark:
name: Startup Benchmark
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
- name: Install poetry
run: pipx install poetry
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.12'
cache: 'poetry'
- name: Install dependencies
run: |
poetry install
- name: Install hyperfine
run: |
sudo apt-get install -y hyperfine
- name: Benchmark cold startup
id: cold
run: |
# Get virtualenv path so we can clear .pyc files in dependencies too
VENV_PATH=$(poetry env info --path)
# Cold startup: --prepare clears .pyc in repo AND virtualenv, drops OS page cache
hyperfine \
--runs 5 \
--prepare "find . '$VENV_PATH' -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null; sudo sh -c 'sync && echo 3 > /proc/sys/vm/drop_caches' 2>/dev/null; true" \
--export-json cold.json \
'poetry run python -c "from gptme.cli.main import main"'
COLD_TIME=$(python3 -c "import json; print(f'{json.load(open(\"cold.json\"))[\"results\"][0][\"median\"]:.3f}')")
echo "cold_time=$COLD_TIME" >> $GITHUB_OUTPUT
echo "Cold startup median: ${COLD_TIME}s"
- name: Benchmark warm startup
id: warm
run: |
# Warm startup: --warmup ensures .pyc files are cached
hyperfine \
--runs 10 \
--warmup 3 \
--export-json warm.json \
'poetry run python -c "from gptme.cli.main import main"'
WARM_TIME=$(python3 -c "import json; print(f'{json.load(open(\"warm.json\"))[\"results\"][0][\"median\"]:.3f}')")
echo "warm_time=$WARM_TIME" >> $GITHUB_OUTPUT
echo "Warm startup median: ${WARM_TIME}s"
- name: Check thresholds
env:
COLD_TIME: ${{ steps.cold.outputs.cold_time }}
WARM_TIME: ${{ steps.warm.outputs.warm_time }}
# Thresholds in seconds (with margin for CI variability)
COLD_THRESHOLD: "6.0"
WARM_THRESHOLD: "2.5"
run: |
echo "=== Startup Benchmark Results (hyperfine medians) ==="
echo "Cold startup: ${COLD_TIME}s (threshold: ${COLD_THRESHOLD}s)"
echo "Warm startup: ${WARM_TIME}s (threshold: ${WARM_THRESHOLD}s)"
echo ""
# Parse stddev for reporting
COLD_STDDEV=$(python3 -c "import json; print(f'{json.load(open(\"cold.json\"))[\"results\"][0][\"stddev\"]:.3f}')")
WARM_STDDEV=$(python3 -c "import json; print(f'{json.load(open(\"warm.json\"))[\"results\"][0][\"stddev\"]:.3f}')")
echo "Cold stddev: ${COLD_STDDEV}s"
echo "Warm stddev: ${WARM_STDDEV}s"
echo ""
FAILED=0
if (( $(echo "$COLD_TIME > $COLD_THRESHOLD" | bc -l) )); then
echo "::error::Cold startup (${COLD_TIME}s +/- ${COLD_STDDEV}s) exceeds threshold (${COLD_THRESHOLD}s)"
FAILED=1
else
echo "✅ PASS: Cold startup within threshold"
fi
if (( $(echo "$WARM_TIME > $WARM_THRESHOLD" | bc -l) )); then
echo "::error::Warm startup (${WARM_TIME}s +/- ${WARM_STDDEV}s) exceeds threshold (${WARM_THRESHOLD}s)"
FAILED=1
else
echo "✅ PASS: Warm startup within threshold"
fi
if [ $FAILED -eq 1 ]; then
echo ""
echo "Performance regression detected! Please investigate."
exit 1
fi