Skip to content

Commit 322f0aa

Browse files
authored
feat: Add an action for running seeder (#1109)
1 parent 4a4e488 commit 322f0aa

File tree

2 files changed

+138
-1
lines changed

2 files changed

+138
-1
lines changed

.github/workflows/evals.yml

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
name: Evals
2+
3+
on:
4+
workflow_dispatch:
5+
inputs:
6+
commit_sha:
7+
description: "Git commit SHA to evaluate"
8+
required: true
9+
type: string
10+
test_size:
11+
type: choice
12+
description: "Number of charms to run"
13+
options:
14+
- one
15+
- ten
16+
- all
17+
# FOR DEVELOPMENT
18+
#pull_request:
19+
# branches:
20+
# - main
21+
22+
env:
23+
# FOR DEVELOPMENT
24+
#target_sha: "4a4e488db80aa074a825923c757572f99137c957"
25+
#test_size: "one"
26+
target_sha: ${{ github.event.inputs.commit_sha }}
27+
test_size: ${{ github.event.inputs.test_size }}
28+
29+
jobs:
30+
evals:
31+
name: "Evals"
32+
runs-on: ubuntu-latest
33+
environment: seeder
34+
services:
35+
redis:
36+
image: redis
37+
ports:
38+
- 6379:6379
39+
options: >-
40+
--health-cmd "redis-cli ping"
41+
--health-interval 10s
42+
--health-timeout 5s
43+
--health-retries 5
44+
steps:
45+
# TODO Upload `seeder` as a deno executable to avoid
46+
# running from source.
47+
- name: 📥 Checkout repository
48+
uses: actions/checkout@v4
49+
with:
50+
ref: ${{ env.target_sha }}
51+
52+
- name: 🦕 Setup Deno 2.2.2
53+
uses: denoland/setup-deno@v2
54+
with:
55+
deno-version: "2.2.2"
56+
57+
- name: 📦 Cache Deno dependencies
58+
uses: actions/cache@v3
59+
with:
60+
path: |
61+
~/.deno
62+
~/.cache/deno
63+
key: ${{ runner.os }}-deno-${{ hashFiles('**/deno.json') }}
64+
65+
- name: 🔑 Authenticate to Google Cloud
66+
uses: google-github-actions/auth@v1
67+
with:
68+
credentials_json: ${{ secrets.GCP_SA_KEY }}
69+
70+
- name: ⚙️ Setup Google Cloud SDK
71+
uses: google-github-actions/setup-gcloud@v1
72+
73+
- name: 📥 Download built binaries from Google Cloud Storage
74+
run: |
75+
mkdir -p downloaded-artifacts
76+
gsutil cp gs://commontools-build-artifacts/workspace-artifacts/labs-${{ env.target_sha }}.tar.gz downloaded-artifacts/
77+
gsutil cp gs://commontools-build-artifacts/workspace-artifacts/labs-${{ env.target_sha }}.hash.txt hash.txt
78+
79+
# Verify the tarball exists
80+
if [ ! -f downloaded-artifacts/labs-${{ env.target_sha }}.tar.gz ]; then
81+
echo "::error::Artifact tarball for commit ${{ env.target_sha }} not found!"
82+
echo "Make sure this commit was successfully built and artifacts were uploaded."
83+
exit 1
84+
fi
85+
mkdir -p common-binaries
86+
tar -xzf downloaded-artifacts/labs-${{ env.target_sha }}.tar.gz -C common-binaries
87+
88+
# CTTS_AI_LLM_GOOGLE_APPLICATION_CREDENTIALS points to `/tmp/gcp-vertex-creds.json`
89+
- name: ⚙️ Setup Google Vertex Key
90+
run: |
91+
echo '${{ secrets.GOOGLE_APPLICATION_CREDENTIALS }}' > /tmp/gcp-vertex-creds.json
92+
93+
# We want to essentially use a persistent directory
94+
# https://github.com/actions/cache/blob/main/tips-and-workarounds.md#update-a-cache
95+
- name: 📦 Load LLM Cache
96+
id: load-llm-cache
97+
uses: actions/cache@v4
98+
with:
99+
path: ./llm_cache_dir
100+
key: llm-cache-${{ github.run_id }}
101+
restore-keys: llm-cache
102+
103+
- name: 🚀 Start Toolshed server
104+
run: |
105+
mkdir -p ./llm_cache_dir
106+
chmod +x ./common-binaries/toolshed
107+
CACHE_DIR=./llm_cache_dir \
108+
CTTS_AI_LLM_ANTHROPIC_API_KEY=${{ secrets.CTTS_AI_LLM_ANTHROPIC_API_KEY }} \
109+
CTTS_AI_LLM_OPENAI_API_KEY=${{ secrets.CTTS_AI_LLM_OPENAI_API_KEY }} \
110+
CTTS_AI_LLM_GOOGLE_APPLICATION_CREDENTIALS=${{ secrets.CTTS_AI_LLM_GOOGLE_APPLICATION_CREDENTIALS }} \
111+
./common-binaries/toolshed &
112+
113+
# For Astral
114+
# https://github.com/lino-levan/astral/blob/f5ef833b2c5bde3783564a6b925073d5d46bb4b8/README.md#no-usable-sandbox-with-user-namespace-cloning-enabled
115+
- name: 🛡️ Disable AppArmor for browser tests
116+
run: echo 0 | sudo tee /proc/sys/kernel/apparmor_restrict_unprivileged_userns
117+
118+
- name: 🧪 Run seeder
119+
working-directory: seeder
120+
run: |
121+
FLAGS=""
122+
if [ ${{ env.test_size }} == "one" ]; then
123+
FLAGS="--tag smol"
124+
elif [ ${{ env.test_size }} == "ten" ]; then
125+
FLAGS="--tag 10x10"
126+
fi
127+
128+
TOOLSHED_API_URL=http://localhost:8000/ \
129+
OPENAI_API_KEY=${{ secrets.CTTS_AI_LLM_OPENAI_API_KEY }} \
130+
deno task start --name ${{ env.target_sha }} $FLAGS
131+
132+
- name: 📤 Upload reports
133+
uses: actions/upload-artifact@v4
134+
with:
135+
name: seeder-results
136+
path: |
137+
./seeder/results

seeder/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,5 +47,5 @@ You can add new flows to `scenarios.ts`.
4747

4848
## Reading the report
4949

50-
A report will be generated in the `report` directory named `report/blue42.html`.
50+
A report will be generated in the `results` directory named `results/blue42.html`.
5151
You can open this in your browser to see the results.

0 commit comments

Comments
 (0)