Skip to content

Commit a9e18e5

Browse files
authored
feat(seeder): scenario-aware HTML reports, cookbook test set, misc clean-ups (#1105)
Seeder: * Replace inline string‐built report with template engine (`templates/*`) * Summary bar, per-scenario sections, graceful no-screenshot fallback * New `ExecutedScenario` type; CLI now tracks results per step & scenario * Add 10-scenario “Family Cookbook” generator for wider import/extend coverage * CLI hardening * Per-step try/catch + skip-on-fail logic * Longer wait before screenshot * Misc fixes & polish (next round of fixes will ship in a separate PR)
1 parent dd26d77 commit a9e18e5

File tree

10 files changed

+530
-396
lines changed

10 files changed

+530
-396
lines changed

charm/src/commands.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ export const createDataCharm = (
5252
const schema = ${schemaString};
5353
5454
export default recipe(schema, schema, (data) => ({
55-
[NAME]: "${name ?? "data import"}",
55+
[NAME]: "${name ?? "Data Import"}",
5656
[UI]: <div><h2>Your data has this schema</h2><pre>${
5757
schemaString.replaceAll("{", "&#123;")
5858
.replaceAll("}", "&#125;")
@@ -64,7 +64,7 @@ export const createDataCharm = (
6464
return compileAndRunRecipe(
6565
charmManager,
6666
dataRecipeSrc,
67-
name ?? "data import",
67+
name ?? "Data Import",
6868
data,
6969
);
7070
};

seeder/cli.ts

Lines changed: 40 additions & 175 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,14 @@
11
import { parseArgs } from "@std/cli/parse-args";
2-
import {
3-
castNewRecipe,
4-
CharmManager,
5-
compileAndRunRecipe,
6-
} from "@commontools/charm";
7-
import { getEntityId, setBobbyServerUrl, storage } from "@commontools/runner";
8-
import { createSession, Identity } from "@commontools/identity";
9-
import { LLMClient, setLLMUrl } from "@commontools/llm";
10-
import { createDataCharm, processWorkflow } from "@commontools/charm";
11-
import { type CharmResult, CommandType, type Step } from "./interfaces.ts";
2+
import { setBobbyServerUrl, storage } from "@commontools/runner";
3+
import { setLLMUrl } from "@commontools/llm";
4+
import { processScenario } from "./processor.ts";
5+
import { type ExecutedScenario } from "./interfaces.ts";
126
import { scenarios } from "./scenarios.ts";
137
import { toolshedUrl } from "./env.ts";
14-
import { llmVerifyCharm } from "./judge.ts";
158
import { ensureReportDir, generateReport } from "./report.ts";
16-
import {
17-
addErrorListeners,
18-
browser,
19-
checkForErrors,
20-
goto,
21-
login,
22-
screenshot,
23-
} from "./jumble.ts";
24-
9+
import { browser, login } from "./jumble.ts";
10+
import { createSession, Identity } from "@commontools/identity";
11+
import { CharmManager } from "@commontools/charm";
2512
const {
2613
name,
2714
tag,
@@ -38,181 +25,59 @@ const {
3825
const cache = !noCache;
3926

4027
if (!name) {
28+
// FIXME(ja): if the name already exists, we should not use it!
4129
console.error("Error: Missing `--name`.");
4230
Deno.exit(1);
4331
}
4432

4533
storage.setRemoteStorage(new URL(toolshedUrl));
4634
setBobbyServerUrl(toolshedUrl);
4735
setLLMUrl(toolshedUrl);
48-
const llmClient = new LLMClient();
49-
50-
const charmManager = new CharmManager(
51-
await createSession({
52-
identity: await Identity.fromPassphrase("common user"),
53-
name,
54-
}),
55-
);
5636

57-
const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
37+
// Track executed scenarios and steps
38+
const executedScenarios: ExecutedScenario[] = [];
5839

59-
async function processPrompts(tag: string | undefined) {
60-
let promptCount = 0;
61-
console.log(`Processing prompts...`);
40+
async function processScenarios({
41+
tag,
42+
name,
43+
}: {
44+
tag: string | undefined;
45+
name: string;
46+
}) {
47+
await ensureReportDir(name);
48+
const charmManager = new CharmManager(
49+
await createSession({
50+
identity: await Identity.fromPassphrase("common user"),
51+
name,
52+
}),
53+
);
54+
console.log(`Processing scenarios...`);
6255

6356
for (const scenario of scenarios) {
6457
if (tag && (scenario.tags === undefined || !scenario.tags.includes(tag))) {
6558
continue;
6659
}
67-
await goto(toolshedUrl);
68-
await sleep(1000);
69-
let lastCharmId: string | undefined = undefined;
70-
for (const step of scenario.steps) {
71-
promptCount++;
72-
const newCharmId = await processCommand(step, lastCharmId, cache);
73-
if (newCharmId) {
74-
lastCharmId = newCharmId;
75-
}
76-
}
77-
}
78-
console.log(`Successfully processed ${promptCount} prompts.`);
79-
}
80-
81-
async function processCommand(
82-
step: Step,
83-
lastCharmId: string | undefined,
84-
cache = true,
85-
): Promise<string | undefined> {
86-
const { type, prompt } = step;
87-
88-
switch (type) {
89-
case CommandType.New: {
90-
console.log(`Adding: "${prompt}"`);
91-
const form = await processWorkflow(prompt, charmManager, {
92-
cache,
93-
model,
94-
prefill: {
95-
classification: {
96-
workflowType: "imagine",
97-
confidence: 1.0,
98-
reasoning: "hard coded",
99-
},
100-
},
101-
});
102-
const charm = await castNewRecipe(charmManager, form);
103-
const id = getEntityId(charm);
104-
if (id) {
105-
console.log(`Charm added: ${id["/"]}`);
106-
await verifyCharm(id["/"], prompt);
107-
return id["/"];
108-
}
109-
break;
110-
}
111-
case CommandType.Extend: {
112-
console.log(`Extending: "${prompt}"`);
113-
if (!lastCharmId) {
114-
throw new Error("Last charm ID is undefined.");
115-
}
116-
const charm = await charmManager.get(lastCharmId);
117-
const form = await processWorkflow(prompt, charmManager, {
118-
existingCharm: charm,
119-
cache,
120-
model,
121-
prefill: {
122-
classification: {
123-
workflowType: "imagine",
124-
confidence: 1.0,
125-
reasoning: "hard coded",
126-
},
127-
},
128-
});
129-
130-
await castNewRecipe(charmManager, form);
131-
const id = getEntityId(charm);
132-
if (id) {
133-
console.log(`Charm added: ${id["/"]}`);
134-
await verifyCharm(id["/"], prompt);
135-
return id["/"];
136-
} else {
137-
console.error(`Charm not added: ${prompt}`);
138-
}
139-
break;
140-
}
141-
case CommandType.ImportJSON: {
142-
console.log(`Importing JSON for: "${prompt}"`);
143-
if (!step.data) {
144-
throw new Error("Missing data for JSON import.");
145-
}
146-
147-
const charm = await createDataCharm(
148-
charmManager,
149-
step.data,
150-
step.dataSchema,
151-
prompt,
152-
);
153-
154-
const id = getEntityId(charm);
155-
console.log(`Charm added from JSON import`, { id });
156-
if (id) {
157-
console.log(`Charm added from JSON import: ${id["/"]}`);
158-
await verifyCharm(id["/"], "shows a jsonschema for " + prompt);
159-
return id["/"];
160-
}
161-
break;
162-
}
163-
case CommandType.Other: {
164-
throw new Error("Unsupported command type.");
165-
}
166-
}
167-
}
168-
169-
const charmResults: CharmResult[] = [];
170-
171-
async function verifyCharm(id: string, prompt: string): Promise<string> {
172-
// FIXME(ja): can we navigate without causing a page reload?
173-
await goto(`/${name!}/${id}`);
174-
addErrorListeners();
175-
await sleep(5000);
176-
await ensureReportDir(name!);
177-
const screenshotPath = `results/${name}/${id}.png`;
178-
await screenshot(id, screenshotPath);
179-
const errors = await checkForErrors();
180-
if (errors.length > 0) {
181-
charmResults.push({
182-
id,
183-
prompt,
184-
screenshotPath,
185-
status: "FAIL",
186-
summary: `Errors: ${errors.join("\n")}`,
60+
const executedScenario = await processScenario({
61+
scenario,
62+
model,
63+
cache,
64+
name,
65+
charmManager,
18766
});
188-
return `Error: ${errors.join("\n")}`;
67+
executedScenarios.push(executedScenario);
18968
}
190-
191-
const verdict = await llmVerifyCharm(prompt, screenshotPath);
192-
console.log(`Charm verified: ${id} - ${verdict}`);
193-
194-
// Parse the verdict and add to results
195-
const parsedVerdict = JSON.parse(verdict);
196-
charmResults.push({
197-
id,
198-
prompt,
199-
screenshotPath,
200-
status: parsedVerdict.result,
201-
summary: parsedVerdict.summary,
202-
});
203-
204-
return verdict;
69+
console.log(`Processed ${executedScenarios.length} scenarios.`);
70+
return executedScenarios;
20571
}
20672

73+
// FIXME(ja): if the tag doesn't exist, we should error out with warning, show the tags
20774
try {
208-
await login(name);
209-
await processPrompts(tag);
210-
await ensureReportDir(name);
211-
await generateReport(name, charmResults, toolshedUrl, scenarios);
212-
} catch (e) {
213-
console.error(e);
75+
await login(name!);
76+
await processScenarios({ tag, name });
77+
await ensureReportDir(name!);
78+
await generateReport(name!, executedScenarios, toolshedUrl, scenarios);
21479
} finally {
215-
await sleep(100);
80+
await new Promise((resolve) => setTimeout(resolve, 100));
21681
await browser.close();
21782
Deno.exit(0);
21883
}

seeder/interfaces.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ export type Command = {
2222
export type CharmResult = {
2323
id: string;
2424
prompt: string;
25-
screenshotPath: string;
25+
screenshotPath?: string;
2626
status: string;
2727
summary: string;
2828
};
@@ -39,3 +39,8 @@ export type Scenario = {
3939
steps: Step[];
4040
tags?: string[];
4141
};
42+
43+
export interface ExecutedScenario {
44+
scenario: Scenario;
45+
results: CharmResult[];
46+
}

seeder/jumble.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@ import { browser, page } from "./browser.ts";
55
import { toolshedUrl } from "./env.ts";
66
export { browser };
77

8-
const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
8+
export const sleep = (ms: number) =>
9+
new Promise((resolve) => setTimeout(resolve, ms));
910

1011
async function waitForSelectorClick(
1112
page: Page,

0 commit comments

Comments
 (0)