Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions charm/src/commands.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ export const createDataCharm = (
const schema = ${schemaString};

export default recipe(schema, schema, (data) => ({
[NAME]: "${name ?? "data import"}",
[NAME]: "${name ?? "Data Import"}",
[UI]: <div><h2>Your data has this schema</h2><pre>${
schemaString.replaceAll("{", "&#123;")
.replaceAll("}", "&#125;")
Expand All @@ -64,7 +64,7 @@ export const createDataCharm = (
return compileAndRunRecipe(
charmManager,
dataRecipeSrc,
name ?? "data import",
name ?? "Data Import",
data,
);
};
Expand Down
215 changes: 40 additions & 175 deletions seeder/cli.ts
Original file line number Diff line number Diff line change
@@ -1,27 +1,14 @@
import { parseArgs } from "@std/cli/parse-args";
import {
castNewRecipe,
CharmManager,
compileAndRunRecipe,
} from "@commontools/charm";
import { getEntityId, setBobbyServerUrl, storage } from "@commontools/runner";
import { createSession, Identity } from "@commontools/identity";
import { LLMClient, setLLMUrl } from "@commontools/llm";
import { createDataCharm, processWorkflow } from "@commontools/charm";
import { type CharmResult, CommandType, type Step } from "./interfaces.ts";
import { setBobbyServerUrl, storage } from "@commontools/runner";
import { setLLMUrl } from "@commontools/llm";
import { processScenario } from "./processor.ts";
import { type ExecutedScenario } from "./interfaces.ts";
import { scenarios } from "./scenarios.ts";
import { toolshedUrl } from "./env.ts";
import { llmVerifyCharm } from "./judge.ts";
import { ensureReportDir, generateReport } from "./report.ts";
import {
addErrorListeners,
browser,
checkForErrors,
goto,
login,
screenshot,
} from "./jumble.ts";

import { browser, login } from "./jumble.ts";
import { createSession, Identity } from "@commontools/identity";
import { CharmManager } from "@commontools/charm";
const {
name,
tag,
Expand All @@ -38,181 +25,59 @@ const {
const cache = !noCache;

if (!name) {
// FIXME(ja): if the name already exists, we should not use it!
console.error("Error: Missing `--name`.");
Deno.exit(1);
}

storage.setRemoteStorage(new URL(toolshedUrl));
setBobbyServerUrl(toolshedUrl);
setLLMUrl(toolshedUrl);
const llmClient = new LLMClient();

const charmManager = new CharmManager(
await createSession({
identity: await Identity.fromPassphrase("common user"),
name,
}),
);

const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
// Track executed scenarios and steps
const executedScenarios: ExecutedScenario[] = [];

async function processPrompts(tag: string | undefined) {
let promptCount = 0;
console.log(`Processing prompts...`);
async function processScenarios({
tag,
name,
}: {
tag: string | undefined;
name: string;
}) {
await ensureReportDir(name);
const charmManager = new CharmManager(
await createSession({
identity: await Identity.fromPassphrase("common user"),
name,
}),
);
console.log(`Processing scenarios...`);

for (const scenario of scenarios) {
if (tag && (scenario.tags === undefined || !scenario.tags.includes(tag))) {
continue;
}
await goto(toolshedUrl);
await sleep(1000);
let lastCharmId: string | undefined = undefined;
for (const step of scenario.steps) {
promptCount++;
const newCharmId = await processCommand(step, lastCharmId, cache);
if (newCharmId) {
lastCharmId = newCharmId;
}
}
}
console.log(`Successfully processed ${promptCount} prompts.`);
}

async function processCommand(
step: Step,
lastCharmId: string | undefined,
cache = true,
): Promise<string | undefined> {
const { type, prompt } = step;

switch (type) {
case CommandType.New: {
console.log(`Adding: "${prompt}"`);
const form = await processWorkflow(prompt, charmManager, {
cache,
model,
prefill: {
classification: {
workflowType: "imagine",
confidence: 1.0,
reasoning: "hard coded",
},
},
});
const charm = await castNewRecipe(charmManager, form);
const id = getEntityId(charm);
if (id) {
console.log(`Charm added: ${id["/"]}`);
await verifyCharm(id["/"], prompt);
return id["/"];
}
break;
}
case CommandType.Extend: {
console.log(`Extending: "${prompt}"`);
if (!lastCharmId) {
throw new Error("Last charm ID is undefined.");
}
const charm = await charmManager.get(lastCharmId);
const form = await processWorkflow(prompt, charmManager, {
existingCharm: charm,
cache,
model,
prefill: {
classification: {
workflowType: "imagine",
confidence: 1.0,
reasoning: "hard coded",
},
},
});

await castNewRecipe(charmManager, form);
const id = getEntityId(charm);
if (id) {
console.log(`Charm added: ${id["/"]}`);
await verifyCharm(id["/"], prompt);
return id["/"];
} else {
console.error(`Charm not added: ${prompt}`);
}
break;
}
case CommandType.ImportJSON: {
console.log(`Importing JSON for: "${prompt}"`);
if (!step.data) {
throw new Error("Missing data for JSON import.");
}

const charm = await createDataCharm(
charmManager,
step.data,
step.dataSchema,
prompt,
);

const id = getEntityId(charm);
console.log(`Charm added from JSON import`, { id });
if (id) {
console.log(`Charm added from JSON import: ${id["/"]}`);
await verifyCharm(id["/"], "shows a jsonschema for " + prompt);
return id["/"];
}
break;
}
case CommandType.Other: {
throw new Error("Unsupported command type.");
}
}
}

const charmResults: CharmResult[] = [];

async function verifyCharm(id: string, prompt: string): Promise<string> {
// FIXME(ja): can we navigate without causing a page reload?
await goto(`/${name!}/${id}`);
addErrorListeners();
await sleep(5000);
await ensureReportDir(name!);
const screenshotPath = `results/${name}/${id}.png`;
await screenshot(id, screenshotPath);
const errors = await checkForErrors();
if (errors.length > 0) {
charmResults.push({
id,
prompt,
screenshotPath,
status: "FAIL",
summary: `Errors: ${errors.join("\n")}`,
const executedScenario = await processScenario({
scenario,
model,
cache,
name,
charmManager,
});
return `Error: ${errors.join("\n")}`;
executedScenarios.push(executedScenario);
}

const verdict = await llmVerifyCharm(prompt, screenshotPath);
console.log(`Charm verified: ${id} - ${verdict}`);

// Parse the verdict and add to results
const parsedVerdict = JSON.parse(verdict);
charmResults.push({
id,
prompt,
screenshotPath,
status: parsedVerdict.result,
summary: parsedVerdict.summary,
});

return verdict;
console.log(`Processed ${executedScenarios.length} scenarios.`);
return executedScenarios;
}

// FIXME(ja): if the tag doesn't exist, we should error out with warning, show the tags
try {
await login(name);
await processPrompts(tag);
await ensureReportDir(name);
await generateReport(name, charmResults, toolshedUrl, scenarios);
} catch (e) {
console.error(e);
await login(name!);
await processScenarios({ tag, name });
await ensureReportDir(name!);
await generateReport(name!, executedScenarios, toolshedUrl, scenarios);
} finally {
await sleep(100);
await new Promise((resolve) => setTimeout(resolve, 100));
await browser.close();
Deno.exit(0);
}
7 changes: 6 additions & 1 deletion seeder/interfaces.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ export type Command = {
export type CharmResult = {
id: string;
prompt: string;
screenshotPath: string;
screenshotPath?: string;
status: string;
summary: string;
};
Expand All @@ -39,3 +39,8 @@ export type Scenario = {
steps: Step[];
tags?: string[];
};

export interface ExecutedScenario {
scenario: Scenario;
results: CharmResult[];
}
3 changes: 2 additions & 1 deletion seeder/jumble.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@ import { browser, page } from "./browser.ts";
import { toolshedUrl } from "./env.ts";
export { browser };

const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
export const sleep = (ms: number) =>
new Promise((resolve) => setTimeout(resolve, ms));

async function waitForSelectorClick(
page: Page,
Expand Down
Loading