11import { parseArgs } from "@std/cli/parse-args" ;
2- import {
3- castNewRecipe ,
4- CharmManager ,
5- compileAndRunRecipe ,
6- } from "@commontools/charm" ;
7- import { getEntityId , setBobbyServerUrl , storage } from "@commontools/runner" ;
8- import { createSession , Identity } from "@commontools/identity" ;
9- import { LLMClient , setLLMUrl } from "@commontools/llm" ;
10- import { createDataCharm , processWorkflow } from "@commontools/charm" ;
11- import { type CharmResult , CommandType , type Step } from "./interfaces.ts" ;
2+ import { setBobbyServerUrl , storage } from "@commontools/runner" ;
3+ import { setLLMUrl } from "@commontools/llm" ;
4+ import { processScenario } from "./processor.ts" ;
5+ import { type ExecutedScenario } from "./interfaces.ts" ;
126import { scenarios } from "./scenarios.ts" ;
137import { toolshedUrl } from "./env.ts" ;
14- import { llmVerifyCharm } from "./judge.ts" ;
158import { ensureReportDir , generateReport } from "./report.ts" ;
16- import {
17- addErrorListeners ,
18- browser ,
19- checkForErrors ,
20- goto ,
21- login ,
22- screenshot ,
23- } from "./jumble.ts" ;
24-
9+ import { browser , login } from "./jumble.ts" ;
10+ import { createSession , Identity } from "@commontools/identity" ;
11+ import { CharmManager } from "@commontools/charm" ;
2512const {
2613 name,
2714 tag,
@@ -38,181 +25,59 @@ const {
3825const cache = ! noCache ;
3926
4027if ( ! name ) {
28+ // FIXME(ja): if the name already exists, we should not use it!
4129 console . error ( "Error: Missing `--name`." ) ;
4230 Deno . exit ( 1 ) ;
4331}
4432
4533storage . setRemoteStorage ( new URL ( toolshedUrl ) ) ;
4634setBobbyServerUrl ( toolshedUrl ) ;
4735setLLMUrl ( toolshedUrl ) ;
48- const llmClient = new LLMClient ( ) ;
49-
50- const charmManager = new CharmManager (
51- await createSession ( {
52- identity : await Identity . fromPassphrase ( "common user" ) ,
53- name,
54- } ) ,
55- ) ;
5636
57- const sleep = ( ms : number ) => new Promise ( ( resolve ) => setTimeout ( resolve , ms ) ) ;
37+ // Track executed scenarios and steps
38+ const executedScenarios : ExecutedScenario [ ] = [ ] ;
5839
59- async function processPrompts ( tag : string | undefined ) {
60- let promptCount = 0 ;
61- console . log ( `Processing prompts...` ) ;
40+ async function processScenarios ( {
41+ tag,
42+ name,
43+ } : {
44+ tag : string | undefined ;
45+ name : string ;
46+ } ) {
47+ await ensureReportDir ( name ) ;
48+ const charmManager = new CharmManager (
49+ await createSession ( {
50+ identity : await Identity . fromPassphrase ( "common user" ) ,
51+ name,
52+ } ) ,
53+ ) ;
54+ console . log ( `Processing scenarios...` ) ;
6255
6356 for ( const scenario of scenarios ) {
6457 if ( tag && ( scenario . tags === undefined || ! scenario . tags . includes ( tag ) ) ) {
6558 continue ;
6659 }
67- await goto ( toolshedUrl ) ;
68- await sleep ( 1000 ) ;
69- let lastCharmId : string | undefined = undefined ;
70- for ( const step of scenario . steps ) {
71- promptCount ++ ;
72- const newCharmId = await processCommand ( step , lastCharmId , cache ) ;
73- if ( newCharmId ) {
74- lastCharmId = newCharmId ;
75- }
76- }
77- }
78- console . log ( `Successfully processed ${ promptCount } prompts.` ) ;
79- }
80-
81- async function processCommand (
82- step : Step ,
83- lastCharmId : string | undefined ,
84- cache = true ,
85- ) : Promise < string | undefined > {
86- const { type, prompt } = step ;
87-
88- switch ( type ) {
89- case CommandType . New : {
90- console . log ( `Adding: "${ prompt } "` ) ;
91- const form = await processWorkflow ( prompt , charmManager , {
92- cache,
93- model,
94- prefill : {
95- classification : {
96- workflowType : "imagine" ,
97- confidence : 1.0 ,
98- reasoning : "hard coded" ,
99- } ,
100- } ,
101- } ) ;
102- const charm = await castNewRecipe ( charmManager , form ) ;
103- const id = getEntityId ( charm ) ;
104- if ( id ) {
105- console . log ( `Charm added: ${ id [ "/" ] } ` ) ;
106- await verifyCharm ( id [ "/" ] , prompt ) ;
107- return id [ "/" ] ;
108- }
109- break ;
110- }
111- case CommandType . Extend : {
112- console . log ( `Extending: "${ prompt } "` ) ;
113- if ( ! lastCharmId ) {
114- throw new Error ( "Last charm ID is undefined." ) ;
115- }
116- const charm = await charmManager . get ( lastCharmId ) ;
117- const form = await processWorkflow ( prompt , charmManager , {
118- existingCharm : charm ,
119- cache,
120- model,
121- prefill : {
122- classification : {
123- workflowType : "imagine" ,
124- confidence : 1.0 ,
125- reasoning : "hard coded" ,
126- } ,
127- } ,
128- } ) ;
129-
130- await castNewRecipe ( charmManager , form ) ;
131- const id = getEntityId ( charm ) ;
132- if ( id ) {
133- console . log ( `Charm added: ${ id [ "/" ] } ` ) ;
134- await verifyCharm ( id [ "/" ] , prompt ) ;
135- return id [ "/" ] ;
136- } else {
137- console . error ( `Charm not added: ${ prompt } ` ) ;
138- }
139- break ;
140- }
141- case CommandType . ImportJSON : {
142- console . log ( `Importing JSON for: "${ prompt } "` ) ;
143- if ( ! step . data ) {
144- throw new Error ( "Missing data for JSON import." ) ;
145- }
146-
147- const charm = await createDataCharm (
148- charmManager ,
149- step . data ,
150- step . dataSchema ,
151- prompt ,
152- ) ;
153-
154- const id = getEntityId ( charm ) ;
155- console . log ( `Charm added from JSON import` , { id } ) ;
156- if ( id ) {
157- console . log ( `Charm added from JSON import: ${ id [ "/" ] } ` ) ;
158- await verifyCharm ( id [ "/" ] , "shows a jsonschema for " + prompt ) ;
159- return id [ "/" ] ;
160- }
161- break ;
162- }
163- case CommandType . Other : {
164- throw new Error ( "Unsupported command type." ) ;
165- }
166- }
167- }
168-
169- const charmResults : CharmResult [ ] = [ ] ;
170-
171- async function verifyCharm ( id : string , prompt : string ) : Promise < string > {
172- // FIXME(ja): can we navigate without causing a page reload?
173- await goto ( `/${ name ! } /${ id } ` ) ;
174- addErrorListeners ( ) ;
175- await sleep ( 5000 ) ;
176- await ensureReportDir ( name ! ) ;
177- const screenshotPath = `results/${ name } /${ id } .png` ;
178- await screenshot ( id , screenshotPath ) ;
179- const errors = await checkForErrors ( ) ;
180- if ( errors . length > 0 ) {
181- charmResults . push ( {
182- id,
183- prompt,
184- screenshotPath,
185- status : "FAIL" ,
186- summary : `Errors: ${ errors . join ( "\n" ) } ` ,
60+ const executedScenario = await processScenario ( {
61+ scenario,
62+ model,
63+ cache,
64+ name,
65+ charmManager,
18766 } ) ;
188- return `Error: ${ errors . join ( "\n" ) } ` ;
67+ executedScenarios . push ( executedScenario ) ;
18968 }
190-
191- const verdict = await llmVerifyCharm ( prompt , screenshotPath ) ;
192- console . log ( `Charm verified: ${ id } - ${ verdict } ` ) ;
193-
194- // Parse the verdict and add to results
195- const parsedVerdict = JSON . parse ( verdict ) ;
196- charmResults . push ( {
197- id,
198- prompt,
199- screenshotPath,
200- status : parsedVerdict . result ,
201- summary : parsedVerdict . summary ,
202- } ) ;
203-
204- return verdict ;
69+ console . log ( `Processed ${ executedScenarios . length } scenarios.` ) ;
70+ return executedScenarios ;
20571}
20672
73+ // FIXME(ja): if the tag doesn't exist, we should error out with warning, show the tags
20774try {
208- await login ( name ) ;
209- await processPrompts ( tag ) ;
210- await ensureReportDir ( name ) ;
211- await generateReport ( name , charmResults , toolshedUrl , scenarios ) ;
212- } catch ( e ) {
213- console . error ( e ) ;
75+ await login ( name ! ) ;
76+ await processScenarios ( { tag, name } ) ;
77+ await ensureReportDir ( name ! ) ;
78+ await generateReport ( name ! , executedScenarios , toolshedUrl , scenarios ) ;
21479} finally {
215- await sleep ( 100 ) ;
80+ await new Promise ( ( resolve ) => setTimeout ( resolve , 100 ) ) ;
21681 await browser . close ( ) ;
21782 Deno . exit ( 0 ) ;
21883}
0 commit comments