diff --git a/.github/workflows/deno.yml b/.github/workflows/deno.yml index 76bfdef1a..4f7545bac 100644 --- a/.github/workflows/deno.yml +++ b/.github/workflows/deno.yml @@ -367,6 +367,54 @@ jobs: FRONTEND_URL=http://localhost:8000/ \ deno task integration + seeder-integration-test: + name: "Seeder Integration Tests" + runs-on: ubuntu-latest + needs: ["build-binaries"] + environment: ci + services: + redis: + image: redis + ports: + - 6379:6379 + options: >- + --health-cmd "redis-cli ping" + --health-interval 10s + --health-timeout 5s + --health-retries 5 + steps: + - name: ๐Ÿ“ฅ Checkout repository + uses: actions/checkout@v4 + + - name: ๐Ÿฆ• Setup Deno 2.2.2 + uses: denoland/setup-deno@v2 + with: + deno-version: "2.2.2" + + - name: ๐Ÿ“ฆ Cache Deno dependencies + uses: actions/cache@v3 + with: + path: | + ~/.deno + ~/.cache/deno + key: ${{ runner.os }}-deno-${{ hashFiles('**/deno.json') }} + + - name: ๐Ÿ“ฅ Download built binaries + uses: actions/download-artifact@v4 + + - name: ๐Ÿš€ Start Toolshed server for testing + run: | + chmod +x ./common-binaries/toolshed + CTTS_AI_LLM_ANTHROPIC_API_KEY=fake \ + CACHE_DIR=${GITHUB_WORKSPACE}/jumble/integration/cache \ + ./common-binaries/toolshed & + + - name: ๐Ÿงช Run seeder integration tests + working-directory: seeder + run: | + TOOLSHED_API_URL=http://localhost:8000/ \ + deno task start --tag smol --no-verify --no-report --name ${{ github.run_id }} + # Automatic deployment to staging (toolshed) deploy-toolshed: name: "Deploy to Toolshed (Staging)" diff --git a/.github/workflows/evals.yml b/.github/workflows/evals.yml index 5ad0791d2..fcd97a511 100644 --- a/.github/workflows/evals.yml +++ b/.github/workflows/evals.yml @@ -106,7 +106,7 @@ jobs: key: llm-cache-${{ github.run_id }} restore-keys: llm-cache - - name: โœ๏ธ List LLM Cache Load + - name: โœ๏ธ List LLM Cache Load run: | mkdir -p ./llm_cache_dir/llm-api-cache ls -1 ./llm_cache_dir/llm-api-cache > ./cache-log-pre @@ -138,17 +138,24 @@ jobs: FLAGS="--tag 10x10" fi - TOOLSHED_API_URL=http://localhost:8000/ \ - OPENAI_API_KEY=${{ secrets.CTTS_AI_LLM_OPENAI_API_KEY }} \ - deno task start --name ${{ env.target_sha }} $FLAGS + # Wrap in an if/else, we don't want the job to fail yet + # until we can confidently expect passing results. + if + TOOLSHED_API_URL=http://localhost:8000/ \ + OPENAI_API_KEY=${{ secrets.CTTS_AI_LLM_OPENAI_API_KEY }} \ + deno task start --name ${{ env.target_sha }} $FLAGS ; then + echo "All charms have been successfully verified!" + else + echo "Some charms have failed verification." + fi - - name: โœ๏ธ List LLM Cache Save + - name: โœ๏ธ List LLM Cache Save run: | mkdir -p ./llm_cache_dir/llm-api-cache ls -1 ./llm_cache_dir/llm-api-cache > ./cache-log-post echo "$(cat ./cache-log-post | wc -l) items found in cache." cat ./cache-log-post - + - name: ๐Ÿ“ฆ LLM Cache Status run: | if diff ./cache-log-pre ./cache-log-post > ./cache-analysis-diff ; then @@ -158,7 +165,7 @@ jobs: ADDITIONS=$(cat ./cache-analysis-diff | wc -l) echo "LLM cache has added $((ADDITIONS / 2)) entries." fi - + - name: ๐Ÿ“ค Upload reports uses: actions/upload-artifact@v4 with: diff --git a/deno.jsonc b/deno.jsonc index a690f6578..a4d79c8f4 100644 --- a/deno.jsonc +++ b/deno.jsonc @@ -112,4 +112,4 @@ "zod-to-json-schema": "npm:zod-to-json-schema@^3.24.1", "zod": "npm:zod@^3.24.1" } -} +} \ No newline at end of file diff --git a/jumble/integration/cache/llm-api-cache/3d0fbce7fd401830cddcbafc815811755891a538c1c3ad8401399a51f353d7c8.json b/jumble/integration/cache/llm-api-cache/3d0fbce7fd401830cddcbafc815811755891a538c1c3ad8401399a51f353d7c8.json deleted file mode 100644 index 3dabb45e8..000000000 --- a/jumble/integration/cache/llm-api-cache/3d0fbce7fd401830cddcbafc815811755891a538c1c3ad8401399a51f353d7c8.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "model": "anthropic:claude-3-7-sonnet-latest", - "system": "# React Component Builder\n\nCreate an interactive React component that fulfills the user's request. Focus on delivering a clean, useful implementation with appropriate features.\n\n\n**This task is part of a 2-Phase Process.**\n\n1. First phase (already completed):\n - Analyzed the user's request\n - Created a detailed specification\n - Generated a structured data schema\n\n2. Your job (second phase):\n - Create a reactive UI component based on the provided specification and schema\n - Implement the UI exactly according to the specification\n - Strictly adhere to the data schema provided\n\n\n\n- Define a title with `const title = 'Your App Name';`\n- Implement both `onLoad` and `onReady` functions\n- Use Tailwind CSS for styling with tasteful defaults\n- Do not write inline, use emoji for icons\n- Carefully avoid infinite loops and recursion that may cause performance issues\n\n\n\n- React and ReactDOM are pre-imported - don't import them again\n- All React hooks must be namespaced (e.g., `React.useState`, `React.useEffect`)\n- Follow React hooks rules - never nest or conditionally call hooks\n- For form handling, use `onClick` handlers instead of `onSubmit`\n\n\n\n- **useReactiveCell(keyPath: string[])** - Persistent data storage with reactive updates\n- **generateText({ system, messages })** - Generate text via Large Language Model\n- **generateObject({ system, messages })** - Generate JSON object via Large Language Model\n- **readWebpage(url)** - Fetch and parse external web content\n- **generateImage(prompt)** - Create AI-generated images\n\n \n ## Important Note About useReactiveCell(keyPath: string[])\n - **useReactiveCell is a React Hook** and must follow all React hook rules\n - It should only be used for persistent state and must draw from the provided schema\n - For any ephemeral state, use `React.useState`\n - Only call useReactiveCell at the top level of your React function components or custom hooks\n - Do not call useReactiveCell inside loops, conditions, or nested functions\n - Do not call useReactiveCell directly inside the onReady function - it can only be used inside React components\n - The onReady function itself is not a React component and cannot use React hooks directly\n - Example of correct usage:\n ```jsx\n function onReady(mount, sourceData, modules) {\n // This is NOT a React component, so do NOT use hooks here!\n \n // Create a React component to use hooks properly:\n function MyApp() {\n // React hooks can be used here\n const [data, setData] = useReactiveCell('myData', defaultValue);\n return
{/* your JSX */}
;\n }\n \n // Render the React component to the mount point\n const root = ReactDOM.createRoot(mount);\n root.render();\n }\n ```\n
\n
\n\n\n- Request additional libraries in `onLoad` by returning an array of module names\n- Available libraries:\n - imports : [object Object]\n- Only use the explicitly provided libraries\n\n\n\n\n- Do not use browser dialog functions (`prompt()`, `alert()`, `confirm()`)\n- Avoid any methods that could compromise security or user experience\n\n\n\n\n{\n \"type\": \"object\",\n \"properties\": {\n \"simplevalue2\": {\n \"type\": \"object\",\n \"properties\": {\n \"values\": {\n \"type\": \"array\",\n \"items\": {\n \"type\": \"string\"\n }\n }\n }\n },\n \"displayData\": {\n \"type\": \"object\",\n \"title\": \"Display Data\",\n \"description\": \"The data being displayed from simpleValue2\",\n \"properties\": {\n \"values\": {\n \"type\": \"array\",\n \"title\": \"Values\",\n \"description\": \"The string values from simpleValue2\",\n \"items\": {\n \"type\": \"string\"\n },\n \"default\": []\n },\n \"lastUpdated\": {\n \"type\": \"string\",\n \"format\": \"date-time\",\n \"title\": \"Last Updated\",\n \"description\": \"When the display was last refreshed\"\n }\n },\n \"required\": [\n \"values\"\n ]\n },\n \"displaySettings\": {\n \"type\": \"object\",\n \"title\": \"Display Settings\",\n \"description\": \"Configuration for how the data is displayed\",\n \"properties\": {\n \"showTimestamp\": {\n \"type\": \"boolean\",\n \"title\": \"Show Timestamp\",\n \"description\": \"Whether to show when the data was last updated\",\n \"default\": true\n },\n \"refreshInterval\": {\n \"type\": \"integer\",\n \"title\": \"Refresh Interval\",\n \"description\": \"How often to refresh the display (in seconds)\",\n \"default\": 30\n }\n }\n }\n },\n \"title\": \"SimpleValue2 Viewer\",\n \"description\": \"A component to display the current values stored in the simpleValue2 charm.\"\n}\n\n\n\n# SDK Usage Guide\n\n\n## 1. `useReactiveCell` Hook\n\nThe `useReactiveCell` hook binds to a reactive cell given a key path and returns a tuple `[doc, setDoc]`:\n\nAny keys from the view-model-schema are valid for useReactiveCell, any other keys will fail. Provide a default as the second argument, **do not set an initial value explicitly**.\n\nFor this schema:\n\n```json\n{\n \"type\": \"object\",\n \"properties\": {\n \"counter\": {\n \"type\": \"number\",\n },\n \"title\": {\n \"type\": \"string\",\n \"default\": \"My Counter App\"\n }\n }\n}\n```\n\n```jsx\nfunction CounterComponent() {\n // Correct: useReactiveCell called at top level of component\n const [counter, setCounter] = useReactiveCell(\"counter\");\n const [maxValue, setMaxValue] = useReactiveCell(['settings', 'maxValue']);\n\n const onIncrement = useCallback(() => {\n // writing to the cell automatically triggers a re-render\n setCounter(counter + 1);\n }, [counter]);\n\n return (\n \n );\n}\n```\n\n\n\nSeveral APIs exist for generating text, JSON or image urls.\n\n\n`generateText({ system, messages}): Promise`\n\n```jsx\nasync function fetchLLMResponse() {\n const result = await generateText({\n system: 'Translate all the messages to emojis, reply in JSON.',\n messages: ['Hi', 'How can I help you today?', 'tell me a joke']\n })\n console.log('LLM responded:', result);\n}\n```\n\n\n\n\n`window.generateObject({ system, messages }): Promise`\n\nYou must give the exact schema of the response in the prompt.\n\nFor example: \"Generate a traditional Vietnamese recipe in JSON format, with the\nfollowing properties: name (string), ingredients (array of strings),\ninstructions (array of strings)\"\n\n`generateObject` returns a parsed object already, or `undefined`. Be defensive working with the response, the LLM may make mistakes.\n\n\n```jsx\nconst promptPayload = ;\nconst result = await generateObject({\n system: 'Translate all the messages to emojis, reply in JSON with the following properties: an array of objects, each with original_text (string), emoji_translation (string)',\n messages: ['Hi', 'How can I help you today?', 'tell me a joke'],\n});\nconsole.log('JSON response from llm:', result);\n\n// [\n// {\n// \"original_text\": \"Hi\",\n// \"emoji_translation\": \"๐Ÿ‘‹\"\n// },\n// {\n// \"original_text\": \"How can I help you today?\",\n// \"emoji_translation\": \"๐Ÿค”โ“๐Ÿ™‹โ€โ™‚๏ธ๐Ÿ“…\"\n// },\n// {\n// \"original_text\": \"tell me a joke\",\n// \"emoji_translation\": \"๐Ÿ—ฃ๏ธ๐Ÿ‘‰๐Ÿ˜‚\"\n// }\n// ]\n```\n\n\n\nNOTE: Language model requests are globally cached based on your prompt.\nThis means that identical requests will return the same result. If your llm use\nrequires unique results on every request, make sure to introduce a cache-breaking\nstring such as a timestamp or incrementing number/id.\n\n```jsx\n// To avoid the cache we'll use a cache-busting string.\nconst cacheBreaker = Date.now();\n\nconst result = await generateObject({\n system: \"You are a professional chef specializing in Mexican cuisine. Generate a detailed, authentic Mexican recipe in JSON format with the following properties: title (string), ingredients (array of strings), instructions (array of strings), prepTime (integer in minutes), cookTime (integer in minutes)\",\n messages: [\"give me something spicy!\" + \" \" + cacheBreaker],\n});\nconsole.log('JSON response from llm:', result);\n\n// {\n// \"title\": \"Camarones a la Diabla (Devil Shrimp)\",\n// \"ingredients\": [\n// \"1.5 lbs Large Shrimp, peeled and deveined\",\n// \"4 tbsp Olive Oil\",\n// \"1 medium White Onion, finely chopped\",\n// \"4 cloves Garlic, minced\",\n// \"2-3 Habanero Peppers, finely chopped (adjust to your spice preference, remove seeds for less heat)\",\n// \"1 (28 oz) can Crushed Tomatoes\",\n// \"1/2 cup Chicken Broth\",\n// \"2 tbsp Tomato Paste\",\n// \"1 tbsp Apple Cider Vinegar\",\n// \"1 tbsp Dried Oregano\",\n// \"1 tsp Cumin\",\n// \"1/2 tsp Smoked Paprika\",\n// \"1/4 tsp Ground Cloves\",\n// \"Salt and Black Pepper to taste\",\n// \"Fresh Cilantro, chopped, for garnish\",\n// \"Lime wedges, for serving\"\n// ],\n// \"instructions\": [\n// \"In a large bowl, toss the shrimp with salt and pepper.\",\n// \"Heat the olive oil in a large skillet or Dutch oven over medium-high heat.\",\n// \"Add the onion and cook until softened, about 5 minutes.\",\n// \"Add the garlic and habanero peppers and cook for 1 minute more, until fragrant.\",\n// \"Stir in the crushed tomatoes, chicken broth, tomato paste, apple cider vinegar, oregano, cumin, smoked paprika, and cloves.\",\n// \"Bring the sauce to a simmer and cook for 15 minutes, stirring occasionally, until slightly thickened.\",\n// \"Add the shrimp to the sauce and cook for 3-5 minutes, or until the shrimp are pink and cooked through.\",\n// \"Taste and adjust seasoning with salt and pepper as needed.\",\n// \"Garnish with fresh cilantro and serve immediately with lime wedges. Serve with rice or tortillas.\"\n// ],\n// \"prepTime\": 20,\n// \"cookTime\": 30\n// }\n```\n\n\n\n\n\n\n\nSynchronous, generates a URL that will load the image.\n\n```jsx\nfunction ImageComponent() {\n return \"Generated;\n}\n```\n\n\n\n\n\n`readWebpage(url: string): Promise` Returns markdown format.\n\n```jsx\nasync function fetchFromUrl() {\n const url = 'https://twopm.studio';\n const result = await readWebpage(url);\n console.log('Markdown:', result.content);\n}\n```\n\n\n\nAll generated code must follow this pattern:\n\n```javascript\n// Import from modern ESM libraries:\n// - react\n// - react-dom\n// - react-dom/client\n// - d3\n// - moment\n// - marked\n// - @react-spring/web\n// - @use-gesture/react\n// - uuid\n// - tone\nfunction onLoad() {\n return ['@react-spring/web']; // Request the modules you need\n}\n\nconst title = 'My ESM App';\nfunction ImageComponent({ url }) {\n return \"Generated;\n}\n\nfunction MyComponent({ label, description }) {\n return (\n
\n

{label}

\n

{description}

\n \n
\n );\n}\n\nfunction TodoItem({ todo, onToggle, onDelete }) {\n return (\n
\n \n \n {todo.text}\n \n \n Delete\n \n
\n );\n}\n\nfunction TodoList({ todo, setTodos}) {\n const [newTodo, setNewTodo] = React.useState('');\n\n const addTodo = () => {\n if (newTodo.trim() === '') return;\n\n const newTodoItem = {\n id: Date.now(),\n text: newTodo,\n completed: false\n };\n\n setTodos([...todos, newTodoItem]);\n setNewTodo('');\n };\n\n const toggleTodo = (id) => {\n setTodos(todos.map(todo =>\n todo.id === id ? { ...todo, completed: !todo.completed } : todo\n ));\n };\n\n const deleteTodo = (id) => {\n setTodos(todos.filter(todo => todo.id !== id));\n };\n\n return (\n
\n

Todo List

\n\n
\n setNewTodo(e.target.value)}\n placeholder=\"Add a new todo\"\n className=\"flex-grow p-2 border rounded-l\"\n />\n \n Add\n \n
\n\n
\n {todos.length > 0 ? (\n todos.map(todo => (\n toggleTodo(todo.id)}\n onDelete={() => deleteTodo(todo.id)}\n />\n ))\n ) : (\n

No todos yet!

\n )}\n
\n
\n );\n}\n\n// Main application code with modules passed as third parameter\nfunction onReady(mount, sourceData, libs) {\n const { useState, useEffect } = React; // React is available globally\n const { useSpring, animated } = libs['@react-spring/web']; // Access imported module\n\n function MyApp() {\n const [count, setCount] = useReactiveCell('count');\n const [todos, setTodos] = useReactiveCell('todos');\n\n const props = useSpring({\n from: { opacity: 0 },\n to: { opacity: 1 }\n });\n\n return (\n
\n \n setCount(count + 1)}\n >\n Clicks: {count}\n \n \n \n
\n );\n }\n\n // Use the client API for React 18\n const root = ReactDOM.createRoot(mount);\n root.render();\n}\n```\n\n", - "messages": [ - { - "role": "user", - "content": "The user asked you to create the source code with the following specification:\n```\n\nshow the data from `simpleValue2`\n\nShow the data from the `simpleValue2` charm, displaying its current value and any associated metadata.\n\n1. Access the simpleValue2 data from the existing schema\n2. Create a display component that renders the values array as a list\n3. Implement auto-refresh functionality to keep the display in sync with data changes\nThe SimpleValue2 Viewer will display all string values currently stored in the simpleValue2.values array. It will present the values in a clean, readable list format. The viewer will automatically update when the underlying data changes.\n```" - }, - { - "role": "assistant", - "content": "\nfunction onLoad() {\n return []; // No additional libraries needed\n}\n\nconst title = 'SimpleValue2 Viewer';\n\nfunction ValuesList({ values }) {\n return (\n
\n

Values

\n {values && values.length > 0 ? (\n
    \n {values.map((value, index) => (\n
  • \n {value}\n
  • \n ))}\n
\n ) : (\n

No values available

\n )}\n
\n );\n}\n\nfunction LastUpdatedInfo({ timestamp, showTimestamp }) {\n if (!showTimestamp || !timestamp) return null;\n \n const formattedDate = new Date(timestamp).toLocaleString();\n \n return (\n
\n Last updated: {formattedDate}\n
\n );\n}\n\nfunction RefreshButton({ onRefresh, isLoading }) {\n return (\n \n {isLoading ? 'Refreshing...' : '๐Ÿ”„ Refresh Now'}\n \n );\n}\n\nfunction SettingsPanel({ showTimestamp, setShowTimestamp, refreshInterval, setRefreshInterval }) {\n return (\n
\n

Display Settings

\n \n
\n setShowTimestamp(e.target.checked)}\n className=\"mr-2 h-4 w-4\"\n />\n \n
\n \n
\n \n setRefreshInterval(Math.max(5, parseInt(e.target.value) || 30))}\n min=\"5\"\n className=\"border border-gray-300 rounded px-3 py-2 w-32\"\n />\n
\n
\n );\n}\n\nfunction SimpleValue2Viewer() {\n // Access the data from simpleValue2\n const [simpleValue2Data, setSimpleValue2Data] = useReactiveCell(['simplevalue2', 'values'], []);\n \n // Access display data\n const [displayData, setDisplayData] = useReactiveCell(['displayData', 'values'], []);\n const [lastUpdated, setLastUpdated] = useReactiveCell(['displayData', 'lastUpdated'], '');\n \n // Access display settings\n const [showTimestamp, setShowTimestamp] = useReactiveCell(['displaySettings', 'showTimestamp'], true);\n const [refreshInterval, setRefreshInterval] = useReactiveCell(['displaySettings', 'refreshInterval'], 30);\n \n // Local state for UI\n const [isRefreshing, setIsRefreshing] = React.useState(false);\n \n // Function to update the display data\n const refreshData = React.useCallback(() => {\n setIsRefreshing(true);\n \n // Update the display data with the current simpleValue2 values\n setDisplayData(simpleValue2Data || []);\n setLastUpdated(new Date().toISOString());\n \n setTimeout(() => {\n setIsRefreshing(false);\n }, 500);\n }, [simpleValue2Data, setDisplayData, setLastUpdated]);\n \n // Set up auto-refresh\n React.useEffect(() => {\n refreshData(); // Initial refresh\n \n const intervalId = setInterval(() => {\n refreshData();\n }, refreshInterval * 1000);\n \n return () => clearInterval(intervalId);\n }, [refreshData, refreshInterval]);\n \n // Watch for changes in simpleValue2Data and refresh when it changes\n React.useEffect(() => {\n refreshData();\n }, [simpleValue2Data, refreshData]);\n \n return (\n
\n
\n

SimpleValue2 Viewer

\n \n
\n \n
\n \n \n
\n \n \n
\n );\n}\n\nfunction onReady(mount, sourceData) {\n const root = ReactDOM.createRoot(mount);\n root.render();\n}\n```" - } - ], - "stop": "\n```", - "maxTokens": 64000 -} \ No newline at end of file diff --git a/jumble/integration/cache/llm-api-cache/4276002ae5287209ed4074671d4be9b326c9da271b39d0f198809db1756daa39.json b/jumble/integration/cache/llm-api-cache/4276002ae5287209ed4074671d4be9b326c9da271b39d0f198809db1756daa39.json new file mode 100644 index 000000000..55150d852 --- /dev/null +++ b/jumble/integration/cache/llm-api-cache/4276002ae5287209ed4074671d4be9b326c9da271b39d0f198809db1756daa39.json @@ -0,0 +1,16 @@ +{ + "model": "anthropic:claude-3-7-sonnet-20250219", + "system": "# React Component Builder\n\nCreate an interactive React component that fulfills the user's request. Focus on delivering a clean, useful implementation with appropriate features.\n\n\n**This task is part of a 2-Phase Process.**\n\n1. First phase (already completed):\n - Analyzed the user's request\n - Created a detailed specification\n - Generated a structured data schema\n\n2. Your job (second phase):\n - Create a reactive UI component based on the provided specification and schema\n - Implement the UI exactly according to the specification\n - Strictly adhere to the data schema provided\n\n\n\n- Define a title with `const title = 'Your App Name';`\n- Implement both `onLoad` and `onReady` functions\n- Use Tailwind CSS for styling with tasteful defaults\n- Do not write inline, use emoji for icons\n- Carefully avoid infinite loops and recursion that may cause performance issues\n\n\n\n- React and ReactDOM are pre-imported - don't import them again\n- All React hooks must be namespaced (e.g., `React.useState`, `React.useEffect`)\n- Follow React hooks rules - never nest or conditionally call hooks\n- For form handling, use `onClick` handlers instead of `onSubmit`\n\n\n\n- **useReactiveCell(keyPath: string[])** - Persistent data storage with reactive updates\n- **generateText({ system, messages })** - Generate text via Large Language Model\n- **generateObject({ system, messages })** - Generate JSON object via Large Language Model\n- **readWebpage(url)** - Fetch and parse external web content\n- **generateImage(prompt)** - Create AI-generated images\n\n \n ## Important Note About useReactiveCell(keyPath: string[])\n - **useReactiveCell is a React Hook** and must follow all React hook rules\n - It should only be used for persistent state and must draw from the provided schema\n - For any ephemeral state, use `React.useState`\n - Only call useReactiveCell at the top level of your React function components or custom hooks\n - Do not call useReactiveCell inside loops, conditions, or nested functions\n - Do not call useReactiveCell directly inside the onReady function - it can only be used inside React components\n - The onReady function itself is not a React component and cannot use React hooks directly\n - Example of correct usage:\n ```jsx\n function onReady(mount, sourceData, modules) {\n // This is NOT a React component, so do NOT use hooks here!\n \n // Create a React component to use hooks properly:\n function MyApp() {\n // React hooks can be used here\n const [data, setData] = useReactiveCell('myData', defaultValue);\n return
{/* your JSX */}
;\n }\n \n // Render the React component to the mount point\n const root = ReactDOM.createRoot(mount);\n root.render();\n }\n ```\n
\n
\n\n\n- Request additional libraries in `onLoad` by returning an array of module names\n- Available libraries:\n - imports : [object Object]\n- Only use the explicitly provided libraries\n\n\n\n\n- Do not use browser dialog functions (`prompt()`, `alert()`, `confirm()`)\n- Avoid any methods that could compromise security or user experience\n\n\n\n\n{\n \"type\": \"object\",\n \"properties\": {\n \"board\": {\n \"type\": \"array\",\n \"title\": \"Game Board\",\n \"description\": \"4x4 grid of number tiles (0 represents empty)\",\n \"default\": [\n [\n 0,\n 0,\n 0,\n 0\n ],\n [\n 0,\n 0,\n 0,\n 0\n ],\n [\n 0,\n 0,\n 0,\n 0\n ],\n [\n 0,\n 0,\n 0,\n 0\n ]\n ],\n \"items\": {\n \"type\": \"array\",\n \"items\": {\n \"type\": \"integer\",\n \"minimum\": 0\n }\n }\n },\n \"score\": {\n \"type\": \"integer\",\n \"title\": \"Current Score\",\n \"description\": \"Player's current score in this game\",\n \"default\": 0,\n \"minimum\": 0\n },\n \"highScore\": {\n \"type\": \"integer\",\n \"title\": \"High Score\",\n \"description\": \"Highest score achieved across games\",\n \"default\": 0,\n \"minimum\": 0\n },\n \"gameState\": {\n \"type\": \"string\",\n \"title\": \"Game State\",\n \"description\": \"Current state of the game\",\n \"enum\": [\n \"playing\",\n \"won\",\n \"lost\"\n ],\n \"default\": \"playing\"\n },\n \"highestTile\": {\n \"type\": \"integer\",\n \"title\": \"Highest Tile\",\n \"description\": \"Value of the highest tile on the board\",\n \"default\": 2,\n \"minimum\": 2\n }\n },\n \"title\": \"2048 Sliding Puzzle Game\",\n \"description\": \"A game charm to play the classic 2048 sliding tile puzzle with score tracking.\"\n}\n\n\n\n# SDK Usage Guide\n\n\n## 1. `useReactiveCell` Hook\n\nThe `useReactiveCell` hook binds to a reactive cell given a key path and returns a tuple `[doc, setDoc]`:\n\nAny keys from the view-model-schema are valid for useReactiveCell, any other keys will fail. Provide a default as the second argument, **do not set an initial value explicitly**.\n\nFor this schema:\n\n```json\n{\n \"type\": \"object\",\n \"properties\": {\n \"counter\": {\n \"type\": \"number\",\n },\n \"title\": {\n \"type\": \"string\",\n \"default\": \"My Counter App\"\n }\n }\n}\n```\n\n```jsx\nfunction CounterComponent() {\n // Correct: useReactiveCell called at top level of component\n const [counter, setCounter] = useReactiveCell(\"counter\");\n const [maxValue, setMaxValue] = useReactiveCell(['settings', 'maxValue']);\n\n const onIncrement = useCallback(() => {\n // writing to the cell automatically triggers a re-render\n setCounter(counter + 1);\n }, [counter]);\n\n return (\n \n );\n}\n```\n\n\n\nSeveral APIs exist for generating text, JSON or image urls.\n\n\n`generateText({ system, messages}): Promise`\n\n```jsx\nasync function fetchLLMResponse() {\n const result = await generateText({\n system: 'Translate all the messages to emojis, reply in JSON.',\n messages: ['Hi', 'How can I help you today?', 'tell me a joke']\n })\n console.log('LLM responded:', result);\n}\n```\n\n\n\n\n`window.generateObject({ system, messages }): Promise`\n\nYou must give the exact schema of the response in the prompt.\n\nFor example: \"Generate a traditional Vietnamese recipe in JSON format, with the\nfollowing properties: name (string), ingredients (array of strings),\ninstructions (array of strings)\"\n\n`generateObject` returns a parsed object already, or `undefined`. Be defensive working with the response, the LLM may make mistakes.\n\n\n```jsx\nconst promptPayload = ;\nconst result = await generateObject({\n system: 'Translate all the messages to emojis, reply in JSON with the following properties: an array of objects, each with original_text (string), emoji_translation (string)',\n messages: ['Hi', 'How can I help you today?', 'tell me a joke'],\n});\nconsole.log('JSON response from llm:', result);\n\n// [\n// {\n// \"original_text\": \"Hi\",\n// \"emoji_translation\": \"๐Ÿ‘‹\"\n// },\n// {\n// \"original_text\": \"How can I help you today?\",\n// \"emoji_translation\": \"๐Ÿค”โ“๐Ÿ™‹โ€โ™‚๏ธ๐Ÿ“…\"\n// },\n// {\n// \"original_text\": \"tell me a joke\",\n// \"emoji_translation\": \"๐Ÿ—ฃ๏ธ๐Ÿ‘‰๐Ÿ˜‚\"\n// }\n// ]\n```\n\n\n\nNOTE: Language model requests are globally cached based on your prompt.\nThis means that identical requests will return the same result. If your llm use\nrequires unique results on every request, make sure to introduce a cache-breaking\nstring such as a timestamp or incrementing number/id.\n\n```jsx\n// To avoid the cache we'll use a cache-busting string.\nconst cacheBreaker = Date.now();\n\nconst result = await generateObject({\n system: \"You are a professional chef specializing in Mexican cuisine. Generate a detailed, authentic Mexican recipe in JSON format with the following properties: title (string), ingredients (array of strings), instructions (array of strings), prepTime (integer in minutes), cookTime (integer in minutes)\",\n messages: [\"give me something spicy!\" + \" \" + cacheBreaker],\n});\nconsole.log('JSON response from llm:', result);\n\n// {\n// \"title\": \"Camarones a la Diabla (Devil Shrimp)\",\n// \"ingredients\": [\n// \"1.5 lbs Large Shrimp, peeled and deveined\",\n// \"4 tbsp Olive Oil\",\n// \"1 medium White Onion, finely chopped\",\n// \"4 cloves Garlic, minced\",\n// \"2-3 Habanero Peppers, finely chopped (adjust to your spice preference, remove seeds for less heat)\",\n// \"1 (28 oz) can Crushed Tomatoes\",\n// \"1/2 cup Chicken Broth\",\n// \"2 tbsp Tomato Paste\",\n// \"1 tbsp Apple Cider Vinegar\",\n// \"1 tbsp Dried Oregano\",\n// \"1 tsp Cumin\",\n// \"1/2 tsp Smoked Paprika\",\n// \"1/4 tsp Ground Cloves\",\n// \"Salt and Black Pepper to taste\",\n// \"Fresh Cilantro, chopped, for garnish\",\n// \"Lime wedges, for serving\"\n// ],\n// \"instructions\": [\n// \"In a large bowl, toss the shrimp with salt and pepper.\",\n// \"Heat the olive oil in a large skillet or Dutch oven over medium-high heat.\",\n// \"Add the onion and cook until softened, about 5 minutes.\",\n// \"Add the garlic and habanero peppers and cook for 1 minute more, until fragrant.\",\n// \"Stir in the crushed tomatoes, chicken broth, tomato paste, apple cider vinegar, oregano, cumin, smoked paprika, and cloves.\",\n// \"Bring the sauce to a simmer and cook for 15 minutes, stirring occasionally, until slightly thickened.\",\n// \"Add the shrimp to the sauce and cook for 3-5 minutes, or until the shrimp are pink and cooked through.\",\n// \"Taste and adjust seasoning with salt and pepper as needed.\",\n// \"Garnish with fresh cilantro and serve immediately with lime wedges. Serve with rice or tortillas.\"\n// ],\n// \"prepTime\": 20,\n// \"cookTime\": 30\n// }\n```\n\n\n\n\n\n\n\nSynchronous, generates a URL that will load the image.\n\n```jsx\nfunction ImageComponent() {\n return \"Generated;\n}\n```\n\n\n\n\n\n`readWebpage(url: string): Promise` Returns markdown format.\n\n```jsx\nasync function fetchFromUrl() {\n const url = 'https://twopm.studio';\n const result = await readWebpage(url);\n console.log('Markdown:', result.content);\n}\n```\n\n\n\nAll generated code must follow this pattern:\n\n```javascript\n// Import from modern ESM libraries:\n// - react\n// - react-dom\n// - react-dom/client\n// - d3\n// - moment\n// - marked\n// - @react-spring/web\n// - @use-gesture/react\n// - uuid\n// - tone\nfunction onLoad() {\n return ['@react-spring/web']; // Request the modules you need\n}\n\nconst title = 'My ESM App';\nfunction ImageComponent({ url }) {\n return \"Generated;\n}\n\nfunction MyComponent({ label, description }) {\n return (\n
\n

{label}

\n

{description}

\n \n
\n );\n}\n\nfunction TodoItem({ todo, onToggle, onDelete }) {\n return (\n
\n \n \n {todo.text}\n \n \n Delete\n \n
\n );\n}\n\nfunction TodoList({ todo, setTodos}) {\n const [newTodo, setNewTodo] = React.useState('');\n\n const addTodo = () => {\n if (newTodo.trim() === '') return;\n\n const newTodoItem = {\n id: Date.now(),\n text: newTodo,\n completed: false\n };\n\n setTodos([...todos, newTodoItem]);\n setNewTodo('');\n };\n\n const toggleTodo = (id) => {\n setTodos(todos.map(todo =>\n todo.id === id ? { ...todo, completed: !todo.completed } : todo\n ));\n };\n\n const deleteTodo = (id) => {\n setTodos(todos.filter(todo => todo.id !== id));\n };\n\n return (\n
\n

Todo List

\n\n
\n setNewTodo(e.target.value)}\n placeholder=\"Add a new todo\"\n className=\"flex-grow p-2 border rounded-l\"\n />\n \n Add\n \n
\n\n
\n {todos.length > 0 ? (\n todos.map(todo => (\n toggleTodo(todo.id)}\n onDelete={() => deleteTodo(todo.id)}\n />\n ))\n ) : (\n

No todos yet!

\n )}\n
\n
\n );\n}\n\n// Main application code with modules passed as third parameter\nfunction onReady(mount, sourceData, libs) {\n const { useState, useEffect } = React; // React is available globally\n const { useSpring, animated } = libs['@react-spring/web']; // Access imported module\n\n function MyApp() {\n const [count, setCount] = useReactiveCell('count');\n const [todos, setTodos] = useReactiveCell('todos');\n\n const props = useSpring({\n from: { opacity: 0 },\n to: { opacity: 1 }\n });\n\n return (\n
\n \n setCount(count + 1)}\n >\n Clicks: {count}\n \n \n \n
\n );\n }\n\n // Use the client API for React 18\n const root = ReactDOM.createRoot(mount);\n root.render();\n}\n```\n\n", + "messages": [ + { + "role": "user", + "content": "The user asked you to create the source code with the following specification:\n```\n\n2048 game\n\nCreate a ๐ŸŽฎ 2048 game charm with sliding ๐Ÿ”ข number tiles, score tracking, and game state management. The charm will need a grid layout, tile merging logic, and keyboard/swipe controls. Could add a ๐Ÿ† high score feature and ๐Ÿ”„ restart button.\n\n1. Implement the core game board with grid representation and tile movement/merging logic\n2. Create the UI with responsive grid layout and keyboard/swipe controls\n3. Add game state management, scoring system, and restart functionality\nThe 2048 game will feature a 4x4 grid where numbered tiles can be slid in four directions (up, down, left, right). When two tiles with the same number collide, they merge into a single tile with their sum. The game tracks the current score, game state (playing, won, lost), and highest tile achieved. A high score feature will persist the best score across game sessions.\n```" + }, + { + "role": "assistant", + "content": "\nfunction onLoad() {\n return ['@react-spring/web'];\n}\n\nconst title = '2048 Puzzle Game';\n\n// Game logic functions\nconst getEmptyBoard = () => [\n [0, 0, 0, 0],\n [0, 0, 0, 0],\n [0, 0, 0, 0],\n [0, 0, 0, 0]\n];\n\nconst getRandomPosition = (board) => {\n const emptyCells = [];\n \n for (let i = 0; i < 4; i++) {\n for (let j = 0; j < 4; j++) {\n if (board[i][j] === 0) {\n emptyCells.push({ row: i, col: j });\n }\n }\n }\n \n if (emptyCells.length === 0) return null;\n \n const randomIndex = Math.floor(Math.random() * emptyCells.length);\n return emptyCells[randomIndex];\n};\n\nconst addRandomTile = (board) => {\n const newBoard = JSON.parse(JSON.stringify(board));\n const position = getRandomPosition(newBoard);\n \n if (!position) return newBoard;\n \n const value = Math.random() < 0.9 ? 2 : 4;\n newBoard[position.row][position.col] = value;\n \n return newBoard;\n};\n\nconst initializeBoard = () => {\n let board = getEmptyBoard();\n board = addRandomTile(board);\n board = addRandomTile(board);\n return board;\n};\n\nconst rotateBoard = (board) => {\n const newBoard = getEmptyBoard();\n \n for (let i = 0; i < 4; i++) {\n for (let j = 0; j < 4; j++) {\n newBoard[i][j] = board[3 - j][i];\n }\n }\n \n return newBoard;\n};\n\nconst moveLeft = (board) => {\n const newBoard = JSON.parse(JSON.stringify(board));\n let score = 0;\n let moved = false;\n \n for (let i = 0; i < 4; i++) {\n let row = newBoard[i].filter(cell => cell !== 0);\n let resultRow = [];\n \n for (let j = 0; j < row.length; j++) {\n if (j < row.length - 1 && row[j] === row[j + 1]) {\n const mergedValue = row[j] * 2;\n resultRow.push(mergedValue);\n score += mergedValue;\n j++;\n moved = true;\n } else {\n resultRow.push(row[j]);\n }\n }\n \n while (resultRow.length < 4) {\n resultRow.push(0);\n }\n \n if (JSON.stringify(newBoard[i]) !== JSON.stringify(resultRow)) {\n moved = true;\n }\n \n newBoard[i] = resultRow;\n }\n \n return { board: newBoard, score, moved };\n};\n\nconst moveRight = (board) => {\n const newBoard = JSON.parse(JSON.stringify(board));\n \n // Reverse each row\n for (let i = 0; i < 4; i++) {\n newBoard[i].reverse();\n }\n \n // Move left\n const { board: movedBoard, score, moved } = moveLeft(newBoard);\n \n // Reverse back\n for (let i = 0; i < 4; i++) {\n movedBoard[i].reverse();\n }\n \n return { board: movedBoard, score, moved };\n};\n\nconst moveUp = (board) => {\n // Rotate right\n let rotatedBoard = rotateBoard(board);\n \n // Move left\n const { board: movedBoard, score, moved } = moveLeft(rotatedBoard);\n \n // Rotate left (3 times right)\n rotatedBoard = rotateBoard(movedBoard);\n rotatedBoard = rotateBoard(rotatedBoard);\n rotatedBoard = rotateBoard(rotatedBoard);\n \n return { board: rotatedBoard, score, moved };\n};\n\nconst moveDown = (board) => {\n // Rotate left (3 times right)\n let rotatedBoard = rotateBoard(board);\n rotatedBoard = rotateBoard(rotatedBoard);\n rotatedBoard = rotateBoard(rotatedBoard);\n \n // Move left\n const { board: movedBoard, score, moved } = moveLeft(rotatedBoard);\n \n // Rotate right\n rotatedBoard = rotateBoard(movedBoard);\n \n return { board: rotatedBoard, score, moved };\n};\n\nconst hasAvailableMoves = (board) => {\n // Check for empty cells\n for (let i = 0; i < 4; i++) {\n for (let j = 0; j < 4; j++) {\n if (board[i][j] === 0) return true;\n }\n }\n \n // Check for possible merges horizontally\n for (let i = 0; i < 4; i++) {\n for (let j = 0; j < 3; j++) {\n if (board[i][j] === board[i][j + 1]) return true;\n }\n }\n \n // Check for possible merges vertically\n for (let i = 0; i < 3; i++) {\n for (let j = 0; j < 4; j++) {\n if (board[i][j] === board[i + 1][j]) return true;\n }\n }\n \n return false;\n};\n\nconst getHighestTile = (board) => {\n let highest = 0;\n \n for (let i = 0; i < 4; i++) {\n for (let j = 0; j < 4; j++) {\n highest = Math.max(highest, board[i][j]);\n }\n }\n \n return highest;\n};\n\n// React components\nfunction Tile({ value, animation }) {\n const { animated } = window.libs['@react-spring/web'];\n \n const getBackgroundColor = () => {\n const colors = {\n 0: '#cdc1b4',\n 2: '#eee4da',\n 4: '#ede0c8',\n 8: '#f2b179',\n 16: '#f59563',\n 32: '#f67c5f',\n 64: '#f65e3b',\n 128: '#edcf72',\n 256: '#edcc61',\n 512: '#edc850',\n 1024: '#edc53f',\n 2048: '#edc22e',\n 4096: '#3c3a32',\n 8192: '#3c3a32',\n };\n \n return colors[value] || '#3c3a32';\n };\n \n const getTextColor = () => {\n return value <= 4 ? '#776e65' : '#f9f6f2';\n };\n \n const getFontSize = () => {\n if (value < 100) return '2rem';\n if (value < 1000) return '1.75rem';\n return '1.5rem';\n };\n \n return (\n \n {value !== 0 ? value : ''}\n \n );\n}\n\nfunction GameBoard({ board, handleKeyDown }) {\n const { useSpring } = window.libs['@react-spring/web'];\n \n // Animation for tiles\n const tileAnimation = useSpring({\n from: { opacity: 0, scale: 0.8 },\n to: { opacity: 1, scale: 1 },\n config: { tension: 300, friction: 20 }\n });\n \n React.useEffect(() => {\n window.addEventListener('keydown', handleKeyDown);\n return () => {\n window.removeEventListener('keydown', handleKeyDown);\n };\n }, [handleKeyDown]);\n \n // Touch handling for mobile\n const [touchStart, setTouchStart] = React.useState({ x: 0, y: 0 });\n \n const handleTouchStart = (e) => {\n const touch = e.touches[0];\n setTouchStart({ x: touch.clientX, y: touch.clientY });\n };\n \n const handleTouchEnd = (e) => {\n if (!e.changedTouches || !e.changedTouches[0]) return;\n \n const touch = e.changedTouches[0];\n const deltaX = touch.clientX - touchStart.x;\n const deltaY = touch.clientY - touchStart.y;\n \n // Determine swipe direction\n if (Math.abs(deltaX) > Math.abs(deltaY)) {\n // Horizontal swipe\n if (deltaX > 30) {\n handleKeyDown({ key: 'ArrowRight' });\n } else if (deltaX < -30) {\n handleKeyDown({ key: 'ArrowLeft' });\n }\n } else {\n // Vertical swipe\n if (deltaY > 30) {\n handleKeyDown({ key: 'ArrowDown' });\n } else if (deltaY < -30) {\n handleKeyDown({ key: 'ArrowUp' });\n }\n }\n };\n \n return (\n
\n {board.flat().map((value, index) => (\n
\n \n
\n ))}\n
\n );\n}\n\nfunction ScoreBoard({ score, highScore }) {\n return (\n
\n
\n
SCORE
\n
{score}
\n
\n
\n
BEST
\n
{highScore}
\n
\n
\n );\n}\n\nfunction GameOverModal({ gameState, highestTile, onRestart }) {\n if (gameState === 'playing') return null;\n \n return (\n
\n
\n

\n {gameState === 'won' ? 'You Won! ๐ŸŽ‰' : 'Game Over! ๐Ÿ˜ข'}\n

\n

\n {gameState === 'won' \n ? `Congratulations! You reached the 2048 tile!` \n : `Your highest tile was ${highestTile}.`}\n

\n \n
\n
\n );\n}\n\nfunction Game2048() {\n const [board, setBoard] = useReactiveCell(['board']);\n const [score, setScore] = useReactiveCell(['score']);\n const [highScore, setHighScore] = useReactiveCell(['highScore']);\n const [gameState, setGameState] = useReactiveCell(['gameState']);\n const [highestTile, setHighestTile] = useReactiveCell(['highestTile']);\n \n const handleKeyDown = React.useCallback((e) => {\n if (gameState !== 'playing') return;\n \n let result;\n \n switch (e.key) {\n case 'ArrowUp':\n result = moveUp(board);\n break;\n case 'ArrowDown':\n result = moveDown(board);\n break;\n case 'ArrowLeft':\n result = moveLeft(board);\n break;\n case 'ArrowRight':\n result = moveRight(board);\n break;\n default:\n return;\n }\n \n if (result.moved) {\n const newBoard = addRandomTile(result.board);\n const newScore = score + result.score;\n const newHighestTile = getHighestTile(newBoard);\n \n setBoard(newBoard);\n setScore(newScore);\n setHighestTile(Math.max(highestTile, newHighestTile));\n \n if (newHighestTile >= 2048 && gameState !== 'won') {\n setGameState('won');\n } else if (!hasAvailableMoves(newBoard)) {\n setGameState('lost');\n }\n \n if (newScore > highScore) {\n setHighScore(newScore);\n }\n }\n }, [board, score, highScore, gameState, highestTile]);\n \n const restartGame = () => {\n const newBoard = initializeBoard();\n setBoard(newBoard);\n setScore(0);\n setGameState('playing');\n setHighestTile(getHighestTile(newBoard));\n };\n \n // Initialize game if board is empty\n React.useEffect(() => {\n if (board.flat().every(cell => cell === 0)) {\n restartGame();\n }\n }, [board]);\n \n return (\n
\n
\n
\n

2048

\n \n
\n \n

\n Join the tiles, get to 2048! Use arrow keys or swipe to move.\n

\n \n \n \n
\n \n \n
\n
\n
\n );\n}\n\nfunction onReady(mount, sourceData, libs) {\n const root = ReactDOM.createRoot(mount);\n root.render();\n}\n```" + } + ], + "stop": "\n```", + "maxTokens": 64000 +} \ No newline at end of file diff --git a/jumble/integration/cache/llm-api-cache/ad89522faed2f90e9a5b20a094e939e7f8cb79cd5089a84002dae9a3ff4239ef.json b/jumble/integration/cache/llm-api-cache/ad89522faed2f90e9a5b20a094e939e7f8cb79cd5089a84002dae9a3ff4239ef.json new file mode 100644 index 000000000..b3c765ffe --- /dev/null +++ b/jumble/integration/cache/llm-api-cache/ad89522faed2f90e9a5b20a094e939e7f8cb79cd5089a84002dae9a3ff4239ef.json @@ -0,0 +1,16 @@ +{ + "model": "anthropic:claude-3-7-sonnet-20250219", + "system": "\nYou are creating a simple MVP based on the user's goal, using an existing data schema. Focus on the simplest implementation that works with the provided schema.\n\n\nGiven a user's feature request and an existing data schema, you will:\n1. Create a short title (2-5 words) that names the artifact\n2. Create a one-sentence description in the format \"A to \"\n3. Create a concise specification (3-5 sentences max) that works with the existing schema\n4. Generate a brief implementation plan (3 steps max)\n5. Design a minimal JSON schema that represents the core data model\n\n\n\nYour response must be structured as follows:\n\nFreeform reasoning for you to consider the solution.\n\n\n[Short title for the artifact, 2-5 words]\n\n\n\n[One-sentence description in the format \"A to \"]\n\n\n\n[Concise specification that captures only the essential requirements]\n\n\n\n[Brief 3-step implementation plan using the existing schema]\n\n\n\n[Minimal JSON Schema in valid JSON format that represents data created by the artifact]\n\n\n\n\n1. Keep it minimal:\n - Include only essential fields (5-7 properties max)\n - Focus on the core functionality\n - If user requested complex features, simplify for this first version\n\n2. Each property should have:\n - A descriptive \"title\" field\n - A brief \"description\" field\n - A sensible default value where appropriate\n\n3. Example of a simple schema:\n```json\n{\n \"type\": \"object\",\n \"title\": \"Note List\",\n \"description\": \"A simple note list for the user\",\n \"properties\": {\n \"notes\": {\n \"type\": \"array\",\n \"title\": \"Notes\",\n \"description\": \"List of user notes\",\n \"default\": [],\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"title\": {\n \"type\": \"string\",\n \"title\": \"Title\",\n \"description\": \"Title of the note\",\n \"default\": \"New Note\"\n },\n \"content\": {\n \"type\": \"string\",\n \"title\": \"Content\",\n \"description\": \"Content of the note\"\n },\n \"created\": {\n \"type\": \"string\",\n \"format\": \"date-time\",\n \"title\": \"Created Date\",\n \"description\": \"When the note was created\"\n }\n },\n \"required\": [\"title\", \"content\"]\n }\n }\n },\n \"required\": [\"title\", \"content\"]\n}\n```\n\nGUIDELINES:\n- Aim for the simplest possible solution that works with the existing schema\n- The specification should take into account the existing schema structure\n- Focus on what can be achieved quickly with the existing data model\n- Avoid suggesting modifications to the schema if possible\n\n\n\nIMPORTANT:\n- Focus on the simplest working version\n- Aim for fewer fields rather than more\n- But still capture all the important state the user is creating\n- The user can always iterate and improve the solution later\n\n", + "stream": false, + "messages": [ + { + "role": "user", + "content": "\n\n\n2048 game\n\nCreate a ๐ŸŽฎ 2048 game charm with sliding ๐Ÿ”ข number tiles, score tracking, and game state management. The charm will need a grid layout, tile merging logic, and keyboard/swipe controls. Could add a ๐Ÿ† high score feature and ๐Ÿ”„ restart button.\n\n\n\n\n```json\n{\n \"type\": \"object\",\n \"properties\": {}\n}\n```\n\n\nBased on this goal and the existing schema, please provide a title, description, any additional schema, detailed specification, and implementation plan.\n" + }, + { + "role": "assistant", + "content": "\nThe user wants to create a 2048 game charm. The 2048 game is a sliding tile puzzle where players combine tiles with the same numbers to create a tile with the number 2048. \n\nKey components needed:\n1. A grid (typically 4x4) to hold the number tiles\n2. Logic for sliding tiles in four directions (up, down, left, right)\n3. Merging logic when two tiles with the same number collide\n4. Score tracking\n5. Game state management (in progress, won, lost)\n6. Optional: High score tracking and restart functionality\n\nThe existing schema is empty, so I'll need to design a complete schema that captures:\n- The game board (grid of numbers)\n- Current score\n- Game state\n- High score (as mentioned in optional features)\n\nFor the implementation plan, I'll focus on the core game mechanics first, then the UI, and finally the additional features.\n\n\n\n2048 Sliding Puzzle Game\n\n\n\nA game charm to play the classic 2048 sliding tile puzzle with score tracking.\n\n\n\nThe 2048 game will feature a 4x4 grid where numbered tiles can be slid in four directions (up, down, left, right). When two tiles with the same number collide, they merge into a single tile with their sum. The game tracks the current score, game state (playing, won, lost), and highest tile achieved. A high score feature will persist the best score across game sessions.\n\n\n\n1. Implement the core game board with grid representation and tile movement/merging logic\n2. Create the UI with responsive grid layout and keyboard/swipe controls\n3. Add game state management, scoring system, and restart functionality\n\n\n\n{\n \"type\": \"object\",\n \"title\": \"2048 Game State\",\n \"description\": \"Represents the current state of a 2048 game\",\n \"properties\": {\n \"board\": {\n \"type\": \"array\",\n \"title\": \"Game Board\",\n \"description\": \"4x4 grid of number tiles (0 represents empty)\",\n \"default\": [\n [0, 0, 0, 0],\n [0, 0, 0, 0],\n [0, 0, 0, 0],\n [0, 0, 0, 0]\n ],\n \"items\": {\n \"type\": \"array\",\n \"items\": {\n \"type\": \"integer\",\n \"minimum\": 0\n }\n }\n },\n \"score\": {\n \"type\": \"integer\",\n \"title\": \"Current Score\",\n \"description\": \"Player's current score in this game\",\n \"default\": 0,\n \"minimum\": 0\n },\n \"highScore\": {\n \"type\": \"integer\",\n \"title\": \"High Score\",\n \"description\": \"Highest score achieved across games\",\n \"default\": 0,\n \"minimum\": 0\n },\n \"gameState\": {\n \"type\": \"string\",\n \"title\": \"Game State\",\n \"description\": \"Current state of the game\",\n \"enum\": [\"playing\", \"won\", \"lost\"],\n \"default\": \"playing\"\n },\n \"highestTile\": {\n \"type\": \"integer\",\n \"title\": \"Highest Tile\",\n \"description\": \"Value of the highest tile on the board\",\n \"default\": 2,\n \"minimum\": 2\n }\n },\n \"required\": [\"board\", \"score\", \"gameState\"]\n}\n" + } + ], + "maxTokens": 64000 +} \ No newline at end of file diff --git a/jumble/integration/cache/llm-api-cache/b0a1a973797ea0e477e6bf4d1cc83cf9ec59127763bea39a7428ec03e7ca2313.json b/jumble/integration/cache/llm-api-cache/b0a1a973797ea0e477e6bf4d1cc83cf9ec59127763bea39a7428ec03e7ca2313.json index 05195068b..32d4df6a5 100644 --- a/jumble/integration/cache/llm-api-cache/b0a1a973797ea0e477e6bf4d1cc83cf9ec59127763bea39a7428ec03e7ca2313.json +++ b/jumble/integration/cache/llm-api-cache/b0a1a973797ea0e477e6bf4d1cc83cf9ec59127763bea39a7428ec03e7ca2313.json @@ -9,7 +9,7 @@ }, { "role": "assistant", - "content": "\nLet's analyze what we have:\n\n1. The user wants to display data from a `simpleValue2` charm.\n2. The existing schema shows that `simpleValue2` contains an object with a property called `values`, which is an array of strings.\n\nThe user wants a simple way to view this data. I'll create a simple viewer component that displays the current values in the array.\n\nFor the schema, I'll keep it minimal since we're just displaying existing data. The result schema will represent what the viewer component will display - the values from the array and potentially some metadata about the display.\n\n\n\nSimpleValue2 Viewer\n\n\n\nA component to display the current values stored in the simpleValue2 charm.\n\n\n\nThe SimpleValue2 Viewer will display all string values currently stored in the simpleValue2.values array. It will present the values in a clean, readable list format. The viewer will automatically update when the underlying data changes.\n\n\n\n1. Access the simpleValue2 data from the existing schema\n2. Create a display component that renders the values array as a list\n3. Implement auto-refresh functionality to keep the display in sync with data changes\n\n\n\n{\n \"type\": \"object\",\n \"title\": \"SimpleValue2 Viewer\",\n \"description\": \"Display configuration for simpleValue2 data\",\n \"properties\": {\n \"displayData\": {\n \"type\": \"object\",\n \"title\": \"Display Data\",\n \"description\": \"The data being displayed from simpleValue2\",\n \"properties\": {\n \"values\": {\n \"type\": \"array\",\n \"title\": \"Values\",\n \"description\": \"The string values from simpleValue2\",\n \"items\": {\n \"type\": \"string\"\n },\n \"default\": []\n },\n \"lastUpdated\": {\n \"type\": \"string\",\n \"format\": \"date-time\",\n \"title\": \"Last Updated\",\n \"description\": \"When the display was last refreshed\"\n }\n },\n \"required\": [\"values\"]\n },\n \"displaySettings\": {\n \"type\": \"object\",\n \"title\": \"Display Settings\",\n \"description\": \"Configuration for how the data is displayed\",\n \"properties\": {\n \"showTimestamp\": {\n \"type\": \"boolean\",\n \"title\": \"Show Timestamp\",\n \"description\": \"Whether to show when the data was last updated\",\n \"default\": true\n },\n \"refreshInterval\": {\n \"type\": \"integer\",\n \"title\": \"Refresh Interval\",\n \"description\": \"How often to refresh the display (in seconds)\",\n \"default\": 30\n }\n }\n }\n },\n \"required\": [\"displayData\"]\n}\n" + "content": "\nLet's analyze what we have:\n\n1. The user wants to display data from a `simpleValue2` charm.\n2. The existing schema shows that `simplevalue2` is an object with a property called `values`, which is an array of strings.\n3. The user wants to see the current value and any associated metadata.\n\nFrom the schema, it seems like the main data is in the `values` array. There doesn't appear to be explicit metadata in the schema, just the array of string values.\n\nFor this MVP, I'll create a simple viewer that displays the array of strings from the `simplevalue2.values` property. Since there's no explicit metadata in the schema, I'll focus on displaying the values themselves in a clean, readable format.\n\nThe artifact will be a viewer component that reads from the existing schema and presents the data in a user-friendly way.\n\n\n\nSimpleValue2 Viewer\n\n\n\nA viewer component to display the contents of the simpleValue2 charm.\n\n\n\nThe SimpleValue2 Viewer will read and display all string values stored in the simplevalue2.values array. It will present these values in a clean, readable list format. The viewer will automatically update when the underlying data changes.\n\n\n\n1. Create a component that accesses the simplevalue2.values array from the existing schema\n2. Implement a display layout that renders each string value as a list item\n3. Add basic styling and empty state handling for when the values array is empty\n\n\n\n{\n \"type\": \"object\",\n \"title\": \"SimpleValue2 Viewer\",\n \"description\": \"Display component for simpleValue2 data\",\n \"properties\": {\n \"displayData\": {\n \"type\": \"object\",\n \"title\": \"Display Data\",\n \"description\": \"Formatted data for display\",\n \"properties\": {\n \"values\": {\n \"type\": \"array\",\n \"title\": \"Values\",\n \"description\": \"List of string values from simpleValue2\",\n \"default\": [],\n \"items\": {\n \"type\": \"string\"\n }\n },\n \"count\": {\n \"type\": \"integer\",\n \"title\": \"Count\",\n \"description\": \"Number of values in the array\",\n \"default\": 0\n },\n \"lastUpdated\": {\n \"type\": \"string\",\n \"format\": \"date-time\",\n \"title\": \"Last Updated\",\n \"description\": \"When the viewer last refreshed the data\"\n }\n },\n \"required\": [\"values\", \"count\"]\n }\n },\n \"required\": [\"displayData\"]\n}\n" } ], "maxTokens": 64000 diff --git a/jumble/integration/cache/llm-api-cache/cacf4ca8ff310d75d00121bae9caec8c1030970e950d3333386e4e461ee47650.json b/jumble/integration/cache/llm-api-cache/cacf4ca8ff310d75d00121bae9caec8c1030970e950d3333386e4e461ee47650.json new file mode 100644 index 000000000..9661c1d4b --- /dev/null +++ b/jumble/integration/cache/llm-api-cache/cacf4ca8ff310d75d00121bae9caec8c1030970e950d3333386e4e461ee47650.json @@ -0,0 +1,16 @@ +{ + "model": "anthropic:claude-3-7-sonnet-latest", + "system": "# React Component Builder\n\nCreate an interactive React component that fulfills the user's request. Focus on delivering a clean, useful implementation with appropriate features.\n\n\n**This task is part of a 2-Phase Process.**\n\n1. First phase (already completed):\n - Analyzed the user's request\n - Created a detailed specification\n - Generated a structured data schema\n\n2. Your job (second phase):\n - Create a reactive UI component based on the provided specification and schema\n - Implement the UI exactly according to the specification\n - Strictly adhere to the data schema provided\n\n\n\n- Define a title with `const title = 'Your App Name';`\n- Implement both `onLoad` and `onReady` functions\n- Use Tailwind CSS for styling with tasteful defaults\n- Do not write inline, use emoji for icons\n- Carefully avoid infinite loops and recursion that may cause performance issues\n\n\n\n- React and ReactDOM are pre-imported - don't import them again\n- All React hooks must be namespaced (e.g., `React.useState`, `React.useEffect`)\n- Follow React hooks rules - never nest or conditionally call hooks\n- For form handling, use `onClick` handlers instead of `onSubmit`\n\n\n\n- **useReactiveCell(keyPath: string[])** - Persistent data storage with reactive updates\n- **generateText({ system, messages })** - Generate text via Large Language Model\n- **generateObject({ system, messages })** - Generate JSON object via Large Language Model\n- **readWebpage(url)** - Fetch and parse external web content\n- **generateImage(prompt)** - Create AI-generated images\n\n \n ## Important Note About useReactiveCell(keyPath: string[])\n - **useReactiveCell is a React Hook** and must follow all React hook rules\n - It should only be used for persistent state and must draw from the provided schema\n - For any ephemeral state, use `React.useState`\n - Only call useReactiveCell at the top level of your React function components or custom hooks\n - Do not call useReactiveCell inside loops, conditions, or nested functions\n - Do not call useReactiveCell directly inside the onReady function - it can only be used inside React components\n - The onReady function itself is not a React component and cannot use React hooks directly\n - Example of correct usage:\n ```jsx\n function onReady(mount, sourceData, modules) {\n // This is NOT a React component, so do NOT use hooks here!\n \n // Create a React component to use hooks properly:\n function MyApp() {\n // React hooks can be used here\n const [data, setData] = useReactiveCell('myData', defaultValue);\n return
{/* your JSX */}
;\n }\n \n // Render the React component to the mount point\n const root = ReactDOM.createRoot(mount);\n root.render();\n }\n ```\n
\n
\n\n\n- Request additional libraries in `onLoad` by returning an array of module names\n- Available libraries:\n - imports : [object Object]\n- Only use the explicitly provided libraries\n\n\n\n\n- Do not use browser dialog functions (`prompt()`, `alert()`, `confirm()`)\n- Avoid any methods that could compromise security or user experience\n\n\n\n\n{\n \"type\": \"object\",\n \"properties\": {\n \"simplevalue2\": {\n \"type\": \"object\",\n \"properties\": {\n \"values\": {\n \"type\": \"array\",\n \"items\": {\n \"type\": \"string\"\n }\n }\n }\n },\n \"displayData\": {\n \"type\": \"object\",\n \"title\": \"Display Data\",\n \"description\": \"Formatted data for display\",\n \"properties\": {\n \"values\": {\n \"type\": \"array\",\n \"title\": \"Values\",\n \"description\": \"List of string values from simpleValue2\",\n \"default\": [],\n \"items\": {\n \"type\": \"string\"\n }\n },\n \"count\": {\n \"type\": \"integer\",\n \"title\": \"Count\",\n \"description\": \"Number of values in the array\",\n \"default\": 0\n },\n \"lastUpdated\": {\n \"type\": \"string\",\n \"format\": \"date-time\",\n \"title\": \"Last Updated\",\n \"description\": \"When the viewer last refreshed the data\"\n }\n },\n \"required\": [\n \"values\",\n \"count\"\n ]\n }\n },\n \"title\": \"SimpleValue2 Viewer\",\n \"description\": \"A viewer component to display the contents of the simpleValue2 charm.\"\n}\n\n\n\n# SDK Usage Guide\n\n\n## 1. `useReactiveCell` Hook\n\nThe `useReactiveCell` hook binds to a reactive cell given a key path and returns a tuple `[doc, setDoc]`:\n\nAny keys from the view-model-schema are valid for useReactiveCell, any other keys will fail. Provide a default as the second argument, **do not set an initial value explicitly**.\n\nFor this schema:\n\n```json\n{\n \"type\": \"object\",\n \"properties\": {\n \"counter\": {\n \"type\": \"number\",\n },\n \"title\": {\n \"type\": \"string\",\n \"default\": \"My Counter App\"\n }\n }\n}\n```\n\n```jsx\nfunction CounterComponent() {\n // Correct: useReactiveCell called at top level of component\n const [counter, setCounter] = useReactiveCell(\"counter\");\n const [maxValue, setMaxValue] = useReactiveCell(['settings', 'maxValue']);\n\n const onIncrement = useCallback(() => {\n // writing to the cell automatically triggers a re-render\n setCounter(counter + 1);\n }, [counter]);\n\n return (\n \n );\n}\n```\n\n\n\nSeveral APIs exist for generating text, JSON or image urls.\n\n\n`generateText({ system, messages}): Promise`\n\n```jsx\nasync function fetchLLMResponse() {\n const result = await generateText({\n system: 'Translate all the messages to emojis, reply in JSON.',\n messages: ['Hi', 'How can I help you today?', 'tell me a joke']\n })\n console.log('LLM responded:', result);\n}\n```\n\n\n\n\n`window.generateObject({ system, messages }): Promise`\n\nYou must give the exact schema of the response in the prompt.\n\nFor example: \"Generate a traditional Vietnamese recipe in JSON format, with the\nfollowing properties: name (string), ingredients (array of strings),\ninstructions (array of strings)\"\n\n`generateObject` returns a parsed object already, or `undefined`. Be defensive working with the response, the LLM may make mistakes.\n\n\n```jsx\nconst promptPayload = ;\nconst result = await generateObject({\n system: 'Translate all the messages to emojis, reply in JSON with the following properties: an array of objects, each with original_text (string), emoji_translation (string)',\n messages: ['Hi', 'How can I help you today?', 'tell me a joke'],\n});\nconsole.log('JSON response from llm:', result);\n\n// [\n// {\n// \"original_text\": \"Hi\",\n// \"emoji_translation\": \"๐Ÿ‘‹\"\n// },\n// {\n// \"original_text\": \"How can I help you today?\",\n// \"emoji_translation\": \"๐Ÿค”โ“๐Ÿ™‹โ€โ™‚๏ธ๐Ÿ“…\"\n// },\n// {\n// \"original_text\": \"tell me a joke\",\n// \"emoji_translation\": \"๐Ÿ—ฃ๏ธ๐Ÿ‘‰๐Ÿ˜‚\"\n// }\n// ]\n```\n\n\n\nNOTE: Language model requests are globally cached based on your prompt.\nThis means that identical requests will return the same result. If your llm use\nrequires unique results on every request, make sure to introduce a cache-breaking\nstring such as a timestamp or incrementing number/id.\n\n```jsx\n// To avoid the cache we'll use a cache-busting string.\nconst cacheBreaker = Date.now();\n\nconst result = await generateObject({\n system: \"You are a professional chef specializing in Mexican cuisine. Generate a detailed, authentic Mexican recipe in JSON format with the following properties: title (string), ingredients (array of strings), instructions (array of strings), prepTime (integer in minutes), cookTime (integer in minutes)\",\n messages: [\"give me something spicy!\" + \" \" + cacheBreaker],\n});\nconsole.log('JSON response from llm:', result);\n\n// {\n// \"title\": \"Camarones a la Diabla (Devil Shrimp)\",\n// \"ingredients\": [\n// \"1.5 lbs Large Shrimp, peeled and deveined\",\n// \"4 tbsp Olive Oil\",\n// \"1 medium White Onion, finely chopped\",\n// \"4 cloves Garlic, minced\",\n// \"2-3 Habanero Peppers, finely chopped (adjust to your spice preference, remove seeds for less heat)\",\n// \"1 (28 oz) can Crushed Tomatoes\",\n// \"1/2 cup Chicken Broth\",\n// \"2 tbsp Tomato Paste\",\n// \"1 tbsp Apple Cider Vinegar\",\n// \"1 tbsp Dried Oregano\",\n// \"1 tsp Cumin\",\n// \"1/2 tsp Smoked Paprika\",\n// \"1/4 tsp Ground Cloves\",\n// \"Salt and Black Pepper to taste\",\n// \"Fresh Cilantro, chopped, for garnish\",\n// \"Lime wedges, for serving\"\n// ],\n// \"instructions\": [\n// \"In a large bowl, toss the shrimp with salt and pepper.\",\n// \"Heat the olive oil in a large skillet or Dutch oven over medium-high heat.\",\n// \"Add the onion and cook until softened, about 5 minutes.\",\n// \"Add the garlic and habanero peppers and cook for 1 minute more, until fragrant.\",\n// \"Stir in the crushed tomatoes, chicken broth, tomato paste, apple cider vinegar, oregano, cumin, smoked paprika, and cloves.\",\n// \"Bring the sauce to a simmer and cook for 15 minutes, stirring occasionally, until slightly thickened.\",\n// \"Add the shrimp to the sauce and cook for 3-5 minutes, or until the shrimp are pink and cooked through.\",\n// \"Taste and adjust seasoning with salt and pepper as needed.\",\n// \"Garnish with fresh cilantro and serve immediately with lime wedges. Serve with rice or tortillas.\"\n// ],\n// \"prepTime\": 20,\n// \"cookTime\": 30\n// }\n```\n\n\n\n\n\n\n\nSynchronous, generates a URL that will load the image.\n\n```jsx\nfunction ImageComponent() {\n return \"Generated;\n}\n```\n\n\n\n\n\n`readWebpage(url: string): Promise` Returns markdown format.\n\n```jsx\nasync function fetchFromUrl() {\n const url = 'https://twopm.studio';\n const result = await readWebpage(url);\n console.log('Markdown:', result.content);\n}\n```\n\n\n\nAll generated code must follow this pattern:\n\n```javascript\n// Import from modern ESM libraries:\n// - react\n// - react-dom\n// - react-dom/client\n// - d3\n// - moment\n// - marked\n// - @react-spring/web\n// - @use-gesture/react\n// - uuid\n// - tone\nfunction onLoad() {\n return ['@react-spring/web']; // Request the modules you need\n}\n\nconst title = 'My ESM App';\nfunction ImageComponent({ url }) {\n return \"Generated;\n}\n\nfunction MyComponent({ label, description }) {\n return (\n
\n

{label}

\n

{description}

\n \n
\n );\n}\n\nfunction TodoItem({ todo, onToggle, onDelete }) {\n return (\n
\n \n \n {todo.text}\n \n \n Delete\n \n
\n );\n}\n\nfunction TodoList({ todo, setTodos}) {\n const [newTodo, setNewTodo] = React.useState('');\n\n const addTodo = () => {\n if (newTodo.trim() === '') return;\n\n const newTodoItem = {\n id: Date.now(),\n text: newTodo,\n completed: false\n };\n\n setTodos([...todos, newTodoItem]);\n setNewTodo('');\n };\n\n const toggleTodo = (id) => {\n setTodos(todos.map(todo =>\n todo.id === id ? { ...todo, completed: !todo.completed } : todo\n ));\n };\n\n const deleteTodo = (id) => {\n setTodos(todos.filter(todo => todo.id !== id));\n };\n\n return (\n
\n

Todo List

\n\n
\n setNewTodo(e.target.value)}\n placeholder=\"Add a new todo\"\n className=\"flex-grow p-2 border rounded-l\"\n />\n \n Add\n \n
\n\n
\n {todos.length > 0 ? (\n todos.map(todo => (\n toggleTodo(todo.id)}\n onDelete={() => deleteTodo(todo.id)}\n />\n ))\n ) : (\n

No todos yet!

\n )}\n
\n
\n );\n}\n\n// Main application code with modules passed as third parameter\nfunction onReady(mount, sourceData, libs) {\n const { useState, useEffect } = React; // React is available globally\n const { useSpring, animated } = libs['@react-spring/web']; // Access imported module\n\n function MyApp() {\n const [count, setCount] = useReactiveCell('count');\n const [todos, setTodos] = useReactiveCell('todos');\n\n const props = useSpring({\n from: { opacity: 0 },\n to: { opacity: 1 }\n });\n\n return (\n
\n \n setCount(count + 1)}\n >\n Clicks: {count}\n \n \n \n
\n );\n }\n\n // Use the client API for React 18\n const root = ReactDOM.createRoot(mount);\n root.render();\n}\n```\n\n", + "messages": [ + { + "role": "user", + "content": "The user asked you to create the source code with the following specification:\n```\n\nshow the data from `simpleValue2`\n\nShow the data from the `simpleValue2` charm, displaying its current value and any associated metadata.\n\n1. Create a component that accesses the simplevalue2.values array from the existing schema\n2. Implement a display layout that renders each string value as a list item\n3. Add basic styling and empty state handling for when the values array is empty\nThe SimpleValue2 Viewer will read and display all string values stored in the simplevalue2.values array. It will present these values in a clean, readable list format. The viewer will automatically update when the underlying data changes.\n```" + }, + { + "role": "assistant", + "content": "\nfunction onLoad() {\n return []; // No additional modules needed\n}\n\nconst title = 'SimpleValue2 Viewer';\n\nfunction ValueItem({ value, index }) {\n return (\n
\n
\n {index + 1}.\n {value}\n
\n
\n );\n}\n\nfunction EmptyState() {\n return (\n
\n ๐Ÿ“‹\n

No values found

\n

The simpleValue2 data source is empty

\n
\n );\n}\n\nfunction SimpleValue2Viewer() {\n // Access the values from simplevalue2\n const [simpleValues, setSimpleValues] = useReactiveCell(['simplevalue2', 'values'], []);\n \n // Update display data whenever simpleValues changes\n const [displayData, setDisplayData] = useReactiveCell(['displayData'], {\n values: [],\n count: 0,\n lastUpdated: new Date().toISOString()\n });\n\n // Update the display data when simpleValues changes\n React.useEffect(() => {\n setDisplayData({\n values: simpleValues || [],\n count: (simpleValues || []).length,\n lastUpdated: new Date().toISOString()\n });\n }, [simpleValues, setDisplayData]);\n\n return (\n
\n
\n

{title}

\n
\n {displayData.count} value{displayData.count !== 1 ? 's' : ''}\n Last updated: {new Date(displayData.lastUpdated).toLocaleString()}\n
\n
\n\n
\n {displayData.values && displayData.values.length > 0 ? (\n
\n {displayData.values.map((value, index) => (\n \n ))}\n
\n ) : (\n \n )}\n
\n
\n );\n}\n\nfunction onReady(mount, sourceData) {\n function App() {\n return ;\n }\n\n const root = ReactDOM.createRoot(mount);\n root.render();\n}\n```" + } + ], + "stop": "\n```", + "maxTokens": 64000 +} \ No newline at end of file diff --git a/jumble/integration/cache/llm-api-cache/f48dcd323e7984932c033667c18dc58849319ab6aba51970740e36d0bf43a9c5.json b/jumble/integration/cache/llm-api-cache/f48dcd323e7984932c033667c18dc58849319ab6aba51970740e36d0bf43a9c5.json new file mode 100644 index 000000000..ae3ca15c3 --- /dev/null +++ b/jumble/integration/cache/llm-api-cache/f48dcd323e7984932c033667c18dc58849319ab6aba51970740e36d0bf43a9c5.json @@ -0,0 +1,15 @@ +{ + "system": "\n This is Common Tools. Our system uses the concept of a `Charm`, a bundle of data + UI + functionality that can be composed, remixed and reused. Charms are smaller than apps, closer to a screen or panel of a web app, but can be networked together into complex systems. Charms communicate using a reactive graph database, any changes to referenced data automatically propagate. The 'behavior' of a Charm is defined by a Recipe, a data flow graph where the nodes are functions.\n\n Users will come to you with intentions, requests and vague ideas of their needs. Your task is to 'autocomplete' their thought, using your knowledge of the Common Tools system.\n\n Your Capabilities:\n\n 1. generate a charm\n - generate a spec, implementation plan and code for a charm that meets the user's requirements\n 2. search for and cast existing spells from the spellbook\n - a spell can be 'cast' on an existing charm to change its behavior, but re-use the data\n 3. connect referenced data to a new charm\n - multiple charms can be\n 4. edit an existing charm\n - code and schema can be modified independently\n - the user may ask you to fix bugs, add features or tweak the overall concept slightly\n 5. navigate to an existing charm\n - sometimes, the user will ask for something that already exists, and you should be able to find it quickly\n\n Generate an 'autocompletion' of the user's request, in terms of the system capabilities. No dot points. Work with the user's voice, do not say 'I' or 'me'. It's a joint project with the user.\n Do not generate code or plan the project. e.g. \"I need to buy groceries\" -> \"Make a ๐Ÿ›’ grocery list and determine ๐Ÿ“ where to buy everything\". \"Emails from my boss\" -> \"Find ๐Ÿ“ง emails from a particular sender ๐Ÿ‘ฉโ€๐Ÿ’ผ (boss)\". \"Plan a party\" -> \"Create an ๐Ÿ’ƒ event charm with a ๐Ÿ“‹ guest list, ๐Ÿ—บ๏ธ location, date, time and ๐Ÿ‘ฉโ€๐ŸŽค theme.\"\n Return your just the autocompletion between tags.\n\n User's request: (IMAGINE) \"{{ INPUT }}\"", + "messages": [ + { + "role": "user", + "content": "2048 game" + }, + { + "role": "assistant", + "content": "Create a ๐ŸŽฎ 2048 game charm with sliding ๐Ÿ”ข number tiles, score tracking, and game state management. The charm will need a grid layout, tile merging logic, and keyboard/swipe controls. Could add a ๐Ÿ† high score feature and ๐Ÿ”„ restart button." + } + ], + "model": "anthropic:claude-3-7-sonnet-20250219", + "maxTokens": 64000 +} \ No newline at end of file diff --git a/scripts/check-all.sh b/scripts/check-all.sh index 5f18db8f1..42c78a0b2 100755 --- a/scripts/check-all.sh +++ b/scripts/check-all.sh @@ -13,5 +13,6 @@ deno check \ jumble \ memory \ runner \ + seeder \ toolshed \ ui diff --git a/seeder/browser.ts b/seeder/browser.ts index 925272423..923e217f7 100644 --- a/seeder/browser.ts +++ b/seeder/browser.ts @@ -1,21 +1,81 @@ -// FIXME(ja): all this should be in the utils module +import { + Browser as AstralBrowser, + launch, + Page as AstralPage, +} from "@astral/astral"; +import { login } from "@commontools/utils/integration"; +import { sleep } from "@commontools/utils/sleep"; -import { ConsoleEvent, launch } from "@astral/astral"; +// Wrapper around `@astral/astral`'s `Browser`. +export class Browser { + private browser: AstralBrowser | null; + private page: AstralPage | null; + private apiUrl: string; + private constructor( + browser: AstralBrowser, + page: AstralPage, + apiUrl: string, + ) { + this.browser = browser; + this.page = page; + this.apiUrl = apiUrl; + } -const HEADLESS = (Deno.env.get("HEADLESS") ?? "true") === "true"; -export const browser = await launch({ - args: ["--window-size=1280,1024"], - headless: HEADLESS, -}); -export const page = await browser.newPage(); + static async launch( + { headless, apiUrl }: { headless: boolean; apiUrl: string }, + ): Promise { + const browser = await launch({ + args: ["--window-size=1280,1024"], + headless, + }); + const page = await browser.newPage(); + return new Browser(browser, page, apiUrl); + } -// export const logs: ConsoleEvent[] = []; + async screenshot(filename: string): Promise { + this.checkIsOk(); + const screenshot = await this.page!.screenshot(); + return Deno.writeFile(filename, screenshot); + } -// page.addEventListener("console", (e: ConsoleEvent) => { -// logs.push(e); -// }); + async goto(url: string) { + this.checkIsOk(); + await this.page!.goto(new URL(url, this.apiUrl).toString()); + await sleep(1000); + this.addErrorListeners(); + await login(this.page!); + } -// export const getLogs = () => [...logs]; -// export const clearLogs = () => { -// logs.length = 0; -// }; + async checkForErrors(): Promise { + this.checkIsOk(); + return await this.page!.evaluate(() => { + // @ts-ignore: this code is stringified and sent to browser context + return globalThis.charmRuntimeErrors; + }); + } + + async close(): Promise { + this.checkIsOk(); + const { page, browser } = this; + this.browser = null; + this.page = null; + await browser!.close(); + } + + private addErrorListeners() { + this.page!.evaluate(() => { + // @ts-ignore: this code is stringified and sent to browser context + globalThis.charmRuntimeErrors = []; + globalThis.addEventListener("common-iframe-error", (e) => { + // @ts-ignore: this code is stringified and sent to browser context + globalThis.charmRuntimeErrors.push(e.detail.description); + }); + }); + } + + private checkIsOk() { + if (!this.browser || !this.page) { + throw new Error("Browser is already closed."); + } + } +} diff --git a/seeder/cli.ts b/seeder/cli.ts index 3e15447b5..2b3fd37bd 100644 --- a/seeder/cli.ts +++ b/seeder/cli.ts @@ -1,28 +1,37 @@ import { parseArgs } from "@std/cli/parse-args"; import { setBlobbyServerUrl, storage } from "@commontools/runner"; import { setLLMUrl } from "@commontools/llm"; -import { processScenario } from "./processor.ts"; -import { type ExecutedScenario } from "./interfaces.ts"; -import { scenarios } from "./scenarios.ts"; -import { toolshedUrl } from "./env.ts"; -import { ensureReportDir, generateReport } from "./report.ts"; -import { browser, login } from "./jumble.ts"; import { createSession, Identity } from "@commontools/identity"; import { CharmManager } from "@commontools/charm"; +import { sleep } from "@commontools/utils/sleep"; +import { Processor } from "./processor.ts"; +import { scenarios } from "./scenarios.ts"; +import { ExecutedScenario } from "./interfaces.ts"; +import { ensureReportDir, generateReport } from "./report.ts"; +import { Verifier } from "./verifier.ts"; + const { name, tag, "no-cache": noCache, + "no-verify": noVerify, + "no-report": noReport, model = "anthropic:claude-3-7-sonnet-20250219", } = parseArgs( Deno.args, { string: ["name", "tag", "model"], - boolean: ["no-cache"], + boolean: ["no-cache", "no-verify", "no-report"], }, ); +if (!noVerify && !Deno.env.get("OPENAI_API_KEY")) { + throw new Error("OPENAI_API_KEY is not set"); +} +const headless = !(Deno.env.get("HEADLESS") === "false"); const cache = !noCache; +const apiUrl = Deno.env.get("TOOLSHED_API_URL") ?? + "https://toolshed.saga-castor.ts.net/"; if (!name) { // FIXME(ja): if the name already exists, we should not use it! @@ -30,54 +39,39 @@ if (!name) { Deno.exit(1); } -storage.setRemoteStorage(new URL(toolshedUrl)); -setBlobbyServerUrl(toolshedUrl); -setLLMUrl(toolshedUrl); - -// Track executed scenarios and steps -const executedScenarios: ExecutedScenario[] = []; +storage.setRemoteStorage(new URL(apiUrl)); +setBlobbyServerUrl(apiUrl); +setLLMUrl(apiUrl); -async function processScenarios({ - tag, - name, -}: { - tag: string | undefined; - name: string; -}) { - await ensureReportDir(name); - const charmManager = new CharmManager( - await createSession({ - identity: await Identity.fromPassphrase("common user"), - name, - }), - ); - console.log(`Processing scenarios...`); +const charmManager = new CharmManager( + await createSession({ + identity: await Identity.fromPassphrase("common user"), + name, + }), +); - for (const scenario of scenarios) { - if (tag && (scenario.tags === undefined || !scenario.tags.includes(tag))) { - continue; - } - const executedScenario = await processScenario({ - scenario, - model, - cache, - name, - charmManager, - }); - executedScenarios.push(executedScenario); - } - console.log(`Processed ${executedScenarios.length} scenarios.`); - return executedScenarios; -} +const verifier = + await (noVerify ? undefined : Verifier.initialize({ apiUrl, headless })); +const processor = new Processor({ name, model, cache, charmManager, verifier }); -// FIXME(ja): if the tag doesn't exist, we should error out with warning, show the tags +let processed: ExecutedScenario[] | undefined; try { - await login(name!); - await processScenarios({ tag, name }); - await ensureReportDir(name!); - await generateReport(name!, executedScenarios, toolshedUrl, scenarios); + if (!noReport) await ensureReportDir(name); + // FIXME(ja): if the tag doesn't exist, we should error out with warning, show the tags + processed = await processor.process(scenarios, tag); + if (!noReport) await generateReport(name!, processed, apiUrl); } finally { - await new Promise((resolve) => setTimeout(resolve, 100)); - await browser.close(); - Deno.exit(0); + await sleep(100); + if (verifier) await verifier.close(); + if (!processed) { + Deno.exit(1); + } + + const success = processed.every(({ results }) => + results.every(({ status }) => status === "PASS" || status === "NOTVERIFIED") + ); + if (success) { + Deno.exit(0); + } + Deno.exit(1); } diff --git a/seeder/env.ts b/seeder/env.ts deleted file mode 100644 index 2a6a95ef7..000000000 --- a/seeder/env.ts +++ /dev/null @@ -1,2 +0,0 @@ -export const toolshedUrl = Deno.env.get("TOOLSHED_API_URL") ?? - "https://toolshed.saga-castor.ts.net/"; diff --git a/seeder/interfaces.ts b/seeder/interfaces.ts index 2b64178fa..88b460f77 100644 --- a/seeder/interfaces.ts +++ b/seeder/interfaces.ts @@ -20,10 +20,10 @@ export type Command = { }; export type CharmResult = { - id: string; + id: string | null; prompt: string; screenshotPath?: string; - status: string; + status: "PASS" | "FAIL" | "NOTVERIFIED"; summary: string; }; diff --git a/seeder/judge.ts b/seeder/judge.ts deleted file mode 100644 index abaf71d94..000000000 --- a/seeder/judge.ts +++ /dev/null @@ -1,48 +0,0 @@ -import { generateObject } from "ai"; -import { openai } from "@ai-sdk/openai"; -import { z } from "zod"; - -if (!Deno.env.get("OPENAI_API_KEY")) { - throw new Error("OPENAI_API_KEY is not set"); -} - -const model = openai("gpt-4o-mini"); - -export async function llmVerifyCharm( - prompt: string, - filename: string, -): Promise { - const system = `You are a helpful assistant that verifies charm screenshots. - - Your task is to evaluate how well the screenshot represents what the user asked for in the prompt. - - If the screenshot accurately represents the prompt, return a PASS result with a brief explanation. - If the screenshot does not match the prompt, return a FAIL result with a brief explanation of what's missing or incorrect.`; - - const schema = z.object({ - result: z.enum(["PASS", "FAIL"]), - summary: z.string().describe("A 1-sentence summary of your evaluation"), - }); - - const { object } = await generateObject({ - system: system, - model, - schema, - messages: [ - { - role: "user", - content: [ - { - type: "text", - text: prompt, - }, - { type: "image", image: Deno.readFileSync(filename) }, - ], - }, - ], - }); - - const result = schema.parse(object); - console.log({ result }); - return JSON.stringify(result); -} diff --git a/seeder/jumble.ts b/seeder/jumble.ts deleted file mode 100644 index 900d94ae5..000000000 --- a/seeder/jumble.ts +++ /dev/null @@ -1,121 +0,0 @@ -// FIXME(ja): all this should be in the utils module - -import { Page } from "@astral/astral"; -import { browser, page } from "./browser.ts"; -import { toolshedUrl } from "./env.ts"; -export { browser }; - -export const sleep = (ms: number) => - new Promise((resolve) => setTimeout(resolve, ms)); - -async function waitForSelectorClick( - page: Page, - selector: string, -): Promise { - console.log(`Waiting for "${selector}"...`); - const el = await page.waitForSelector(selector); - console.log(`Found "${selector}"! Clicking...`); - await el.click(); -} - -// FIXME(ja): we need to move this and other browser helpers to utils -// in a way that allows it to share with the basic-flow integration tests -export async function login(name: string) { - await goto(`/${name}`); - await sleep(1000); - const avatar = await page.$("#user-avatar"); - if (avatar) { - console.log("Already logged in"); - return; - } - - // If not logged in, see if any credential data is - // persisting. If so, destroy local data. - await sleep(500); - const clearCredsButton = await page.$( - "button[aria-label='clear-credentials']", - ); - if (clearCredsButton) { - await clearCredsButton.click(); - } - - // Try log in - console.log("Logging in"); - - // Click the first button, "register" - await waitForSelectorClick(page, "button[aria-label='register']"); - - // Check if we need to select a method first (in case of multiple auth methods available) - const methodPassphraseButton = await page.$( - "button[aria-label='method-passphrase']", - ); - if (methodPassphraseButton) { - console.log("Multiple auth methods available, selecting passphrase method"); - await methodPassphraseButton.click(); - } else { - // Click the register with passphrase button - await waitForSelectorClick( - page, - "button[aria-label='register-with-passphrase']", - ); - } - - // Get the mnemonic from textarea. - let input = await page.waitForSelector("textarea[aria-label='mnemonic']"); - const mnemonic = await input!.evaluate((textarea: HTMLInputElement) => - textarea.value - ); - - // Click the SECOND button, "continue to login" - await waitForSelectorClick(page, "button[aria-label='continue-login']"); - - // Check if we need to select a method for login (in case of multiple auth methods) - const loginMethodPassphraseButton = await page.$( - "button[aria-label='method-passphrase']", - ); - if (loginMethodPassphraseButton) { - console.log( - "Multiple auth methods available for login, selecting passphrase method", - ); - await loginMethodPassphraseButton.click(); - } - - // Paste the mnemonic in the input. - input = await page.waitForSelector("input[aria-label='enter-passphrase']"); - await input!.evaluate( - (input: HTMLInputElement, mnemonic: string) => input.value = mnemonic, - { args: [mnemonic] }, - ); - - // Click the only button, "login" - await waitForSelectorClick(page, "button[aria-label='login']"); - - await page.waitForSelector("#user-avatar"); -} - -export function addErrorListeners() { - page.evaluate(() => { - // @ts-ignore: this code is stringified and sent to browser context - globalThis.charmRuntimeErrors = []; - globalThis.addEventListener("common-iframe-error", (e) => { - // @ts-ignore: this code is stringified and sent to browser context - globalThis.charmRuntimeErrors.push(e.detail.description); - }); - }); -} - -export async function checkForErrors() { - return await page.evaluate(() => { - // @ts-ignore: this code is stringified and sent to browser context - return globalThis.charmRuntimeErrors; - }); -} - -export async function screenshot(id: string, filename: string) { - const screenshot = await page.screenshot(); - return Deno.writeFile(filename, screenshot); -} - -export async function goto(url: string) { - await page.goto(new URL(url, toolshedUrl).toString()); -} diff --git a/seeder/processor.ts b/seeder/processor.ts index 4006edea8..87eb87e6a 100644 --- a/seeder/processor.ts +++ b/seeder/processor.ts @@ -4,12 +4,9 @@ import type { Scenario, Step, } from "./interfaces.ts"; -import { llmVerifyCharm } from "./judge.ts"; +import { Verifier } from "./verifier.ts"; import { CharmManager } from "@commontools/charm"; -import { goto, sleep } from "./jumble.ts"; -import { toolshedUrl } from "./env.ts"; import { CommandType } from "./interfaces.ts"; -import { addErrorListeners, checkForErrors, screenshot } from "./jumble.ts"; import { castNewRecipe, createDataCharm, @@ -17,187 +14,191 @@ import { } from "@commontools/charm"; import { getEntityId } from "@commontools/runner"; -export async function processScenario( - { scenario, model, cache, name, charmManager }: { - scenario: Scenario; - model: string; - cache: boolean; - name: string; - charmManager: CharmManager; - }, -): Promise { - const results: CharmResult[] = []; +export class Processor { + private cache: boolean; + private charmManager: CharmManager; + private name: string; + private model: string; + private verifier?: Verifier; - await goto(toolshedUrl); - await sleep(1000); - let lastCharmId: string | undefined; - let failed = false; - for (const step of scenario.steps) { - if (failed) { - results.push({ - id: "skip", - prompt: step.prompt, - status: "FAIL", - summary: "Failed to process step", - }); - continue; - } - let result: CharmResult; - try { - result = await processCommand({ - step, - lastCharmId, - cache, - name, - model, - charmManager, - }); - results.push(result); - } catch (error) { - console.error(`Error processing step`, { step, scenario, error }); - failed = true; - continue; - } - if (result.id !== "none") { - lastCharmId = result.id; + constructor( + { name, model, cache, charmManager, verifier }: { + name: string; + model: string; + cache: boolean; + charmManager: CharmManager; + verifier?: Verifier; + }, + ) { + this.cache = cache; + this.charmManager = charmManager; + this.model = model; + this.name = name; + this.verifier = verifier; + } + + async process( + scenarios: Scenario[], + tag: string | undefined, + ): Promise { + console.log(`Processing scenarios...`); + + const executed = []; + for (const scenario of scenarios) { + if ( + tag && (scenario.tags === undefined || !scenario.tags.includes(tag)) + ) { + continue; + } + executed.push(await this.processScenario(scenario)); } + console.log(`Processed ${executed.length} scenarios.`); + return executed; } - return { scenario, results }; -} -export async function processCommand({ - step, - lastCharmId, - cache, - name, - model, - charmManager, -}: { - step: Step; - lastCharmId: string | undefined; - cache: boolean; - name: string; - model: string; - charmManager: CharmManager; -}): Promise { - const { type, prompt } = step; + private async processScenario(scenario: Scenario): Promise { + const results: CharmResult[] = []; - switch (type) { - case CommandType.New: { - console.log(`Adding: "${prompt}"`); - const form = await processWorkflow(prompt, charmManager, { - cache, - model, - prefill: { - classification: { - workflowType: "imagine", - confidence: 1.0, - reasoning: "hard coded", - }, - }, - }); - const { cell: charm } = await castNewRecipe(charmManager, form); - const id = getEntityId(charm); - if (id) { - console.log(`Charm added: ${id["/"]}`); - return verifyCharm({ id: id["/"], prompt, name }); + let prevCharmId: string | undefined; + let failed = false; + for (const step of scenario.steps) { + if (failed) { + results.push({ + id: "skip", + prompt: step.prompt, + status: "FAIL", + summary: "Failed to process step", + }); + continue; + } + let result: CharmResult; + try { + result = await this.processCommand({ + step, + prevCharmId, + }); + results.push(result); + } catch (error: any) { + console.error(`Error processing step`, { step, scenario, error }); + failed = true; + results.push({ + id: "error", + prompt: step.prompt, + status: "FAIL", + summary: error && error.message ? error.message : error, + }); + continue; + } + if (result.id) { + prevCharmId = result.id; } - break; } - case CommandType.Extend: { - console.log(`Extending: "${prompt}"`); - if (!lastCharmId) { - throw new Error("Last charm ID is undefined."); + return { scenario, results }; + } + + private async processCommand({ + step, + prevCharmId, + }: { + step: Step; + prevCharmId: string | undefined; + }): Promise { + const { type, prompt } = step; + + switch (type) { + case CommandType.New: { + console.log(`Adding: "${prompt}"`); + const form = await processWorkflow(prompt, this.charmManager, { + cache: this.cache, + model: this.model, + prefill: { + classification: { + workflowType: "imagine", + confidence: 1.0, + reasoning: "hard coded", + }, + }, + }); + const { cell: charm } = await castNewRecipe(this.charmManager, form); + const id = getEntityId(charm); + if (id) { + console.log(`Charm added: ${id["/"]}`); + return this.verify({ id: id["/"], prompt, name: this.name }); + } + break; } - const charm = await charmManager.get(lastCharmId); - const form = await processWorkflow(prompt, charmManager, { - existingCharm: charm, - cache, - model, - prefill: { - classification: { - workflowType: "imagine", - confidence: 1.0, - reasoning: "hard coded", + case CommandType.Extend: { + console.log(`Extending: "${prompt}"`); + if (!prevCharmId) { + throw new Error("Previous charm ID is undefined."); + } + const charm = await this.charmManager.get(prevCharmId); + const form = await processWorkflow(prompt, this.charmManager, { + existingCharm: charm, + cache: this.cache, + model: this.model, + prefill: { + classification: { + workflowType: "imagine", + confidence: 1.0, + reasoning: "hard coded", + }, }, - }, - }); + }); - const extendedCharm = form.generation?.charm; - const id = getEntityId(extendedCharm); - if (id) { - console.log(`Charm added: ${id["/"]}`); - return verifyCharm({ id: id["/"], prompt, name }); - } else { - console.error(`Charm not added: ${prompt}`); - } - break; - } - case CommandType.ImportJSON: { - console.log(`Importing JSON for: "${prompt}"`); - if (!step.data) { - throw new Error("Missing data for JSON import."); + const extendedCharm = form.generation?.charm; + const id = getEntityId(extendedCharm); + if (id) { + console.log(`Charm added: ${id["/"]}`); + return this.verify({ id: id["/"], prompt, name: this.name }); + } else { + console.error(`Charm not added: ${prompt}`); + } + break; } + case CommandType.ImportJSON: { + console.log(`Importing JSON for: "${prompt}"`); + if (!step.data) { + throw new Error("Missing data for JSON import."); + } - const charm = await createDataCharm( - charmManager, - step.data, - step.dataSchema, - prompt, - ); + const charm = await createDataCharm( + this.charmManager, + step.data, + step.dataSchema, + prompt, + ); - const id = getEntityId(charm); - console.log(`Charm added from JSON import`, { id }); - if (id) { - console.log(`Charm added from JSON import: ${id["/"]}`); - return verifyCharm({ - id: id["/"], - prompt: "shows a jsonschema for " + prompt, - name, - }); + const id = getEntityId(charm); + console.log(`Charm added from JSON import`, { id }); + if (id) { + console.log(`Charm added from JSON import: ${id["/"]}`); + return this.verify({ + id: id["/"], + prompt: "shows a jsonschema for " + prompt, + name: this.name, + }); + } + break; } - break; } - } - return { - id: "none", - prompt, - status: "FAIL", - summary: `Unsupported command type: ${type}`, - }; -} - -export async function verifyCharm( - { id, prompt, name }: { id: string; prompt: string; name: string }, -) { - // FIXME(ja): can we navigate without causing a page reload? - await goto(`/${name!}/${id}`); - addErrorListeners(); - // FIXME(ja): perhaps charm can emit a "ready" event and we can wait for it? - await sleep(10000); - const screenshotPath = `results/${name}/${id}.png`; - await screenshot(id, screenshotPath); - const errors = await checkForErrors(); - if (errors.length > 0) { return { - id, + id: null, prompt, - screenshotPath, status: "FAIL", - summary: `Errors: ${errors.join("\n")}`, + summary: `Unsupported command type: ${type}`, }; } - const verdict = await llmVerifyCharm(prompt, screenshotPath); - console.log(`Charm verified: ${id} - ${verdict}`); - - const parsedVerdict = JSON.parse(verdict); - return { - id, - prompt, - screenshotPath, - status: parsedVerdict.result, - summary: parsedVerdict.summary, - }; + async verify( + { id, prompt, name }: { id: string; prompt: string; name: string }, + ): Promise { + return await (this.verifier ? this.verifier.verify({ id, prompt, name }) : { + id, + prompt, + status: "NOTVERIFIED", + summary: "Scenario was not verified.", + }); + } } diff --git a/seeder/report.ts b/seeder/report.ts index a2d4d94f7..d981521e1 100644 --- a/seeder/report.ts +++ b/seeder/report.ts @@ -1,4 +1,4 @@ -import type { CharmResult, ExecutedScenario, Scenario } from "./interfaces.ts"; +import type { CharmResult, ExecutedScenario } from "./interfaces.ts"; import { dirname, fromFileUrl, join } from "@std/path"; // FIXME(ja): we should just use handlebars or something... claude built a custom template engine for this... @@ -19,50 +19,6 @@ export const ensureReportDir = async (name: string) => { } }; -// Helper function to group results by scenario -function groupResultsByScenario( - results: CharmResult[], - scenarios: Scenario[], -): Map { - const groups = new Map(); - let currentScenario = 0; - - // Initialize the first scenario group - groups.set(currentScenario, { - name: scenarios[currentScenario]?.name || `Scenario ${currentScenario + 1}`, - results: [], - }); - - // Process each result - for (let i = 0; i < results.length; i++) { - const result = results[i]; - - // Check if we need to move to the next scenario - // We do this by checking if we've processed all steps in the current scenario - let stepsInCurrentScenario = 0; - for (let j = 0; j <= currentScenario; j++) { - if (j < scenarios.length) { - stepsInCurrentScenario += scenarios[j].steps.length; - } - } - - // If we've processed all steps in the current scenario, move to the next one - if (i >= stepsInCurrentScenario && currentScenario < scenarios.length - 1) { - currentScenario++; - groups.set(currentScenario, { - name: scenarios[currentScenario]?.name || - `Scenario ${currentScenario + 1}`, - results: [], - }); - } - - // Add the result to the current scenario group - groups.get(currentScenario)!.results.push(result); - } - - return groups; -} - // Helper function to load and replace placeholders in a template async function loadTemplate(templateName: string): Promise { const templatePath = join(__dirname, "templates", templateName); @@ -93,7 +49,6 @@ export async function generateReport( name: string, executedScenarios: ExecutedScenario[], toolshedUrl: string, - allScenarios: Scenario[], ) { // Calculate overall statistics const totalScenarios = executedScenarios.length; @@ -150,6 +105,7 @@ export async function generateReport( ? `Screenshot` : `

No screenshot available

`; + const id = result.id ?? "none"; return replaceTemplatePlaceholders(resultTemplate, { RESULT_DELAY: (groupIndex * 0.1) + (index * 0.05), SCREENSHOT_HTML: screenshotHtml, @@ -157,8 +113,8 @@ export async function generateReport( STATUS: result.status, TOOLSHED_URL: toolshedUrl, NAME: name, - RESULT_ID: result.id, - SHORT_ID: result.id.slice(-6), + RESULT_ID: id, + SHORT_ID: id.slice(-6), PROMPT: result.prompt, SUMMARY: result.summary, }); diff --git a/seeder/verifier.ts b/seeder/verifier.ts new file mode 100644 index 000000000..111a3d09d --- /dev/null +++ b/seeder/verifier.ts @@ -0,0 +1,104 @@ +import { openai } from "@ai-sdk/openai"; +import { z } from "zod"; +import { Browser } from "./browser.ts"; +import { CharmResult } from "./interfaces.ts"; +import { sleep } from "@commontools/utils/sleep"; + +const model = openai("gpt-4o-mini"); + +export class Verifier { + private browser: Browser; + + constructor({ browser }: { browser: Browser }) { + this.browser = browser; + } + + static async initialize( + { headless, apiUrl }: { headless: boolean; apiUrl: string }, + ) { + const browser = await Browser.launch({ + headless, + apiUrl, + }); + return new Verifier({ browser }); + } + + async verify( + { id, prompt, name }: { id: string; prompt: string; name: string }, + ): Promise { + // FIXME(ja): can we navigate without causing a page reload? + await this.browser.goto(`/${name!}/${id}`); + // FIXME(ja): perhaps charm can emit a "ready" event and we can wait for it? + await sleep(10000); + const screenshotPath = `results/${name}/${id}.png`; + await this.browser.screenshot(screenshotPath); + const errors = await this.browser.checkForErrors(); + if (errors.length > 0) { + return { + id, + prompt, + screenshotPath, + status: "FAIL", + summary: `Errors: ${errors.join("\n")}`, + }; + } + + const verdict = await llmVerifyCharm(prompt, screenshotPath); + console.log(`Charm verified: ${id} - ${verdict}`); + + const parsedVerdict = JSON.parse(verdict); + return { + id, + prompt, + screenshotPath, + status: parsedVerdict.result, + summary: parsedVerdict.summary, + }; + } + + async close(): Promise { + await this.browser.close(); + } +} + +async function llmVerifyCharm( + prompt: string, + filename: string, +): Promise { + // Lazily load so we don't need an API key when not verifying + const { generateObject } = await import("ai"); + + const system = `You are a helpful assistant that verifies charm screenshots. + + Your task is to evaluate how well the screenshot represents what the user asked for in the prompt. + + If the screenshot accurately represents the prompt, return a PASS result with a brief explanation. + If the screenshot does not match the prompt, return a FAIL result with a brief explanation of what's missing or incorrect.`; + + const schema = z.object({ + result: z.enum(["PASS", "FAIL"]), + summary: z.string().describe("A 1-sentence summary of your evaluation"), + }); + + const { object } = await generateObject({ + system: system, + model, + schema, + messages: [ + { + role: "user", + content: [ + { + type: "text", + text: prompt, + }, + { type: "image", image: Deno.readFileSync(filename) }, + ], + }, + ], + }); + + const result = schema.parse(object); + console.log({ result }); + return JSON.stringify(result); +} diff --git a/utils/src/integration.ts b/utils/src/integration.ts index 3fa9f19a4..8b08aa72f 100644 --- a/utils/src/integration.ts +++ b/utils/src/integration.ts @@ -71,11 +71,20 @@ export const login = async (page: Page) => { // Click the first button, "register" await waitForSelectorClick(page, "button[aria-label='register']"); - // Click the first button, "register with passphrase" - await waitForSelectorClick( - page, - "button[aria-label='register-with-passphrase']", + // Check if we need to select a method first (in case of multiple auth methods available) + const methodPassphraseButton = await page.$( + "button[aria-label='method-passphrase']", ); + if (methodPassphraseButton) { + console.log("Multiple auth methods available, selecting passphrase method"); + await methodPassphraseButton.click(); + } else { + // Click the register with passphrase button + await waitForSelectorClick( + page, + "button[aria-label='register-with-passphrase']", + ); + } // Get the mnemonic from textarea. let input = await page.waitForSelector("textarea[aria-label='mnemonic']"); @@ -86,6 +95,17 @@ export const login = async (page: Page) => { // Click the SECOND button, "continue to login" await waitForSelectorClick(page, "button[aria-label='continue-login']"); + // Check if we need to select a method for login (in case of multiple auth methods) + const loginMethodPassphraseButton = await page.$( + "button[aria-label='method-passphrase']", + ); + if (loginMethodPassphraseButton) { + console.log( + "Multiple auth methods available for login, selecting passphrase method", + ); + await loginMethodPassphraseButton.click(); + } + // Paste the mnemonic in the input. input = await page.waitForSelector("input[aria-label='enter-passphrase']"); await input!.evaluate(