Skip to content

Commit 45d2c25

Browse files
committed
Fixing duplicate message in cache artifacts, and cleaning up caching
logic
1 parent 523e0bc commit 45d2c25

File tree

3 files changed

+38
-30
lines changed

3 files changed

+38
-30
lines changed

jumble/integration/basic-flow.test.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ import {
2222
const TOOLSHED_API_URL = Deno.env.get("TOOLSHED_API_URL") ??
2323
"http://localhost:8000/";
2424
const FRONTEND_URL = Deno.env.get("FRONTEND_URL") ?? "http://localhost:5173/";
25-
const HEADLESS = false;
25+
const HEADLESS = true;
2626
const ASTRAL_TIMEOUT = 60_000;
2727

2828
console.log(`TOOLSHED_API_URL=${TOOLSHED_API_URL}`);

toolshed/routes/ai/llm/generateText.ts

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,14 +23,17 @@ export interface GenerateTextParams {
2323
abortSignal?: AbortSignal;
2424
max_tokens?: number;
2525
mode?: "json";
26-
// Optional callback for when streaming is complete (used for caching)
27-
onStreamComplete?: (
28-
finalMessage: { role: "user" | "assistant"; content: string },
29-
) => void;
26+
// Updated callback to receive complete data for caching
27+
onStreamComplete?: (result: {
28+
message: { role: "user" | "assistant"; content: string };
29+
messages: { role: "user" | "assistant"; content: string }[];
30+
originalRequest: GenerateTextParams;
31+
}) => void;
3032
}
3133

3234
export interface GenerateTextResult {
3335
message: { role: "user" | "assistant"; content: string };
36+
messages: { role: "user" | "assistant"; content: string }[];
3437
stream?: ReadableStream;
3538
}
3639

@@ -225,7 +228,10 @@ export async function generateText(
225228
messages[messages.length - 1].content = result;
226229
}
227230

228-
return { message: messages[messages.length - 1] };
231+
return {
232+
message: messages[messages.length - 1],
233+
messages: [...messages],
234+
};
229235
}
230236

231237
// Create streaming response
@@ -271,10 +277,13 @@ export async function generateText(
271277
messages[messages.length - 1].content = result;
272278
}
273279

274-
// Call the onStreamComplete callback with the final message, if provided
275-
// This is used to save the completed stream response to the cache
280+
// Call the onStreamComplete callback with all the data needed for caching
276281
if (params.onStreamComplete) {
277-
params.onStreamComplete(messages[messages.length - 1]);
282+
params.onStreamComplete({
283+
message: messages[messages.length - 1],
284+
messages: [...messages],
285+
originalRequest: params,
286+
});
278287
}
279288

280289
controller.close();
@@ -283,6 +292,7 @@ export async function generateText(
283292

284293
return {
285294
message: messages[messages.length - 1],
295+
messages: [...messages],
286296
stream,
287297
};
288298
}

toolshed/routes/ai/llm/llm.handlers.ts

Lines changed: 19 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -92,17 +92,27 @@ export const getModels: AppRouteHandler<GetModelsRoute> = (c) => {
9292
export const generateText: AppRouteHandler<GenerateTextRoute> = async (c) => {
9393
const payload = await c.req.json();
9494

95-
console.log("LLM HASH", await cache.hashKey(JSON.stringify(payload)));
96-
console.log("LLM PAYLOAD", JSON.stringify(payload, null, 2));
97-
98-
// Check cache for existing response
95+
// First, check whether the request is cached, if so return the cached result
9996
const cacheKey = await cache.hashKey(JSON.stringify(payload));
10097
const cachedResult = await cache.loadItem(cacheKey);
10198
if (cachedResult) {
10299
const lastMessage = cachedResult.messages[cachedResult.messages.length - 1];
103100
return c.json(lastMessage);
104101
}
105102

103+
const persistCache = async (
104+
messages: { role: string; content: string }[],
105+
) => {
106+
try {
107+
await cache.saveItem(cacheKey, {
108+
...payload,
109+
messages,
110+
});
111+
} catch (e) {
112+
console.error("Error saving response to cache:", e);
113+
}
114+
};
115+
106116
const validationError = validateModelAndJsonMode(
107117
c,
108118
payload.model,
@@ -120,29 +130,17 @@ export const generateText: AppRouteHandler<GenerateTextRoute> = async (c) => {
120130
...payload,
121131
abortSignal: c.req.raw.signal,
122132
max_tokens: payload.max_tokens || modelDefaultMaxTokens,
133+
// If response is streaming, save to cache after the stream is complete
123134
onStreamComplete: payload.stream
124-
? (finalMessage) => {
125-
// Save the completed stream response to the cache
126-
cache.saveItem(cacheKey, {
127-
...payload,
128-
messages: [...payload.messages, finalMessage],
129-
}).catch((e) => {
130-
console.error("Error saving streamed response to cache:", e);
131-
});
135+
? async (result) => {
136+
await persistCache(result.messages);
132137
}
133138
: undefined,
134139
});
135140

141+
// If response is not streaming, save to cache and return the message
136142
if (!payload.stream) {
137-
// Save to cache
138-
console.log("messagesssssss", [...payload.messages, result.message]);
139-
console.log("=======================");
140-
console.log("minus one", [...payload.messages]);
141-
await cache.saveItem(cacheKey, {
142-
...payload,
143-
// FIXME(jake): I believe this is persisting duplicate messages to the cached json blobs.
144-
messages: [...payload.messages, result.message],
145-
});
143+
await persistCache(result.messages);
146144
return c.json(result.message);
147145
}
148146

0 commit comments

Comments
 (0)