Skip to content

Commit fe0e36b

Browse files
seefeldbclaude
andcommitted
fix(runner): use encodeURIComponent for UTF-8 safe data URIs
Changed from base64 encoding to URL encoding (encodeURIComponent) for data URIs to handle UTF-8 characters properly. This matches the pattern already used in runtime.ts and avoids the btoa() Latin1 limitation. Changes: - createDataCellURI() now uses encodeURIComponent instead of base64 - getJSONFromDataURI() already handles both formats (base64 and URL-encoded) - Update tests to use getJSONFromDataURI() instead of manual atob() decoding - Add comprehensive test for UTF-8 character support This fixes integration test failures where recipes containing non-ASCII characters (emojis, Chinese, Arabic, etc.) would cause data URI creation to fail with "characters outside of the Latin1 range" error. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 6c94cfe commit fe0e36b

File tree

3 files changed

+46
-19
lines changed

3 files changed

+46
-19
lines changed

packages/runner/src/link-utils.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -549,8 +549,8 @@ export function createDataCellURI(
549549
const json = JSON.stringify({
550550
value: traverseAndAddBaseIdToRelativeLinks(data, new Set()),
551551
});
552-
const base64 = btoa(json);
553-
return `data:application/json;charset=utf-8;base64,${base64}`;
552+
// Use encodeURIComponent for UTF-8 safe encoding (matches runtime.ts pattern)
553+
return `data:application/json,${encodeURIComponent(json)}` as URI;
554554
}
555555

556556
/**

packages/runner/src/uri-utils.ts

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,19 @@ export function getJSONFromDataURI(uri: URI | string): any {
9090
// Check if data is base64 encoded
9191
const isBase64 = headerParts.some((part) => part === "base64");
9292

93-
const decodedData = isBase64 ? atob(data) : decodeURIComponent(data);
93+
let decodedData: string;
94+
if (isBase64) {
95+
// Use TextDecoder to properly decode UTF-8 bytes from base64
96+
const binaryString = atob(data);
97+
const bytes = new Uint8Array(binaryString.length);
98+
for (let i = 0; i < binaryString.length; i++) {
99+
bytes[i] = binaryString.charCodeAt(i);
100+
}
101+
const decoder = new TextDecoder();
102+
decodedData = decoder.decode(bytes);
103+
} else {
104+
decodedData = decodeURIComponent(data);
105+
}
94106

95107
return decodedData.length > 0 ? JSON.parse(decodedData) : undefined;
96108
}

packages/runner/test/link-utils.test.ts

Lines changed: 31 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import {
1313
parseLinkOrThrow,
1414
sanitizeSchemaForLinks,
1515
} from "../src/link-utils.ts";
16+
import { getJSONFromDataURI } from "../src/uri-utils.ts";
1617
import { Identity } from "@commontools/identity";
1718
import { StorageManager } from "@commontools/runner/storage/cache.deno";
1819
import type { JSONSchema } from "../src/builder/types.ts";
@@ -773,10 +774,8 @@ describe("link-utils", () => {
773774
baseCell,
774775
);
775776

776-
// Decode the data URI
777-
const base64 = dataURI.split(",")[1];
778-
const json = atob(base64);
779-
const parsed = JSON.parse(json);
777+
// Decode the data URI using getJSONFromDataURI
778+
const parsed = getJSONFromDataURI(dataURI);
780779

781780
expect(parsed.value.link["/"][LINK_V1_TAG].path).toEqual([
782781
"nested",
@@ -814,10 +813,8 @@ describe("link-utils", () => {
814813

815814
const dataURI = createDataCellURI(data, baseCell);
816815

817-
// Decode the data URI
818-
const base64 = dataURI.split(",")[1];
819-
const json = atob(base64);
820-
const parsed = JSON.parse(json);
816+
// Decode the data URI using getJSONFromDataURI
817+
const parsed = getJSONFromDataURI(dataURI);
821818

822819
expect(parsed.value.items[0]["/"][LINK_V1_TAG].id).toBe(baseId);
823820
expect(parsed.value.items[1].nested.link["/"][LINK_V1_TAG].id).toBe(
@@ -841,10 +838,8 @@ describe("link-utils", () => {
841838

842839
const dataURI = createDataCellURI({ link: absoluteLink }, baseCell);
843840

844-
// Decode the data URI
845-
const base64 = dataURI.split(",")[1];
846-
const json = atob(base64);
847-
const parsed = JSON.parse(json);
841+
// Decode the data URI using getJSONFromDataURI
842+
const parsed = getJSONFromDataURI(dataURI);
848843

849844
// Should remain unchanged
850845
expect(parsed.value.link["/"][LINK_V1_TAG].id).toBe(otherId);
@@ -867,14 +862,34 @@ describe("link-utils", () => {
867862
// Should not throw even though sharedObject is referenced multiple times
868863
const dataURI = createDataCellURI(data);
869864

870-
// Decode and verify
871-
const base64 = dataURI.split(",")[1];
872-
const json = atob(base64);
873-
const parsed = JSON.parse(json);
865+
// Decode and verify using getJSONFromDataURI
866+
const parsed = getJSONFromDataURI(dataURI);
874867

875868
expect(parsed.value.first.value).toBe(42);
876869
expect(parsed.value.second.value).toBe(42);
877870
expect(parsed.value.nested.third.value).toBe(42);
878871
});
872+
873+
it("should handle UTF-8 characters (emojis, special characters)", () => {
874+
const data = {
875+
emoji: "🚀 Hello World! 🌍",
876+
chinese: "你好世界",
877+
arabic: "مرحبا بالعالم",
878+
special: "Ñoño™©®",
879+
mixed: "Test 🎉 with ñ and 中文",
880+
};
881+
882+
// Should not throw with UTF-8 characters
883+
const dataURI = createDataCellURI(data);
884+
885+
// Decode and verify using getJSONFromDataURI
886+
const parsed = getJSONFromDataURI(dataURI);
887+
888+
expect(parsed.value.emoji).toBe("🚀 Hello World! 🌍");
889+
expect(parsed.value.chinese).toBe("你好世界");
890+
expect(parsed.value.arabic).toBe("مرحبا بالعالم");
891+
expect(parsed.value.special).toBe("Ñoño™©®");
892+
expect(parsed.value.mixed).toBe("Test 🎉 with ñ and 中文");
893+
});
879894
});
880895
});

0 commit comments

Comments
 (0)