Skip to content

Commit e817c4c

Browse files
authored
feat(runner): create data URIs for array element objects not marked asCell (#1920)
* feat(runner): create data URIs for array element objects not marked asCell When getting array elements as cells, plain objects (not marked with asCell or asStream) now receive immutable data URIs instead of array index paths. This provides stable references while preventing mutation loops when reassigning array elements. Changes: - Array element objects get data URIs with empty paths - Nested documents (marked asCell) continue to use of: URIs as before - Only create array index references when element is already a link - Update tests to reflect new data URI behavior for plain objects - Change createDataCellURI return type from string to URI for consistency * make anyOf.every vs .same consistent with the rest (though actually currently incorrectly so) * fix setRaw not automatically converting the cell * fix(runner): use encodeURIComponent for UTF-8 safe data URIs
1 parent 70a7227 commit e817c4c

File tree

5 files changed

+123
-56
lines changed

5 files changed

+123
-56
lines changed

packages/runner/src/link-utils.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -511,7 +511,7 @@ export function findAndInlineDataURILinks(value: any): any {
511511
export function createDataCellURI(
512512
data: any,
513513
base?: Cell | NormalizedLink,
514-
): string {
514+
): URI {
515515
const baseId = isCell(base) ? base.getAsNormalizedFullLink().id : base?.id;
516516

517517
function traverseAndAddBaseIdToRelativeLinks(
@@ -549,8 +549,8 @@ export function createDataCellURI(
549549
const json = JSON.stringify({
550550
value: traverseAndAddBaseIdToRelativeLinks(data, new Set()),
551551
});
552-
const base64 = btoa(json);
553-
return `data:application/json;charset=utf-8;base64,${base64}`;
552+
// Use encodeURIComponent for UTF-8 safe encoding (matches runtime.ts pattern)
553+
return `data:application/json,${encodeURIComponent(json)}` as URI;
554554
}
555555

556556
/**

packages/runner/src/schema.ts

Lines changed: 49 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,11 @@ import { createCell, isCell, isStream } from "./cell.ts";
88
import { readMaybeLink, resolveLink } from "./link-resolution.ts";
99
import { type IExtendedStorageTransaction } from "./storage/interface.ts";
1010
import { type IRuntime } from "./runtime.ts";
11-
import { type NormalizedFullLink } from "./link-utils.ts";
11+
import {
12+
createDataCellURI,
13+
type NormalizedFullLink,
14+
parseLink,
15+
} from "./link-utils.ts";
1216
import {
1317
createQueryResultProxy,
1418
isQueryResultForDereferencing,
@@ -737,20 +741,6 @@ export function validateAndTransform(
737741

738742
// Now process elements after adding the array to seen
739743
for (let i = 0; i < value.length; i++) {
740-
// If the element on the array is a link, we follow that link so the
741-
// returned object is the current item at that location (otherwise the
742-
// link would refer to "Nth element"). This is important when turning
743-
// returned objects back into cells: We want to then refer to the actual
744-
// object by default, not the array location.
745-
//
746-
// This makes
747-
// ```ts
748-
// const array = [...cell.get()];
749-
// array.splice(index, 1);
750-
// cell.set(array);
751-
// ```
752-
// work as expected.
753-
// Handle boolean items values for element schema
754744
let elementSchema: JSONSchema;
755745
if (resolvedSchema.items === true) {
756746
// items: true means allow any item type
@@ -772,13 +762,56 @@ export function validateAndTransform(
772762
path: [...link.path, String(i)],
773763
schema: elementSchema,
774764
};
775-
const maybeLink = readMaybeLink(tx ?? runtime.edit(), elementLink);
765+
766+
// If the element on the array is a link, we follow that link so the
767+
// returned object is the current item at that location (otherwise the
768+
// link would refer to "Nth element"). This is important when turning
769+
// returned objects back into cells: We want to then refer to the actual
770+
// object by default, not the array location.
771+
//
772+
// If the element is an object, but not a link, we create an immutable
773+
// cell to hold the object, except when it is requested as Cell. While
774+
// this means updates aren't propagated, it seems like the right trade-off
775+
// for stability of links and the ability to mutate them without creating
776+
// loops (see below).
777+
//
778+
// This makes
779+
// ```ts
780+
// const array = [...cell.get()];
781+
// array.splice(index, 1);
782+
// cell.set(array);
783+
// ```
784+
// work as expected. Handle boolean items values for element schema
785+
const maybeLink = parseLink(value[i], link);
776786
if (maybeLink) {
777787
elementLink = {
778788
...maybeLink,
779789
schema: elementLink.schema,
780790
rootSchema: elementLink.rootSchema,
781791
};
792+
} else if (
793+
isRecord(value[i]) &&
794+
// TODO(seefeld): Should factor this out, but we should just fully
795+
// normalize schemas, etc.
796+
!(isObject(elementSchema) &&
797+
(elementSchema.asCell || elementSchema.asStream ||
798+
(Array.isArray(elementSchema?.anyOf) &&
799+
elementSchema.anyOf.every((option) =>
800+
option.asCell || option.asStream
801+
)) ||
802+
(Array.isArray(elementSchema?.oneOf) &&
803+
elementSchema.oneOf.every((option) =>
804+
option.asCell || option.asStream
805+
))))
806+
) {
807+
elementLink = {
808+
id: createDataCellURI(value[i], link),
809+
path: [],
810+
schema: elementSchema,
811+
rootSchema: elementLink.rootSchema,
812+
space: link.space,
813+
type: "application/json",
814+
} satisfies NormalizedFullLink;
782815
}
783816

784817
result[i] = validateAndTransform(

packages/runner/src/uri-utils.ts

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,19 @@ export function getJSONFromDataURI(uri: URI | string): any {
9090
// Check if data is base64 encoded
9191
const isBase64 = headerParts.some((part) => part === "base64");
9292

93-
const decodedData = isBase64 ? atob(data) : decodeURIComponent(data);
93+
let decodedData: string;
94+
if (isBase64) {
95+
// Use TextDecoder to properly decode UTF-8 bytes from base64
96+
const binaryString = atob(data);
97+
const bytes = new Uint8Array(binaryString.length);
98+
for (let i = 0; i < binaryString.length; i++) {
99+
bytes[i] = binaryString.charCodeAt(i);
100+
}
101+
const decoder = new TextDecoder();
102+
decodedData = decoder.decode(bytes);
103+
} else {
104+
decodedData = decodeURIComponent(data);
105+
}
94106

95107
return decodedData.length > 0 ? JSON.parse(decodedData) : undefined;
96108
}

packages/runner/test/link-utils.test.ts

Lines changed: 31 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import {
1313
parseLinkOrThrow,
1414
sanitizeSchemaForLinks,
1515
} from "../src/link-utils.ts";
16+
import { getJSONFromDataURI } from "../src/uri-utils.ts";
1617
import { Identity } from "@commontools/identity";
1718
import { StorageManager } from "@commontools/runner/storage/cache.deno";
1819
import type { JSONSchema } from "../src/builder/types.ts";
@@ -773,10 +774,8 @@ describe("link-utils", () => {
773774
baseCell,
774775
);
775776

776-
// Decode the data URI
777-
const base64 = dataURI.split(",")[1];
778-
const json = atob(base64);
779-
const parsed = JSON.parse(json);
777+
// Decode the data URI using getJSONFromDataURI
778+
const parsed = getJSONFromDataURI(dataURI);
780779

781780
expect(parsed.value.link["/"][LINK_V1_TAG].path).toEqual([
782781
"nested",
@@ -814,10 +813,8 @@ describe("link-utils", () => {
814813

815814
const dataURI = createDataCellURI(data, baseCell);
816815

817-
// Decode the data URI
818-
const base64 = dataURI.split(",")[1];
819-
const json = atob(base64);
820-
const parsed = JSON.parse(json);
816+
// Decode the data URI using getJSONFromDataURI
817+
const parsed = getJSONFromDataURI(dataURI);
821818

822819
expect(parsed.value.items[0]["/"][LINK_V1_TAG].id).toBe(baseId);
823820
expect(parsed.value.items[1].nested.link["/"][LINK_V1_TAG].id).toBe(
@@ -841,10 +838,8 @@ describe("link-utils", () => {
841838

842839
const dataURI = createDataCellURI({ link: absoluteLink }, baseCell);
843840

844-
// Decode the data URI
845-
const base64 = dataURI.split(",")[1];
846-
const json = atob(base64);
847-
const parsed = JSON.parse(json);
841+
// Decode the data URI using getJSONFromDataURI
842+
const parsed = getJSONFromDataURI(dataURI);
848843

849844
// Should remain unchanged
850845
expect(parsed.value.link["/"][LINK_V1_TAG].id).toBe(otherId);
@@ -867,14 +862,34 @@ describe("link-utils", () => {
867862
// Should not throw even though sharedObject is referenced multiple times
868863
const dataURI = createDataCellURI(data);
869864

870-
// Decode and verify
871-
const base64 = dataURI.split(",")[1];
872-
const json = atob(base64);
873-
const parsed = JSON.parse(json);
865+
// Decode and verify using getJSONFromDataURI
866+
const parsed = getJSONFromDataURI(dataURI);
874867

875868
expect(parsed.value.first.value).toBe(42);
876869
expect(parsed.value.second.value).toBe(42);
877870
expect(parsed.value.nested.third.value).toBe(42);
878871
});
872+
873+
it("should handle UTF-8 characters (emojis, special characters)", () => {
874+
const data = {
875+
emoji: "🚀 Hello World! 🌍",
876+
chinese: "你好世界",
877+
arabic: "مرحبا بالعالم",
878+
special: "Ñoño™©®",
879+
mixed: "Test 🎉 with ñ and 中文",
880+
};
881+
882+
// Should not throw with UTF-8 characters
883+
const dataURI = createDataCellURI(data);
884+
885+
// Decode and verify using getJSONFromDataURI
886+
const parsed = getJSONFromDataURI(dataURI);
887+
888+
expect(parsed.value.emoji).toBe("🚀 Hello World! 🌍");
889+
expect(parsed.value.chinese).toBe("你好世界");
890+
expect(parsed.value.arabic).toBe("مرحبا بالعالم");
891+
expect(parsed.value.special).toBe("Ñoño™©®");
892+
expect(parsed.value.mixed).toBe("Test 🎉 with ñ and 中文");
893+
});
879894
});
880895
});

packages/runner/test/schema.test.ts

Lines changed: 27 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1397,7 +1397,7 @@ describe("Schema Support", () => {
13971397
undefined,
13981398
tx,
13991399
);
1400-
plain.set({
1400+
plain.setRaw({
14011401
type: "vnode",
14021402
name: "div",
14031403
props: { style: { color: "red" } },
@@ -1417,23 +1417,23 @@ describe("Schema Support", () => {
14171417
undefined,
14181418
tx,
14191419
);
1420-
styleCell.set({ color: "red" });
1420+
styleCell.setRaw({ color: "red" });
14211421

14221422
const innerTextCell = runtime.getCell<{ type: string; value: string }>(
14231423
space,
14241424
"should work for the vdom schema with $ref 4",
14251425
undefined,
14261426
tx,
14271427
);
1428-
innerTextCell.set({ type: "text", value: "world" });
1428+
innerTextCell.setRaw({ type: "text", value: "world" });
14291429

14301430
const childrenArrayCell = runtime.getCell<any[]>(
14311431
space,
14321432
"should work for the vdom schema with $ref 5",
14331433
undefined,
14341434
tx,
14351435
);
1436-
childrenArrayCell.set([
1436+
childrenArrayCell.setRaw([
14371437
{ type: "text", value: "hello" },
14381438
innerTextCell.getAsLink(),
14391439
]);
@@ -1451,15 +1451,15 @@ describe("Schema Support", () => {
14511451
undefined,
14521452
tx,
14531453
);
1454-
withLinks.set({
1454+
withLinks.setRaw({
14551455
type: "vnode",
14561456
name: "div",
14571457
props: {
14581458
style: styleCell,
14591459
},
14601460
children: [
14611461
{ type: "text", value: "single" },
1462-
childrenArrayCell,
1462+
childrenArrayCell.getAsLink(),
14631463
"or just text",
14641464
],
14651465
});
@@ -2441,7 +2441,7 @@ describe("Schema Support", () => {
24412441
expect(links[0].id).not.toBe(links[2].id);
24422442
});
24432443

2444-
it("should resolve to array indices when elements are not nested documents", () => {
2444+
it("should create data URIs for plain objects not marked asCell", () => {
24452445
const schema = {
24462446
type: "object",
24472447
properties: {
@@ -2482,14 +2482,17 @@ describe("Schema Support", () => {
24822482
const itemCells = result.items.map((item: any) => item[toCell]());
24832483
const links = itemCells.map((cell) => cell.getAsNormalizedFullLink());
24842484

2485-
// Without nested documents, links should point to array indices
2486-
expect(links[0].path).toEqual(["items", "0"]);
2487-
expect(links[1].path).toEqual(["items", "1"]);
2488-
expect(links[2].path).toEqual(["items", "2"]);
2485+
// Plain objects now get data URIs with empty paths
2486+
expect(links[0].id).toMatch(/^data:/);
2487+
expect(links[1].id).toMatch(/^data:/);
2488+
expect(links[2].id).toMatch(/^data:/);
2489+
expect(links[0].path).toEqual([]);
2490+
expect(links[1].path).toEqual([]);
2491+
expect(links[2].path).toEqual([]);
24892492

2490-
// They should all have the same ID (the parent cell)
2491-
expect(links[0].id).toBe(links[1].id);
2492-
expect(links[1].id).toBe(links[2].id);
2493+
// Each should have unique data URIs
2494+
expect(links[0].id).not.toBe(links[1].id);
2495+
expect(links[1].id).not.toBe(links[2].id);
24932496
});
24942497

24952498
it("should support array splice operations with nested documents", () => {
@@ -2614,15 +2617,19 @@ describe("Schema Support", () => {
26142617
expect(links[0].path).toEqual([]);
26152618
expect(links[2].path).toEqual([]);
26162619

2617-
// Plain objects have array index paths
2618-
expect(links[1].path).toEqual(["items", "1"]);
2619-
expect(links[3].path).toEqual(["items", "3"]);
2620+
// Plain objects now also have empty paths (data URIs)
2621+
expect(links[1].path).toEqual([]);
2622+
expect(links[3].path).toEqual([]);
26202623

2621-
// Nested documents should have unique IDs
2624+
// Nested documents should have unique IDs (of: format)
26222625
expect(links[0].id).not.toBe(links[2].id);
2626+
expect(links[0].id).toMatch(/^of:/);
2627+
expect(links[2].id).toMatch(/^of:/);
26232628

2624-
// Plain objects should share the parent cell's ID
2625-
expect(links[1].id).toBe(links[3].id);
2629+
// Plain objects should have data URIs
2630+
expect(links[1].id).toMatch(/^data:/);
2631+
expect(links[3].id).toMatch(/^data:/);
2632+
expect(links[1].id).not.toBe(links[3].id); // Different data URIs
26262633
});
26272634

26282635
it("should preserve nested document references when reordering arrays", () => {

0 commit comments

Comments
 (0)