Skip to content

Commit 65dd1a2

Browse files
authored
feat: update schema removing need for coalesce (#525)
1 parent 5d41cd6 commit 65dd1a2

File tree

3 files changed

+161
-59
lines changed

3 files changed

+161
-59
lines changed

typescript/packages/common-memory/migrate.ts

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,4 @@
1-
import {
2-
Database,
3-
SqliteError,
4-
Transaction as DBTransaction,
5-
} from "@db/sqlite";
1+
import { Database } from "@db/sqlite";
62

73
const MIGRATIONS = new URL("./migrations/", import.meta.url);
84

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
-- First we update all the `fact` records so that we don't have any rows with
2+
-- where `is` is NULL
3+
UPDATE fact
4+
SET 'is' = 'undefined'
5+
WHERE 'is' IS NULL;
6+
7+
-- Then we archive the `fact` table so we can create an altered version
8+
ALTER TABLE fact RENAME TO fact_archive;
9+
10+
-- Create new `fact` table where `is` has NOT NULL constraint.
11+
CREATE TABLE fact (
12+
this TEXT NOT NULL PRIMARY KEY,
13+
the TEXT NOT NULL,
14+
of TEXT NOT NULL,
15+
'is' TEXT NOT NULL, -- 👈 is can no longer be `NULL`
16+
cause TEXT,
17+
since INTEGER NOT NULL,
18+
FOREIGN KEY('is') REFERENCES datum(this)
19+
);
20+
21+
-- Drop `fact_since` index so we can re-create it for the fact table.
22+
DROP INDEX IF EXISTS fact_since;
23+
-- Recreate `fact_since` index on a new `fact` table.
24+
CREATE INDEX fact_since ON fact (since);
25+
26+
-- Migrate data from the archived `fact` table to the new one.
27+
INSERT INTO fact (this, the, of, 'is', cause, since)
28+
SELECT archive.this, archive.the, archive.of, archive.'is', archive.cause, archive.since
29+
FROM fact_archive archive;
30+
31+
-- Next we need to recreate memory table so it has a foreign key into
32+
-- the new fact table, there for we archive the old one
33+
ALTER TABLE memory RENAME TO memory_archive;
34+
-- Now create exact replica pointing to the new memory table
35+
CREATE TABLE memory (
36+
the TEXT NOT NULL,
37+
of TEXT NOT NULL,
38+
fact TEXT NOT NULL,
39+
FOREIGN KEY(fact) REFERENCES fact(this),
40+
PRIMARY KEY (the, of)
41+
);
42+
-- Drop indexes for the archived `memory` table
43+
DROP INDEX memory_the;
44+
DROP INDEX memory_of;
45+
-- Recrate those indexes for the new `memory` table
46+
CREATE INDEX memory_the ON memory (the);
47+
CREATE INDEX memory_of ON memory (of);
48+
49+
-- Migrate records from archived memory table into new one.
50+
INSERT INTO memory (the, of, fact)
51+
SELECT archive.the, archive.of, archive.fact
52+
FROM memory_archive archive;
53+
54+
-- Now we need to recreate `state` view
55+
-- So first we drop the old one
56+
DROP VIEW state;
57+
-- And then create a new `state` view without `coalesce`
58+
-- Create new 'state' view
59+
CREATE VIEW state AS
60+
SELECT
61+
memory.the AS the,
62+
memory.of AS of,
63+
datum.source AS 'is',
64+
fact.cause AS cause,
65+
memory.fact AS fact,
66+
datum.this AS proof,
67+
fact.since AS since
68+
FROM
69+
memory
70+
JOIN
71+
fact ON memory.fact = fact.this
72+
JOIN
73+
datum ON datum.this = fact.'is'; -- 👈 coalesce is gone.
74+
75+
76+
-- Now we can drop all the archived tables
77+
DROP TABLE memory_archive;
78+
DROP TABLE fact_archive;

typescript/packages/common-memory/space.ts

Lines changed: 82 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -50,35 +50,47 @@ CREATE TABLE IF NOT EXISTS datum (
5050
source JSON -- Source for this JSON
5151
);
5252
53-
-- We create special record to represent undefined which does not exist in JSON.
54-
-- This allows us to join fact with datum table and cover retractions where
55-
-- fact.is is set to NULL
53+
-- We create special record to represent "undefined" which does not a valid
54+
-- JSON data type. We need this record to join fact.is on datum.this
5655
INSERT OR IGNORE INTO datum (this, source) VALUES ('undefined', NULL);
5756
58-
57+
-- Fact table holds complete history of assertions and retractions. It has
58+
-- n:1 mapping with datum table implying that we could have multiple entity
59+
-- assertions with a same JSON value. Claimed n:1 mapping is guaranteed through
60+
-- a foreign key constraint.
5961
CREATE TABLE IF NOT EXISTS fact (
6062
this TEXT NOT NULL PRIMARY KEY, -- Merkle reference for { the, of, is, cause }
6163
the TEXT NOT NULL, -- Kind of a fact e.g. "application/json"
6264
of TEXT NOT NULL, -- Entity identifier fact is about
63-
'is' TEXT, -- Value entity is claimed to have
64-
cause TEXT, -- Causal reference to prior fact
65+
'is' TEXT NOT NULL, -- Merkle reference of asserted value or "undefined" if retraction
66+
cause TEXT, -- Causal reference to prior fact (It is NULL for a first assertion)
6567
since INTEGER NOT NULL, -- Lamport clock since when this fact was in effect
6668
FOREIGN KEY('is') REFERENCES datum(this)
6769
);
70+
-- Index via "since" field to allow for efficient time queries
71+
CREATE INDEX IF NOT EXISTS fact_since ON fact (since); -- Index to query by "since" field
6872
73+
-- Memory table holds latest assertion / retraction for each entity. In theory
74+
-- it has n:1 mapping with fact table, but in practice it is 1:1 mapping because
75+
-- initial cause is derived from {the, of} seed and there for it is practically
76+
-- guaranteed to be unique if we disregard astronomically tiny chance of hash
77+
-- collision. Claimed n:1 mapping is guaranteed through a foreign key constraint.
6978
CREATE TABLE IF NOT EXISTS memory (
7079
the TEXT NOT NULL, -- Kind of a fact e.g. "application/json"
7180
of TEXT NOT NULL, -- Entity identifier fact is about
72-
fact TEXT NOT NULL, -- Link to the fact,
81+
fact TEXT NOT NULL, -- Reference to the fact
7382
FOREIGN KEY(fact) REFERENCES fact(this),
7483
PRIMARY KEY (the, of) -- Ensure that we have only one fact per entity
7584
);
7685
77-
CREATE INDEX IF NOT EXISTS memory_the ON memory (the); -- Index to filter by "the" field
78-
CREATE INDEX IF NOT EXISTS memory_of ON memory (of); -- Index to query by "of" field
79-
CREATE INDEX IF NOT EXISTS fact_since ON fact (since); -- Index to query by "since" field
86+
-- Index so we can efficiently search by "the" field.
87+
CREATE INDEX IF NOT EXISTS memory_the ON memory (the);
88+
-- Index so we can efficiently search by "of" field.
89+
CREATE INDEX IF NOT EXISTS memory_of ON memory (of);
8090
81-
-- Create the updated 'state' view
91+
-- State view is effectively a memory table with all the foreign keys resolved
92+
-- Note we use a view because we have 1:n:m relation among memory:fact:datum
93+
-- in order to deduplicate data.
8294
CREATE VIEW IF NOT EXISTS state AS
8395
SELECT
8496
memory.the AS the,
@@ -90,19 +102,15 @@ SELECT
90102
fact.since AS since
91103
FROM
92104
memory
93-
-- We use inner join because we memory.fact can not be NULL and as foreign
94-
-- key into fact.this which is also primary key. This guarantees that we will
95-
-- not have any memory record with corresponding fact record
96105
JOIN
106+
-- We use inner join because we have 1:n mapping between memory:fact tables
107+
-- guaranteed through foreign key constraint.
97108
fact ON memory.fact = fact.this
98-
-- We use inner join here because fact.is || 'undefined' is guaranteed to have
99-
-- corresponding record in datum through a foreign key constraint and inner
100-
-- joins are generally more efficient that left joins.
101-
-- ⚠️ Also note that we use COALESCE operator to use 'undefined' in case where
102-
-- there fact.is NULL (retractions), which is important because SQLite never
103-
-- matches over fact.is = NULL.
109+
-- We use inner join here because we have 1:n mapping between fact:datum
110+
-- tables guaranteed through a foreign key constraint. We also prefer inner
111+
-- join because it's generally more efficient that outer join.
104112
JOIN
105-
datum ON datum.this = COALESCE(fact.'is', 'undefined');
113+
datum ON datum.this = fact.'is';
106114
107115
COMMIT;
108116
`;
@@ -119,9 +127,9 @@ const IMPORT_MEMORY =
119127
const SWAP = `UPDATE memory
120128
SET fact = :fact
121129
WHERE
122-
fact = :cause
123-
AND the = :the
124-
AND of = :of;
130+
the = :the
131+
AND of = :of
132+
AND fact = :cause;
125133
`;
126134

127135
const EXPORT = `SELECT
@@ -348,16 +356,28 @@ const select = <Space extends MemorySpace>(
348356
return selection;
349357
};
350358

359+
/**
360+
* Imports datum into the `datum` table. If `datum` is undefined we return
361+
* special `"undefined"` for which `datum` table will have row with `NULL`
362+
* source. If `datum` already contains row for matching `datum` insert is
363+
* ignored because existing record will parse to same `datum` since primary
364+
* key is merkle-reference for it or an "undefined" for the `undefined`.
365+
*/
351366
const importDatum = <Space extends MemorySpace>(
352367
session: Session<Space>,
353-
source: Assertion,
354-
): Reference<JSONValue> => {
355-
const is = refer(source.is);
356-
session.store.run(IMPORT_DATUM, {
357-
this: is.toString(),
358-
source: JSON.stringify(source.is),
359-
});
360-
return is;
368+
datum: JSONValue | undefined,
369+
): string => {
370+
if (datum === undefined) {
371+
return "undefined";
372+
} else {
373+
const is = refer(datum).toString();
374+
session.store.run(IMPORT_DATUM, {
375+
this: is,
376+
source: JSON.stringify(datum),
377+
});
378+
379+
return is;
380+
}
361381
};
362382

363383
const iterate = function* (
@@ -380,12 +400,20 @@ const iterate = function* (
380400
}
381401
};
382402

403+
/**
404+
* Performs memory update with compare and swap (CAS) semantics. It will import
405+
* new data into `datum`, `fact` tables and update `memory` table to point to
406+
* new fact. All the updates occur in a single transaction to guarantee that
407+
* either all changes are made or no changes are. Function can also be passed
408+
* `Claim` in which case provided invariant is upheld, meaning no updates will
409+
* take place but error will be raised if claimed memory state is not current.
410+
*/
383411
const swap = <Space extends MemorySpace>(
384412
session: Session<Space>,
385413
source: Retract | Assert | Claim,
386414
{ since, transaction }: { since: number; transaction: Transaction<Space> },
387415
) => {
388-
const [{ the, of }, expect] = source.assert
416+
const [{ the, of, is }, expect] = source.assert
389417
? [source.assert, source.assert.cause]
390418
: source.retract
391419
? [source.retract, source.retract.cause]
@@ -403,40 +431,40 @@ const swap = <Space extends MemorySpace>(
403431
? refer(source.retract).toString()
404432
: source.claim.fact.toString();
405433

406-
// If this is an assertion we need to import asserted data and then insert
407-
// fact referencing it. If it is retraction we don't have data to import
408-
// but we do still need to create fact record.
434+
// If this is an assertion we need to import asserted datum and then insert
435+
// fact referencing it.
409436
if (source.assert || source.retract) {
410-
// First we import JSON value in the `is` field into the `datum` table and
411-
// then we import the fact into the `factor` table. If `datum` already exists
412-
// we ignore as we key those by the merkle reference. Same is true for the
413-
// `factor` table where we key by the merkle reference of the fact so if
414-
// conflicting record exists it is the same record and we ignore.
437+
// First we import datum and and then use it's primary key as `is` field
438+
// in the `fact` table upholding foreign key constraint.
415439
session.store.run(IMPORT_FACT, {
416440
this: fact,
417441
the,
418442
of,
419-
is: source.assert ? importDatum(session, source.assert).toString() : null,
443+
is: importDatum(session, is),
420444
cause,
421445
since,
422446
});
423447
}
424448

425-
// Now if referenced cause is for an implicit fact, we will not have a record
426-
// for it in the memory table to update in the next step. We also can not
427-
// create such record as we don't have corresponding records in the `fact`
428-
// or `datum` tables. Therefore instead we try to create a record for the
429-
// desired update. If conflicting record exists this will be ignored, but that
430-
// is fine as update in the next step will update it to the desired state.
449+
// First assertion has a causal reference to the `type Unclaimed = { the, of }`
450+
// implicit fact for which no record in the memory table exists which is why
451+
// we simply insert into the memory table. However such memory record may
452+
// already exist in which case insert will be ignored. This can happen if
453+
// say we had assertions `a, b, c, a` last `a` will not not create any new
454+
// records and will be ignored. You may be wondering why do insert with an
455+
// ignore as opposed to do insert in if clause and update in the else block,
456+
// that is because we may also have assertions in this order `a, b, c, c`
457+
// where second `c` insert is redundant yet we do not want to fail transaction,
458+
// therefor we insert or ignore here to ensure fact record exists and then
459+
// use update afterwards to update to desired state from expected `cause` state.
431460
if (expected == null) {
432461
session.store.run(IMPORT_MEMORY, { the, of, fact });
433462
}
434463

435-
// Here we finally perform a memory swap. Note that update is conditional and
436-
// will only update if current record has the same `cause` reference. If that
437-
// is not the case 0 records will be updated indicating a conflict handled
438-
// below. Note that passing `the` and `of` is required, if omitted we may
439-
// update another memory which has passed `cause`.
464+
// Finally we perform a memory swap, using conditional update so it only
465+
// updates memory if the `cause` references expected state. We use return
466+
// value to figure out whether update took place, if it is `0` no records
467+
// were updated indicating potential conflict which we handle below.
440468
const updated = session.store.run(SWAP, { fact, cause, the, of });
441469

442470
// If no records were updated it implies that there was no record with
@@ -448,7 +476,7 @@ const swap = <Space extends MemorySpace>(
448476
if (updated === 0) {
449477
const { fact: actual } = recall(session, { the, of });
450478

451-
// If actual state matches desired state it is either was inserted by the
479+
// If actual state matches desired state it either was inserted by the
452480
// `IMPORT_MEMORY` or this was a duplicate call. Either way we do not treat
453481
// it as a conflict as current state is the asserted one.
454482
if (refer(actual).toString() !== fact) {

0 commit comments

Comments
 (0)