@@ -50,35 +50,47 @@ CREATE TABLE IF NOT EXISTS datum (
5050 source JSON -- Source for this JSON
5151);
5252
53- -- We create special record to represent undefined which does not exist in JSON.
54- -- This allows us to join fact with datum table and cover retractions where
55- -- fact.is is set to NULL
53+ -- We create special record to represent "undefined" which does not a valid
54+ -- JSON data type. We need this record to join fact.is on datum.this
5655INSERT OR IGNORE INTO datum (this, source) VALUES ('undefined', NULL);
5756
58-
57+ -- Fact table holds complete history of assertions and retractions. It has
58+ -- n:1 mapping with datum table implying that we could have multiple entity
59+ -- assertions with a same JSON value. Claimed n:1 mapping is guaranteed through
60+ -- a foreign key constraint.
5961CREATE TABLE IF NOT EXISTS fact (
6062 this TEXT NOT NULL PRIMARY KEY, -- Merkle reference for { the, of, is, cause }
6163 the TEXT NOT NULL, -- Kind of a fact e.g. "application/json"
6264 of TEXT NOT NULL, -- Entity identifier fact is about
63- 'is' TEXT, -- Value entity is claimed to have
64- cause TEXT, -- Causal reference to prior fact
65+ 'is' TEXT NOT NULL, -- Merkle reference of asserted value or "undefined" if retraction
66+ cause TEXT, -- Causal reference to prior fact (It is NULL for a first assertion)
6567 since INTEGER NOT NULL, -- Lamport clock since when this fact was in effect
6668 FOREIGN KEY('is') REFERENCES datum(this)
6769);
70+ -- Index via "since" field to allow for efficient time queries
71+ CREATE INDEX IF NOT EXISTS fact_since ON fact (since); -- Index to query by "since" field
6872
73+ -- Memory table holds latest assertion / retraction for each entity. In theory
74+ -- it has n:1 mapping with fact table, but in practice it is 1:1 mapping because
75+ -- initial cause is derived from {the, of} seed and there for it is practically
76+ -- guaranteed to be unique if we disregard astronomically tiny chance of hash
77+ -- collision. Claimed n:1 mapping is guaranteed through a foreign key constraint.
6978CREATE TABLE IF NOT EXISTS memory (
7079 the TEXT NOT NULL, -- Kind of a fact e.g. "application/json"
7180 of TEXT NOT NULL, -- Entity identifier fact is about
72- fact TEXT NOT NULL, -- Link to the fact,
81+ fact TEXT NOT NULL, -- Reference to the fact
7382 FOREIGN KEY(fact) REFERENCES fact(this),
7483 PRIMARY KEY (the, of) -- Ensure that we have only one fact per entity
7584);
7685
77- CREATE INDEX IF NOT EXISTS memory_the ON memory (the); -- Index to filter by "the" field
78- CREATE INDEX IF NOT EXISTS memory_of ON memory (of); -- Index to query by "of" field
79- CREATE INDEX IF NOT EXISTS fact_since ON fact (since); -- Index to query by "since" field
86+ -- Index so we can efficiently search by "the" field.
87+ CREATE INDEX IF NOT EXISTS memory_the ON memory (the);
88+ -- Index so we can efficiently search by "of" field.
89+ CREATE INDEX IF NOT EXISTS memory_of ON memory (of);
8090
81- -- Create the updated 'state' view
91+ -- State view is effectively a memory table with all the foreign keys resolved
92+ -- Note we use a view because we have 1:n:m relation among memory:fact:datum
93+ -- in order to deduplicate data.
8294CREATE VIEW IF NOT EXISTS state AS
8395SELECT
8496 memory.the AS the,
@@ -90,19 +102,15 @@ SELECT
90102 fact.since AS since
91103FROM
92104 memory
93- -- We use inner join because we memory.fact can not be NULL and as foreign
94- -- key into fact.this which is also primary key. This guarantees that we will
95- -- not have any memory record with corresponding fact record
96105JOIN
106+ -- We use inner join because we have 1:n mapping between memory:fact tables
107+ -- guaranteed through foreign key constraint.
97108 fact ON memory.fact = fact.this
98- -- We use inner join here because fact.is || 'undefined' is guaranteed to have
99- -- corresponding record in datum through a foreign key constraint and inner
100- -- joins are generally more efficient that left joins.
101- -- ⚠️ Also note that we use COALESCE operator to use 'undefined' in case where
102- -- there fact.is NULL (retractions), which is important because SQLite never
103- -- matches over fact.is = NULL.
109+ -- We use inner join here because we have 1:n mapping between fact:datum
110+ -- tables guaranteed through a foreign key constraint. We also prefer inner
111+ -- join because it's generally more efficient that outer join.
104112JOIN
105- datum ON datum.this = COALESCE( fact.'is', 'undefined') ;
113+ datum ON datum.this = fact.'is';
106114
107115COMMIT;
108116` ;
@@ -119,9 +127,9 @@ const IMPORT_MEMORY =
119127const SWAP = `UPDATE memory
120128 SET fact = :fact
121129WHERE
122- fact = :cause
123- AND the = :the
124- AND of = :of ;
130+ the = :the
131+ AND of = :of
132+ AND fact = :cause ;
125133` ;
126134
127135const EXPORT = `SELECT
@@ -348,16 +356,28 @@ const select = <Space extends MemorySpace>(
348356 return selection ;
349357} ;
350358
359+ /**
360+ * Imports datum into the `datum` table. If `datum` is undefined we return
361+ * special `"undefined"` for which `datum` table will have row with `NULL`
362+ * source. If `datum` already contains row for matching `datum` insert is
363+ * ignored because existing record will parse to same `datum` since primary
364+ * key is merkle-reference for it or an "undefined" for the `undefined`.
365+ */
351366const importDatum = < Space extends MemorySpace > (
352367 session : Session < Space > ,
353- source : Assertion ,
354- ) : Reference < JSONValue > => {
355- const is = refer ( source . is ) ;
356- session . store . run ( IMPORT_DATUM , {
357- this : is . toString ( ) ,
358- source : JSON . stringify ( source . is ) ,
359- } ) ;
360- return is ;
368+ datum : JSONValue | undefined ,
369+ ) : string => {
370+ if ( datum === undefined ) {
371+ return "undefined" ;
372+ } else {
373+ const is = refer ( datum ) . toString ( ) ;
374+ session . store . run ( IMPORT_DATUM , {
375+ this : is ,
376+ source : JSON . stringify ( datum ) ,
377+ } ) ;
378+
379+ return is ;
380+ }
361381} ;
362382
363383const iterate = function * (
@@ -380,12 +400,20 @@ const iterate = function* (
380400 }
381401} ;
382402
403+ /**
404+ * Performs memory update with compare and swap (CAS) semantics. It will import
405+ * new data into `datum`, `fact` tables and update `memory` table to point to
406+ * new fact. All the updates occur in a single transaction to guarantee that
407+ * either all changes are made or no changes are. Function can also be passed
408+ * `Claim` in which case provided invariant is upheld, meaning no updates will
409+ * take place but error will be raised if claimed memory state is not current.
410+ */
383411const swap = < Space extends MemorySpace > (
384412 session : Session < Space > ,
385413 source : Retract | Assert | Claim ,
386414 { since, transaction } : { since : number ; transaction : Transaction < Space > } ,
387415) => {
388- const [ { the, of } , expect ] = source . assert
416+ const [ { the, of, is } , expect ] = source . assert
389417 ? [ source . assert , source . assert . cause ]
390418 : source . retract
391419 ? [ source . retract , source . retract . cause ]
@@ -403,40 +431,40 @@ const swap = <Space extends MemorySpace>(
403431 ? refer ( source . retract ) . toString ( )
404432 : source . claim . fact . toString ( ) ;
405433
406- // If this is an assertion we need to import asserted data and then insert
407- // fact referencing it. If it is retraction we don't have data to import
408- // but we do still need to create fact record.
434+ // If this is an assertion we need to import asserted datum and then insert
435+ // fact referencing it.
409436 if ( source . assert || source . retract ) {
410- // First we import JSON value in the `is` field into the `datum` table and
411- // then we import the fact into the `factor` table. If `datum` already exists
412- // we ignore as we key those by the merkle reference. Same is true for the
413- // `factor` table where we key by the merkle reference of the fact so if
414- // conflicting record exists it is the same record and we ignore.
437+ // First we import datum and and then use it's primary key as `is` field
438+ // in the `fact` table upholding foreign key constraint.
415439 session . store . run ( IMPORT_FACT , {
416440 this : fact ,
417441 the,
418442 of,
419- is : source . assert ? importDatum ( session , source . assert ) . toString ( ) : null ,
443+ is : importDatum ( session , is ) ,
420444 cause,
421445 since,
422446 } ) ;
423447 }
424448
425- // Now if referenced cause is for an implicit fact, we will not have a record
426- // for it in the memory table to update in the next step. We also can not
427- // create such record as we don't have corresponding records in the `fact`
428- // or `datum` tables. Therefore instead we try to create a record for the
429- // desired update. If conflicting record exists this will be ignored, but that
430- // is fine as update in the next step will update it to the desired state.
449+ // First assertion has a causal reference to the `type Unclaimed = { the, of }`
450+ // implicit fact for which no record in the memory table exists which is why
451+ // we simply insert into the memory table. However such memory record may
452+ // already exist in which case insert will be ignored. This can happen if
453+ // say we had assertions `a, b, c, a` last `a` will not not create any new
454+ // records and will be ignored. You may be wondering why do insert with an
455+ // ignore as opposed to do insert in if clause and update in the else block,
456+ // that is because we may also have assertions in this order `a, b, c, c`
457+ // where second `c` insert is redundant yet we do not want to fail transaction,
458+ // therefor we insert or ignore here to ensure fact record exists and then
459+ // use update afterwards to update to desired state from expected `cause` state.
431460 if ( expected == null ) {
432461 session . store . run ( IMPORT_MEMORY , { the, of, fact } ) ;
433462 }
434463
435- // Here we finally perform a memory swap. Note that update is conditional and
436- // will only update if current record has the same `cause` reference. If that
437- // is not the case 0 records will be updated indicating a conflict handled
438- // below. Note that passing `the` and `of` is required, if omitted we may
439- // update another memory which has passed `cause`.
464+ // Finally we perform a memory swap, using conditional update so it only
465+ // updates memory if the `cause` references expected state. We use return
466+ // value to figure out whether update took place, if it is `0` no records
467+ // were updated indicating potential conflict which we handle below.
440468 const updated = session . store . run ( SWAP , { fact, cause, the, of } ) ;
441469
442470 // If no records were updated it implies that there was no record with
@@ -448,7 +476,7 @@ const swap = <Space extends MemorySpace>(
448476 if ( updated === 0 ) {
449477 const { fact : actual } = recall ( session , { the, of } ) ;
450478
451- // If actual state matches desired state it is either was inserted by the
479+ // If actual state matches desired state it either was inserted by the
452480 // `IMPORT_MEMORY` or this was a duplicate call. Either way we do not treat
453481 // it as a conflict as current state is the asserted one.
454482 if ( refer ( actual ) . toString ( ) !== fact ) {
0 commit comments