From 892c498f9c37b148d73eabc29495accc97641a37 Mon Sep 17 00:00:00 2001 From: William Chargin Date: Tue, 25 Sep 2018 11:58:32 -0700 Subject: [PATCH] mirror: query and process own-data updates (#883) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: This commit adds internal functions to (a) emit a GraphQL query to fetch data for own-data of an object, and (b) ingest the results of said query back into the database. The API and implementation differ from the connection-updating analogues introduced in #878 in that the query for own data is independent of an object’s ID: it depends only on the object’s type. This affords us more flexibility in composing queries. As described in a internal documentation comment, values are stored in the database in JSON-stringified form: we cannot use the obvious raw SQL values, because there is no native encoding of booleans (`0`/`1` is conventional), and we need to distinguish them from other data types. There are other ways to solve this problem. Notably: 1. We could take inspiration from OCaml: encode stronger static types and a simpler runtime representation. That is, we could change the schema field types from simply “primitive” to the various specific primitive types. Then, when reading data out from the database, we could reinterpret the values appropriately. 2. We could take advantage of the fact that we are not using all of SQLite’s data types. In particular, we do not store anything as a binary blob, so we could encode `false` as a length-0 zeroblob and `true` as a length-1 zeroblob, for instance. Again, when reading data out from the database, we would reinterpret the values—but in this approach we would not need an explicit schema from the user. For now, we take the easiest and simplest approach just to get ourselves off the ground. We can easily move to the second option described above later. This commit makes progress toward #622. Test Plan: Unit tests included, with full coverage. While these tests check that the GraphQL queries are as expected, they cannot check that they are actually valid in production. To check this, follow the instructions in the added snapshot test. wchargin-branch: mirror-own-data-updates --- src/graphql/__snapshots__/mirror.test.js.snap | 29 ++ src/graphql/mirror.js | 253 ++++++++++++++ src/graphql/mirror.test.js | 309 ++++++++++++++++++ 3 files changed, 591 insertions(+) diff --git a/src/graphql/__snapshots__/mirror.test.js.snap b/src/graphql/__snapshots__/mirror.test.js.snap index d7d74ae..a05c651 100644 --- a/src/graphql/__snapshots__/mirror.test.js.snap +++ b/src/graphql/__snapshots__/mirror.test.js.snap @@ -232,3 +232,32 @@ exports[`graphql/mirror Mirror _queryConnection snapshot test for actual GitHub } }" `; + +exports[`graphql/mirror Mirror _updateOwnData snapshot test for actual GitHub queries 1`] = ` +"query TestQuery { + node(id: \\"MDU6SXNzdWUzNDg1NDA0NjE=\\") { + ... on Issue { + __typename + id + url + author { + __typename + ... on User { + id + } + ... on Bot { + id + } + ... on Organization { + id + } + } + repository { + __typename + id + } + title + } + } +}" +`; diff --git a/src/graphql/mirror.js b/src/graphql/mirror.js index d4ff84d..a330a68 100644 --- a/src/graphql/mirror.js +++ b/src/graphql/mirror.js @@ -69,6 +69,7 @@ export class Mirror { * the corresponding table. * * In more detail: + * * - The `connections` table has a row for each `(id, fieldname)` * pair, where `fieldname` is the name of a connection field on the * object with the given ID. This stores metadata about the @@ -76,13 +77,25 @@ export class Mirror { * does not store the actual entries in the connection (the nodes * that the connection points to); `connection_entries` stores * these. + * * - The `links` table has a row for each `(id, fieldname)` pair, * where `fieldname` is the name of a link field on the object * with the given ID. This simply points to the referenced object. + * * - For each type `T`, the `primitives_T` table has one row for * each object of type `T`, storing the primitive data of the * object. * + * All values are stored as stringified JSON values: so, for + * instance, the JSON value `null` is represented as the SQL + * string 'null', _not_ SQL NULL, while the JSON string "null" is + * represented as the SQL string '"null"'. This is primarily to + * accommodate storing booleans: SQLite encodes `true` and `false` + * as `1` and `0`, but we need to be able to distinguish between + * these respective values. There are other ways to do this more + * efficiently in both space and time (see discussion on #883 for + * some options). + * * We refer to node and primitive data together as "own data", because * this is the data that can be queried uniformly for all elements of * a type; querying connection data, by contrast, requires the @@ -704,6 +717,230 @@ export class Mirror { addEntry.run({connectionId, idx, childId}); } } + + /** + * Create a GraphQL selection set required to fetch the own-data + * (primitives and node references) of an object, but not its + * connection entries. The result depends only on the (concrete) type + * of the object, not its ID. + * + * The provided typename must correspond to an object type, not a + * union type; otherwise, an error will be thrown. + * + * The resulting GraphQL can be embedded into the context of any node + * with the provided typename. For instance, if there are issues with + * IDs "#1" and "#2", then `_queryOwnData("Issue")` emits GraphQL + * that may replace the `?` in any of the following queries: + * + * repository(owner: "foo", name: "bar") { + * issues(first: 1) { ? } + * } + * nodes(ids: ["#1", "#2") { ... on Issue { ? } } + * node(id: "#1") { ... on Issue { ? } } + * + * The result of this query has type `E`, where `E` is the element + * type of `OwnDataUpdateResult`. That is, it is an object with shape + * that depends on the provided typename: the name of each ID or + * primitive field is a key mapping to a primitive value, and the name + * of each node field is a key mapping to a `NodeFieldResult`. + * Additionally, the attribute "__typename" maps to the node's + * typename. + * + * This function is pure: it does not interact with the database. + */ + _queryOwnData(typename: Schema.Typename): Queries.Selection[] { + const type = this._schema[typename]; + if (type == null) { + throw new Error(`No such type: ${JSON.stringify(typename)}`); + } + if (type.type !== "OBJECT") { + throw new Error( + `Not an object type: ${JSON.stringify(typename)} (${type.type})` + ); + } + const b = Queries.build; + return [ + b.field("__typename"), + ...Object.keys(type.fields) + .map((fieldname) => { + const field = type.fields[fieldname]; + switch (field.type) { + case "ID": + return b.field(fieldname); + case "PRIMITIVE": + return b.field(fieldname); + case "NODE": + return b.field( + fieldname, + {}, + this._queryShallow(field.elementType) + ); + case "CONNECTION": + // Not handled by this function. + return null; + // istanbul ignore next + default: + throw new Error((field.type: empty)); + } + }) + .filter(Boolean), + ]; + } + + /** + * Ingest own-data (primitive and link) updates for many objects of a + * fixed concrete type. Every object in the input list must have the + * same `__typename` attribute, which must be the name of a valid + * object type. + * + * See: `_queryOwnData`. + */ + _updateOwnData(updateId: UpdateId, queryResult: OwnDataUpdateResult): void { + _inTransaction(this._db, () => { + this._nontransactionallyUpdateOwnData(updateId, queryResult); + }); + } + + /** + * As `_updateOwnData`, but do not enter any transactions. Other + * methods may call this method as a subroutine in a larger + * transaction. + */ + _nontransactionallyUpdateOwnData( + updateId: UpdateId, + queryResult: OwnDataUpdateResult + ): void { + if (queryResult.length === 0) { + return; + } + const typename = queryResult[0].__typename; + if (this._schema[typename] == null) { + throw new Error("Unknown type: " + JSON.stringify(typename)); + } + if (this._schema[typename].type !== "OBJECT") { + throw new Error( + "Cannot update data for non-object type: " + + `${JSON.stringify(typename)} (${this._schema[typename].type})` + ); + } + + const db = this._db; + const objectType = this._schemaInfo.objectTypes[typename]; + + // First, make sure that all objects for which we're given own data + // actually exist and have the correct typename. + { + const doesObjectExist = db + .prepare("SELECT COUNT(1) FROM objects WHERE id = ?") + .pluck(); + for (const entry of queryResult) { + if (!doesObjectExist.get(entry.id)) { + throw new Error( + "Cannot update data for nonexistent node: " + + JSON.stringify(entry.id) + ); + } + if (entry.__typename !== typename) { + const s = JSON.stringify; + throw new Error( + "Result set has inconsistent typenames: " + + `${s(typename)} vs. ${s(entry.__typename)}` + ); + } + } + } + + // Set each node's `lastUpdate` time. + { + const setLastUpdate: (objectId: Schema.ObjectId) => void = (() => { + const stmt = db.prepare( + dedent`\ + UPDATE objects SET last_update = :updateId + WHERE id = :objectId + ` + ); + const update = _makeSingleUpdateFunction(stmt); + return (objectId) => update({objectId, updateId}); + })(); + for (const entry of queryResult) { + setLastUpdate(entry.id); + } + } + + // Update each node's primitive data. + { + const updatePrimitives: ({| + +id: Schema.ObjectId, + +[primitiveFieldName: Schema.Fieldname]: string, + |}) => void = (() => { + if (objectType.primitiveFieldNames.length === 0) { + return () => {}; + } + const tableName = _primitivesTableName(typename); + const updates = objectType.primitiveFieldNames + .map((f) => `"${f}" = :${f}`) + .join(", "); + const stmt = db.prepare( + `UPDATE ${tableName} SET ${updates} WHERE id = :id` + ); + return _makeSingleUpdateFunction(stmt); + })(); + for (const entry of queryResult) { + const primitives: {| + +id: Schema.ObjectId, + [primitiveFieldName: Schema.Fieldname]: string, + |} = {id: entry.id}; + for (const fieldname of objectType.primitiveFieldNames) { + const value: PrimitiveResult | NodeFieldResult = entry[fieldname]; + const primitive: PrimitiveResult = (value: any); + if (primitive === undefined) { + const s = JSON.stringify; + throw new Error( + `Missing primitive ${s(fieldname)} on ${s(entry.id)} ` + + `of type ${s(typename)} (got ${(primitive: empty)})` + ); + } + primitives[fieldname] = JSON.stringify(primitive); + } + updatePrimitives(primitives); + } + } + + // Update each node's links. + { + const updateLink: ({| + +parentId: string, + +fieldname: string, + +childId: string | null, + |}) => void = (() => { + const stmt = db.prepare( + dedent`\ + UPDATE links SET child_id = :childId + WHERE parent_id = :parentId AND fieldname = :fieldname + ` + ); + return _makeSingleUpdateFunction(stmt); + })(); + for (const entry of queryResult) { + for (const fieldname of objectType.linkFieldNames) { + const value: PrimitiveResult | NodeFieldResult = entry[fieldname]; + const link: NodeFieldResult = (value: any); + if (link === undefined) { + const s = JSON.stringify; + throw new Error( + `Missing node reference ${s(fieldname)} on ${s(entry.id)} ` + + `of type ${s(typename)} (got ${(link: empty)})` + ); + } + const childId = this._nontransactionallyRegisterNodeFieldResult(link); + const parentId = entry.id; + updateLink({parentId, fieldname, childId}); + } + } + } + + // Last-updates, primitives, and links all updated: we're done. + } } /** @@ -823,6 +1060,7 @@ type QueryPlan = {| */ type EndCursor = string | null; +type PrimitiveResult = string | number | boolean | null; type NodeFieldResult = {| +__typename: Schema.Typename, +id: Schema.ObjectId, @@ -833,6 +1071,21 @@ type ConnectionFieldResult = {| +nodes: $ReadOnlyArray, |}; +/** + * Result describing own-data for many nodes of a given type. Whether a + * value is a `PrimitiveResult` or a `NodeFieldResult` is determined by + * the schema. + * + * This type would be exact but for facebook/flow#2977, et al. + */ +type OwnDataUpdateResult = $ReadOnlyArray<{ + +__typename: Schema.Typename, // the same for all entries + +id: Schema.ObjectId, + +[nonConnectionFieldname: Schema.Fieldname]: + | PrimitiveResult + | NodeFieldResult, +}>; + /** * Execute a function inside a database transaction. * diff --git a/src/graphql/mirror.test.js b/src/graphql/mirror.test.js index 8fa8a1c..36265da 100644 --- a/src/graphql/mirror.test.js +++ b/src/graphql/mirror.test.js @@ -1065,6 +1065,315 @@ describe("graphql/mirror", () => { }); }); }); + + describe("_queryOwnData", () => { + it("fails given a nonexistent typename", () => { + const db = new Database(":memory:"); + const mirror = new Mirror(db, buildGithubSchema()); + expect(() => { + mirror._queryOwnData("Wat"); + }).toThrow('No such type: "Wat"'); + }); + it("fails given a non-OBJECT type", () => { + const db = new Database(":memory:"); + const mirror = new Mirror(db, buildGithubSchema()); + expect(() => { + mirror._queryOwnData("Actor"); + }).toThrow('Not an object type: "Actor" (UNION)'); + }); + it("generates a query with ID, primitives, and nodes only", () => { + const db = new Database(":memory:"); + const mirror = new Mirror(db, buildGithubSchema()); + const query = mirror._queryOwnData("Issue"); + const b = Queries.build; + // Note: The actual selections could permissibly be permuted + // with respect to these expected selections, causing a spurious + // failure. If that happens, we can choose make the test more + // robust. + expect(query).toEqual([ + b.field("__typename"), + b.field("id"), + b.field("url"), + b.field("author", {}, [ + b.field("__typename"), + b.inlineFragment("User", [b.field("id")]), + b.inlineFragment("Bot", [b.field("id")]), + b.inlineFragment("Organization", [b.field("id")]), + ]), + b.field("repository", {}, [b.field("__typename"), b.field("id")]), + b.field("title"), + // no `comments` + // no `timeline` + ]); + }); + }); + + describe("_updateOwnData", () => { + it("fails given a nonexistent typename", () => { + const db = new Database(":memory:"); + const mirror = new Mirror(db, buildGithubSchema()); + const updateId = mirror._createUpdate(new Date(123)); + expect(() => { + mirror._updateOwnData(updateId, [{__typename: "Wat", id: "wot"}]); + }).toThrow('Unknown type: "Wat"'); + }); + it("fails given a non-object typename", () => { + const db = new Database(":memory:"); + const mirror = new Mirror(db, buildGithubSchema()); + const updateId = mirror._createUpdate(new Date(123)); + expect(() => { + mirror._updateOwnData(updateId, [{__typename: "Actor", id: "wut"}]); + }).toThrow('Cannot update data for non-object type: "Actor" (UNION)'); + }); + it("fails given a nonexistent object with a link to itself", () => { + // A naive implementation might register the link targets as + // objects before verifying that the target object actually + // exists. This test would catch such an implementation. + const db = new Database(":memory:"); + const mirror = new Mirror(db, buildGithubSchema()); + const updateId = mirror._createUpdate(new Date(123)); + expect(() => { + mirror._updateOwnData(updateId, [ + { + __typename: "Issue", + id: "issue:#1", + url: "url://issue/1", + author: {__typename: "User", id: "alice"}, + parent: {__typename: "Issue", id: "issue:#1"}, + title: "hello", + }, + ]); + }).toThrow('Cannot update data for nonexistent node: "issue:#1"'); + }); + it("fails given a nonexistent object referenced in another node", () => { + // A naive implementation might fail here similar to above. + const db = new Database(":memory:"); + const mirror = new Mirror(db, buildGithubSchema()); + mirror.registerObject({typename: "Issue", id: "issue:#1"}); + const updateId = mirror._createUpdate(new Date(123)); + expect(() => { + mirror._updateOwnData(updateId, [ + { + __typename: "Issue", + id: "issue:#1", + url: "url://issue/1", + author: {__typename: "User", id: "alice"}, + repository: {__typename: "Issue", id: "issue:#2"}, + title: "hello", + }, + { + __typename: "Issue", + id: "issue:#2", + url: "url://issue/2", + author: {__typename: "User", id: "alice"}, + repository: null, + title: "wat", + }, + ]); + }).toThrow('Cannot update data for nonexistent node: "issue:#2"'); + }); + it("fails given a result set with inconsistent typenames", () => { + const db = new Database(":memory:"); + const mirror = new Mirror(db, buildGithubSchema()); + const updateId = mirror._createUpdate(new Date(123)); + mirror.registerObject({typename: "Repository", id: "repo:foo/bar"}); + mirror.registerObject({typename: "User", id: "user:alice"}); + expect(() => { + mirror._updateOwnData(updateId, [ + { + __typename: "Repository", + id: "repo:foo/bar", + url: "url://repo/foo/bar", + }, + { + __typename: "User", + id: "user:alice", + url: "url://user/alice", + login: "alice", + }, + ]); + }).toThrow( + 'Result set has inconsistent typenames: "Repository" vs. "User"' + ); + }); + it("fails if the input is missing any primitive fields", () => { + const db = new Database(":memory:"); + const mirror = new Mirror(db, buildGithubSchema()); + const updateId = mirror._createUpdate(new Date(123)); + mirror.registerObject({typename: "IssueComment", id: "comment:#1"}); + expect(() => { + mirror._updateOwnData(updateId, [ + { + __typename: "IssueComment", + id: "comment:#1", + author: null, + // body omitted + }, + ]); + }).toThrow( + 'Missing primitive "body" on "comment:#1" of type "IssueComment" ' + + "(got undefined)" + ); + }); + it("fails if the input is missing any link fields", () => { + const db = new Database(":memory:"); + const mirror = new Mirror(db, buildGithubSchema()); + const updateId = mirror._createUpdate(new Date(123)); + mirror.registerObject({typename: "IssueComment", id: "comment:#1"}); + expect(() => { + mirror._updateOwnData(updateId, [ + { + __typename: "IssueComment", + id: "comment:#1", + body: "somebody", + // author omitted + }, + ]); + }).toThrow( + 'Missing node reference "author" on "comment:#1" of type "IssueComment" ' + + "(got undefined)" + ); + }); + it("properly stores normal data", () => { + const db = new Database(":memory:"); + const mirror = new Mirror(db, buildGithubSchema()); + mirror.registerObject({typename: "Repository", id: "repo:foo/bar"}); + mirror.registerObject({typename: "Issue", id: "issue:#1"}); + mirror.registerObject({typename: "Issue", id: "issue:#2"}); + mirror.registerObject({typename: "Issue", id: "issue:#3"}); + mirror.registerObject({typename: "User", id: "alice"}); + const updateId = mirror._createUpdate(new Date(123)); + + mirror._updateOwnData(updateId, [ + { + __typename: "Issue", + id: "issue:#1", + url: "url://issue/1", + author: {__typename: "User", id: "alice"}, + repository: {__typename: "Repository", id: "repo:foo/bar"}, + title: 13.75, + }, + { + __typename: "Issue", + id: "issue:#2", + url: null, + author: {__typename: "User", id: "bob"}, // must be added + repository: null, + title: false, + }, + ]); + expect( + db + .prepare("SELECT id FROM objects WHERE typename = 'User'") + .pluck() + .all() + .sort() + ).toEqual(["alice", "bob"].sort()); + expect( + db.prepare("SELECT * FROM primitives_Issue ORDER BY id ASC").all() + ).toEqual([ + {id: "issue:#1", url: '"url://issue/1"', title: "13.75"}, + {id: "issue:#2", url: "null", title: "false"}, + {id: "issue:#3", url: null, title: null}, + ]); + }); + it("properly handles input of a type with no primitives", () => { + const db = new Database(":memory:"); + const mirror = new Mirror(db, buildGithubSchema()); + mirror.registerObject({typename: "Repository", id: "repo:foo/bar"}); + mirror.registerObject({typename: "LockedEvent", id: "uno"}); + mirror.registerObject({typename: "LockedEvent", id: "dos"}); + const updateId = mirror._createUpdate(new Date(123)); + + mirror._updateOwnData(updateId, [ + { + __typename: "LockedEvent", + id: "uno", + actor: null, + }, + { + __typename: "LockedEvent", + id: "dos", + actor: {__typename: "User", id: "user:alice"}, + }, + ]); + expect( + db + .prepare("SELECT * FROM primitives_LockedEvent ORDER BY id ASC") + .all() + ).toEqual([{id: "dos"}, {id: "uno"}]); + expect( + db + .prepare("SELECT * FROM links ORDER BY parent_id ASC") + .all() + .filter((x) => x.parent_id === "uno" || x.parent_id === "dos") + ).toEqual([ + { + parent_id: "dos", + fieldname: "actor", + child_id: "user:alice", + rowid: expect.anything(), + }, + { + parent_id: "uno", + fieldname: "actor", + child_id: null, + rowid: expect.anything(), + }, + ]); + }); + it("does nothing on an empty input", () => { + const db = new Database(":memory:"); + const mirror = new Mirror(db, buildGithubSchema()); + const getState = () => ({ + updates: db.prepare("SELECT * FROM updates ORDER BY rowid").all(), + objects: db.prepare("SELECT * FROM objects ORDER BY id").all(), + links: db + .prepare("SELECT * FROM links ORDER BY parent_id, fieldname") + .all(), + connections: db + .prepare("SELECT * FROM connections ORDER BY object_id, fieldname") + .all(), + connectionEntries: db + .prepare( + "SELECT * FROM connection_entries ORDER BY connection_id, idx" + ) + .all(), + }); + mirror.registerObject({typename: "Repository", id: "repo:foo/bar"}); + mirror.registerObject({typename: "Issue", id: "issue:#1"}); + const updateId = mirror._createUpdate(new Date(123)); + const pre = getState(); + mirror._updateOwnData(updateId, []); + const post = getState(); + expect(post).toEqual(pre); + }); + it("snapshot test for actual GitHub queries", () => { + // This test emits as a snapshot a valid query against GitHub's + // GraphQL API. You can copy-and-paste the snapshot into + // to run it. The + // resulting should contain valid data about a GitHub issue. + // Note that the "Issue" type contains all types of fields: ID, + // primitive, node reference to object, and node reference to + // union. + const db = new Database(":memory:"); + const mirror = new Mirror(db, buildGithubSchema()); + const exampleIssueId = "MDU6SXNzdWUzNDg1NDA0NjE="; + const b = Queries.build; + const query = b.query( + "TestQuery", + [], + [ + b.field("node", {id: b.literal(exampleIssueId)}, [ + b.inlineFragment("Issue", mirror._queryOwnData("Issue")), + ]), + ] + ); + const format = (body: Queries.Body): string => + Queries.stringify.body(body, Queries.multilineLayout(" ")); + expect(format([query])).toMatchSnapshot(); + }); + }); }); describe("_buildSchemaInfo", () => {