mirror: add internal method _findOutdated (#875)

Summary:
This function finds all objects whose own data has not been updated
since a given time, and all connections whose entries have not been
updated since that time.

Note that this is scoped to the entirety of the database. In #622,
I discussed using a recursive common table expression to identify only
those transitive dependencies of the root. I think that this is overkill
for the `_findOutdated` method: you’ll usually want to update everything
in the database. Don’t worry—the cool recursive query will still be used
in the `extract` function. :-)

This commit makes progress toward #622.

Test Plan:
Unit tests added, with full coverage; run `yarn unit`.

wchargin-branch: mirror-findoutdated
This commit is contained in:
William Chargin 2018-09-20 14:19:47 -07:00 committed by GitHub
parent e3f04c5079
commit 1dd8b7bcb7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 207 additions and 0 deletions

View File

@ -353,6 +353,58 @@ export class Mirror {
addConnection.run({id, fieldname});
}
}
/**
* Find objects and connections that are not known to be up-to-date.
*
* An object is up-to-date if its own data has been loaded at least as
* recently as the provided date.
*
* A connection is up-to-date if it has been fetched at least as
* recently as the provided date, and at the time of fetching there
* were no more pages.
*/
_findOutdated(since: Date): QueryPlan {
const db = this._db;
return _inTransaction(db, () => {
const objects: $PropertyType<QueryPlan, "objects"> = db
.prepare(
dedent`\
SELECT typename AS typename, id AS id
FROM objects
LEFT OUTER JOIN updates ON objects.last_update = updates.rowid
WHERE objects.last_update IS NULL
OR updates.time_epoch_millis < :timeEpochMillisThreshold
`
)
.all({timeEpochMillisThreshold: +since});
const connections: $PropertyType<QueryPlan, "connections"> = db
.prepare(
dedent`\
SELECT
connections.object_id AS objectId,
connections.fieldname AS fieldname,
connections.last_update IS NULL AS neverUpdated,
connections.end_cursor AS endCursor
FROM connections
LEFT OUTER JOIN updates ON connections.last_update = updates.rowid
WHERE connections.has_next_page
OR connections.last_update IS NULL
OR updates.time_epoch_millis < :timeEpochMillisThreshold
`
)
.all({timeEpochMillisThreshold: +since})
.map((entry) => {
const result = {...entry};
if (result.neverUpdated) {
result.endCursor = undefined; // as opposed to `null`
}
delete result.neverUpdated;
return result;
});
return {objects, connections};
});
}
}
/**
@ -449,6 +501,29 @@ export function _buildSchemaInfo(schema: Schema.Schema): SchemaInfo {
type UpdateId = number;
/**
* A set of objects and connections that should be updated.
*/
type QueryPlan = {|
+objects: $ReadOnlyArray<{|
+typename: Schema.Typename,
+id: Schema.ObjectId,
|}>,
+connections: $ReadOnlyArray<{|
+objectId: Schema.ObjectId,
+fieldname: Schema.Fieldname,
+endCursor: EndCursor | void, // `undefined` if never fetched
|}>,
|};
/**
* An `endCursor` of a GraphQL `pageInfo` object, denoting where the
* cursor should continue reading the next page. This is `null` when the
* cursor is at the beginning of the connection (i.e., when the
* connection is empty, or when `first: 0` is provided).
*/
type EndCursor = string | null;
/**
* Execute a function inside a database transaction.
*

View File

@ -4,6 +4,7 @@ import Database from "better-sqlite3";
import fs from "fs";
import tmp from "tmp";
import dedent from "../util/dedent";
import * as Schema from "./schema";
import {_buildSchemaInfo, _inTransaction, Mirror} from "./mirror";
@ -348,6 +349,137 @@ describe("graphql/mirror", () => {
expect(db.prepare("SELECT * FROM connections").all()).toHaveLength(0);
});
});
describe("_findOutdated", () => {
it("finds the right objects and connections", () => {
const db = new Database(":memory:");
const schema = buildGithubSchema();
const mirror = new Mirror(db, schema);
mirror.registerObject({typename: "Repository", id: "repo:ab/cd"});
mirror.registerObject({typename: "Issue", id: "issue:ab/cd#1"});
mirror.registerObject({typename: "Issue", id: "issue:ab/cd#2"});
mirror.registerObject({typename: "Issue", id: "issue:ab/cd#3"});
mirror.registerObject({typename: "Issue", id: "issue:ab/cd#4"});
const createUpdate = (epochTimeMillis) => ({
time: epochTimeMillis,
id: mirror._createUpdate(new Date(epochTimeMillis)),
});
const earlyUpdate = createUpdate(123);
const midUpdate = createUpdate(456);
const lateUpdate = createUpdate(789);
const makeUpdateFunction = (updateSql) => {
const stmt = db.prepare(updateSql);
return (...bindings) => {
const result = stmt.run(...bindings);
// Make sure that we actually updated something. (This can
// trigger if, for instance, you copy-paste some updates for
// a new object, but never actually register that object
// with the DB.)
expect({updateSql, bindings, result}).toEqual({
updateSql,
bindings,
result: expect.objectContaining({changes: 1}),
});
};
};
const setObjectData = makeUpdateFunction(
"UPDATE objects SET last_update = :update WHERE id = :id"
);
setObjectData({id: "repo:ab/cd", update: earlyUpdate.id});
setObjectData({id: "issue:ab/cd#1", update: lateUpdate.id});
setObjectData({id: "issue:ab/cd#2", update: null});
setObjectData({id: "issue:ab/cd#3", update: null});
setObjectData({id: "issue:ab/cd#4", update: midUpdate.id});
const setConnectionData = makeUpdateFunction(
dedent`\
UPDATE connections SET
last_update = :update,
total_count = :totalCount,
has_next_page = :hasNextPage,
end_cursor = :endCursor
WHERE object_id = :objectId AND fieldname = :fieldname
`
);
setConnectionData({
objectId: "repo:ab/cd",
fieldname: "issues",
update: earlyUpdate.id,
totalCount: 1,
hasNextPage: +false,
endCursor: "cursor:repo.issues",
});
setConnectionData({
objectId: "issue:ab/cd#1",
fieldname: "comments",
update: null,
totalCount: null,
hasNextPage: null,
endCursor: null,
});
setConnectionData({
objectId: "issue:ab/cd#2",
fieldname: "comments",
update: lateUpdate.id,
totalCount: 1,
hasNextPage: +true,
endCursor: null,
});
setConnectionData({
objectId: "issue:ab/cd#3",
fieldname: "comments",
update: lateUpdate.id,
totalCount: 0,
hasNextPage: +false,
endCursor: null,
});
setConnectionData({
objectId: "issue:ab/cd#4",
fieldname: "comments",
update: midUpdate.id,
totalCount: 3,
hasNextPage: +false,
endCursor: "cursor:issue4.comments",
});
const actual = mirror._findOutdated(new Date(midUpdate.time));
const expected = {
objects: [
{typename: "Repository", id: "repo:ab/cd"}, // loaded before cutoff
// issue:ab/cd#1 was loaded after the cutoff
{typename: "Issue", id: "issue:ab/cd#2"}, // never loaded
{typename: "Issue", id: "issue:ab/cd#3"}, // never loaded
// issue:ab/cd#4 was loaded exactly at the cutoff
],
connections: [
{
// loaded before cutoff
objectId: "repo:ab/cd",
fieldname: "issues",
endCursor: "cursor:repo.issues",
},
{
// never loaded
objectId: "issue:ab/cd#1",
fieldname: "comments",
endCursor: undefined,
},
{
// loaded, but has more data available
objectId: "issue:ab/cd#2",
fieldname: "comments",
endCursor: null,
},
// issue:ab/cd#3.comments was loaded after the cutoff
// issue:ab/cd#4.comments was loaded exactly at the cutoff
],
};
expect(actual).toEqual(expected);
});
});
});
describe("_buildSchemaInfo", () => {