mirror: remove legacy tables

Summary:
This data is now stored in EAV `primitives` table; see issue #1313 and
adjacent commits for details.

We simultaneously lift the restriction that GraphQL type and field names
be SQL-safe identifiers, as it’s no longer necessary.

Test Plan:
Some test cases queried the legacy primitives tables to check properties
about the database state. These queries have of course been removed;
note that each such removed query was already accompanied by an
equivalent query against the EAV `primitives` table.

Note that `yarn test --full` still passes, and that when manually
loading `sourcecred/example-github` the cache no longer has any of the
legacy tables.

wchargin-branch: mirror-eav-prune-tables
This commit is contained in:
William Chargin 2019-09-01 15:29:04 -07:00
parent c0325f14ff
commit 2f9c9aa219
2 changed files with 5 additions and 328 deletions

View File

@ -143,11 +143,6 @@ export class Mirror {
* this more efficiently in both space and time (see discussion on
* #883 for some options).
*
* NOTE: A previous version of the schema used a separate
* primitives table for each GraphQL object type. These
* `primitives_*` tables are still written, but are no longer
* read. They will be removed entirely in a future change.
*
* We refer to node and primitive data together as "own data", because
* this is the data that can be queried uniformly for all elements of
* a type; querying connection data, by contrast, requires the
@ -195,7 +190,7 @@ export class Mirror {
// it requires bumping the version, bump it: requiring some extra
// one-time cache resets is okay; doing the wrong thing is not.
const blob = stringify({
version: "MIRROR_v6",
version: "MIRROR_v7",
schema: this._schema,
options: {
blacklistedIds: this._blacklistedIds,
@ -230,8 +225,7 @@ export class Mirror {
}
db.prepare("INSERT INTO meta (zero, config) VALUES (0, ?)").run(blob);
// First, create those tables that are independent of the schema.
const structuralTables = [
const tables = [
// Time is stored in milliseconds since 1970-01-01T00:00Z, with
// ECMAScript semantics (leap seconds ignored, exactly 86.4M ms
// per day, etc.).
@ -327,57 +321,9 @@ export class Mirror {
ON connection_entries (connection_id)
`,
];
for (const sql of structuralTables) {
for (const sql of tables) {
db.prepare(sql).run();
}
// Then, create primitive-data tables, which depend on the schema.
// We only create tables for object types, as union types have no
// physical representation; they exist only at the type level.
for (const typename of Object.keys(this._schemaInfo.objectTypes)) {
const type = this._schemaInfo.objectTypes[typename];
if (!isSqlSafe(typename)) {
throw new Error(
"invalid object type name: " + JSON.stringify(typename)
);
}
for (const fieldname of type.primitiveFieldNames) {
if (!isSqlSafe(fieldname)) {
throw new Error("invalid field name: " + JSON.stringify(fieldname));
}
}
for (const fieldname of type.nestedFieldNames) {
if (!isSqlSafe(fieldname)) {
throw new Error("invalid field name: " + JSON.stringify(fieldname));
}
const children = type.nestedFields[fieldname].primitives;
for (const childFieldname of Object.keys(children)) {
if (!isSqlSafe(childFieldname)) {
throw new Error(
"invalid field name: " +
JSON.stringify(childFieldname) +
" under " +
JSON.stringify(fieldname)
);
}
}
}
const tableName = _primitivesTableName(typename);
const tableSpec = []
.concat(
["id TEXT NOT NULL PRIMARY KEY"],
type.primitiveFieldNames.map((fieldname) => `"${fieldname}"`),
type.nestedFieldNames.map((fieldname) => `"${fieldname}"`),
...type.nestedFieldNames.map((f1) =>
Object.keys(type.nestedFields[f1].primitives).map(
(f2) => `"${f1}.${f2}"`
)
),
["FOREIGN KEY(id) REFERENCES objects(id)"]
)
.join(", ");
db.prepare(`CREATE TABLE ${tableName} (${tableSpec})`).run();
}
});
}
@ -470,14 +416,6 @@ export class Mirror {
`
)
.run({id, typename});
this._db
.prepare(
dedent`\
INSERT INTO ${_primitivesTableName(typename)} (id)
VALUES (?)
`
)
.run(id);
const addPrimitive = this._db.prepare(
dedent`
INSERT INTO primitives (object_id, fieldname, value)
@ -1355,37 +1293,6 @@ export class Mirror {
].join("_"): string): any);
},
};
const updateTypeSpecificPrimitives: ({|
+id: Schema.ObjectId,
// These keys can be top-level primitive fields or the primitive
// children of a nested field. The values are the JSON encodings
// of the values received from the GraphQL response. For a
// nested field, the value is `0` or `1` as the nested field is
// `null` or not. (See docs on `_initialize` for more details.)
+[parameterName: ParameterName]: string | 0 | 1,
|}) => void = (() => {
const updates: $ReadOnlyArray<string> = [].concat(
objectType.primitiveFieldNames.map(
(f) => `"${f}" = :${parameterNameFor.topLevelField(f)}`
),
objectType.nestedFieldNames.map(
(f) => `"${f}" = :${parameterNameFor.topLevelField(f)}`
),
...objectType.nestedFieldNames.map((f1) =>
Object.keys(objectType.nestedFields[f1].primitives).map(
(f2) => `"${f1}.${f2}" = :${parameterNameFor.nestedField(f1, f2)}`
)
)
);
if (updates.length === 0) {
return () => {};
}
const tableName = _primitivesTableName(typename);
const stmt = db.prepare(
`UPDATE ${tableName} SET ${updates.join(", ")} WHERE id = :id`
);
return _makeSingleUpdateFunction(stmt);
})();
const updateEavPrimitive: ({|
+id: Schema.ObjectId,
+fieldname: string,
@ -1470,8 +1377,6 @@ export class Mirror {
});
}
}
updateTypeSpecificPrimitives(primitives);
}
}
@ -2194,25 +2099,6 @@ function isSqlSafe(token: string) {
return !token.match(/[^A-Za-z0-9_]/);
}
/**
* Get the name of the table used to store primitive data for objects of
* the given type, which should be SQL-safe lest an error be thrown.
*
* Note that the resulting string is double-quoted.
*/
function _primitivesTableName(typename: Schema.Typename) {
// istanbul ignore if
if (!isSqlSafe(typename)) {
// This shouldn't be reachable---we should have caught it earlier.
// But checking it anyway is cheap.
throw new Error(
"Invariant violation: invalid object type name " +
JSON.stringify(typename)
);
}
return `"primitives_${typename}"`;
}
/**
* Convert a prepared statement into a JS function that executes that
* statement and asserts that it makes exactly one change to the

View File

@ -142,7 +142,6 @@ describe("graphql/mirror", () => {
expect(tables.sort()).toEqual(
Array.from(
new Set([
// Structural tables
"meta",
"updates",
"objects",
@ -150,15 +149,6 @@ describe("graphql/mirror", () => {
"links",
"connections",
"connection_entries",
// Primitive data tables per OBJECT type (no UNIONs)
"primitives_Reaction",
"primitives_Repository",
"primitives_Issue",
"primitives_IssueComment",
"primitives_User",
"primitives_Bot",
"primitives_Organization",
...issueTimelineItemClauses().map((x) => `primitives_${x}`),
])
).sort()
);
@ -217,63 +207,6 @@ describe("graphql/mirror", () => {
new Mirror(db, schema, {blacklistedIds: ["ominous"]});
}).toThrow("incompatible schema, options, or version");
});
it("rejects a schema with SQL-unsafe type name", () => {
const s = Schema;
const schema0 = s.schema({
"Non-Word-Characters": s.object({id: s.id()}),
});
const db = new Database(":memory:");
expect(() => new Mirror(db, schema0)).toThrow(
'invalid object type name: "Non-Word-Characters"'
);
});
it("rejects a schema with SQL-unsafe primitive field name", () => {
const s = Schema;
const schema0 = s.schema({
A: s.object({id: s.id(), "Non-Word-Characters": s.primitive()}),
});
const db = new Database(":memory:");
expect(() => new Mirror(db, schema0)).toThrow(
'invalid field name: "Non-Word-Characters"'
);
});
it("rejects a schema with SQL-unsafe nested field name", () => {
const s = Schema;
const schema0 = s.schema({
A: s.object({id: s.id(), "Non-Word-Characters": s.nested({})}),
});
const db = new Database(":memory:");
expect(() => new Mirror(db, schema0)).toThrow(
'invalid field name: "Non-Word-Characters"'
);
});
it("rejects a schema with SQL-unsafe nested primitive field name", () => {
const s = Schema;
const schema0 = s.schema({
A: s.object({
id: s.id(),
problem: s.nested({"Non-Word-Characters": s.primitive()}),
}),
});
const db = new Database(":memory:");
expect(() => new Mirror(db, schema0)).toThrow(
'invalid field name: "Non-Word-Characters" under "problem"'
);
});
it("allows specifying a good schema after rejecting one", () => {
const s = Schema;
const schema0 = s.schema({
A: s.object({id: s.id(), "Non-Word-Characters": s.primitive()}),
});
const db = new Database(":memory:");
expect(() => new Mirror(db, schema0)).toThrow("invalid field name");
expect(() => new Mirror(db, buildGithubSchema())).not.toThrow();
});
});
describe("_createUpdate", () => {
@ -351,15 +284,6 @@ describe("graphql/mirror", () => {
.pluck()
.all(issueId)
).toEqual(["author", "repository"].sort());
expect(
db.prepare("SELECT * FROM primitives_Issue WHERE id = ?").all(issueId)
).toEqual([
{
id: issueId,
url: null,
title: null,
},
]);
expect(
db
.prepare(
@ -390,15 +314,6 @@ describe("graphql/mirror", () => {
.pluck()
.get()
).toBe(0);
expect(
db
.prepare(
"SELECT COUNT(1) FROM primitives_Issue WHERE " +
"url IS NOT NULL OR title IS NOT NULL"
)
.pluck()
.get()
).toBe(0);
});
it("adds an object with nested primitives and links", () => {
const db = new Database(":memory:");
@ -421,18 +336,6 @@ describe("graphql/mirror", () => {
.pluck()
.all(commitId)
).toEqual(["author.user"]);
expect(
db
.prepare("SELECT * FROM primitives_Commit WHERE id = ?")
.all(commitId)
).toEqual([
{
id: commitId,
oid: null,
author: null,
"author.date": null,
},
]);
expect(
db
.prepare(
@ -449,17 +352,6 @@ describe("graphql/mirror", () => {
.pluck()
.get()
).toBe(0);
expect(
db
.prepare(
"SELECT COUNT(1) FROM primitives_Commit WHERE " +
"oid IS NOT NULL " +
"OR author IS NOT NULL " +
'OR "author.date" IS NOT NULL'
)
.pluck()
.get()
).toBe(0);
});
it("doesn't touch an existing object with the same typename", () => {
const db = new Database(":memory:");
@ -955,35 +847,6 @@ describe("graphql/mirror", () => {
// nothing for `ClosedEvent` (it has no primitive fields)
]);
// Check primitives (legacy format).
expect(
db
.prepare("SELECT * FROM primitives_Repository ORDER BY id ASC")
.all()
).toEqual([{id: "repo:foo/bar", url: JSON.stringify("url://foo/bar")}]);
expect(
db.prepare("SELECT * FROM primitives_Issue ORDER BY id ASC").all()
).toEqual([
{
id: "issue:#1",
title: JSON.stringify("something wicked"),
url: JSON.stringify("url://foo/bar/issue/1"),
},
{
id: "issue:#2",
title: JSON.stringify("this way comes"),
url: JSON.stringify("url://foo/bar/issue/2"),
},
]);
expect(
db.prepare("SELECT * FROM primitives_User ORDER BY id ASC").all()
).toEqual([{id: "user:alice", login: null, url: null}]);
expect(
db
.prepare("SELECT * FROM primitives_ClosedEvent ORDER BY id ASC")
.all()
).toEqual([{id: "issue:#1!closed#0"}]);
// Check that some links are correct.
expect(
db
@ -2163,13 +2026,6 @@ describe("graphql/mirror", () => {
.all()
.sort()
).toEqual(["alice", "bob"].sort());
expect(
db.prepare("SELECT * FROM primitives_Issue ORDER BY id ASC").all()
).toEqual([
{id: "issue:#1", title: "13.75", url: '"url://issue/1"'},
{id: "issue:#2", title: "false", url: "null"},
{id: "issue:#3", title: null, url: null},
]);
expect(
db
.prepare(
@ -2214,22 +2070,6 @@ describe("graphql/mirror", () => {
},
},
]);
expect(
db.prepare("SELECT * FROM primitives_Commit ORDER BY id ASC").all()
).toEqual([
{
id: "commit:oid",
author: +true,
"author.date": '"today"',
oid: '"yes"',
},
{
id: "commit:zzz",
oid: '"zzz"',
author: +true,
"author.date": "null",
},
]);
expect(
db
.prepare(
@ -2295,16 +2135,6 @@ describe("graphql/mirror", () => {
author: null,
},
]);
expect(
db.prepare("SELECT * FROM primitives_Commit ORDER BY id ASC").all()
).toEqual([
{
id: "commit:oid",
oid: '"mmm"',
author: +false,
"author.date": "null",
},
]);
expect(
db
.prepare(
@ -2356,11 +2186,6 @@ describe("graphql/mirror", () => {
actor: {__typename: "User", id: "user:alice"},
},
]);
expect(
db
.prepare("SELECT * FROM primitives_LockedEvent ORDER BY id ASC")
.all()
).toEqual([{id: "dos"}, {id: "uno"}]);
expect(
db
.prepare(
@ -3416,42 +3241,8 @@ describe("graphql/mirror", () => {
}
describe("extract", () => {
// Test in EAV mode, hiding the tables corresponding to the legacy
// mode to catch any accidental reads. (We hide and unhide rather
// than deleting because some test cases call `extract` multiple
// times.)
function hiddenName(name) {
return `${name}_DO_NOT_READ`;
}
function hideTable(db, name) {
db.prepare(`ALTER TABLE ${name} RENAME TO ${hiddenName(name)}`).run();
}
function unhideTable(db, name) {
db.prepare(`ALTER TABLE ${hiddenName(name)} RENAME TO ${name}`).run();
}
testExtract((mirror, id) => {
const legacyTables = mirror._db
.prepare(
"SELECT name FROM sqlite_master " +
"WHERE type = 'table' AND name LIKE 'primitives_%'"
)
.pluck()
.all();
if (legacyTables.length === 0) {
throw new Error("Found no type-specific primitives tables?");
}
for (const table of legacyTables) {
hideTable(mirror._db, table);
}
try {
return mirror.extract(id);
} finally {
for (const table of legacyTables) {
unhideTable(mirror._db, table);
}
}
});
// TODO(@wchargin): Inline this function.
testExtract((mirror, id) => mirror.extract(id));
});
describe("end-to-end typename guessing", () => {