discourse: factor SQL parsing out of loops (#1301)

Summary:
Calling `db.prepare(sql)` parses the text in `sql` and compiles it to a
prepared statement. This takes time, both for the parsing and allocation
itself and for the context switch from JavaScript to C (SQLite).
Prepared statements are designed to be invoked multiple times with
different bound values. This commit factors prepared statement creation
out of loops so that each call to `update` prepares only a constant
number of statements.

In doing so, we naturally factor out some light JS abstractions over the
raw SQL: `addTopic((topic: Topic))`, rather than `addTopicStmt.run(…)`.

In principle, these could be factored out of `update` entirely to
properties set on the class at initialization time, but, as described in
a comment on the GraphQL mirror, we defer this optimization for now as
it introduces additional complexity.

Test Plan:
Running `yarn test --full` passes.

wchargin-branch: discourse-sql-cse
This commit is contained in:
William Chargin 2019-08-18 11:58:44 -07:00 committed by GitHub
parent bf5c3a953b
commit 5e26424d82
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 86 additions and 81 deletions

View File

@ -1,6 +1,6 @@
// @flow // @flow
import type Database from "better-sqlite3"; import type {Database} from "better-sqlite3";
import stringify from "json-stable-stringify"; import stringify from "json-stable-stringify";
import dedent from "../../util/dedent"; import dedent from "../../util/dedent";
import { import {
@ -271,46 +271,75 @@ export class Mirror implements DiscourseData {
const encounteredPostIds = new Set(); const encounteredPostIds = new Set();
function addPost(post: Post) { const addPost: (Post) => void = (() => {
const { const query = db.prepare(
id,
timestampMs,
replyToPostIndex,
indexWithinTopic,
topicId,
authorUsername,
} = post;
db.prepare("INSERT OR IGNORE INTO users (username) VALUES (?)").run(
authorUsername
);
db.prepare(
dedent`\ dedent`\
REPLACE INTO posts ( REPLACE INTO posts (
id, id,
timestamp_ms, timestamp_ms,
author_username, author_username,
topic_id, topic_id,
index_within_topic, index_within_topic,
reply_to_post_index reply_to_post_index
) VALUES ( ) VALUES (
:id, :id,
:timestamp_ms, :timestamp_ms,
:author_username, :author_username,
:topic_id, :topic_id,
:index_within_topic, :index_within_topic,
:reply_to_post_index :reply_to_post_index
) )
` `
).run({ );
id, return function addPost(post: Post) {
timestamp_ms: timestampMs, addUser(post.authorUsername);
reply_to_post_index: replyToPostIndex, query.run({
index_within_topic: indexWithinTopic, id: post.id,
topic_id: topicId, timestamp_ms: post.timestampMs,
author_username: authorUsername, reply_to_post_index: post.replyToPostIndex,
}); index_within_topic: post.indexWithinTopic,
encounteredPostIds.add(id); topic_id: post.topicId,
} author_username: post.authorUsername,
});
encounteredPostIds.add(post.id);
};
})();
const addUser: (username: string) => void = (() => {
const query = db.prepare(
"INSERT OR IGNORE INTO users (username) VALUES (?)"
);
return function addUser(username: string) {
query.run(username);
};
})();
const addTopic: (Topic) => void = (() => {
const query = this._db.prepare(
dedent`\
REPLACE INTO topics (
id,
title,
timestamp_ms,
author_username
) VALUES (
:id,
:title,
:timestamp_ms,
:author_username
)
`
);
return function addTopic(topic: Topic) {
addUser(topic.authorUsername);
query.run({
id: topic.id,
title: topic.title,
timestamp_ms: topic.timestampMs,
author_username: topic.authorUsername,
});
};
})();
for ( for (
let topicId = lastLocalTopicId + 1; let topicId = lastLocalTopicId + 1;
@ -320,31 +349,7 @@ export class Mirror implements DiscourseData {
const topicWithPosts = await this._fetcher.topicWithPosts(topicId); const topicWithPosts = await this._fetcher.topicWithPosts(topicId);
if (topicWithPosts != null) { if (topicWithPosts != null) {
const {topic, posts} = topicWithPosts; const {topic, posts} = topicWithPosts;
const {id, title, timestampMs, authorUsername} = topic; addTopic(topic);
db.prepare("INSERT OR IGNORE INTO users (username) VALUES (?)").run(
authorUsername
);
this._db
.prepare(
dedent`\
REPLACE INTO topics (
id,
title,
timestamp_ms,
author_username
) VALUES (
:id,
:title,
:timestamp_ms,
:author_username
)`
)
.run({
id,
title,
timestamp_ms: timestampMs,
author_username: authorUsername,
});
for (const post of posts) { for (const post of posts) {
addPost(post); addPost(post);
} }
@ -386,6 +391,23 @@ export class Mirror implements DiscourseData {
// who we either haven't scanned in the last week, or who have been active // who we either haven't scanned in the last week, or who have been active
// since our last scan. This would likely improve the performance of this // since our last scan. This would likely improve the performance of this
// section of the update significantly. // section of the update significantly.
const insertLike = db.prepare(
dedent`\
INSERT INTO likes (
post_id, timestamp_ms, username
) VALUES (
:post_id, :timestamp_ms, :username
)
`
);
const alreadySeenLike = db
.prepare(
dedent`\
SELECT * FROM likes
WHERE post_id = :post_id AND username = :username
`
)
.pluck();
for (const user of this.users()) { for (const user of this.users()) {
let offset = 0; let offset = 0;
let upToDate = false; let upToDate = false;
@ -393,31 +415,14 @@ export class Mirror implements DiscourseData {
const likeActions = await this._fetcher.likesByUser(user, offset); const likeActions = await this._fetcher.likesByUser(user, offset);
possiblePageSize = Math.max(likeActions.length, possiblePageSize); possiblePageSize = Math.max(likeActions.length, possiblePageSize);
for (const {timestampMs, postId, username} of likeActions) { for (const {timestampMs, postId, username} of likeActions) {
const alreadySeenThisLike = db if (alreadySeenLike.get({post_id: postId, username: username})) {
.prepare(
dedent`\
SELECT * FROM likes
WHERE post_id = :post_id AND username = :username
`
)
.pluck()
.get({post_id: postId, username});
if (alreadySeenThisLike) {
upToDate = true; upToDate = true;
break; break;
} }
db.prepare( insertLike.run({
dedent`\
INSERT INTO likes (
post_id, timestamp_ms, username
) VALUES (
:post_id, :timestamp_ms, :username
)
`
).run({
post_id: postId, post_id: postId,
timestamp_ms: timestampMs, timestamp_ms: timestampMs,
username, username: username,
}); });
} }
if (likeActions.length === 0 || likeActions.length < possiblePageSize) { if (likeActions.length === 0 || likeActions.length < possiblePageSize) {