discourse: factor SQL parsing out of loops (#1301)

Summary:
Calling `db.prepare(sql)` parses the text in `sql` and compiles it to a
prepared statement. This takes time, both for the parsing and allocation
itself and for the context switch from JavaScript to C (SQLite).
Prepared statements are designed to be invoked multiple times with
different bound values. This commit factors prepared statement creation
out of loops so that each call to `update` prepares only a constant
number of statements.

In doing so, we naturally factor out some light JS abstractions over the
raw SQL: `addTopic((topic: Topic))`, rather than `addTopicStmt.run(…)`.

In principle, these could be factored out of `update` entirely to
properties set on the class at initialization time, but, as described in
a comment on the GraphQL mirror, we defer this optimization for now as
it introduces additional complexity.

Test Plan:
Running `yarn test --full` passes.

wchargin-branch: discourse-sql-cse
This commit is contained in:
William Chargin 2019-08-18 11:58:44 -07:00 committed by GitHub
parent bf5c3a953b
commit 5e26424d82
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 86 additions and 81 deletions

View File

@ -1,6 +1,6 @@
// @flow
import type Database from "better-sqlite3";
import type {Database} from "better-sqlite3";
import stringify from "json-stable-stringify";
import dedent from "../../util/dedent";
import {
@ -271,19 +271,8 @@ export class Mirror implements DiscourseData {
const encounteredPostIds = new Set();
function addPost(post: Post) {
const {
id,
timestampMs,
replyToPostIndex,
indexWithinTopic,
topicId,
authorUsername,
} = post;
db.prepare("INSERT OR IGNORE INTO users (username) VALUES (?)").run(
authorUsername
);
db.prepare(
const addPost: (Post) => void = (() => {
const query = db.prepare(
dedent`\
REPLACE INTO posts (
id,
@ -301,31 +290,32 @@ export class Mirror implements DiscourseData {
:reply_to_post_index
)
`
).run({
id,
timestamp_ms: timestampMs,
reply_to_post_index: replyToPostIndex,
index_within_topic: indexWithinTopic,
topic_id: topicId,
author_username: authorUsername,
});
encounteredPostIds.add(id);
}
for (
let topicId = lastLocalTopicId + 1;
topicId <= latestTopicId;
topicId++
) {
const topicWithPosts = await this._fetcher.topicWithPosts(topicId);
if (topicWithPosts != null) {
const {topic, posts} = topicWithPosts;
const {id, title, timestampMs, authorUsername} = topic;
db.prepare("INSERT OR IGNORE INTO users (username) VALUES (?)").run(
authorUsername
);
this._db
.prepare(
return function addPost(post: Post) {
addUser(post.authorUsername);
query.run({
id: post.id,
timestamp_ms: post.timestampMs,
reply_to_post_index: post.replyToPostIndex,
index_within_topic: post.indexWithinTopic,
topic_id: post.topicId,
author_username: post.authorUsername,
});
encounteredPostIds.add(post.id);
};
})();
const addUser: (username: string) => void = (() => {
const query = db.prepare(
"INSERT OR IGNORE INTO users (username) VALUES (?)"
);
return function addUser(username: string) {
query.run(username);
};
})();
const addTopic: (Topic) => void = (() => {
const query = this._db.prepare(
dedent`\
REPLACE INTO topics (
id,
@ -337,14 +327,29 @@ export class Mirror implements DiscourseData {
:title,
:timestamp_ms,
:author_username
)`
)
.run({
id,
title,
timestamp_ms: timestampMs,
author_username: authorUsername,
`
);
return function addTopic(topic: Topic) {
addUser(topic.authorUsername);
query.run({
id: topic.id,
title: topic.title,
timestamp_ms: topic.timestampMs,
author_username: topic.authorUsername,
});
};
})();
for (
let topicId = lastLocalTopicId + 1;
topicId <= latestTopicId;
topicId++
) {
const topicWithPosts = await this._fetcher.topicWithPosts(topicId);
if (topicWithPosts != null) {
const {topic, posts} = topicWithPosts;
addTopic(topic);
for (const post of posts) {
addPost(post);
}
@ -386,27 +391,7 @@ export class Mirror implements DiscourseData {
// who we either haven't scanned in the last week, or who have been active
// since our last scan. This would likely improve the performance of this
// section of the update significantly.
for (const user of this.users()) {
let offset = 0;
let upToDate = false;
while (!upToDate) {
const likeActions = await this._fetcher.likesByUser(user, offset);
possiblePageSize = Math.max(likeActions.length, possiblePageSize);
for (const {timestampMs, postId, username} of likeActions) {
const alreadySeenThisLike = db
.prepare(
dedent`\
SELECT * FROM likes
WHERE post_id = :post_id AND username = :username
`
)
.pluck()
.get({post_id: postId, username});
if (alreadySeenThisLike) {
upToDate = true;
break;
}
db.prepare(
const insertLike = db.prepare(
dedent`\
INSERT INTO likes (
post_id, timestamp_ms, username
@ -414,10 +399,30 @@ export class Mirror implements DiscourseData {
:post_id, :timestamp_ms, :username
)
`
).run({
);
const alreadySeenLike = db
.prepare(
dedent`\
SELECT * FROM likes
WHERE post_id = :post_id AND username = :username
`
)
.pluck();
for (const user of this.users()) {
let offset = 0;
let upToDate = false;
while (!upToDate) {
const likeActions = await this._fetcher.likesByUser(user, offset);
possiblePageSize = Math.max(likeActions.length, possiblePageSize);
for (const {timestampMs, postId, username} of likeActions) {
if (alreadySeenLike.get({post_id: postId, username: username})) {
upToDate = true;
break;
}
insertLike.run({
post_id: postId,
timestamp_ms: timestampMs,
username,
username: username,
});
}
if (likeActions.length === 0 || likeActions.length < possiblePageSize) {