From b86dcf742ea2f8d5ee1b113d55a1f564ef64f3ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dandelion=20Man=C3=A9?= Date: Fri, 20 Sep 2019 11:21:53 +0200 Subject: [PATCH] Make the Discourse plugin robust to errors (#1387) Currently attempting to load the SourceCred discourse instance fails with foreign key constraint errors. Basically, we have a few weird situations: - A post (which corresponds to the 'psuedo-topic' generated by creating a new category) is picked up, but its topic is not detected, because Discourse does not list these 'psuedo-topics' in the latest topic endpoint. Attempting to add the post breaks the foreign key constraint. - We have several likes which correspond to posts that don't exist. Possibly they were deleted? I'm not sure. Right now, the load process fails entirely when it hits these exceptions, which is bad. It should print a warning instead, and continue without the offending interactions. This commit effects that change in behavior. Test plan: Before this commit, loading the SourceCred discourse with a clean cache fails. After building with this commit, loading the SourceCred discourse with a clean cache workes and prints the following warnings: ``` $ node bin/sourcecred.js discourse https://discourse.sourcecred.io credbot GO load-discourse.sourcecred.io GO discourse GO discourse/topics DONE discourse/topics: 3m 53s GO discourse/posts Warning: Encountered error 'FOREIGN KEY constraint failed' while adding post https://discourse.so urcecred.io/t/214/1. DONE discourse/posts: 2m 38s GO discourse/likes DONE discourse/likes: 50s DONE discourse: 7m 21s GO compute-cred DONE compute-cred: 547ms DONE load-discourse.sourcecred.io: 7m 22s ``` Also, unit tests have been added that verify the specific behavior changes. --- src/plugins/discourse/mirror.js | 65 +++++++++++++++--------- src/plugins/discourse/mirror.test.js | 75 ++++++++++++++++++++++++++++ 2 files changed, 117 insertions(+), 23 deletions(-) diff --git a/src/plugins/discourse/mirror.js b/src/plugins/discourse/mirror.js index 26b19a6..8f32c22 100644 --- a/src/plugins/discourse/mirror.js +++ b/src/plugins/discourse/mirror.js @@ -81,6 +81,7 @@ export interface DiscourseData { export class Mirror implements DiscourseData { +_db: Database; +_fetcher: Discourse; + +_serverUrl: string; /** * Construct a new Mirror instance. @@ -95,12 +96,13 @@ export class Mirror implements DiscourseData { if (db == null) throw new Error("db: " + String(db)); this._db = db; this._fetcher = fetcher; + this._serverUrl = serverUrl; if (db.inTransaction) { throw new Error("already in transaction"); } try { db.prepare("BEGIN").run(); - this._initialize(serverUrl); + this._initialize(); if (db.inTransaction) { db.prepare("COMMIT").run(); } @@ -111,7 +113,7 @@ export class Mirror implements DiscourseData { } } - _initialize(serverUrl: string) { + _initialize() { const db = this._db; // We store the config in a singleton table `meta`, whose unique row // has primary key `0`. Only the first ever insert will succeed; we @@ -127,7 +129,7 @@ export class Mirror implements DiscourseData { const config = stringify({ version: VERSION, - serverUrl: serverUrl, + serverUrl: this._serverUrl, }); const existingConfig: string | void = db @@ -292,17 +294,25 @@ export class Mirror implements DiscourseData { ) ` ); + const serverUrl = this._serverUrl; return function addPost(post: Post) { - addUser(post.authorUsername); - query.run({ - id: post.id, - timestamp_ms: post.timestampMs, - reply_to_post_index: post.replyToPostIndex, - index_within_topic: post.indexWithinTopic, - topic_id: post.topicId, - author_username: post.authorUsername, - }); - encounteredPostIds.add(post.id); + try { + addUser(post.authorUsername); + encounteredPostIds.add(post.id); + query.run({ + id: post.id, + timestamp_ms: post.timestampMs, + reply_to_post_index: post.replyToPostIndex, + index_within_topic: post.indexWithinTopic, + topic_id: post.topicId, + author_username: post.authorUsername, + }); + } catch (e) { + const url = `${serverUrl}/t/${post.topicId}/${post.indexWithinTopic}`; + console.warn( + `Warning: Encountered error '${e.message}' while adding post ${url}.` + ); + } }; })(); @@ -403,10 +413,10 @@ export class Mirror implements DiscourseData { * assumed to already exist in the database; if this is not known to * be the case, run `addUser(like.username)` first. * - * Returns a status indicating whether the database changed as a - * result of this call. + * Returns a status indicating whether we are done processing this user + * (e.g. we have already seen all of their likes). */ - const addLike: (like: LikeAction) => {|+changed: boolean|} = (() => { + const addLike: (like: LikeAction) => {|+doneWithUser: boolean|} = (() => { const query = db.prepare( dedent`\ INSERT OR IGNORE INTO likes ( @@ -421,12 +431,21 @@ export class Mirror implements DiscourseData { ` ); return function addLike(like: LikeAction) { - const runResult = query.run({ - post_id: like.postId, - timestamp_ms: like.timestampMs, - username: like.username, - }); - return {changed: runResult.changes > 0}; + try { + const runResult = query.run({ + post_id: like.postId, + timestamp_ms: like.timestampMs, + username: like.username, + }); + return {doneWithUser: runResult.changes === 0}; + } catch (e) { + console.warn( + `Warning: Encountered error '${e.message}' ` + + `on a like by ${like.username} ` + + `on post id ${like.postId}.` + ); + return {doneWithUser: false}; + } }; })(); @@ -438,7 +457,7 @@ export class Mirror implements DiscourseData { const likeActions = await this._fetcher.likesByUser(user, offset); possiblePageSize = Math.max(likeActions.length, possiblePageSize); for (const like of likeActions) { - if (!addLike(like).changed) { + if (addLike(like).doneWithUser) { upToDate = true; break; } diff --git a/src/plugins/discourse/mirror.test.js b/src/plugins/discourse/mirror.test.js index 11c41e6..20417f7 100644 --- a/src/plugins/discourse/mirror.test.js +++ b/src/plugins/discourse/mirror.test.js @@ -148,6 +148,19 @@ class MockFetcher implements Discourse { } describe("plugins/discourse/mirror", () => { + beforeAll(() => { + jest.spyOn(console, "warn").mockImplementation(() => {}); + }); + function spyWarn(): JestMockFn<[string], void> { + return ((console.warn: any): JestMockFn); + } + beforeEach(() => { + spyWarn().mockReset(); + }); + afterAll(() => { + expect(console.warn).not.toHaveBeenCalled(); + spyWarn().mockRestore(); + }); const example = () => { const fetcher = new MockFetcher(); const db = new Database(":memory:"); @@ -380,6 +393,68 @@ describe("plugins/discourse/mirror", () => { expect(fetchLikes).toHaveBeenCalledTimes(1); expect(fetchLikes).toHaveBeenCalledWith("credbot", 0); }); + + it("warns if one of the latest posts has no topic", async () => { + const {mirror, fetcher, reporter} = example(); + const pid1 = fetcher.addPost(1, null, "credbot"); + const pid2 = fetcher.addPost(2, null, "credbot"); + // Verify that the problem post is one of the latest posts + const latestPostIds = (await fetcher.latestPosts()).map((x) => x.id); + expect(latestPostIds).toContain(pid2); + // Force the fetcher not to return topic 2 + fetcher._latestTopicId--; + await mirror.update(reporter); + const topics = [fetcher._topic(1)]; + expect(mirror.topics()).toEqual(topics); + const posts = [fetcher._post(pid1)]; + expect(mirror.posts()).toEqual(posts); + expect(console.warn).toHaveBeenCalledWith( + "Warning: Encountered error 'FOREIGN KEY constraint failed' " + + "while adding post http://example.com/t/2/1." + ); + expect(console.warn).toHaveBeenCalledTimes(1); + jest.spyOn(console, "warn").mockImplementation(() => {}); + }); + + it("warns if it finds a (non-latest) post with no topic", async () => { + const {mirror, fetcher, reporter} = example(); + const pid1 = fetcher.addPost(1, null, "credbot"); + const pid2 = fetcher.addPost(2, null, "credbot"); + const pid3 = fetcher.addPost(1, null, "credbot"); + // Verify that the problem post is not one of the latest posts + const latestPostIds = (await fetcher.latestPosts()).map((x) => x.id); + expect(latestPostIds).not.toContain(pid2); + // Force the fetcher not to return topic 2 + fetcher._latestTopicId--; + await mirror.update(reporter); + const topics = [fetcher._topic(1)]; + expect(mirror.topics()).toEqual(topics); + const posts = [pid1, pid3].map((x) => fetcher._post(x)); + expect(mirror.posts()).toEqual(posts); + expect(console.warn).toHaveBeenCalledWith( + "Warning: Encountered error 'FOREIGN KEY constraint failed' " + + "while adding post http://example.com/t/2/1." + ); + expect(console.warn).toHaveBeenCalledTimes(1); + jest.spyOn(console, "warn").mockImplementation(() => {}); + }); + + it("warns if it gets a like that doesn't correspond to any post", async () => { + const {mirror, fetcher, reporter} = example(); + const pid = fetcher.addPost(1, null, "credbot"); + const badLike = {username: "credbot", postId: 37, timestampMs: 0}; + fetcher._likes.push(badLike); + await mirror.update(reporter); + expect(mirror.topics()).toEqual([fetcher._topic(1)]); + expect(mirror.posts()).toEqual([fetcher._post(pid)]); + expect(mirror.likes()).toEqual([]); + expect(console.warn).toHaveBeenCalledWith( + "Warning: Encountered error 'FOREIGN KEY constraint failed' " + + "on a like by credbot on post id 37." + ); + expect(console.warn).toHaveBeenCalledTimes(1); + jest.spyOn(console, "warn").mockImplementation(() => {}); + }); }); it("sends the right tasks to the TaskReporter", async () => {