Make the Discourse plugin robust to errors (#1387)

Currently attempting to load the SourceCred discourse instance fails
with foreign key constraint errors.

Basically, we have a few weird situations:
- A post (which corresponds to the 'psuedo-topic' generated by creating
a new category) is picked up, but its topic is not detected, because
Discourse does not list these 'psuedo-topics' in the latest topic
endpoint. Attempting to add the post breaks the foreign key constraint.

- We have several likes which correspond to posts that don't exist.
Possibly they were deleted? I'm not sure.

Right now, the load process fails entirely when it hits these
exceptions, which is bad. It should print a warning instead, and
continue without the offending interactions. This commit effects that
change in behavior.

Test plan:

Before this commit, loading the SourceCred discourse with a clean cache
fails. After building with this commit, loading the SourceCred discourse
with a clean cache workes and prints the following warnings:

```
$ node bin/sourcecred.js discourse https://discourse.sourcecred.io credbot
  GO   load-discourse.sourcecred.io
  GO   discourse
  GO   discourse/topics
 DONE  discourse/topics: 3m 53s
  GO   discourse/posts
Warning: Encountered error 'FOREIGN KEY constraint failed' while adding
post https://discourse.so urcecred.io/t/214/1.
 DONE  discourse/posts: 2m 38s
  GO   discourse/likes
 DONE  discourse/likes: 50s
 DONE  discourse: 7m 21s
  GO   compute-cred
 DONE  compute-cred: 547ms
 DONE  load-discourse.sourcecred.io: 7m 22s
```

Also, unit tests have been added that verify the specific behavior
changes.
This commit is contained in:
Dandelion Mané 2019-09-20 11:21:53 +02:00 committed by GitHub
parent d5d00aae5a
commit b86dcf742e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 117 additions and 23 deletions

View File

@ -81,6 +81,7 @@ export interface DiscourseData {
export class Mirror implements DiscourseData {
+_db: Database;
+_fetcher: Discourse;
+_serverUrl: string;
/**
* Construct a new Mirror instance.
@ -95,12 +96,13 @@ export class Mirror implements DiscourseData {
if (db == null) throw new Error("db: " + String(db));
this._db = db;
this._fetcher = fetcher;
this._serverUrl = serverUrl;
if (db.inTransaction) {
throw new Error("already in transaction");
}
try {
db.prepare("BEGIN").run();
this._initialize(serverUrl);
this._initialize();
if (db.inTransaction) {
db.prepare("COMMIT").run();
}
@ -111,7 +113,7 @@ export class Mirror implements DiscourseData {
}
}
_initialize(serverUrl: string) {
_initialize() {
const db = this._db;
// We store the config in a singleton table `meta`, whose unique row
// has primary key `0`. Only the first ever insert will succeed; we
@ -127,7 +129,7 @@ export class Mirror implements DiscourseData {
const config = stringify({
version: VERSION,
serverUrl: serverUrl,
serverUrl: this._serverUrl,
});
const existingConfig: string | void = db
@ -292,8 +294,11 @@ export class Mirror implements DiscourseData {
)
`
);
const serverUrl = this._serverUrl;
return function addPost(post: Post) {
try {
addUser(post.authorUsername);
encounteredPostIds.add(post.id);
query.run({
id: post.id,
timestamp_ms: post.timestampMs,
@ -302,7 +307,12 @@ export class Mirror implements DiscourseData {
topic_id: post.topicId,
author_username: post.authorUsername,
});
encounteredPostIds.add(post.id);
} catch (e) {
const url = `${serverUrl}/t/${post.topicId}/${post.indexWithinTopic}`;
console.warn(
`Warning: Encountered error '${e.message}' while adding post ${url}.`
);
}
};
})();
@ -403,10 +413,10 @@ export class Mirror implements DiscourseData {
* assumed to already exist in the database; if this is not known to
* be the case, run `addUser(like.username)` first.
*
* Returns a status indicating whether the database changed as a
* result of this call.
* Returns a status indicating whether we are done processing this user
* (e.g. we have already seen all of their likes).
*/
const addLike: (like: LikeAction) => {|+changed: boolean|} = (() => {
const addLike: (like: LikeAction) => {|+doneWithUser: boolean|} = (() => {
const query = db.prepare(
dedent`\
INSERT OR IGNORE INTO likes (
@ -421,12 +431,21 @@ export class Mirror implements DiscourseData {
`
);
return function addLike(like: LikeAction) {
try {
const runResult = query.run({
post_id: like.postId,
timestamp_ms: like.timestampMs,
username: like.username,
});
return {changed: runResult.changes > 0};
return {doneWithUser: runResult.changes === 0};
} catch (e) {
console.warn(
`Warning: Encountered error '${e.message}' ` +
`on a like by ${like.username} ` +
`on post id ${like.postId}.`
);
return {doneWithUser: false};
}
};
})();
@ -438,7 +457,7 @@ export class Mirror implements DiscourseData {
const likeActions = await this._fetcher.likesByUser(user, offset);
possiblePageSize = Math.max(likeActions.length, possiblePageSize);
for (const like of likeActions) {
if (!addLike(like).changed) {
if (addLike(like).doneWithUser) {
upToDate = true;
break;
}

View File

@ -148,6 +148,19 @@ class MockFetcher implements Discourse {
}
describe("plugins/discourse/mirror", () => {
beforeAll(() => {
jest.spyOn(console, "warn").mockImplementation(() => {});
});
function spyWarn(): JestMockFn<[string], void> {
return ((console.warn: any): JestMockFn<any, void>);
}
beforeEach(() => {
spyWarn().mockReset();
});
afterAll(() => {
expect(console.warn).not.toHaveBeenCalled();
spyWarn().mockRestore();
});
const example = () => {
const fetcher = new MockFetcher();
const db = new Database(":memory:");
@ -380,6 +393,68 @@ describe("plugins/discourse/mirror", () => {
expect(fetchLikes).toHaveBeenCalledTimes(1);
expect(fetchLikes).toHaveBeenCalledWith("credbot", 0);
});
it("warns if one of the latest posts has no topic", async () => {
const {mirror, fetcher, reporter} = example();
const pid1 = fetcher.addPost(1, null, "credbot");
const pid2 = fetcher.addPost(2, null, "credbot");
// Verify that the problem post is one of the latest posts
const latestPostIds = (await fetcher.latestPosts()).map((x) => x.id);
expect(latestPostIds).toContain(pid2);
// Force the fetcher not to return topic 2
fetcher._latestTopicId--;
await mirror.update(reporter);
const topics = [fetcher._topic(1)];
expect(mirror.topics()).toEqual(topics);
const posts = [fetcher._post(pid1)];
expect(mirror.posts()).toEqual(posts);
expect(console.warn).toHaveBeenCalledWith(
"Warning: Encountered error 'FOREIGN KEY constraint failed' " +
"while adding post http://example.com/t/2/1."
);
expect(console.warn).toHaveBeenCalledTimes(1);
jest.spyOn(console, "warn").mockImplementation(() => {});
});
it("warns if it finds a (non-latest) post with no topic", async () => {
const {mirror, fetcher, reporter} = example();
const pid1 = fetcher.addPost(1, null, "credbot");
const pid2 = fetcher.addPost(2, null, "credbot");
const pid3 = fetcher.addPost(1, null, "credbot");
// Verify that the problem post is not one of the latest posts
const latestPostIds = (await fetcher.latestPosts()).map((x) => x.id);
expect(latestPostIds).not.toContain(pid2);
// Force the fetcher not to return topic 2
fetcher._latestTopicId--;
await mirror.update(reporter);
const topics = [fetcher._topic(1)];
expect(mirror.topics()).toEqual(topics);
const posts = [pid1, pid3].map((x) => fetcher._post(x));
expect(mirror.posts()).toEqual(posts);
expect(console.warn).toHaveBeenCalledWith(
"Warning: Encountered error 'FOREIGN KEY constraint failed' " +
"while adding post http://example.com/t/2/1."
);
expect(console.warn).toHaveBeenCalledTimes(1);
jest.spyOn(console, "warn").mockImplementation(() => {});
});
it("warns if it gets a like that doesn't correspond to any post", async () => {
const {mirror, fetcher, reporter} = example();
const pid = fetcher.addPost(1, null, "credbot");
const badLike = {username: "credbot", postId: 37, timestampMs: 0};
fetcher._likes.push(badLike);
await mirror.update(reporter);
expect(mirror.topics()).toEqual([fetcher._topic(1)]);
expect(mirror.posts()).toEqual([fetcher._post(pid)]);
expect(mirror.likes()).toEqual([]);
expect(console.warn).toHaveBeenCalledWith(
"Warning: Encountered error 'FOREIGN KEY constraint failed' " +
"on a like by credbot on post id 37."
);
expect(console.warn).toHaveBeenCalledTimes(1);
jest.spyOn(console, "warn").mockImplementation(() => {});
});
});
it("sends the right tasks to the TaskReporter", async () => {