github: use blacklists to unblock twbs/bootstrap (#973)

Summary:
This adds object IDs to the GitHub GraphQL blacklist such that the
`twbs/bootstrap` repository can be loaded.

Ingesting the Mirror-extracted data into the RelationalView yields the
warnings

```
IssueComment[MDEyOklzc3VlQ29tbWVudDEwNTI4Mzk4Ng==].reactions: unexpected null value
IssueComment[MDEyOklzc3VlQ29tbWVudDI0NTQ3OTM3OA==].reactions: unexpected null value
IssueComment[MDEyOklzc3VlQ29tbWVudDMwNDE4NzIzMg==].reactions: unexpected null value
```

because we have nulled out these `Reaction`s in their enclosing
connections. This is expected.

Test Plan:
Run `yarn backend` and `node ./bin/sourcecred.js load twbs/bootstrap`.
Run `yarn start` and note that the cred attribution renders properly.

(Loading the GitHub data may take an hour or two. The resulting SQLite3
database is 172MB. Ingesting it into the `RelationalView` still takes
just a few seconds, and the cred attribution is rendered quickly.)

wchargin-branch: github-use-blacklists
This commit is contained in:
William Chargin 2018-11-01 11:08:17 -07:00 committed by GitHub
parent fe50ca83f6
commit d19227c268
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 19 additions and 2 deletions

View File

@ -0,0 +1,16 @@
// @flow
import type {ObjectId} from "../../graphql/schema";
export const BLACKLISTED_IDS: $ReadOnlyArray<ObjectId> = Object.freeze([
// These are `Organization` nodes that are sometimes referenced in a
// `User` context: in particular, as the author of a reaction.
// See: https://gist.github.com/wchargin/a2b8561b81bcc932c84e493d2485ea8a
"MDEyOk9yZ2FuaXphdGlvbjE3OTUyOTI1",
"MDEyOk9yZ2FuaXphdGlvbjI5MTkzOTQ=",
"MDEyOk9yZ2FuaXphdGlvbjEyNDE3MDI0",
// These are the offending reactions.
"MDg6UmVhY3Rpb24yMTY3ODkyNQ==",
"MDg6UmVhY3Rpb240NDMwMzQ1",
"MDg6UmVhY3Rpb24xMDI4MzQxOA==",
]);

View File

@ -14,8 +14,9 @@ import {Mirror} from "../../graphql/mirror";
import * as Queries from "../../graphql/queries";
import {stringify, inlineLayout, type Body} from "../../graphql/queries";
import * as Schema from "../../graphql/schema";
import schema from "./schema";
import {BLACKLISTED_IDS} from "./blacklistedObjectIds";
import type {Repository} from "./graphqlTypes";
import schema from "./schema";
/**
* Scrape data from a GitHub repo using the GitHub API.
@ -53,7 +54,7 @@ export default async function fetchGithubRepo(
// equals signs in file names.
const dbFilename = `mirror_${Buffer.from(resolvedId).toString("hex")}.db`;
const db = new Database(path.join(cacheDirectory, dbFilename));
const mirror = new Mirror(db, schema());
const mirror = new Mirror(db, schema(), {blacklistedIds: BLACKLISTED_IDS});
mirror.registerObject({typename: "Repository", id: resolvedId});
// These are arbitrary tuning parameters.