From b50ba67797299665bec3023f76c0cd25f36a9be9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dandelion=20Man=C3=A9?= Date: Sat, 17 Aug 2019 04:20:27 +0200 Subject: [PATCH] add discourse declaration and createGraph (#1292) This commit adds the logic needed for creating a contribution graph based on the Discourse data. We first have a declaration with specifications for the node and edge types in the plugin. We also have a `createGraph` module which creates a conformant graph from the Mirror data. The graph creation is thoroughly tested. Test plan: Inspect unit tests, run `yarn test`. I also have (yet unpublished) code which loads the graph into the UI, and it appears fine. --- src/plugins/discourse/createGraph.js | 163 ++++++++++++ src/plugins/discourse/createGraph.test.js | 307 ++++++++++++++++++++++ src/plugins/discourse/declaration.js | 83 ++++++ 3 files changed, 553 insertions(+) create mode 100644 src/plugins/discourse/createGraph.js create mode 100644 src/plugins/discourse/createGraph.test.js create mode 100644 src/plugins/discourse/declaration.js diff --git a/src/plugins/discourse/createGraph.js b/src/plugins/discourse/createGraph.js new file mode 100644 index 0000000..86796be --- /dev/null +++ b/src/plugins/discourse/createGraph.js @@ -0,0 +1,163 @@ +// @flow + +import { + Graph, + NodeAddress, + EdgeAddress, + type Node, + type Edge, + type NodeAddressT, +} from "../../core/graph"; +import {type PostId, type TopicId, type Post, type Topic} from "./fetch"; +import {type DiscourseData} from "./mirror"; +import { + topicNodeType, + postNodeType, + userNodeType, + authorsPostEdgeType, + authorsTopicEdgeType, + postRepliesEdgeType, + topicContainsPostEdgeType, +} from "./declaration"; + +export function topicAddress(serverUrl: string, id: TopicId): NodeAddressT { + return NodeAddress.append(topicNodeType.prefix, serverUrl, String(id)); +} +export function postAddress(serverUrl: string, id: PostId): NodeAddressT { + return NodeAddress.append(postNodeType.prefix, serverUrl, String(id)); +} +export function userAddress(serverUrl: string, username: string): NodeAddressT { + return NodeAddress.append(userNodeType.prefix, serverUrl, username); +} + +export function userNode(serverUrl: string, username: string): Node { + const url = `${serverUrl}/u/${username}/`; + const description = `[@${username}](${url})`; + return { + address: userAddress(serverUrl, username), + description, + timestampMs: null, + }; +} + +export function topicNode(serverUrl: string, topic: Topic): Node { + const url = `${serverUrl}/t/${String(topic.id)}`; + const description = `[${topic.title}](${url})`; + const address = topicAddress(serverUrl, topic.id); + return {address, description, timestampMs: topic.timestampMs}; +} + +export function postNode( + serverUrl: string, + post: Post, + topicTitle: string +): Node { + const url = `${serverUrl}/t/${String(post.topicId)}/${String(post.id)}`; + const descr = `[post #${post.indexWithinTopic} on ${topicTitle}](${url})`; + const address = postAddress(serverUrl, post.id); + return {timestampMs: post.timestampMs, address, description: descr}; +} + +export function authorsTopicEdge(serverUrl: string, topic: Topic): Edge { + const address = EdgeAddress.append( + authorsTopicEdgeType.prefix, + serverUrl, + topic.authorUsername, + String(topic.id) + ); + return { + address, + timestampMs: topic.timestampMs, + src: userAddress(serverUrl, topic.authorUsername), + dst: topicAddress(serverUrl, topic.id), + }; +} + +export function authorsPostEdge(serverUrl: string, post: Post): Edge { + const address = EdgeAddress.append( + authorsPostEdgeType.prefix, + serverUrl, + post.authorUsername, + String(post.id) + ); + return { + address, + timestampMs: post.timestampMs, + src: userAddress(serverUrl, post.authorUsername), + dst: postAddress(serverUrl, post.id), + }; +} + +export function topicContainsPostEdge(serverUrl: string, post: Post): Edge { + const address = EdgeAddress.append( + topicContainsPostEdgeType.prefix, + serverUrl, + String(post.topicId), + String(post.id) + ); + return { + address, + timestampMs: post.timestampMs, + src: topicAddress(serverUrl, post.topicId), + dst: postAddress(serverUrl, post.id), + }; +} + +export function postRepliesEdge( + serverUrl: string, + post: Post, + basePostId: PostId +): Edge { + const address = EdgeAddress.append( + postRepliesEdgeType.prefix, + serverUrl, + String(post.id), + String(basePostId) + ); + + return { + address, + timestampMs: post.timestampMs, + src: postAddress(serverUrl, post.id), + dst: postAddress(serverUrl, basePostId), + }; +} + +export function createGraph(serverUrl: string, data: DiscourseData): Graph { + if (serverUrl.endsWith("/")) { + throw new Error(`by convention, serverUrl should not end with /`); + } + const g = new Graph(); + const topicIdToTitle: Map = new Map(); + + for (const topic of data.topics()) { + topicIdToTitle.set(topic.id, topic.title); + g.addNode(topicNode(serverUrl, topic)); + g.addNode(userNode(serverUrl, topic.authorUsername)); + g.addEdge(authorsTopicEdge(serverUrl, topic)); + } + + for (const post of data.posts()) { + const topicTitle = topicIdToTitle.get(post.topicId) || "[unknown topic]"; + g.addNode(postNode(serverUrl, post, topicTitle)); + g.addNode(userNode(serverUrl, post.authorUsername)); + g.addEdge(authorsPostEdge(serverUrl, post)); + g.addEdge(topicContainsPostEdge(serverUrl, post)); + let replyToPostIndex = post.replyToPostIndex; + if (replyToPostIndex == null && post.indexWithinTopic > 1) { + // For posts that are a reply to the first posts (or, depending on how you look at it, + // replies to the topic), the replyToPostIndex gets set to null. For purposes of cred calculation, + // I think replies to the first post should have a reply edge, as any other reply would. + // So I correct for the API weirdness here. + replyToPostIndex = 1; + } + if (replyToPostIndex != null) { + const basePostId = data.findPostInTopic(post.topicId, replyToPostIndex); + if (basePostId != null) { + g.addEdge(postRepliesEdge(serverUrl, post, basePostId)); + } + } + } + + return g; +} diff --git a/src/plugins/discourse/createGraph.test.js b/src/plugins/discourse/createGraph.test.js new file mode 100644 index 0000000..3a07450 --- /dev/null +++ b/src/plugins/discourse/createGraph.test.js @@ -0,0 +1,307 @@ +// @flow + +import sortBy from "lodash.sortby"; +import type {DiscourseData} from "./mirror"; +import type {Topic, Post, PostId, TopicId} from "./fetch"; +import {NodeAddress, EdgeAddress, type Node, type Edge} from "../../core/graph"; +import { + createGraph, + userNode, + topicNode, + postNode, + authorsTopicEdge, + authorsPostEdge, + topicContainsPostEdge, + postRepliesEdge, +} from "./createGraph"; +import { + userNodeType, + topicNodeType, + postNodeType, + authorsTopicEdgeType, + authorsPostEdgeType, + topicContainsPostEdgeType, + postRepliesEdgeType, +} from "./declaration"; +import type {EdgeType, NodeType} from "../../analysis/types"; + +describe("plugins/discourse/createGraph", () => { + class MockData implements DiscourseData { + _topics: $ReadOnlyArray; + _posts: $ReadOnlyArray; + + constructor(topics, posts) { + this._topics = topics; + this._posts = posts; + } + topics(): $ReadOnlyArray { + return this._topics; + } + posts(): $ReadOnlyArray { + return this._posts; + } + findPostInTopic(topicId: TopicId, indexWithinTopic: number): ?PostId { + const post = this._posts.filter( + (p) => p.topicId === topicId && p.indexWithinTopic === indexWithinTopic + )[0]; + return post ? post.id : null; + } + } + + function example() { + const url = "https://url.com"; + const topic = { + id: 1, + title: "first topic", + timestampMs: 0, + authorUsername: "decentralion", + }; + const post1 = { + id: 1, + topicId: 1, + indexWithinTopic: 1, + replyToPostIndex: null, + timestampMs: 0, + authorUsername: "decentralion", + }; + const post2 = { + id: 2, + topicId: 1, + indexWithinTopic: 2, + // N.B. weird but realistic: replies to the first post get a + // replyToPostIndex of null, not 1 + replyToPostIndex: null, + timestampMs: 1, + authorUsername: "wchargin", + }; + const post3 = { + id: 3, + topicId: 1, + indexWithinTopic: 3, + replyToPostIndex: 2, + timestampMs: 1, + authorUsername: "mzargham", + }; + const posts = [post1, post2, post3]; + const data = new MockData([topic], [post1, post2, post3]); + const graph = createGraph(url, data); + return {graph, topic, url, posts}; + } + + describe("nodes are constructed correctly", () => { + it("for users", () => { + const {url} = example(); + const node = userNode(url, "decentralion"); + expect(node.description).toMatchInlineSnapshot( + `"[@decentralion](https://url.com/u/decentralion/)"` + ); + expect(node.timestampMs).toEqual(null); + expect(NodeAddress.toParts(node.address)).toMatchInlineSnapshot(` + Array [ + "sourcecred", + "discourse", + "user", + "https://url.com", + "decentralion", + ] + `); + }); + it("for topics", () => { + const {url, topic} = example(); + const node = topicNode(url, topic); + expect(node.description).toMatchInlineSnapshot( + `"[first topic](https://url.com/t/1)"` + ); + expect(node.timestampMs).toEqual(topic.timestampMs); + expect(NodeAddress.toParts(node.address)).toMatchInlineSnapshot(` + Array [ + "sourcecred", + "discourse", + "topic", + "https://url.com", + "1", + ] + `); + }); + it("for posts", () => { + const {url, topic, posts} = example(); + const node = postNode(url, posts[1], topic.title); + expect(node.description).toMatchInlineSnapshot( + `"[post #2 on first topic](https://url.com/t/1/2)"` + ); + expect(node.timestampMs).toEqual(posts[1].timestampMs); + expect(NodeAddress.toParts(node.address)).toMatchInlineSnapshot(` + Array [ + "sourcecred", + "discourse", + "post", + "https://url.com", + "2", + ] + `); + }); + it("gives an [unknown topic] description for posts without a matching topic", () => { + const post = { + id: 1, + topicId: 1, + indexWithinTopic: 1, + replyToPostIndex: null, + timestampMs: 0, + authorUsername: "decentralion", + }; + const data = new MockData([], [post]); + const url = "https://foo"; + const graph = createGraph(url, data); + const actual = Array.from(graph.nodes({prefix: postNodeType.prefix}))[0]; + const expected = postNode(url, post, "[unknown topic]"); + expect(actual).toEqual(expected); + }); + }); + + describe("edges are constructed correctly", () => { + it("for authorsTopic", () => { + const {url, topic} = example(); + const expectedSrc = userNode(url, topic.authorUsername).address; + const expectedDst = topicNode(url, topic).address; + const edge = authorsTopicEdge(url, topic); + expect(edge.src).toEqual(expectedSrc); + expect(edge.dst).toEqual(expectedDst); + expect(edge.timestampMs).toEqual(topic.timestampMs); + expect(EdgeAddress.toParts(edge.address)).toMatchInlineSnapshot(` + Array [ + "sourcecred", + "discourse", + "authors", + "topic", + "https://url.com", + "decentralion", + "1", + ] + `); + }); + it("for authorsPost", () => { + const {url, posts, topic} = example(); + const post = posts[1]; + const expectedSrc = userNode(url, post.authorUsername).address; + const expectedDst = postNode(url, post, topic.title).address; + const edge = authorsPostEdge(url, post); + expect(edge.src).toEqual(expectedSrc); + expect(edge.dst).toEqual(expectedDst); + expect(edge.timestampMs).toEqual(post.timestampMs); + expect(EdgeAddress.toParts(edge.address)).toMatchInlineSnapshot(` + Array [ + "sourcecred", + "discourse", + "authors", + "post", + "https://url.com", + "wchargin", + "2", + ] + `); + }); + it("for topicContainsPost", () => { + const {url, posts, topic} = example(); + const post = posts[1]; + const expectedSrc = topicNode(url, topic).address; + const expectedDst = postNode(url, post, topic.title).address; + const edge = topicContainsPostEdge(url, post); + expect(edge.src).toEqual(expectedSrc); + expect(edge.dst).toEqual(expectedDst); + expect(edge.timestampMs).toEqual(post.timestampMs); + expect(EdgeAddress.toParts(edge.address)).toMatchInlineSnapshot(` + Array [ + "sourcecred", + "discourse", + "topicContainsPost", + "https://url.com", + "1", + "2", + ] + `); + }); + it("for postReplies", () => { + const {url, posts, topic} = example(); + const post = posts[2]; + const basePost = posts[1]; + const expectedSrc = postNode(url, post, topic.title).address; + const expectedDst = postNode(url, basePost, topic.title).address; + const edge = postRepliesEdge(url, post, basePost.id); + expect(edge.src).toEqual(expectedSrc); + expect(edge.dst).toEqual(expectedDst); + expect(edge.timestampMs).toEqual(post.timestampMs); + expect(EdgeAddress.toParts(edge.address)).toMatchInlineSnapshot(` + Array [ + "sourcecred", + "discourse", + "replyTo", + "https://url.com", + "3", + "2", + ] + `); + }); + }); + + describe("has the right nodes", () => { + const addressSort = (xs) => sortBy(xs, (x) => x.address); + function nodesOfType(t: NodeType) { + return Array.from(example().graph.nodes({prefix: t.prefix})); + } + function expectNodesOfType(expected: Node[], type: NodeType) { + expect(addressSort(expected)).toEqual(addressSort(nodesOfType(type))); + } + it("for users", () => { + const {url} = example(); + const usernames = ["decentralion", "wchargin", "mzargham"]; + const expected = usernames.map((x) => userNode(url, x)); + expectNodesOfType(expected, userNodeType); + }); + it("for topics", () => { + const {url, topic} = example(); + const expected = [topicNode(url, topic)]; + expectNodesOfType(expected, topicNodeType); + }); + it("for posts", () => { + const {url, posts, topic} = example(); + const expected = posts.map((x) => postNode(url, x, topic.title)); + expectNodesOfType(expected, postNodeType); + }); + }); + + describe("has the right edges", () => { + const addressSort = (xs) => sortBy(xs, (x) => x.address); + function edgesOfType(t: EdgeType) { + return Array.from( + example().graph.edges({addressPrefix: t.prefix, showDangling: false}) + ); + } + function expectEdgesOfType(expected: Edge[], type: EdgeType) { + expect(addressSort(expected)).toEqual(addressSort(edgesOfType(type))); + } + it("authorsTopic edges", () => { + const {url, topic} = example(); + const topicEdge = authorsTopicEdge(url, topic); + expectEdgesOfType([topicEdge], authorsTopicEdgeType); + }); + it("authorsPost edges", () => { + const {url, posts} = example(); + const postEdges = posts.map((p) => authorsPostEdge(url, p)); + expectEdgesOfType(postEdges, authorsPostEdgeType); + }); + it("topicContainsPost edges", () => { + const {url, posts} = example(); + const edges = posts.map((p) => topicContainsPostEdge(url, p)); + expectEdgesOfType(edges, topicContainsPostEdgeType); + }); + it("postReplies edges", () => { + const {url, posts} = example(); + const [post1, post2, post3] = posts; + const edges = [ + postRepliesEdge(url, post2, post1.id), + postRepliesEdge(url, post3, post2.id), + ]; + expectEdgesOfType(edges, postRepliesEdgeType); + }); + }); +}); diff --git a/src/plugins/discourse/declaration.js b/src/plugins/discourse/declaration.js new file mode 100644 index 0000000..3abf60a --- /dev/null +++ b/src/plugins/discourse/declaration.js @@ -0,0 +1,83 @@ +// @flow + +import deepFreeze from "deep-freeze"; +import type {PluginDeclaration} from "../../analysis/pluginDeclaration"; +import type {NodeType, EdgeType} from "../../analysis/types"; +import {NodeAddress, EdgeAddress} from "../../core/graph"; + +export const topicNodeType: NodeType = deepFreeze({ + name: "Topic", + pluralName: "Topics", + prefix: NodeAddress.fromParts(["sourcecred", "discourse", "topic"]), + defaultWeight: 2, + description: + "A topic (or post-container) in a Discourse instance. Every topic has at least one post.", +}); + +export const postNodeType: NodeType = deepFreeze({ + name: "Post", + pluralName: "Posts", + prefix: NodeAddress.fromParts(["sourcecred", "discourse", "post"]), + defaultWeight: 1, + description: "A post in some topic in a Discourse instance.", +}); + +export const userNodeType: NodeType = deepFreeze({ + name: "User", + pluralName: "Users", + prefix: NodeAddress.fromParts(["sourcecred", "discourse", "user"]), + defaultWeight: 1, + description: "A user account on a particular Discourse instance.", +}); + +export const topicContainsPostEdgeType: EdgeType = deepFreeze({ + forwardName: "contains post", + backwardName: "is contained by topic", + prefix: EdgeAddress.fromParts([ + "sourcecred", + "discourse", + "topicContainsPost", + ]), + defaultWeight: {forwards: 0, backwards: 1}, + description: "Connects a topic to the posts that it contains.", +}); + +export const postRepliesEdgeType: EdgeType = deepFreeze({ + forwardName: "post is reply to", + backwardName: "post replied to by", + prefix: EdgeAddress.fromParts(["sourcecred", "discourse", "replyTo"]), + defaultWeight: {forwards: 1, backwards: 0}, + description: "Connects a post to the post that it is a reply to.", +}); + +export const authorsTopicEdgeType: EdgeType = deepFreeze({ + forwardName: "authors", + backwardName: "is authored by", + prefix: EdgeAddress.fromParts([ + "sourcecred", + "discourse", + "authors", + "topic", + ]), + defaultWeight: {forwards: 0.5, backwards: 1}, + description: "Connects an author to a topic they created.", +}); + +export const authorsPostEdgeType: EdgeType = deepFreeze({ + forwardName: "authors", + backwardName: "is authored by", + prefix: EdgeAddress.fromParts(["sourcecred", "discourse", "authors", "post"]), + defaultWeight: {forwards: 0.5, backwards: 1}, + description: "Connects an author to a post they've created.", +}); + +export const declaration: PluginDeclaration = deepFreeze({ + name: "discourse", + nodeTypes: [userNodeType, topicNodeType, postNodeType], + edgeTypes: [ + postRepliesEdgeType, + authorsTopicEdgeType, + authorsPostEdgeType, + topicContainsPostEdgeType, + ], +});