Add `MentionsAuthor` edges to the graph (#808)
This commit builds on the work in #806, adding the `MentionsAuthorReference`s to the graph. It thus resolves #804. Empirically, the addition of these edges does not change the users' cred distribution much. Consider the results with the following 3 forward weights for the edge (results for ipfs/go-ipfs): | User | w=1/32 | w=1/2 | w=2 | |---------------|-------:|-------:|-------:| | whyrusleeping | 228.04 | 225.69 | 223.86 | | jbenet | 102.04 | 100.26 | 99.53 | | kubuxu | 66.60 | 67.80 | 69.36 | | ... | — | — | — | | btc | 22.69 | 22.29 | 21.38 | The small effect on users' cred is not that surprising: the MentionsAuthor references always "shadow" a direct comment->user reference. In principle, the overall cred going to the user should be similar; the difference is that now some more cred flows in between the various comments authored by that user, on the way to the user. (And if those other comments had references, then it flows out from them, etc.) Empirically, the variance on comments' scores seems to increase as a result of having this heuristic, which is great—the fact that all comments had about the same score was a bug, not a feature. Sadly, we don't have good tooling for proper statistical analysis of the effect this is having. We'll want to study the effect of this heuristic more later, as we build tooling and canonical datasets that makes that analysis feasible. We choose to add this heuristic, despite the ambiguous effect on users' cred, because we think it is principled, and adds meaningful structure to the graph. Test plan: The commit is a pretty straightforward generalization of our existing GitHub edge logic. All of the interesting logic was thoroughly tested in the preceding pull, so this commit just tests the integration. Observe that standard (de)serialization of the edge works, that the snapshot is updated with a MentionsAuthor reference edge, and that the graph invariant checker, after update, does not throw errors. Also, I manually tested this change on the ipfs/go-ipfs repo. (It does not require regenerating data.)
This commit is contained in:
parent
91f76393e8
commit
737ed4d8b3
|
@ -974,6 +974,31 @@ Array [
|
|||
"dstIndex": 28,
|
||||
"srcIndex": 16,
|
||||
},
|
||||
Object {
|
||||
"address": Array [
|
||||
"sourcecred",
|
||||
"github",
|
||||
"MENTIONS_AUTHOR",
|
||||
"4",
|
||||
"PULL",
|
||||
"sourcecred",
|
||||
"example-github",
|
||||
"5",
|
||||
"6",
|
||||
"COMMENT",
|
||||
"PULL",
|
||||
"sourcecred",
|
||||
"example-github",
|
||||
"5",
|
||||
"396430464",
|
||||
"3",
|
||||
"USERLIKE",
|
||||
"USER",
|
||||
"wchargin",
|
||||
],
|
||||
"dstIndex": 15,
|
||||
"srcIndex": 25,
|
||||
},
|
||||
Object {
|
||||
"address": Array [
|
||||
"sourcecred",
|
||||
|
|
|
@ -72,6 +72,46 @@ Object {
|
|||
}
|
||||
`;
|
||||
|
||||
exports[`plugins/github/edges createEdge works for "mentionsAuthor" 1`] = `
|
||||
Object {
|
||||
"addressParts": Array [
|
||||
"sourcecred",
|
||||
"github",
|
||||
"MENTIONS_AUTHOR",
|
||||
"4",
|
||||
"ISSUE",
|
||||
"sourcecred",
|
||||
"example-github",
|
||||
"2",
|
||||
"4",
|
||||
"ISSUE",
|
||||
"sourcecred",
|
||||
"example-github",
|
||||
"2",
|
||||
"3",
|
||||
"USERLIKE",
|
||||
"USER",
|
||||
"decentralion",
|
||||
],
|
||||
"dstParts": Array [
|
||||
"sourcecred",
|
||||
"github",
|
||||
"ISSUE",
|
||||
"sourcecred",
|
||||
"example-github",
|
||||
"2",
|
||||
],
|
||||
"srcParts": Array [
|
||||
"sourcecred",
|
||||
"github",
|
||||
"ISSUE",
|
||||
"sourcecred",
|
||||
"example-github",
|
||||
"2",
|
||||
],
|
||||
}
|
||||
`;
|
||||
|
||||
exports[`plugins/github/edges createEdge works for "mergedAs" 1`] = `
|
||||
Object {
|
||||
"addressParts": Array [
|
||||
|
|
|
@ -5,6 +5,7 @@ import * as GitNode from "../git/nodes";
|
|||
import * as N from "./nodes";
|
||||
import * as R from "./relationalView";
|
||||
import {createEdge} from "./edges";
|
||||
import {findMentionsAuthorReferences} from "./heuristics/mentionsAuthorReference";
|
||||
|
||||
export function createGraph(view: R.RelationalView): Graph {
|
||||
const creator = new GraphCreator();
|
||||
|
@ -47,6 +48,10 @@ class GraphCreator {
|
|||
);
|
||||
}
|
||||
}
|
||||
|
||||
for (const mentionsAuthorReference of findMentionsAuthorReferences(view)) {
|
||||
this.graph.addEdge(createEdge.mentionsAuthor(mentionsAuthorReference));
|
||||
}
|
||||
}
|
||||
|
||||
addNode(addr: N.StructuredAddress) {
|
||||
|
|
|
@ -8,6 +8,7 @@ import {
|
|||
} from "../../core/graph";
|
||||
import * as GithubNode from "./nodes";
|
||||
import * as GitNode from "../git/nodes";
|
||||
import type {MentionsAuthorReference} from "./heuristics/mentionsAuthorReference";
|
||||
|
||||
export opaque type RawAddress: EdgeAddressT = EdgeAddressT;
|
||||
|
||||
|
@ -15,6 +16,7 @@ export const AUTHORS_TYPE = "AUTHORS";
|
|||
export const MERGED_AS_TYPE = "MERGED_AS";
|
||||
export const HAS_PARENT_TYPE = "HAS_PARENT";
|
||||
export const REFERENCES_TYPE = "REFERENCES";
|
||||
export const MENTIONS_AUTHOR_TYPE = "MENTIONS_AUTHOR";
|
||||
|
||||
const GITHUB_PREFIX = EdgeAddress.fromParts(["sourcecred", "github"]);
|
||||
function githubEdgeAddress(...parts: string[]): RawAddress {
|
||||
|
@ -27,6 +29,7 @@ export const _Prefix = Object.freeze({
|
|||
mergedAs: githubEdgeAddress(MERGED_AS_TYPE),
|
||||
references: githubEdgeAddress(REFERENCES_TYPE),
|
||||
hasParent: githubEdgeAddress(HAS_PARENT_TYPE),
|
||||
mentionsAuthor: githubEdgeAddress(MENTIONS_AUTHOR_TYPE),
|
||||
});
|
||||
|
||||
export type AuthorsAddress = {|
|
||||
|
@ -47,12 +50,17 @@ export type ReferencesAddress = {|
|
|||
+referrer: GithubNode.TextContentAddress,
|
||||
+referent: GithubNode.ReferentAddress,
|
||||
|};
|
||||
export type MentionsAuthorAddress = {|
|
||||
+type: typeof MENTIONS_AUTHOR_TYPE,
|
||||
+reference: MentionsAuthorReference,
|
||||
|};
|
||||
|
||||
export type StructuredAddress =
|
||||
| AuthorsAddress
|
||||
| MergedAsAddress
|
||||
| HasParentAddress
|
||||
| ReferencesAddress;
|
||||
| ReferencesAddress
|
||||
| MentionsAuthorAddress;
|
||||
|
||||
export const createEdge = Object.freeze({
|
||||
authors: (
|
||||
|
@ -87,6 +95,11 @@ export const createEdge = Object.freeze({
|
|||
src: GithubNode.toRaw(referrer),
|
||||
dst: GithubNode.toRaw(referent),
|
||||
}),
|
||||
mentionsAuthor: (reference: MentionsAuthorReference): Edge => ({
|
||||
address: toRaw({type: MENTIONS_AUTHOR_TYPE, reference}),
|
||||
src: GithubNode.toRaw(reference.src),
|
||||
dst: GithubNode.toRaw(reference.dst),
|
||||
}),
|
||||
});
|
||||
|
||||
const NODE_PREFIX_LENGTH = NodeAddress.toParts(GithubNode._githubAddress())
|
||||
|
@ -185,6 +198,24 @@ export function fromRaw(x: RawAddress): StructuredAddress {
|
|||
): any);
|
||||
return ({type: REFERENCES_TYPE, referrer, referent}: ReferencesAddress);
|
||||
}
|
||||
case MENTIONS_AUTHOR_TYPE: {
|
||||
const parts = multiLengthDecode(rest, fail);
|
||||
if (parts.length !== 3) {
|
||||
throw fail();
|
||||
}
|
||||
const [srcParts, dstParts, whoParts] = parts;
|
||||
const src: GithubNode.TextContentAddress = (GithubNode.fromRaw(
|
||||
GithubNode._githubAddress(...srcParts)
|
||||
): any);
|
||||
const dst: GithubNode.TextContentAddress = (GithubNode.fromRaw(
|
||||
GithubNode._githubAddress(...dstParts)
|
||||
): any);
|
||||
const who: GithubNode.UserlikeAddress = (GithubNode.fromRaw(
|
||||
GithubNode._githubAddress(...whoParts)
|
||||
): any);
|
||||
const reference = {src, dst, who};
|
||||
return {type: MENTIONS_AUTHOR_TYPE, reference};
|
||||
}
|
||||
default:
|
||||
throw fail();
|
||||
}
|
||||
|
@ -214,6 +245,13 @@ export function toRaw(x: StructuredAddress): RawAddress {
|
|||
...lengthEncode(GithubNode.toRaw(x.referrer)),
|
||||
...lengthEncode(GithubNode.toRaw(x.referent))
|
||||
);
|
||||
case MENTIONS_AUTHOR_TYPE:
|
||||
return EdgeAddress.append(
|
||||
_Prefix.mentionsAuthor,
|
||||
...lengthEncode(GithubNode.toRaw(x.reference.src)),
|
||||
...lengthEncode(GithubNode.toRaw(x.reference.dst)),
|
||||
...lengthEncode(GithubNode.toRaw(x.reference.who))
|
||||
);
|
||||
default:
|
||||
throw new Error((x.type: empty));
|
||||
}
|
||||
|
|
|
@ -57,6 +57,12 @@ describe("plugins/github/edges", () => {
|
|||
createEdge.hasParent(nodeExamples.reviewComment(), nodeExamples.review()),
|
||||
references: () =>
|
||||
createEdge.references(nodeExamples.issue(), nodeExamples.pull()),
|
||||
mentionsAuthor: () =>
|
||||
createEdge.mentionsAuthor({
|
||||
src: nodeExamples.issue(),
|
||||
dst: nodeExamples.issue(),
|
||||
who: nodeExamples.user(),
|
||||
}),
|
||||
};
|
||||
|
||||
describe("createEdge", () => {
|
||||
|
|
|
@ -233,6 +233,14 @@ export class GraphView {
|
|||
srcAccessor: (x) => GN.toRaw((x: any).author),
|
||||
dstAccessor: (x) => GN.toRaw((x: any).content),
|
||||
},
|
||||
[GE.MENTIONS_AUTHOR_TYPE]: {
|
||||
homs: homProduct(
|
||||
[GN._Prefix.issue, GN._Prefix.pull, GN._Prefix.comment],
|
||||
[GN._Prefix.issue, GN._Prefix.pull, GN._Prefix.comment]
|
||||
),
|
||||
srcAccessor: (x) => GN.toRaw((x: any).reference.src),
|
||||
dstAccessor: (x) => GN.toRaw((x: any).reference.dst),
|
||||
},
|
||||
};
|
||||
|
||||
for (const edge of this._graph.edges({
|
||||
|
|
|
@ -100,6 +100,14 @@ export class StaticPluginAdapter implements IStaticPluginAdapter {
|
|||
defaultBackwardWeight: 1 / 16,
|
||||
prefix: E._Prefix.references,
|
||||
},
|
||||
{
|
||||
forwardName: "mentions author of",
|
||||
backwardName: "has author mentioned by",
|
||||
defaultForwardWeight: 1,
|
||||
// TODO(#811): Probably change this to 0
|
||||
defaultBackwardWeight: 1 / 32,
|
||||
prefix: E._Prefix.mentionsAuthor,
|
||||
},
|
||||
];
|
||||
}
|
||||
async load(assets: Assets, repo: Repo): Promise<IDynamicPluginAdapater> {
|
||||
|
|
Loading…
Reference in New Issue