add `Graph.contractNodes` (#1380)

This commit adds Graph.contractNodes, which allows collapsing certain
nodes in the graph into each other. This will enable the creation of a
SourceCred "identity" plugin, allowing identity resolution between users
different accounts on different services.

Test plan: Thorough unit tests have been added. `yarn test` passes.

Thanks to @wchargin for [review feedback][1] which significantly
improved this API.

[1]: https://github.com/sourcecred/sourcecred/pull/1380#discussion_r324958055
This commit is contained in:
Dandelion Mané 2019-09-18 13:59:49 +02:00 committed by GitHub
parent ddf07c6714
commit ac8ac7051f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 224 additions and 0 deletions

View File

@ -209,6 +209,15 @@ type CachedOrder = {|
+modificationCount: number,
|};
/**
* Specifies how to contract a graph, collapsing several old nodes
* into a single new node, and re-writing edges for consistency.
*/
export type NodeContraction = {|
+old: $ReadOnlyArray<NodeAddressT>,
+replacement: Node,
|};
export class Graph {
_nodes: Map<NodeAddressT, Node>;
_edges: Map<EdgeAddressT, Edge>;
@ -889,6 +898,63 @@ export class Graph {
return result;
}
/**
* Create a new graph, in which some nodes have been contracted together.
*
* contractNodes takes a list of NodeContractions, each of which specifies a
* replacement node, and a list of old node addresses to map onto the new
* node. A new graph will be returned where the new node is added, none of
* the old nodes are present, and every edge incident to one of the old nodes
* has been re-written so that it is incident to the new node instead.
*
* If the same node addresses is "old" for several contractions, all incident
* edges will be re-written to connect to whichever contraction came last.
*
* If the replacement node is present in the graph, no error will be thrown,
* provided that the replacement node is consistent with the one in the graph.
*
* If there is a "chain" of remaps (i.e. a->b and b->c), then an error will
* be thrown, as support for chaining has not yet been implemented.
*
* The original Graph is not mutated.
*
* contractNodes runs in O(n+e+k), where `n` is the number of nodes, `e` is the
* number of edges, and `k` is the number of contractions. If needed, we can
* improve the peformance by mutating the original graph instead of creating
* a new one.
*/
contractNodes(contractions: $ReadOnlyArray<NodeContraction>): Graph {
const remap = new Map();
const replacements = new Set();
const contracted = new Graph();
for (const {old, replacement} of contractions) {
for (const addr of old) {
if (replacements.has(addr)) {
throw new Error(
`Chained contractions are not supported: ${NodeAddress.toString(
addr
)}`
);
}
remap.set(addr, replacement.address);
}
replacements.add(replacement.address);
contracted.addNode(replacement);
}
for (const node of this.nodes()) {
if (!remap.has(node.address)) {
contracted.addNode(node);
}
}
for (const edge of this.edges({showDangling: true})) {
const src = NullUtil.orElse(remap.get(edge.src), edge.src);
const dst = NullUtil.orElse(remap.get(edge.dst), edge.dst);
const newEdge = {...edge, src, dst};
contracted.addEdge(newEdge);
}
return contracted;
}
checkInvariants() {
if (this._invariantsLastChecked.when !== this._modificationCount) {
let failure: ?string = null;

View File

@ -1358,6 +1358,164 @@ describe("core/graph", () => {
});
});
describe("contractNodes", () => {
const a = node("a");
const b = node("b");
const c = node("c");
it("has no effect with no contractions", () => {
const g = simpleGraph();
const g_ = simpleGraph().contractNodes([]);
expect(g.equals(g_)).toBe(true);
});
it("adds the new node to the graph", () => {
const g = simpleGraph().contractNodes([{old: [], replacement: c}]);
const g_ = simpleGraph().addNode(c);
expect(g.equals(g_)).toBe(true);
});
it("filters old nodes from the graph", () => {
const g = new Graph()
.addNode(a)
.addNode(b)
.contractNodes([{old: [a.address, b.address], replacement: c}]);
const expected = new Graph().addNode(c);
expect(g.equals(expected)).toBe(true);
});
it("re-writes regular edges", () => {
const g = new Graph()
.addNode(a)
.addNode(b)
.addEdge(edge("forward", a, b))
.addEdge(edge("backward", b, a))
.contractNodes([{old: [a.address], replacement: c}]);
const expected = new Graph()
.addNode(c)
.addNode(b)
.addEdge(edge("forward", c, b))
.addEdge(edge("backward", b, c));
expect(g.equals(expected)).toBe(true);
});
it("re-writes edges, including dangling or loop edges", () => {
const g = new Graph()
.addNode(a)
.addEdge(edge("loop", a, a))
.addEdge(edge("dangle1", a, b))
.addEdge(edge("dangle2", b, a))
.contractNodes([{old: [a.address], replacement: c}]);
const expected = new Graph()
.addNode(c)
.addEdge(edge("loop", c, c))
.addEdge(edge("dangle1", c, b))
.addEdge(edge("dangle2", b, c));
expect(g.equals(expected)).toBe(true);
});
it("if multiple transforms target the same node, last one wins", () => {
const g = new Graph()
.addNode(a)
.addEdge(edge("loop", a, a))
.contractNodes([
{old: [a.address], replacement: b},
{old: [a.address], replacement: c},
]);
const expected = new Graph()
.addNode(b)
.addNode(c)
.addEdge(edge("loop", c, c));
expect(g.equals(expected)).toBe(true);
});
it("doesn't mutate the original graph", () => {
const g1 = new Graph().addNode(a);
const g2 = g1.contractNodes([{old: [a.address], replacement: b}]);
expect(g1.equals(g2)).toBe(false);
});
it("allows replacements that are already in the graph", () => {
const g = new Graph()
.addNode(a)
.addNode(b)
.addEdge(edge("future-loop", a, b))
.contractNodes([{old: [a.address], replacement: b}]);
const expected = new Graph()
.addNode(b)
.addEdge(edge("future-loop", b, b));
expect(g.equals(expected)).toBe(true);
});
it("a node can replace itself", () => {
const g = new Graph().addNode(a).addEdge(edge("loop", a, a));
const g_ = g.contractNodes([{old: [a.address], replacement: a}]);
expect(g.equals(g_)).toBe(true);
});
it("a node can replace itself with a distinct node", () => {
// I don't think this is useful, but it's interesting to document.
const a_ = {...a, timestampMs: 1337};
const g = new Graph()
.addNode(a)
.contractNodes([{old: [a.address], replacement: a_}]);
const expected = new Graph().addNode(a_);
expect(g.equals(expected)).toBe(true);
});
it("adding a conflicting node via replacement throws an error", () => {
const b_ = {...b, timestampMs: 1337};
const fail = () =>
new Graph()
.addNode(a)
.addNode(b)
.contractNodes([{old: [a.address], replacement: b_}]);
expect(fail).toThrowError("conflict between new node");
});
it("does not allow chained contractions", () => {
const fail = () =>
new Graph()
.addNode(a)
.addEdge(edge("loop", a, a))
.contractNodes([
{old: [a.address], replacement: b},
{old: [b.address], replacement: c},
]);
expect(fail).toThrow("Chained contractions are not supported");
});
/**
* If we decide to support chained contractions,
* these would be the semantics to shoot for.
it("can chain contractions", () => {
const g = new Graph()
.addNode(a)
.addEdge(edge("loop", a, a))
.contractNodes([
{old: [a.address], replacement: b},
{old: [b.address], replacement: c},
]);
const chained = new Graph()
.addNode(a)
.addEdge(edge("loop", a, a))
.contractNodes([{old: [a.address], replacement: c}]);
// equivalent, due to chaining
expect(g.equals(chained)).toBe(true);
// documenting the output of chaining
const expected = new Graph().addNode(c).addEdge(edge("loop", c, c));
expect(g.equals(expected)).toBe(true);
});
*/
it("only chains contractions in forward order", () => {
const g = new Graph()
.addNode(a)
.addEdge(edge("loop", a, a))
.contractNodes([
{old: [b.address], replacement: c},
{old: [a.address], replacement: b},
]);
const chained = new Graph()
.addNode(a)
.addEdge(edge("loop", a, a))
.contractNodes([{old: [a.address], replacement: c}]);
// Not equal, because the order was wrong
expect(g.equals(chained)).toBe(false);
const expected = new Graph()
.addNode(b)
.addNode(c)
.addEdge(edge("loop", b, b));
expect(g.equals(expected)).toBe(true);
});
});
describe("toJSON / fromJSON", () => {
describe("snapshot testing", () => {
it("a trivial graph", () => {