From 17345fcca9f66732f29fda5654bd018535ee5ab5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dandelion=20Man=C3=A9?= Date: Sat, 16 Feb 2019 15:47:38 -0700 Subject: [PATCH] PagerankGraph: Add toJSON/fromJSON (#1088) * PagerankGraph: Add toJSON/fromJSON This commit adds serialization logic to `PagerankGraph`. As with many things in PagerankGraph, it's based on the corresponding logic in `Graph`. Much like graph, it stores data associated with nodes and edges (in this case, the scores and edge weights) in an ordered array rather than a map, so as to avoid repetitiously serializing the node and edge addresses. Test plan: Unit tests added, and they should be sufficient. Also take a look at the included snapshot. --- .../__snapshots__/pagerankGraph.test.js.snap | 76 +++++++++++++ src/core/pagerankGraph.js | 100 +++++++++++++++++- src/core/pagerankGraph.test.js | 29 +++++ 3 files changed, 204 insertions(+), 1 deletion(-) create mode 100644 src/core/__snapshots__/pagerankGraph.test.js.snap diff --git a/src/core/__snapshots__/pagerankGraph.test.js.snap b/src/core/__snapshots__/pagerankGraph.test.js.snap new file mode 100644 index 0000000..66d4ce3 --- /dev/null +++ b/src/core/__snapshots__/pagerankGraph.test.js.snap @@ -0,0 +1,76 @@ +// Jest Snapshot v1, https://goo.gl/fbAQLP + +exports[`core/pagerankGraph to/from JSON matches expected snapshot 1`] = ` +Array [ + Object { + "type": "sourcecred/pagerankGraph", + "version": "0.1.0", + }, + Object { + "froWeights": Array [ + 0, + 0, + 0, + ], + "graphJSON": Array [ + Object { + "type": "sourcecred/graph", + "version": "0.4.0", + }, + Object { + "edges": Array [ + Object { + "address": Array [ + "hom", + "1", + ], + "dstIndex": 0, + "srcIndex": 3, + }, + Object { + "address": Array [ + "hom", + "2", + ], + "dstIndex": 0, + "srcIndex": 3, + }, + Object { + "address": Array [ + "loop", + ], + "dstIndex": 2, + "srcIndex": 2, + }, + ], + "nodes": Array [ + Array [ + "dst", + ], + Array [ + "isolated", + ], + Array [ + "loop", + ], + Array [ + "src", + ], + ], + }, + ], + "scores": Array [ + 0.25, + 0.25, + 0.25, + 0.25, + ], + "syntheticLoopWeight": 0.001, + "toWeights": Array [ + 1, + 1, + 1, + ], + }, +] +`; diff --git a/src/core/pagerankGraph.js b/src/core/pagerankGraph.js index 596fc39..cdea6ee 100644 --- a/src/core/pagerankGraph.js +++ b/src/core/pagerankGraph.js @@ -2,7 +2,16 @@ import deepEqual from "lodash.isequal"; -import {Graph, type Edge, type NodeAddressT, type EdgeAddressT} from "./graph"; +import {toCompat, fromCompat, type Compatible} from "../util/compat"; +import { + Graph, + type Edge, + type NodeAddressT, + type EdgeAddressT, + type GraphJSON, + sortedEdgeAddressesFromJSON, + sortedNodeAddressesFromJSON, +} from "./graph"; import { distributionToNodeDistribution, createConnections, @@ -25,6 +34,20 @@ export type WeightedEdge = {| +weight: EdgeWeight, |}; +export opaque type PagerankGraphJSON = Compatible<{| + +graphJSON: GraphJSON, + // Score for every node, ordered by the sorted node address. + +scores: $ReadOnlyArray, + // Weights for every edge, ordered by sorted edge address. + // We could save the EdgeWeights directly rather than having separate + // arrays for toWeights and froWeights, but this would lead to an inflated + // JSON representation because we would be needlessly duplicating the keys + // "toWeight" and "froWeight" themselves. + +toWeights: $ReadOnlyArray, + +froWeights: $ReadOnlyArray, + +syntheticLoopWeight: number, +|}>; + /** * Options to control how PageRank runs and when it stops */ @@ -46,6 +69,8 @@ export type PagerankConvergenceReport = {| export const DEFAULT_SYNTHETIC_LOOP_WEIGHT = 1e-3; +const COMPAT_INFO = {type: "sourcecred/pagerankGraph", version: "0.1.0"}; + /** * PagerankGraph is a wrapper over the Graph class, which adds * the ability to run PageRank to compute scores on the Graph. @@ -296,6 +321,79 @@ export class PagerankGraph { ); } + /** + * Serialize this graph into a PagerankJSON object. + * + * Returns a plain JavaScript object. + * + * For space efficency, we store the node scores as an array of numbers in + * node-address-sorted order, and we store the edge weights as two arrays of + * numbers in edge-address-sorted-order. + */ + toJSON(): PagerankGraphJSON { + this._verifyGraphNotModified(); + + const graphJSON = this.graph().toJSON(); + const nodes = sortedNodeAddressesFromJSON(graphJSON); + const scores: number[] = nodes.map((x) => + NullUtil.get(this._scores.get(x)) + ); + + const edgeAddresses = sortedEdgeAddressesFromJSON(graphJSON); + const edgeWeights: EdgeWeight[] = edgeAddresses.map((x) => + NullUtil.get(this._edgeWeights.get(x)) + ); + const toWeights: number[] = edgeWeights.map((x) => x.toWeight); + const froWeights: number[] = edgeWeights.map((x) => x.froWeight); + + const rawJSON = { + graphJSON, + scores, + toWeights, + froWeights, + syntheticLoopWeight: this.syntheticLoopWeight(), + }; + + return toCompat(COMPAT_INFO, rawJSON); + } + + static fromJSON(json: PagerankGraphJSON): PagerankGraph { + const { + toWeights, + froWeights, + scores, + graphJSON, + syntheticLoopWeight, + } = fromCompat(COMPAT_INFO, json); + const graph = Graph.fromJSON(graphJSON); + + const nodes = sortedNodeAddressesFromJSON(graphJSON); + const scoreMap: Map = new Map(); + for (let i = 0; i < nodes.length; i++) { + scoreMap.set(nodes[i], scores[i]); + } + + const edges = sortedEdgeAddressesFromJSON(graphJSON); + const edgeWeights: Map = new Map(); + for (let i = 0; i < edges.length; i++) { + const toWeight = toWeights[i]; + const froWeight = froWeights[i]; + edgeWeights.set(edges[i], {toWeight, froWeight}); + } + + function evaluator(e: Edge): EdgeWeight { + return NullUtil.get(edgeWeights.get(e.address)); + } + + const prg = new PagerankGraph(graph, evaluator, syntheticLoopWeight); + // TODO(#1020): It's a little hacky to force the scores in like this; + // consider adding an optional constructor argument to allow manually + // setting the scores at construction time, if we ever find a use case + // that needs it. + prg._scores = scoreMap; + return prg; + } + _verifyGraphNotModified() { if (this._graph.modificationCount() !== this._graphModificationCount) { throw new Error( diff --git a/src/core/pagerankGraph.test.js b/src/core/pagerankGraph.test.js index 288a47a..2ab5c62 100644 --- a/src/core/pagerankGraph.test.js +++ b/src/core/pagerankGraph.test.js @@ -229,4 +229,33 @@ describe("core/pagerankGraph", () => { expect(() => pg.equals(pg)).toThrowError("has been modified"); }); }); + + describe("to/from JSON", () => { + it("to->fro is identity", async () => { + const pg = examplePagerankGraph(); + await pg.runPagerank({maxIterations: 1, convergenceThreshold: 0.01}); + const pgJSON = pg.toJSON(); + const pg_ = PagerankGraph.fromJSON(pgJSON); + expect(pg.equals(pg_)).toBe(true); + }); + it("fro->to is identity", async () => { + const pg = examplePagerankGraph(); + await pg.runPagerank({maxIterations: 1, convergenceThreshold: 0.01}); + const pgJSON = pg.toJSON(); + const pg_ = PagerankGraph.fromJSON(pgJSON); + const pgJSON_ = pg_.toJSON(); + expect(pgJSON).toEqual(pgJSON_); + }); + it("is canonical with respect to the graph's history", async () => { + const pg1 = new PagerankGraph(advancedGraph().graph1(), defaultEvaluator); + const pg2 = new PagerankGraph(advancedGraph().graph2(), defaultEvaluator); + const pg1JSON = pg1.toJSON(); + const pg2JSON = pg2.toJSON(); + expect(pg1JSON).toEqual(pg2JSON); + }); + it("matches expected snapshot", () => { + const pgJSON = examplePagerankGraph().toJSON(); + expect(pgJSON).toMatchSnapshot(); + }); + }); });