From c5c9f950d43a5ef2f4119b6e34670802c37ad2ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dandelion=20Man=C3=A9?= Date: Mon, 1 Jun 2020 17:14:26 -0700 Subject: [PATCH] Compute CredData from TimelineCredScores (#1831) This commit adds the `analysis/credData` module, which processes raw TimelineCredScores into a format which is better for serialization and data analysis. In particular, this format explicitly stores the summary (summed-across-time) data for nodes separately from the raw temporal data, which will allow us to throw away raw data for uninteresting or low-cred nodes, while keeping the summary. Test plan: I've added some basic unit tests; run `yarn test`. --- src/analysis/credData.js | 124 +++++++++++++++++++++++ src/analysis/credData.test.js | 55 ++++++++++ src/core/algorithm/distributionToCred.js | 4 +- 3 files changed, 181 insertions(+), 2 deletions(-) create mode 100644 src/analysis/credData.js create mode 100644 src/analysis/credData.test.js diff --git a/src/analysis/credData.js b/src/analysis/credData.js new file mode 100644 index 0000000..4f6a484 --- /dev/null +++ b/src/analysis/credData.js @@ -0,0 +1,124 @@ +// @flow + +import type {TimestampMs} from "../util/timestamp"; +import type {TimelineCredScores} from "../core/algorithm/distributionToCred"; + +/** + * Comprehensive data on a cred distribution. + */ +export type CredData = {| + // Cred level information, always stored in graph address order. + +nodeSummaries: $ReadOnlyArray, + +nodeOverTime: $ReadOnlyArray, + +edgeSummaries: $ReadOnlyArray, + +edgeOverTime: $ReadOnlyArray, + +intervalEnds: $ReadOnlyArray, +|}; + +/** Summary of a node's cred across all time. + * + * CredData includes this information for every node in the graph, regardless of its score. + */ +export type NodeCredSummary = {| + +cred: number, + +seedFlow: number, + +syntheticLoopFlow: number, +|}; + +/** + * A node's cred data at interval time resolution. + * + * To save space, the CredData may filter out the NodeCredOverTime entirely + * for nodes with low score, or may filter out the seedFlow or syntheticLoopFlow + * fields if either was trivial. + */ +export type NodeCredOverTime = {| + +cred: $ReadOnlyArray, + +seedFlow: $ReadOnlyArray | null, + +syntheticLoopFlow: $ReadOnlyArray | null, +|}; + +/** + * An edge's cred flows across all time. + * + * CredData includes this for every edge in the graph, regardless of its cred flows. + */ +export type EdgeCredSummary = {| + +forwardFlow: number, + +backwardFlow: number, +|}; + +/** + * An edge's cred flows at interval time resolution. + * + * To save space, we may filter out this struct entirely for low-cred-flow edges, or we might + * skip either the forwardFlow or backwardFlow fields if it had negligible cred flows in either direction. + */ +export type EdgeCredOverTime = {| + +forwardFlow: $ReadOnlyArray | null, + +backwardFlow: $ReadOnlyArray | null, +|}; + +export function computeCredData(scores: TimelineCredScores): CredData { + const numIntervals = scores.length; + if (numIntervals === 0) { + return { + nodeSummaries: [], + nodeOverTime: [], + edgeSummaries: [], + edgeOverTime: [], + intervalEnds: [], + }; + } + const intervalEnds = scores.map((x) => x.interval.endTimeMs); + const numNodes = scores[0].cred.length; + const numEdges = scores[0].forwardFlow.length; + const nodeSummaries = new Array(numNodes).fill(null).map(() => ({ + cred: 0, + seedFlow: 0, + syntheticLoopFlow: 0, + })); + const nodeOverTime = new Array(numNodes).fill(null).map(() => ({ + cred: new Array(numIntervals), + seedFlow: new Array(numIntervals), + syntheticLoopFlow: new Array(numIntervals), + })); + const edgeSummaries = new Array(numEdges).fill(null).map(() => ({ + forwardFlow: 0, + backwardFlow: 0, + })); + const edgeOverTime = new Array(numEdges).fill(null).map(() => ({ + forwardFlow: new Array(numIntervals), + backwardFlow: new Array(numIntervals), + })); + for (let i = 0; i < numIntervals; i++) { + const { + cred, + forwardFlow, + backwardFlow, + seedFlow, + syntheticLoopFlow, + } = scores[i]; + for (let n = 0; n < numNodes; n++) { + nodeSummaries[n].cred += cred[n]; + nodeOverTime[n].cred[i] = cred[n]; + nodeSummaries[n].seedFlow += seedFlow[n]; + nodeOverTime[n].seedFlow[i] = seedFlow[n]; + nodeSummaries[n].syntheticLoopFlow += syntheticLoopFlow[n]; + nodeOverTime[n].syntheticLoopFlow[i] = syntheticLoopFlow[n]; + } + for (let e = 0; e < numEdges; e++) { + edgeSummaries[e].forwardFlow += forwardFlow[e]; + edgeOverTime[e].forwardFlow[i] = forwardFlow[e]; + edgeSummaries[e].backwardFlow += backwardFlow[e]; + edgeOverTime[e].backwardFlow[i] = backwardFlow[e]; + } + } + return { + nodeSummaries, + nodeOverTime, + edgeSummaries, + edgeOverTime, + intervalEnds, + }; +} diff --git a/src/analysis/credData.test.js b/src/analysis/credData.test.js new file mode 100644 index 0000000..4efad05 --- /dev/null +++ b/src/analysis/credData.test.js @@ -0,0 +1,55 @@ +// @flow + +import {computeCredData} from "./credData"; +import type {TimelineCredScores} from "../core/algorithm/distributionToCred"; + +describe("src/analysis/credData", () => { + it("handles empty scores correctly", () => { + expect(computeCredData([])).toEqual({ + nodeSummaries: [], + nodeOverTime: [], + edgeSummaries: [], + edgeOverTime: [], + intervalEnds: [], + }); + }); + it("handles non-empty scores correctly", () => { + const scores: TimelineCredScores = [ + { + interval: {startTimeMs: 0, endTimeMs: 100}, + cred: new Float64Array([4, 5]), + forwardFlow: new Float64Array([1]), + backwardFlow: new Float64Array([2]), + seedFlow: new Float64Array([0, 1]), + syntheticLoopFlow: new Float64Array([0.1, 0]), + }, + { + interval: {startTimeMs: 100, endTimeMs: 200}, + cred: new Float64Array([10, 1]), + forwardFlow: new Float64Array([1]), + backwardFlow: new Float64Array([0]), + seedFlow: new Float64Array([0, 1]), + syntheticLoopFlow: new Float64Array([0.1, 0]), + }, + ]; + const expected = { + intervalEnds: [100, 200], + nodeSummaries: [ + {cred: 14, seedFlow: 0, syntheticLoopFlow: 0.2}, + {cred: 6, seedFlow: 2, syntheticLoopFlow: 0}, + ], + nodeOverTime: [ + {cred: [4, 10], seedFlow: [0, 0], syntheticLoopFlow: [0.1, 0.1]}, + {cred: [5, 1], seedFlow: [1, 1], syntheticLoopFlow: [0, 0]}, + ], + edgeSummaries: [ + { + forwardFlow: 2, + backwardFlow: 2, + }, + ], + edgeOverTime: [{forwardFlow: [1, 1], backwardFlow: [2, 0]}], + }; + expect(computeCredData(scores)).toEqual(expected); + }); +}); diff --git a/src/core/algorithm/distributionToCred.js b/src/core/algorithm/distributionToCred.js index cb90bce..1fe9884 100644 --- a/src/core/algorithm/distributionToCred.js +++ b/src/core/algorithm/distributionToCred.js @@ -10,8 +10,8 @@ import {type Interval} from "../interval"; import {type TimelineDistributions} from "./timelinePagerank"; import {NodeAddress, type NodeAddressT} from "../../core/graph"; -export opaque type NodeOrderedCredScores: Float64Array = Float64Array; -export opaque type EdgeOrderedCredScores: Float64Array = Float64Array; +export type NodeOrderedCredScores = Float64Array; +export type EdgeOrderedCredScores = Float64Array; /** * Represents cred scores over time.