Compute CredData from TimelineCredScores (#1831)

This commit adds the `analysis/credData` module, which processes raw
TimelineCredScores into a format which is better for serialization and
data analysis. In particular, this format explicitly stores the summary
(summed-across-time) data for nodes separately from the raw temporal
data, which will allow us to throw away raw data for uninteresting or
low-cred nodes, while keeping the summary.

Test plan: I've added some basic unit tests; run `yarn test`.
This commit is contained in:
Dandelion Mané 2020-06-01 17:14:26 -07:00 committed by GitHub
parent ae181c2fda
commit c5c9f950d4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 181 additions and 2 deletions

124
src/analysis/credData.js Normal file
View File

@ -0,0 +1,124 @@
// @flow
import type {TimestampMs} from "../util/timestamp";
import type {TimelineCredScores} from "../core/algorithm/distributionToCred";
/**
* Comprehensive data on a cred distribution.
*/
export type CredData = {|
// Cred level information, always stored in graph address order.
+nodeSummaries: $ReadOnlyArray<NodeCredSummary>,
+nodeOverTime: $ReadOnlyArray<NodeCredOverTime | null>,
+edgeSummaries: $ReadOnlyArray<EdgeCredSummary>,
+edgeOverTime: $ReadOnlyArray<EdgeCredOverTime | null>,
+intervalEnds: $ReadOnlyArray<TimestampMs>,
|};
/** Summary of a node's cred across all time.
*
* CredData includes this information for every node in the graph, regardless of its score.
*/
export type NodeCredSummary = {|
+cred: number,
+seedFlow: number,
+syntheticLoopFlow: number,
|};
/**
* A node's cred data at interval time resolution.
*
* To save space, the CredData may filter out the NodeCredOverTime entirely
* for nodes with low score, or may filter out the seedFlow or syntheticLoopFlow
* fields if either was trivial.
*/
export type NodeCredOverTime = {|
+cred: $ReadOnlyArray<number>,
+seedFlow: $ReadOnlyArray<number> | null,
+syntheticLoopFlow: $ReadOnlyArray<number> | null,
|};
/**
* An edge's cred flows across all time.
*
* CredData includes this for every edge in the graph, regardless of its cred flows.
*/
export type EdgeCredSummary = {|
+forwardFlow: number,
+backwardFlow: number,
|};
/**
* An edge's cred flows at interval time resolution.
*
* To save space, we may filter out this struct entirely for low-cred-flow edges, or we might
* skip either the forwardFlow or backwardFlow fields if it had negligible cred flows in either direction.
*/
export type EdgeCredOverTime = {|
+forwardFlow: $ReadOnlyArray<number> | null,
+backwardFlow: $ReadOnlyArray<number> | null,
|};
export function computeCredData(scores: TimelineCredScores): CredData {
const numIntervals = scores.length;
if (numIntervals === 0) {
return {
nodeSummaries: [],
nodeOverTime: [],
edgeSummaries: [],
edgeOverTime: [],
intervalEnds: [],
};
}
const intervalEnds = scores.map((x) => x.interval.endTimeMs);
const numNodes = scores[0].cred.length;
const numEdges = scores[0].forwardFlow.length;
const nodeSummaries = new Array(numNodes).fill(null).map(() => ({
cred: 0,
seedFlow: 0,
syntheticLoopFlow: 0,
}));
const nodeOverTime = new Array(numNodes).fill(null).map(() => ({
cred: new Array(numIntervals),
seedFlow: new Array(numIntervals),
syntheticLoopFlow: new Array(numIntervals),
}));
const edgeSummaries = new Array(numEdges).fill(null).map(() => ({
forwardFlow: 0,
backwardFlow: 0,
}));
const edgeOverTime = new Array(numEdges).fill(null).map(() => ({
forwardFlow: new Array(numIntervals),
backwardFlow: new Array(numIntervals),
}));
for (let i = 0; i < numIntervals; i++) {
const {
cred,
forwardFlow,
backwardFlow,
seedFlow,
syntheticLoopFlow,
} = scores[i];
for (let n = 0; n < numNodes; n++) {
nodeSummaries[n].cred += cred[n];
nodeOverTime[n].cred[i] = cred[n];
nodeSummaries[n].seedFlow += seedFlow[n];
nodeOverTime[n].seedFlow[i] = seedFlow[n];
nodeSummaries[n].syntheticLoopFlow += syntheticLoopFlow[n];
nodeOverTime[n].syntheticLoopFlow[i] = syntheticLoopFlow[n];
}
for (let e = 0; e < numEdges; e++) {
edgeSummaries[e].forwardFlow += forwardFlow[e];
edgeOverTime[e].forwardFlow[i] = forwardFlow[e];
edgeSummaries[e].backwardFlow += backwardFlow[e];
edgeOverTime[e].backwardFlow[i] = backwardFlow[e];
}
}
return {
nodeSummaries,
nodeOverTime,
edgeSummaries,
edgeOverTime,
intervalEnds,
};
}

View File

@ -0,0 +1,55 @@
// @flow
import {computeCredData} from "./credData";
import type {TimelineCredScores} from "../core/algorithm/distributionToCred";
describe("src/analysis/credData", () => {
it("handles empty scores correctly", () => {
expect(computeCredData([])).toEqual({
nodeSummaries: [],
nodeOverTime: [],
edgeSummaries: [],
edgeOverTime: [],
intervalEnds: [],
});
});
it("handles non-empty scores correctly", () => {
const scores: TimelineCredScores = [
{
interval: {startTimeMs: 0, endTimeMs: 100},
cred: new Float64Array([4, 5]),
forwardFlow: new Float64Array([1]),
backwardFlow: new Float64Array([2]),
seedFlow: new Float64Array([0, 1]),
syntheticLoopFlow: new Float64Array([0.1, 0]),
},
{
interval: {startTimeMs: 100, endTimeMs: 200},
cred: new Float64Array([10, 1]),
forwardFlow: new Float64Array([1]),
backwardFlow: new Float64Array([0]),
seedFlow: new Float64Array([0, 1]),
syntheticLoopFlow: new Float64Array([0.1, 0]),
},
];
const expected = {
intervalEnds: [100, 200],
nodeSummaries: [
{cred: 14, seedFlow: 0, syntheticLoopFlow: 0.2},
{cred: 6, seedFlow: 2, syntheticLoopFlow: 0},
],
nodeOverTime: [
{cred: [4, 10], seedFlow: [0, 0], syntheticLoopFlow: [0.1, 0.1]},
{cred: [5, 1], seedFlow: [1, 1], syntheticLoopFlow: [0, 0]},
],
edgeSummaries: [
{
forwardFlow: 2,
backwardFlow: 2,
},
],
edgeOverTime: [{forwardFlow: [1, 1], backwardFlow: [2, 0]}],
};
expect(computeCredData(scores)).toEqual(expected);
});
});

View File

@ -10,8 +10,8 @@ import {type Interval} from "../interval";
import {type TimelineDistributions} from "./timelinePagerank";
import {NodeAddress, type NodeAddressT} from "../../core/graph";
export opaque type NodeOrderedCredScores: Float64Array = Float64Array;
export opaque type EdgeOrderedCredScores: Float64Array = Float64Array;
export type NodeOrderedCredScores = Float64Array;
export type EdgeOrderedCredScores = Float64Array;
/**
* Represents cred scores over time.