add `analysis/timeline/distributionToCred`

This module takes the timeline distributions created by
`timelinePagerank`, and re-normalizes the scores into cred. For details
on the algorithm, read comments and docstrings in the module.

Test plan: Unit tests added.
This commit is contained in:
Dandelion Mané 2019-07-10 14:18:44 +01:00
parent 87720c4868
commit 162f73c3e9
2 changed files with 142 additions and 0 deletions

View File

@ -0,0 +1,63 @@
// @flow
/**
* Takes timeline distributions and uses them to create normalized cred.
*/
import {sum} from "d3-array";
import {type Interval} from "./interval";
import {type TimelineDistributions} from "./timelinePagerank";
import {NodeAddress, type NodeAddressT} from "../../core/graph";
/**
* Represents the full timeline cred for a graph.
*/
export type FullTimelineCred = $ReadOnlyArray<{|
// The interval for this slice.
+interval: Interval,
// The cred for each node.
// (Uses the graph's canonical node ordering.)
+cred: Float64Array,
|}>;
/**
* Convert a TimelineDistribution into TimelineCred.
*
* The difference between the distribution and cred is that cred has been
* re-normalized to present human-agreeable scores, rather than a probability
* distribution.
*
* This implementation normalizes the scores so that in each interval, the
* total score of every node matching scoringNodePrefix is equal to the
* interval's weight.
*/
export function distributionToCred(
ds: TimelineDistributions,
nodeOrder: $ReadOnlyArray<NodeAddressT>,
scoringNodePrefix: NodeAddressT
): FullTimelineCred {
if (ds.length === 0) {
return [];
}
const intervals = ds.map((x) => x.interval);
const scoringNodeIndices = [];
const cred = new Array(nodeOrder.length);
for (let i = 0; i < nodeOrder.length; i++) {
if (NodeAddress.hasPrefix(nodeOrder[i], scoringNodePrefix)) {
scoringNodeIndices.push(i);
}
cred[i] = new Array(intervals.length);
}
if (scoringNodeIndices.length === 0) {
throw new Error("no nodes matched scoringNodePrefix");
}
return ds.map(({interval, distribution, intervalWeight}) => {
const intervalTotalScore = sum(
scoringNodeIndices.map((x) => distribution[x])
);
const intervalNormalizer = intervalWeight / intervalTotalScore;
const cred = distribution.map((x) => x * intervalNormalizer);
return {interval, cred};
});
}

View File

@ -0,0 +1,79 @@
// @flow
import {NodeAddress} from "../../core/graph";
import {distributionToCred} from "./distributionToCred";
describe("src/analysis/timeline/distributionToCred", () => {
const na = (...parts) => NodeAddress.fromParts(parts);
describe("distributionToCred", () => {
it("works in a case where all nodes are scoring", () => {
const ds = [
{
interval: {startTimeMs: 0, endTimeMs: 10},
intervalWeight: 2,
distribution: new Float64Array([0.5, 0.5]),
},
{
interval: {startTimeMs: 10, endTimeMs: 20},
intervalWeight: 10,
distribution: new Float64Array([0.9, 0.1]),
},
];
const nodeOrder = [na("foo"), na("bar")];
const actual = distributionToCred(ds, nodeOrder, NodeAddress.empty);
const expected = [
{
interval: {startTimeMs: 0, endTimeMs: 10},
cred: new Float64Array([1, 1]),
},
{
interval: {startTimeMs: 10, endTimeMs: 20},
cred: new Float64Array([9, 1]),
},
];
expect(expected).toEqual(actual);
});
it("works in a case where some nodes are scoring", () => {
const ds = [
{
interval: {startTimeMs: 0, endTimeMs: 10},
intervalWeight: 2,
distribution: new Float64Array([0.5, 0.5]),
},
{
interval: {startTimeMs: 10, endTimeMs: 20},
intervalWeight: 10,
distribution: new Float64Array([0.9, 0.1]),
},
];
const nodeOrder = [na("foo"), na("bar")];
const actual = distributionToCred(ds, nodeOrder, na("bar"));
const expected = [
{
interval: {startTimeMs: 0, endTimeMs: 10},
cred: new Float64Array([2, 2]),
},
{
interval: {startTimeMs: 10, endTimeMs: 20},
cred: new Float64Array([90, 10]),
},
];
expect(expected).toEqual(actual);
});
it("errors when no nodes are scoring", () => {
const ds = [
{
interval: {startTimeMs: 0, endTimeMs: 10},
intervalWeight: 2,
distribution: new Float64Array([0.5, 0.5]),
},
];
const nodeOrder = [na("foo"), na("bar")];
const fail = () => distributionToCred(ds, nodeOrder, na("zod"));
expect(fail).toThrowError("no nodes matched scoringNodePrefix");
});
it("returns empty array if no intervals are present", () => {
expect(distributionToCred([], [], NodeAddress.empty)).toEqual([]);
});
});
});