add `analysis/timeline/distributionToCred`
This module takes the timeline distributions created by `timelinePagerank`, and re-normalizes the scores into cred. For details on the algorithm, read comments and docstrings in the module. Test plan: Unit tests added.
This commit is contained in:
parent
87720c4868
commit
162f73c3e9
|
@ -0,0 +1,63 @@
|
||||||
|
// @flow
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Takes timeline distributions and uses them to create normalized cred.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import {sum} from "d3-array";
|
||||||
|
import {type Interval} from "./interval";
|
||||||
|
import {type TimelineDistributions} from "./timelinePagerank";
|
||||||
|
import {NodeAddress, type NodeAddressT} from "../../core/graph";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Represents the full timeline cred for a graph.
|
||||||
|
*/
|
||||||
|
export type FullTimelineCred = $ReadOnlyArray<{|
|
||||||
|
// The interval for this slice.
|
||||||
|
+interval: Interval,
|
||||||
|
// The cred for each node.
|
||||||
|
// (Uses the graph's canonical node ordering.)
|
||||||
|
+cred: Float64Array,
|
||||||
|
|}>;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert a TimelineDistribution into TimelineCred.
|
||||||
|
*
|
||||||
|
* The difference between the distribution and cred is that cred has been
|
||||||
|
* re-normalized to present human-agreeable scores, rather than a probability
|
||||||
|
* distribution.
|
||||||
|
*
|
||||||
|
* This implementation normalizes the scores so that in each interval, the
|
||||||
|
* total score of every node matching scoringNodePrefix is equal to the
|
||||||
|
* interval's weight.
|
||||||
|
*/
|
||||||
|
export function distributionToCred(
|
||||||
|
ds: TimelineDistributions,
|
||||||
|
nodeOrder: $ReadOnlyArray<NodeAddressT>,
|
||||||
|
scoringNodePrefix: NodeAddressT
|
||||||
|
): FullTimelineCred {
|
||||||
|
if (ds.length === 0) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
const intervals = ds.map((x) => x.interval);
|
||||||
|
const scoringNodeIndices = [];
|
||||||
|
const cred = new Array(nodeOrder.length);
|
||||||
|
for (let i = 0; i < nodeOrder.length; i++) {
|
||||||
|
if (NodeAddress.hasPrefix(nodeOrder[i], scoringNodePrefix)) {
|
||||||
|
scoringNodeIndices.push(i);
|
||||||
|
}
|
||||||
|
cred[i] = new Array(intervals.length);
|
||||||
|
}
|
||||||
|
if (scoringNodeIndices.length === 0) {
|
||||||
|
throw new Error("no nodes matched scoringNodePrefix");
|
||||||
|
}
|
||||||
|
|
||||||
|
return ds.map(({interval, distribution, intervalWeight}) => {
|
||||||
|
const intervalTotalScore = sum(
|
||||||
|
scoringNodeIndices.map((x) => distribution[x])
|
||||||
|
);
|
||||||
|
const intervalNormalizer = intervalWeight / intervalTotalScore;
|
||||||
|
const cred = distribution.map((x) => x * intervalNormalizer);
|
||||||
|
return {interval, cred};
|
||||||
|
});
|
||||||
|
}
|
|
@ -0,0 +1,79 @@
|
||||||
|
// @flow
|
||||||
|
|
||||||
|
import {NodeAddress} from "../../core/graph";
|
||||||
|
import {distributionToCred} from "./distributionToCred";
|
||||||
|
|
||||||
|
describe("src/analysis/timeline/distributionToCred", () => {
|
||||||
|
const na = (...parts) => NodeAddress.fromParts(parts);
|
||||||
|
describe("distributionToCred", () => {
|
||||||
|
it("works in a case where all nodes are scoring", () => {
|
||||||
|
const ds = [
|
||||||
|
{
|
||||||
|
interval: {startTimeMs: 0, endTimeMs: 10},
|
||||||
|
intervalWeight: 2,
|
||||||
|
distribution: new Float64Array([0.5, 0.5]),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
interval: {startTimeMs: 10, endTimeMs: 20},
|
||||||
|
intervalWeight: 10,
|
||||||
|
distribution: new Float64Array([0.9, 0.1]),
|
||||||
|
},
|
||||||
|
];
|
||||||
|
const nodeOrder = [na("foo"), na("bar")];
|
||||||
|
const actual = distributionToCred(ds, nodeOrder, NodeAddress.empty);
|
||||||
|
const expected = [
|
||||||
|
{
|
||||||
|
interval: {startTimeMs: 0, endTimeMs: 10},
|
||||||
|
cred: new Float64Array([1, 1]),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
interval: {startTimeMs: 10, endTimeMs: 20},
|
||||||
|
cred: new Float64Array([9, 1]),
|
||||||
|
},
|
||||||
|
];
|
||||||
|
expect(expected).toEqual(actual);
|
||||||
|
});
|
||||||
|
it("works in a case where some nodes are scoring", () => {
|
||||||
|
const ds = [
|
||||||
|
{
|
||||||
|
interval: {startTimeMs: 0, endTimeMs: 10},
|
||||||
|
intervalWeight: 2,
|
||||||
|
distribution: new Float64Array([0.5, 0.5]),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
interval: {startTimeMs: 10, endTimeMs: 20},
|
||||||
|
intervalWeight: 10,
|
||||||
|
distribution: new Float64Array([0.9, 0.1]),
|
||||||
|
},
|
||||||
|
];
|
||||||
|
const nodeOrder = [na("foo"), na("bar")];
|
||||||
|
const actual = distributionToCred(ds, nodeOrder, na("bar"));
|
||||||
|
const expected = [
|
||||||
|
{
|
||||||
|
interval: {startTimeMs: 0, endTimeMs: 10},
|
||||||
|
cred: new Float64Array([2, 2]),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
interval: {startTimeMs: 10, endTimeMs: 20},
|
||||||
|
cred: new Float64Array([90, 10]),
|
||||||
|
},
|
||||||
|
];
|
||||||
|
expect(expected).toEqual(actual);
|
||||||
|
});
|
||||||
|
it("errors when no nodes are scoring", () => {
|
||||||
|
const ds = [
|
||||||
|
{
|
||||||
|
interval: {startTimeMs: 0, endTimeMs: 10},
|
||||||
|
intervalWeight: 2,
|
||||||
|
distribution: new Float64Array([0.5, 0.5]),
|
||||||
|
},
|
||||||
|
];
|
||||||
|
const nodeOrder = [na("foo"), na("bar")];
|
||||||
|
const fail = () => distributionToCred(ds, nodeOrder, na("zod"));
|
||||||
|
expect(fail).toThrowError("no nodes matched scoringNodePrefix");
|
||||||
|
});
|
||||||
|
it("returns empty array if no intervals are present", () => {
|
||||||
|
expect(distributionToCred([], [], NodeAddress.empty)).toEqual([]);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
Loading…
Reference in New Issue