add `analysis/timeline/distributionToCred`
This module takes the timeline distributions created by `timelinePagerank`, and re-normalizes the scores into cred. For details on the algorithm, read comments and docstrings in the module. Test plan: Unit tests added.
This commit is contained in:
parent
87720c4868
commit
162f73c3e9
|
@ -0,0 +1,63 @@
|
|||
// @flow
|
||||
|
||||
/**
|
||||
* Takes timeline distributions and uses them to create normalized cred.
|
||||
*/
|
||||
|
||||
import {sum} from "d3-array";
|
||||
import {type Interval} from "./interval";
|
||||
import {type TimelineDistributions} from "./timelinePagerank";
|
||||
import {NodeAddress, type NodeAddressT} from "../../core/graph";
|
||||
|
||||
/**
|
||||
* Represents the full timeline cred for a graph.
|
||||
*/
|
||||
export type FullTimelineCred = $ReadOnlyArray<{|
|
||||
// The interval for this slice.
|
||||
+interval: Interval,
|
||||
// The cred for each node.
|
||||
// (Uses the graph's canonical node ordering.)
|
||||
+cred: Float64Array,
|
||||
|}>;
|
||||
|
||||
/**
|
||||
* Convert a TimelineDistribution into TimelineCred.
|
||||
*
|
||||
* The difference between the distribution and cred is that cred has been
|
||||
* re-normalized to present human-agreeable scores, rather than a probability
|
||||
* distribution.
|
||||
*
|
||||
* This implementation normalizes the scores so that in each interval, the
|
||||
* total score of every node matching scoringNodePrefix is equal to the
|
||||
* interval's weight.
|
||||
*/
|
||||
export function distributionToCred(
|
||||
ds: TimelineDistributions,
|
||||
nodeOrder: $ReadOnlyArray<NodeAddressT>,
|
||||
scoringNodePrefix: NodeAddressT
|
||||
): FullTimelineCred {
|
||||
if (ds.length === 0) {
|
||||
return [];
|
||||
}
|
||||
const intervals = ds.map((x) => x.interval);
|
||||
const scoringNodeIndices = [];
|
||||
const cred = new Array(nodeOrder.length);
|
||||
for (let i = 0; i < nodeOrder.length; i++) {
|
||||
if (NodeAddress.hasPrefix(nodeOrder[i], scoringNodePrefix)) {
|
||||
scoringNodeIndices.push(i);
|
||||
}
|
||||
cred[i] = new Array(intervals.length);
|
||||
}
|
||||
if (scoringNodeIndices.length === 0) {
|
||||
throw new Error("no nodes matched scoringNodePrefix");
|
||||
}
|
||||
|
||||
return ds.map(({interval, distribution, intervalWeight}) => {
|
||||
const intervalTotalScore = sum(
|
||||
scoringNodeIndices.map((x) => distribution[x])
|
||||
);
|
||||
const intervalNormalizer = intervalWeight / intervalTotalScore;
|
||||
const cred = distribution.map((x) => x * intervalNormalizer);
|
||||
return {interval, cred};
|
||||
});
|
||||
}
|
|
@ -0,0 +1,79 @@
|
|||
// @flow
|
||||
|
||||
import {NodeAddress} from "../../core/graph";
|
||||
import {distributionToCred} from "./distributionToCred";
|
||||
|
||||
describe("src/analysis/timeline/distributionToCred", () => {
|
||||
const na = (...parts) => NodeAddress.fromParts(parts);
|
||||
describe("distributionToCred", () => {
|
||||
it("works in a case where all nodes are scoring", () => {
|
||||
const ds = [
|
||||
{
|
||||
interval: {startTimeMs: 0, endTimeMs: 10},
|
||||
intervalWeight: 2,
|
||||
distribution: new Float64Array([0.5, 0.5]),
|
||||
},
|
||||
{
|
||||
interval: {startTimeMs: 10, endTimeMs: 20},
|
||||
intervalWeight: 10,
|
||||
distribution: new Float64Array([0.9, 0.1]),
|
||||
},
|
||||
];
|
||||
const nodeOrder = [na("foo"), na("bar")];
|
||||
const actual = distributionToCred(ds, nodeOrder, NodeAddress.empty);
|
||||
const expected = [
|
||||
{
|
||||
interval: {startTimeMs: 0, endTimeMs: 10},
|
||||
cred: new Float64Array([1, 1]),
|
||||
},
|
||||
{
|
||||
interval: {startTimeMs: 10, endTimeMs: 20},
|
||||
cred: new Float64Array([9, 1]),
|
||||
},
|
||||
];
|
||||
expect(expected).toEqual(actual);
|
||||
});
|
||||
it("works in a case where some nodes are scoring", () => {
|
||||
const ds = [
|
||||
{
|
||||
interval: {startTimeMs: 0, endTimeMs: 10},
|
||||
intervalWeight: 2,
|
||||
distribution: new Float64Array([0.5, 0.5]),
|
||||
},
|
||||
{
|
||||
interval: {startTimeMs: 10, endTimeMs: 20},
|
||||
intervalWeight: 10,
|
||||
distribution: new Float64Array([0.9, 0.1]),
|
||||
},
|
||||
];
|
||||
const nodeOrder = [na("foo"), na("bar")];
|
||||
const actual = distributionToCred(ds, nodeOrder, na("bar"));
|
||||
const expected = [
|
||||
{
|
||||
interval: {startTimeMs: 0, endTimeMs: 10},
|
||||
cred: new Float64Array([2, 2]),
|
||||
},
|
||||
{
|
||||
interval: {startTimeMs: 10, endTimeMs: 20},
|
||||
cred: new Float64Array([90, 10]),
|
||||
},
|
||||
];
|
||||
expect(expected).toEqual(actual);
|
||||
});
|
||||
it("errors when no nodes are scoring", () => {
|
||||
const ds = [
|
||||
{
|
||||
interval: {startTimeMs: 0, endTimeMs: 10},
|
||||
intervalWeight: 2,
|
||||
distribution: new Float64Array([0.5, 0.5]),
|
||||
},
|
||||
];
|
||||
const nodeOrder = [na("foo"), na("bar")];
|
||||
const fail = () => distributionToCred(ds, nodeOrder, na("zod"));
|
||||
expect(fail).toThrowError("no nodes matched scoringNodePrefix");
|
||||
});
|
||||
it("returns empty array if no intervals are present", () => {
|
||||
expect(distributionToCred([], [], NodeAddress.empty)).toEqual([]);
|
||||
});
|
||||
});
|
||||
});
|
Loading…
Reference in New Issue