diff --git a/src/analysis/timeline/distributionToCred.js b/src/analysis/timeline/distributionToCred.js new file mode 100644 index 0000000..eb74f96 --- /dev/null +++ b/src/analysis/timeline/distributionToCred.js @@ -0,0 +1,63 @@ +// @flow + +/** + * Takes timeline distributions and uses them to create normalized cred. + */ + +import {sum} from "d3-array"; +import {type Interval} from "./interval"; +import {type TimelineDistributions} from "./timelinePagerank"; +import {NodeAddress, type NodeAddressT} from "../../core/graph"; + +/** + * Represents the full timeline cred for a graph. + */ +export type FullTimelineCred = $ReadOnlyArray<{| + // The interval for this slice. + +interval: Interval, + // The cred for each node. + // (Uses the graph's canonical node ordering.) + +cred: Float64Array, +|}>; + +/** + * Convert a TimelineDistribution into TimelineCred. + * + * The difference between the distribution and cred is that cred has been + * re-normalized to present human-agreeable scores, rather than a probability + * distribution. + * + * This implementation normalizes the scores so that in each interval, the + * total score of every node matching scoringNodePrefix is equal to the + * interval's weight. + */ +export function distributionToCred( + ds: TimelineDistributions, + nodeOrder: $ReadOnlyArray, + scoringNodePrefix: NodeAddressT +): FullTimelineCred { + if (ds.length === 0) { + return []; + } + const intervals = ds.map((x) => x.interval); + const scoringNodeIndices = []; + const cred = new Array(nodeOrder.length); + for (let i = 0; i < nodeOrder.length; i++) { + if (NodeAddress.hasPrefix(nodeOrder[i], scoringNodePrefix)) { + scoringNodeIndices.push(i); + } + cred[i] = new Array(intervals.length); + } + if (scoringNodeIndices.length === 0) { + throw new Error("no nodes matched scoringNodePrefix"); + } + + return ds.map(({interval, distribution, intervalWeight}) => { + const intervalTotalScore = sum( + scoringNodeIndices.map((x) => distribution[x]) + ); + const intervalNormalizer = intervalWeight / intervalTotalScore; + const cred = distribution.map((x) => x * intervalNormalizer); + return {interval, cred}; + }); +} diff --git a/src/analysis/timeline/distributionToCred.test.js b/src/analysis/timeline/distributionToCred.test.js new file mode 100644 index 0000000..9d75a3e --- /dev/null +++ b/src/analysis/timeline/distributionToCred.test.js @@ -0,0 +1,79 @@ +// @flow + +import {NodeAddress} from "../../core/graph"; +import {distributionToCred} from "./distributionToCred"; + +describe("src/analysis/timeline/distributionToCred", () => { + const na = (...parts) => NodeAddress.fromParts(parts); + describe("distributionToCred", () => { + it("works in a case where all nodes are scoring", () => { + const ds = [ + { + interval: {startTimeMs: 0, endTimeMs: 10}, + intervalWeight: 2, + distribution: new Float64Array([0.5, 0.5]), + }, + { + interval: {startTimeMs: 10, endTimeMs: 20}, + intervalWeight: 10, + distribution: new Float64Array([0.9, 0.1]), + }, + ]; + const nodeOrder = [na("foo"), na("bar")]; + const actual = distributionToCred(ds, nodeOrder, NodeAddress.empty); + const expected = [ + { + interval: {startTimeMs: 0, endTimeMs: 10}, + cred: new Float64Array([1, 1]), + }, + { + interval: {startTimeMs: 10, endTimeMs: 20}, + cred: new Float64Array([9, 1]), + }, + ]; + expect(expected).toEqual(actual); + }); + it("works in a case where some nodes are scoring", () => { + const ds = [ + { + interval: {startTimeMs: 0, endTimeMs: 10}, + intervalWeight: 2, + distribution: new Float64Array([0.5, 0.5]), + }, + { + interval: {startTimeMs: 10, endTimeMs: 20}, + intervalWeight: 10, + distribution: new Float64Array([0.9, 0.1]), + }, + ]; + const nodeOrder = [na("foo"), na("bar")]; + const actual = distributionToCred(ds, nodeOrder, na("bar")); + const expected = [ + { + interval: {startTimeMs: 0, endTimeMs: 10}, + cred: new Float64Array([2, 2]), + }, + { + interval: {startTimeMs: 10, endTimeMs: 20}, + cred: new Float64Array([90, 10]), + }, + ]; + expect(expected).toEqual(actual); + }); + it("errors when no nodes are scoring", () => { + const ds = [ + { + interval: {startTimeMs: 0, endTimeMs: 10}, + intervalWeight: 2, + distribution: new Float64Array([0.5, 0.5]), + }, + ]; + const nodeOrder = [na("foo"), na("bar")]; + const fail = () => distributionToCred(ds, nodeOrder, na("zod")); + expect(fail).toThrowError("no nodes matched scoringNodePrefix"); + }); + it("returns empty array if no intervals are present", () => { + expect(distributionToCred([], [], NodeAddress.empty)).toEqual([]); + }); + }); +});