Improve distributionToCred (#1654)

This commit makes several small improvements to the distributionToCred
module:

- We rename the output `FullTimelineCred` data structure to
`TimelineCredScores`, which is more descriptive
- We re-organize that data structure so that rather than being an array
of `{interval, cred}` objects, it has an `intervals` property and a
`intervalCredScores` property, both of which are arrays. This will make
downstream usage cleaner.
- An unused variable is removed.
- We document invariants about the TimelineCredScores data type.
- We mark the TimelineCredScores data type opaque, so that clients
recieving a TimelineCredScores can trust that the invariants are
maintained.

Test plan:
- The rename is robustly tested by `yarn flow`.
- That the refactor lands without changing existing semantics is
robustly tested by `yarn test --full`, since we snapshot a full cred
load; thus we know that cred scores haven't changed. (Also, we have
existing unit tests).
- The newly documented invariants aren't robustly tested by the test
code, but it's easy to see that they hold by reading the algorithm.
This commit is contained in:
Dandelion Mané 2020-02-09 11:58:19 -08:00 committed by GitHub
parent db94bb50fb
commit 8dac968a69
3 changed files with 83 additions and 68 deletions

View File

@ -199,17 +199,20 @@ export class TimelineCred {
fullParams.intervalDecay, fullParams.intervalDecay,
fullParams.alpha fullParams.alpha
); );
const cred = distributionToCred(distribution, nodeOrder, scorePrefixes); const credScores = distributionToCred(
distribution,
nodeOrder,
scorePrefixes
);
const addressToCred = new Map(); const addressToCred = new Map();
for (let i = 0; i < nodeOrder.length; i++) { for (let i = 0; i < nodeOrder.length; i++) {
const addr = nodeOrder[i]; const addr = nodeOrder[i];
const addrCred = cred.map(({cred}) => cred[i]); const addrCred = credScores.intervalCredScores.map((cred) => cred[i]);
addressToCred.set(addr, addrCred); addressToCred.set(addr, addrCred);
} }
const intervals = cred.map((x) => x.interval);
return new TimelineCred( return new TimelineCred(
weightedGraph, weightedGraph,
intervals, credScores.intervals,
addressToCred, addressToCred,
fullParams, fullParams,
plugins plugins

View File

@ -9,26 +9,41 @@ import {type Interval} from "../interval";
import {type TimelineDistributions} from "./timelinePagerank"; import {type TimelineDistributions} from "./timelinePagerank";
import {NodeAddress, type NodeAddressT} from "../../core/graph"; import {NodeAddress, type NodeAddressT} from "../../core/graph";
/** export opaque type NodeOrderedCredScores: Float64Array = Float64Array;
* Represents the full timeline cred for a graph.
*/
export type FullTimelineCred = $ReadOnlyArray<{|
// The interval for this slice.
+interval: Interval,
// The cred for each node.
// (Uses the graph's canonical node ordering.)
+cred: Float64Array,
|}>;
/** /**
* Convert a TimelineDistribution into TimelineCred. * Represents cred scores over time.
*
* It contains an array of intervals, which give timing information, and an
* array of CredTimeSlices, which are Float64Arrays. Each CredTimeSlice
* contains cred scores for an interval. The cred scores are included in
* node-address-sorted order, and as such the CredScores can only be
* interpreted in the context of an associated Graph.
*
* As invariants, it is guaranteed that:
* - intervals and intervalCredScores will always have the same length
* - all of the intervalCredScores will have a consistent implicit node ordering
*
* The type is marked opaque so that no-one else can construct instances that
* don't conform to these invariants.
*/
export opaque type TimelineCredScores: {|
+intervals: $ReadOnlyArray<Interval>,
+intervalCredScores: $ReadOnlyArray<NodeOrderedCredScores>,
|} = {|
+intervals: $ReadOnlyArray<Interval>,
+intervalCredScores: $ReadOnlyArray<NodeOrderedCredScores>,
|};
/**
* Convert a TimelineDistribution into CredScores.
* *
* The difference between the distribution and cred is that cred has been * The difference between the distribution and cred is that cred has been
* re-normalized to present human-agreeable scores, rather than a probability * re-normalized to present human-agreeable scores, rather than a probability
* distribution. * distribution.
* *
* This implementation normalizes the scores so that in each interval, the * This implementation normalizes the scores so that in each interval, the
* total score of every node matching scoringNodePrefix is equal to the * total score of every node matching a scoringNodePrefix is equal to the
* interval's weight. * interval's weight.
* *
* Edge cases: * Edge cases:
@ -42,22 +57,19 @@ export function distributionToCred(
ds: TimelineDistributions, ds: TimelineDistributions,
nodeOrder: $ReadOnlyArray<NodeAddressT>, nodeOrder: $ReadOnlyArray<NodeAddressT>,
scoringNodePrefixes: $ReadOnlyArray<NodeAddressT> scoringNodePrefixes: $ReadOnlyArray<NodeAddressT>
): FullTimelineCred { ): TimelineCredScores {
if (ds.length === 0) { if (ds.length === 0) {
return []; return {intervals: [], intervalCredScores: []};
} }
const intervals = ds.map((x) => x.interval);
const scoringNodeIndices = []; const scoringNodeIndices = [];
const cred = new Array(nodeOrder.length);
for (let i = 0; i < nodeOrder.length; i++) { for (let i = 0; i < nodeOrder.length; i++) {
const addr = nodeOrder[i]; const addr = nodeOrder[i];
if (scoringNodePrefixes.some((x) => NodeAddress.hasPrefix(addr, x))) { if (scoringNodePrefixes.some((x) => NodeAddress.hasPrefix(addr, x))) {
scoringNodeIndices.push(i); scoringNodeIndices.push(i);
} }
cred[i] = new Array(intervals.length);
} }
const intervals = ds.map((x) => x.interval);
return ds.map(({interval, distribution, intervalWeight}) => { const intervalCredScores = ds.map(({distribution, intervalWeight}) => {
const intervalTotalScore = sum( const intervalTotalScore = sum(
scoringNodeIndices.map((x) => distribution[x]) scoringNodeIndices.map((x) => distribution[x])
); );
@ -65,6 +77,7 @@ export function distributionToCred(
const intervalNormalizer = const intervalNormalizer =
intervalTotalScore === 0 ? 0 : intervalWeight / intervalTotalScore; intervalTotalScore === 0 ? 0 : intervalWeight / intervalTotalScore;
const cred = distribution.map((x) => x * intervalNormalizer); const cred = distribution.map((x) => x * intervalNormalizer);
return {interval, cred}; return cred;
}); });
return {intervalCredScores, intervals};
} }

View File

@ -21,16 +21,16 @@ describe("src/core/algorithm/distributionToCred", () => {
]; ];
const nodeOrder = [na("foo"), na("bar")]; const nodeOrder = [na("foo"), na("bar")];
const actual = distributionToCred(ds, nodeOrder, [NodeAddress.empty]); const actual = distributionToCred(ds, nodeOrder, [NodeAddress.empty]);
const expected = [ const expected = {
{ intervals: [
interval: {startTimeMs: 0, endTimeMs: 10}, {startTimeMs: 0, endTimeMs: 10},
cred: new Float64Array([1, 1]), {startTimeMs: 10, endTimeMs: 20},
}, ],
{ intervalCredScores: [
interval: {startTimeMs: 10, endTimeMs: 20}, new Float64Array([1, 1]),
cred: new Float64Array([9, 1]), new Float64Array([9, 1]),
}, ],
]; };
expect(expected).toEqual(actual); expect(expected).toEqual(actual);
}); });
it("correctly handles multiple scoring prefixes", () => { it("correctly handles multiple scoring prefixes", () => {
@ -48,16 +48,16 @@ describe("src/core/algorithm/distributionToCred", () => {
]; ];
const nodeOrder = [na("foo"), na("bar")]; const nodeOrder = [na("foo"), na("bar")];
const actual = distributionToCred(ds, nodeOrder, [na("foo"), na("bar")]); const actual = distributionToCred(ds, nodeOrder, [na("foo"), na("bar")]);
const expected = [ const expected = {
{ intervals: [
interval: {startTimeMs: 0, endTimeMs: 10}, {startTimeMs: 0, endTimeMs: 10},
cred: new Float64Array([1, 1]), {startTimeMs: 10, endTimeMs: 20},
}, ],
{ intervalCredScores: [
interval: {startTimeMs: 10, endTimeMs: 20}, new Float64Array([1, 1]),
cred: new Float64Array([9, 1]), new Float64Array([9, 1]),
}, ],
]; };
expect(expected).toEqual(actual); expect(expected).toEqual(actual);
}); });
it("works in a case where some nodes are scoring", () => { it("works in a case where some nodes are scoring", () => {
@ -75,16 +75,16 @@ describe("src/core/algorithm/distributionToCred", () => {
]; ];
const nodeOrder = [na("foo"), na("bar")]; const nodeOrder = [na("foo"), na("bar")];
const actual = distributionToCred(ds, nodeOrder, [na("bar")]); const actual = distributionToCred(ds, nodeOrder, [na("bar")]);
const expected = [ const expected = {
{ intervals: [
interval: {startTimeMs: 0, endTimeMs: 10}, {startTimeMs: 0, endTimeMs: 10},
cred: new Float64Array([2, 2]), {startTimeMs: 10, endTimeMs: 20},
}, ],
{ intervalCredScores: [
interval: {startTimeMs: 10, endTimeMs: 20}, new Float64Array([2, 2]),
cred: new Float64Array([90, 10]), new Float64Array([90, 10]),
}, ],
]; };
expect(expected).toEqual(actual); expect(expected).toEqual(actual);
}); });
it("handles the case where no nodes are scoring", () => { it("handles the case where no nodes are scoring", () => {
@ -97,12 +97,10 @@ describe("src/core/algorithm/distributionToCred", () => {
]; ];
const nodeOrder = [na("foo"), na("bar")]; const nodeOrder = [na("foo"), na("bar")];
const actual = distributionToCred(ds, nodeOrder, []); const actual = distributionToCred(ds, nodeOrder, []);
const expected = [ const expected = {
{ intervals: [{startTimeMs: 0, endTimeMs: 10}],
interval: {startTimeMs: 0, endTimeMs: 10}, intervalCredScores: [new Float64Array([0, 0])],
cred: new Float64Array([0, 0]), };
},
];
expect(actual).toEqual(expected); expect(actual).toEqual(expected);
}); });
@ -116,17 +114,18 @@ describe("src/core/algorithm/distributionToCred", () => {
]; ];
const nodeOrder = [na("foo"), na("bar")]; const nodeOrder = [na("foo"), na("bar")];
const actual = distributionToCred(ds, nodeOrder, [na("bar")]); const actual = distributionToCred(ds, nodeOrder, [na("bar")]);
const expected = [ const expected = {
{ intervals: [{startTimeMs: 0, endTimeMs: 10}],
interval: {startTimeMs: 0, endTimeMs: 10}, intervalCredScores: [new Float64Array([0, 0])],
cred: new Float64Array([0, 0]), };
},
];
expect(actual).toEqual(expected); expect(actual).toEqual(expected);
}); });
it("returns empty array if no intervals are present", () => { it("returns empty CredScores if no intervals are present", () => {
expect(distributionToCred([], [], [])).toEqual([]); expect(distributionToCred([], [], [])).toEqual({
intervals: [],
intervalCredScores: [],
});
}); });
}); });
}); });