Improve distributionToCred (#1654)

This commit makes several small improvements to the distributionToCred
module:

- We rename the output `FullTimelineCred` data structure to
`TimelineCredScores`, which is more descriptive
- We re-organize that data structure so that rather than being an array
of `{interval, cred}` objects, it has an `intervals` property and a
`intervalCredScores` property, both of which are arrays. This will make
downstream usage cleaner.
- An unused variable is removed.
- We document invariants about the TimelineCredScores data type.
- We mark the TimelineCredScores data type opaque, so that clients
recieving a TimelineCredScores can trust that the invariants are
maintained.

Test plan:
- The rename is robustly tested by `yarn flow`.
- That the refactor lands without changing existing semantics is
robustly tested by `yarn test --full`, since we snapshot a full cred
load; thus we know that cred scores haven't changed. (Also, we have
existing unit tests).
- The newly documented invariants aren't robustly tested by the test
code, but it's easy to see that they hold by reading the algorithm.
This commit is contained in:
Dandelion Mané 2020-02-09 11:58:19 -08:00 committed by GitHub
parent db94bb50fb
commit 8dac968a69
3 changed files with 83 additions and 68 deletions

View File

@ -199,17 +199,20 @@ export class TimelineCred {
fullParams.intervalDecay,
fullParams.alpha
);
const cred = distributionToCred(distribution, nodeOrder, scorePrefixes);
const credScores = distributionToCred(
distribution,
nodeOrder,
scorePrefixes
);
const addressToCred = new Map();
for (let i = 0; i < nodeOrder.length; i++) {
const addr = nodeOrder[i];
const addrCred = cred.map(({cred}) => cred[i]);
const addrCred = credScores.intervalCredScores.map((cred) => cred[i]);
addressToCred.set(addr, addrCred);
}
const intervals = cred.map((x) => x.interval);
return new TimelineCred(
weightedGraph,
intervals,
credScores.intervals,
addressToCred,
fullParams,
plugins

View File

@ -9,26 +9,41 @@ import {type Interval} from "../interval";
import {type TimelineDistributions} from "./timelinePagerank";
import {NodeAddress, type NodeAddressT} from "../../core/graph";
/**
* Represents the full timeline cred for a graph.
*/
export type FullTimelineCred = $ReadOnlyArray<{|
// The interval for this slice.
+interval: Interval,
// The cred for each node.
// (Uses the graph's canonical node ordering.)
+cred: Float64Array,
|}>;
export opaque type NodeOrderedCredScores: Float64Array = Float64Array;
/**
* Convert a TimelineDistribution into TimelineCred.
* Represents cred scores over time.
*
* It contains an array of intervals, which give timing information, and an
* array of CredTimeSlices, which are Float64Arrays. Each CredTimeSlice
* contains cred scores for an interval. The cred scores are included in
* node-address-sorted order, and as such the CredScores can only be
* interpreted in the context of an associated Graph.
*
* As invariants, it is guaranteed that:
* - intervals and intervalCredScores will always have the same length
* - all of the intervalCredScores will have a consistent implicit node ordering
*
* The type is marked opaque so that no-one else can construct instances that
* don't conform to these invariants.
*/
export opaque type TimelineCredScores: {|
+intervals: $ReadOnlyArray<Interval>,
+intervalCredScores: $ReadOnlyArray<NodeOrderedCredScores>,
|} = {|
+intervals: $ReadOnlyArray<Interval>,
+intervalCredScores: $ReadOnlyArray<NodeOrderedCredScores>,
|};
/**
* Convert a TimelineDistribution into CredScores.
*
* The difference between the distribution and cred is that cred has been
* re-normalized to present human-agreeable scores, rather than a probability
* distribution.
*
* This implementation normalizes the scores so that in each interval, the
* total score of every node matching scoringNodePrefix is equal to the
* total score of every node matching a scoringNodePrefix is equal to the
* interval's weight.
*
* Edge cases:
@ -42,22 +57,19 @@ export function distributionToCred(
ds: TimelineDistributions,
nodeOrder: $ReadOnlyArray<NodeAddressT>,
scoringNodePrefixes: $ReadOnlyArray<NodeAddressT>
): FullTimelineCred {
): TimelineCredScores {
if (ds.length === 0) {
return [];
return {intervals: [], intervalCredScores: []};
}
const intervals = ds.map((x) => x.interval);
const scoringNodeIndices = [];
const cred = new Array(nodeOrder.length);
for (let i = 0; i < nodeOrder.length; i++) {
const addr = nodeOrder[i];
if (scoringNodePrefixes.some((x) => NodeAddress.hasPrefix(addr, x))) {
scoringNodeIndices.push(i);
}
cred[i] = new Array(intervals.length);
}
return ds.map(({interval, distribution, intervalWeight}) => {
const intervals = ds.map((x) => x.interval);
const intervalCredScores = ds.map(({distribution, intervalWeight}) => {
const intervalTotalScore = sum(
scoringNodeIndices.map((x) => distribution[x])
);
@ -65,6 +77,7 @@ export function distributionToCred(
const intervalNormalizer =
intervalTotalScore === 0 ? 0 : intervalWeight / intervalTotalScore;
const cred = distribution.map((x) => x * intervalNormalizer);
return {interval, cred};
return cred;
});
return {intervalCredScores, intervals};
}

View File

@ -21,16 +21,16 @@ describe("src/core/algorithm/distributionToCred", () => {
];
const nodeOrder = [na("foo"), na("bar")];
const actual = distributionToCred(ds, nodeOrder, [NodeAddress.empty]);
const expected = [
{
interval: {startTimeMs: 0, endTimeMs: 10},
cred: new Float64Array([1, 1]),
},
{
interval: {startTimeMs: 10, endTimeMs: 20},
cred: new Float64Array([9, 1]),
},
];
const expected = {
intervals: [
{startTimeMs: 0, endTimeMs: 10},
{startTimeMs: 10, endTimeMs: 20},
],
intervalCredScores: [
new Float64Array([1, 1]),
new Float64Array([9, 1]),
],
};
expect(expected).toEqual(actual);
});
it("correctly handles multiple scoring prefixes", () => {
@ -48,16 +48,16 @@ describe("src/core/algorithm/distributionToCred", () => {
];
const nodeOrder = [na("foo"), na("bar")];
const actual = distributionToCred(ds, nodeOrder, [na("foo"), na("bar")]);
const expected = [
{
interval: {startTimeMs: 0, endTimeMs: 10},
cred: new Float64Array([1, 1]),
},
{
interval: {startTimeMs: 10, endTimeMs: 20},
cred: new Float64Array([9, 1]),
},
];
const expected = {
intervals: [
{startTimeMs: 0, endTimeMs: 10},
{startTimeMs: 10, endTimeMs: 20},
],
intervalCredScores: [
new Float64Array([1, 1]),
new Float64Array([9, 1]),
],
};
expect(expected).toEqual(actual);
});
it("works in a case where some nodes are scoring", () => {
@ -75,16 +75,16 @@ describe("src/core/algorithm/distributionToCred", () => {
];
const nodeOrder = [na("foo"), na("bar")];
const actual = distributionToCred(ds, nodeOrder, [na("bar")]);
const expected = [
{
interval: {startTimeMs: 0, endTimeMs: 10},
cred: new Float64Array([2, 2]),
},
{
interval: {startTimeMs: 10, endTimeMs: 20},
cred: new Float64Array([90, 10]),
},
];
const expected = {
intervals: [
{startTimeMs: 0, endTimeMs: 10},
{startTimeMs: 10, endTimeMs: 20},
],
intervalCredScores: [
new Float64Array([2, 2]),
new Float64Array([90, 10]),
],
};
expect(expected).toEqual(actual);
});
it("handles the case where no nodes are scoring", () => {
@ -97,12 +97,10 @@ describe("src/core/algorithm/distributionToCred", () => {
];
const nodeOrder = [na("foo"), na("bar")];
const actual = distributionToCred(ds, nodeOrder, []);
const expected = [
{
interval: {startTimeMs: 0, endTimeMs: 10},
cred: new Float64Array([0, 0]),
},
];
const expected = {
intervals: [{startTimeMs: 0, endTimeMs: 10}],
intervalCredScores: [new Float64Array([0, 0])],
};
expect(actual).toEqual(expected);
});
@ -116,17 +114,18 @@ describe("src/core/algorithm/distributionToCred", () => {
];
const nodeOrder = [na("foo"), na("bar")];
const actual = distributionToCred(ds, nodeOrder, [na("bar")]);
const expected = [
{
interval: {startTimeMs: 0, endTimeMs: 10},
cred: new Float64Array([0, 0]),
},
];
const expected = {
intervals: [{startTimeMs: 0, endTimeMs: 10}],
intervalCredScores: [new Float64Array([0, 0])],
};
expect(actual).toEqual(expected);
});
it("returns empty array if no intervals are present", () => {
expect(distributionToCred([], [], [])).toEqual([]);
it("returns empty CredScores if no intervals are present", () => {
expect(distributionToCred([], [], [])).toEqual({
intervals: [],
intervalCredScores: [],
});
});
});
});