Improve distributionToCred (#1654)
This commit makes several small improvements to the distributionToCred module: - We rename the output `FullTimelineCred` data structure to `TimelineCredScores`, which is more descriptive - We re-organize that data structure so that rather than being an array of `{interval, cred}` objects, it has an `intervals` property and a `intervalCredScores` property, both of which are arrays. This will make downstream usage cleaner. - An unused variable is removed. - We document invariants about the TimelineCredScores data type. - We mark the TimelineCredScores data type opaque, so that clients recieving a TimelineCredScores can trust that the invariants are maintained. Test plan: - The rename is robustly tested by `yarn flow`. - That the refactor lands without changing existing semantics is robustly tested by `yarn test --full`, since we snapshot a full cred load; thus we know that cred scores haven't changed. (Also, we have existing unit tests). - The newly documented invariants aren't robustly tested by the test code, but it's easy to see that they hold by reading the algorithm.
This commit is contained in:
parent
db94bb50fb
commit
8dac968a69
|
@ -199,17 +199,20 @@ export class TimelineCred {
|
||||||
fullParams.intervalDecay,
|
fullParams.intervalDecay,
|
||||||
fullParams.alpha
|
fullParams.alpha
|
||||||
);
|
);
|
||||||
const cred = distributionToCred(distribution, nodeOrder, scorePrefixes);
|
const credScores = distributionToCred(
|
||||||
|
distribution,
|
||||||
|
nodeOrder,
|
||||||
|
scorePrefixes
|
||||||
|
);
|
||||||
const addressToCred = new Map();
|
const addressToCred = new Map();
|
||||||
for (let i = 0; i < nodeOrder.length; i++) {
|
for (let i = 0; i < nodeOrder.length; i++) {
|
||||||
const addr = nodeOrder[i];
|
const addr = nodeOrder[i];
|
||||||
const addrCred = cred.map(({cred}) => cred[i]);
|
const addrCred = credScores.intervalCredScores.map((cred) => cred[i]);
|
||||||
addressToCred.set(addr, addrCred);
|
addressToCred.set(addr, addrCred);
|
||||||
}
|
}
|
||||||
const intervals = cred.map((x) => x.interval);
|
|
||||||
return new TimelineCred(
|
return new TimelineCred(
|
||||||
weightedGraph,
|
weightedGraph,
|
||||||
intervals,
|
credScores.intervals,
|
||||||
addressToCred,
|
addressToCred,
|
||||||
fullParams,
|
fullParams,
|
||||||
plugins
|
plugins
|
||||||
|
|
|
@ -9,26 +9,41 @@ import {type Interval} from "../interval";
|
||||||
import {type TimelineDistributions} from "./timelinePagerank";
|
import {type TimelineDistributions} from "./timelinePagerank";
|
||||||
import {NodeAddress, type NodeAddressT} from "../../core/graph";
|
import {NodeAddress, type NodeAddressT} from "../../core/graph";
|
||||||
|
|
||||||
/**
|
export opaque type NodeOrderedCredScores: Float64Array = Float64Array;
|
||||||
* Represents the full timeline cred for a graph.
|
|
||||||
*/
|
|
||||||
export type FullTimelineCred = $ReadOnlyArray<{|
|
|
||||||
// The interval for this slice.
|
|
||||||
+interval: Interval,
|
|
||||||
// The cred for each node.
|
|
||||||
// (Uses the graph's canonical node ordering.)
|
|
||||||
+cred: Float64Array,
|
|
||||||
|}>;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Convert a TimelineDistribution into TimelineCred.
|
* Represents cred scores over time.
|
||||||
|
*
|
||||||
|
* It contains an array of intervals, which give timing information, and an
|
||||||
|
* array of CredTimeSlices, which are Float64Arrays. Each CredTimeSlice
|
||||||
|
* contains cred scores for an interval. The cred scores are included in
|
||||||
|
* node-address-sorted order, and as such the CredScores can only be
|
||||||
|
* interpreted in the context of an associated Graph.
|
||||||
|
*
|
||||||
|
* As invariants, it is guaranteed that:
|
||||||
|
* - intervals and intervalCredScores will always have the same length
|
||||||
|
* - all of the intervalCredScores will have a consistent implicit node ordering
|
||||||
|
*
|
||||||
|
* The type is marked opaque so that no-one else can construct instances that
|
||||||
|
* don't conform to these invariants.
|
||||||
|
*/
|
||||||
|
export opaque type TimelineCredScores: {|
|
||||||
|
+intervals: $ReadOnlyArray<Interval>,
|
||||||
|
+intervalCredScores: $ReadOnlyArray<NodeOrderedCredScores>,
|
||||||
|
|} = {|
|
||||||
|
+intervals: $ReadOnlyArray<Interval>,
|
||||||
|
+intervalCredScores: $ReadOnlyArray<NodeOrderedCredScores>,
|
||||||
|
|};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert a TimelineDistribution into CredScores.
|
||||||
*
|
*
|
||||||
* The difference between the distribution and cred is that cred has been
|
* The difference between the distribution and cred is that cred has been
|
||||||
* re-normalized to present human-agreeable scores, rather than a probability
|
* re-normalized to present human-agreeable scores, rather than a probability
|
||||||
* distribution.
|
* distribution.
|
||||||
*
|
*
|
||||||
* This implementation normalizes the scores so that in each interval, the
|
* This implementation normalizes the scores so that in each interval, the
|
||||||
* total score of every node matching scoringNodePrefix is equal to the
|
* total score of every node matching a scoringNodePrefix is equal to the
|
||||||
* interval's weight.
|
* interval's weight.
|
||||||
*
|
*
|
||||||
* Edge cases:
|
* Edge cases:
|
||||||
|
@ -42,22 +57,19 @@ export function distributionToCred(
|
||||||
ds: TimelineDistributions,
|
ds: TimelineDistributions,
|
||||||
nodeOrder: $ReadOnlyArray<NodeAddressT>,
|
nodeOrder: $ReadOnlyArray<NodeAddressT>,
|
||||||
scoringNodePrefixes: $ReadOnlyArray<NodeAddressT>
|
scoringNodePrefixes: $ReadOnlyArray<NodeAddressT>
|
||||||
): FullTimelineCred {
|
): TimelineCredScores {
|
||||||
if (ds.length === 0) {
|
if (ds.length === 0) {
|
||||||
return [];
|
return {intervals: [], intervalCredScores: []};
|
||||||
}
|
}
|
||||||
const intervals = ds.map((x) => x.interval);
|
|
||||||
const scoringNodeIndices = [];
|
const scoringNodeIndices = [];
|
||||||
const cred = new Array(nodeOrder.length);
|
|
||||||
for (let i = 0; i < nodeOrder.length; i++) {
|
for (let i = 0; i < nodeOrder.length; i++) {
|
||||||
const addr = nodeOrder[i];
|
const addr = nodeOrder[i];
|
||||||
if (scoringNodePrefixes.some((x) => NodeAddress.hasPrefix(addr, x))) {
|
if (scoringNodePrefixes.some((x) => NodeAddress.hasPrefix(addr, x))) {
|
||||||
scoringNodeIndices.push(i);
|
scoringNodeIndices.push(i);
|
||||||
}
|
}
|
||||||
cred[i] = new Array(intervals.length);
|
|
||||||
}
|
}
|
||||||
|
const intervals = ds.map((x) => x.interval);
|
||||||
return ds.map(({interval, distribution, intervalWeight}) => {
|
const intervalCredScores = ds.map(({distribution, intervalWeight}) => {
|
||||||
const intervalTotalScore = sum(
|
const intervalTotalScore = sum(
|
||||||
scoringNodeIndices.map((x) => distribution[x])
|
scoringNodeIndices.map((x) => distribution[x])
|
||||||
);
|
);
|
||||||
|
@ -65,6 +77,7 @@ export function distributionToCred(
|
||||||
const intervalNormalizer =
|
const intervalNormalizer =
|
||||||
intervalTotalScore === 0 ? 0 : intervalWeight / intervalTotalScore;
|
intervalTotalScore === 0 ? 0 : intervalWeight / intervalTotalScore;
|
||||||
const cred = distribution.map((x) => x * intervalNormalizer);
|
const cred = distribution.map((x) => x * intervalNormalizer);
|
||||||
return {interval, cred};
|
return cred;
|
||||||
});
|
});
|
||||||
|
return {intervalCredScores, intervals};
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,16 +21,16 @@ describe("src/core/algorithm/distributionToCred", () => {
|
||||||
];
|
];
|
||||||
const nodeOrder = [na("foo"), na("bar")];
|
const nodeOrder = [na("foo"), na("bar")];
|
||||||
const actual = distributionToCred(ds, nodeOrder, [NodeAddress.empty]);
|
const actual = distributionToCred(ds, nodeOrder, [NodeAddress.empty]);
|
||||||
const expected = [
|
const expected = {
|
||||||
{
|
intervals: [
|
||||||
interval: {startTimeMs: 0, endTimeMs: 10},
|
{startTimeMs: 0, endTimeMs: 10},
|
||||||
cred: new Float64Array([1, 1]),
|
{startTimeMs: 10, endTimeMs: 20},
|
||||||
},
|
],
|
||||||
{
|
intervalCredScores: [
|
||||||
interval: {startTimeMs: 10, endTimeMs: 20},
|
new Float64Array([1, 1]),
|
||||||
cred: new Float64Array([9, 1]),
|
new Float64Array([9, 1]),
|
||||||
},
|
],
|
||||||
];
|
};
|
||||||
expect(expected).toEqual(actual);
|
expect(expected).toEqual(actual);
|
||||||
});
|
});
|
||||||
it("correctly handles multiple scoring prefixes", () => {
|
it("correctly handles multiple scoring prefixes", () => {
|
||||||
|
@ -48,16 +48,16 @@ describe("src/core/algorithm/distributionToCred", () => {
|
||||||
];
|
];
|
||||||
const nodeOrder = [na("foo"), na("bar")];
|
const nodeOrder = [na("foo"), na("bar")];
|
||||||
const actual = distributionToCred(ds, nodeOrder, [na("foo"), na("bar")]);
|
const actual = distributionToCred(ds, nodeOrder, [na("foo"), na("bar")]);
|
||||||
const expected = [
|
const expected = {
|
||||||
{
|
intervals: [
|
||||||
interval: {startTimeMs: 0, endTimeMs: 10},
|
{startTimeMs: 0, endTimeMs: 10},
|
||||||
cred: new Float64Array([1, 1]),
|
{startTimeMs: 10, endTimeMs: 20},
|
||||||
},
|
],
|
||||||
{
|
intervalCredScores: [
|
||||||
interval: {startTimeMs: 10, endTimeMs: 20},
|
new Float64Array([1, 1]),
|
||||||
cred: new Float64Array([9, 1]),
|
new Float64Array([9, 1]),
|
||||||
},
|
],
|
||||||
];
|
};
|
||||||
expect(expected).toEqual(actual);
|
expect(expected).toEqual(actual);
|
||||||
});
|
});
|
||||||
it("works in a case where some nodes are scoring", () => {
|
it("works in a case where some nodes are scoring", () => {
|
||||||
|
@ -75,16 +75,16 @@ describe("src/core/algorithm/distributionToCred", () => {
|
||||||
];
|
];
|
||||||
const nodeOrder = [na("foo"), na("bar")];
|
const nodeOrder = [na("foo"), na("bar")];
|
||||||
const actual = distributionToCred(ds, nodeOrder, [na("bar")]);
|
const actual = distributionToCred(ds, nodeOrder, [na("bar")]);
|
||||||
const expected = [
|
const expected = {
|
||||||
{
|
intervals: [
|
||||||
interval: {startTimeMs: 0, endTimeMs: 10},
|
{startTimeMs: 0, endTimeMs: 10},
|
||||||
cred: new Float64Array([2, 2]),
|
{startTimeMs: 10, endTimeMs: 20},
|
||||||
},
|
],
|
||||||
{
|
intervalCredScores: [
|
||||||
interval: {startTimeMs: 10, endTimeMs: 20},
|
new Float64Array([2, 2]),
|
||||||
cred: new Float64Array([90, 10]),
|
new Float64Array([90, 10]),
|
||||||
},
|
],
|
||||||
];
|
};
|
||||||
expect(expected).toEqual(actual);
|
expect(expected).toEqual(actual);
|
||||||
});
|
});
|
||||||
it("handles the case where no nodes are scoring", () => {
|
it("handles the case where no nodes are scoring", () => {
|
||||||
|
@ -97,12 +97,10 @@ describe("src/core/algorithm/distributionToCred", () => {
|
||||||
];
|
];
|
||||||
const nodeOrder = [na("foo"), na("bar")];
|
const nodeOrder = [na("foo"), na("bar")];
|
||||||
const actual = distributionToCred(ds, nodeOrder, []);
|
const actual = distributionToCred(ds, nodeOrder, []);
|
||||||
const expected = [
|
const expected = {
|
||||||
{
|
intervals: [{startTimeMs: 0, endTimeMs: 10}],
|
||||||
interval: {startTimeMs: 0, endTimeMs: 10},
|
intervalCredScores: [new Float64Array([0, 0])],
|
||||||
cred: new Float64Array([0, 0]),
|
};
|
||||||
},
|
|
||||||
];
|
|
||||||
expect(actual).toEqual(expected);
|
expect(actual).toEqual(expected);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -116,17 +114,18 @@ describe("src/core/algorithm/distributionToCred", () => {
|
||||||
];
|
];
|
||||||
const nodeOrder = [na("foo"), na("bar")];
|
const nodeOrder = [na("foo"), na("bar")];
|
||||||
const actual = distributionToCred(ds, nodeOrder, [na("bar")]);
|
const actual = distributionToCred(ds, nodeOrder, [na("bar")]);
|
||||||
const expected = [
|
const expected = {
|
||||||
{
|
intervals: [{startTimeMs: 0, endTimeMs: 10}],
|
||||||
interval: {startTimeMs: 0, endTimeMs: 10},
|
intervalCredScores: [new Float64Array([0, 0])],
|
||||||
cred: new Float64Array([0, 0]),
|
};
|
||||||
},
|
|
||||||
];
|
|
||||||
expect(actual).toEqual(expected);
|
expect(actual).toEqual(expected);
|
||||||
});
|
});
|
||||||
|
|
||||||
it("returns empty array if no intervals are present", () => {
|
it("returns empty CredScores if no intervals are present", () => {
|
||||||
expect(distributionToCred([], [], [])).toEqual([]);
|
expect(distributionToCred([], [], [])).toEqual({
|
||||||
|
intervals: [],
|
||||||
|
intervalCredScores: [],
|
||||||
|
});
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
Loading…
Reference in New Issue