Improve distributionToCred (#1654)
This commit makes several small improvements to the distributionToCred module: - We rename the output `FullTimelineCred` data structure to `TimelineCredScores`, which is more descriptive - We re-organize that data structure so that rather than being an array of `{interval, cred}` objects, it has an `intervals` property and a `intervalCredScores` property, both of which are arrays. This will make downstream usage cleaner. - An unused variable is removed. - We document invariants about the TimelineCredScores data type. - We mark the TimelineCredScores data type opaque, so that clients recieving a TimelineCredScores can trust that the invariants are maintained. Test plan: - The rename is robustly tested by `yarn flow`. - That the refactor lands without changing existing semantics is robustly tested by `yarn test --full`, since we snapshot a full cred load; thus we know that cred scores haven't changed. (Also, we have existing unit tests). - The newly documented invariants aren't robustly tested by the test code, but it's easy to see that they hold by reading the algorithm.
This commit is contained in:
parent
db94bb50fb
commit
8dac968a69
|
@ -199,17 +199,20 @@ export class TimelineCred {
|
|||
fullParams.intervalDecay,
|
||||
fullParams.alpha
|
||||
);
|
||||
const cred = distributionToCred(distribution, nodeOrder, scorePrefixes);
|
||||
const credScores = distributionToCred(
|
||||
distribution,
|
||||
nodeOrder,
|
||||
scorePrefixes
|
||||
);
|
||||
const addressToCred = new Map();
|
||||
for (let i = 0; i < nodeOrder.length; i++) {
|
||||
const addr = nodeOrder[i];
|
||||
const addrCred = cred.map(({cred}) => cred[i]);
|
||||
const addrCred = credScores.intervalCredScores.map((cred) => cred[i]);
|
||||
addressToCred.set(addr, addrCred);
|
||||
}
|
||||
const intervals = cred.map((x) => x.interval);
|
||||
return new TimelineCred(
|
||||
weightedGraph,
|
||||
intervals,
|
||||
credScores.intervals,
|
||||
addressToCred,
|
||||
fullParams,
|
||||
plugins
|
||||
|
|
|
@ -9,26 +9,41 @@ import {type Interval} from "../interval";
|
|||
import {type TimelineDistributions} from "./timelinePagerank";
|
||||
import {NodeAddress, type NodeAddressT} from "../../core/graph";
|
||||
|
||||
/**
|
||||
* Represents the full timeline cred for a graph.
|
||||
*/
|
||||
export type FullTimelineCred = $ReadOnlyArray<{|
|
||||
// The interval for this slice.
|
||||
+interval: Interval,
|
||||
// The cred for each node.
|
||||
// (Uses the graph's canonical node ordering.)
|
||||
+cred: Float64Array,
|
||||
|}>;
|
||||
export opaque type NodeOrderedCredScores: Float64Array = Float64Array;
|
||||
|
||||
/**
|
||||
* Convert a TimelineDistribution into TimelineCred.
|
||||
* Represents cred scores over time.
|
||||
*
|
||||
* It contains an array of intervals, which give timing information, and an
|
||||
* array of CredTimeSlices, which are Float64Arrays. Each CredTimeSlice
|
||||
* contains cred scores for an interval. The cred scores are included in
|
||||
* node-address-sorted order, and as such the CredScores can only be
|
||||
* interpreted in the context of an associated Graph.
|
||||
*
|
||||
* As invariants, it is guaranteed that:
|
||||
* - intervals and intervalCredScores will always have the same length
|
||||
* - all of the intervalCredScores will have a consistent implicit node ordering
|
||||
*
|
||||
* The type is marked opaque so that no-one else can construct instances that
|
||||
* don't conform to these invariants.
|
||||
*/
|
||||
export opaque type TimelineCredScores: {|
|
||||
+intervals: $ReadOnlyArray<Interval>,
|
||||
+intervalCredScores: $ReadOnlyArray<NodeOrderedCredScores>,
|
||||
|} = {|
|
||||
+intervals: $ReadOnlyArray<Interval>,
|
||||
+intervalCredScores: $ReadOnlyArray<NodeOrderedCredScores>,
|
||||
|};
|
||||
|
||||
/**
|
||||
* Convert a TimelineDistribution into CredScores.
|
||||
*
|
||||
* The difference between the distribution and cred is that cred has been
|
||||
* re-normalized to present human-agreeable scores, rather than a probability
|
||||
* distribution.
|
||||
*
|
||||
* This implementation normalizes the scores so that in each interval, the
|
||||
* total score of every node matching scoringNodePrefix is equal to the
|
||||
* total score of every node matching a scoringNodePrefix is equal to the
|
||||
* interval's weight.
|
||||
*
|
||||
* Edge cases:
|
||||
|
@ -42,22 +57,19 @@ export function distributionToCred(
|
|||
ds: TimelineDistributions,
|
||||
nodeOrder: $ReadOnlyArray<NodeAddressT>,
|
||||
scoringNodePrefixes: $ReadOnlyArray<NodeAddressT>
|
||||
): FullTimelineCred {
|
||||
): TimelineCredScores {
|
||||
if (ds.length === 0) {
|
||||
return [];
|
||||
return {intervals: [], intervalCredScores: []};
|
||||
}
|
||||
const intervals = ds.map((x) => x.interval);
|
||||
const scoringNodeIndices = [];
|
||||
const cred = new Array(nodeOrder.length);
|
||||
for (let i = 0; i < nodeOrder.length; i++) {
|
||||
const addr = nodeOrder[i];
|
||||
if (scoringNodePrefixes.some((x) => NodeAddress.hasPrefix(addr, x))) {
|
||||
scoringNodeIndices.push(i);
|
||||
}
|
||||
cred[i] = new Array(intervals.length);
|
||||
}
|
||||
|
||||
return ds.map(({interval, distribution, intervalWeight}) => {
|
||||
const intervals = ds.map((x) => x.interval);
|
||||
const intervalCredScores = ds.map(({distribution, intervalWeight}) => {
|
||||
const intervalTotalScore = sum(
|
||||
scoringNodeIndices.map((x) => distribution[x])
|
||||
);
|
||||
|
@ -65,6 +77,7 @@ export function distributionToCred(
|
|||
const intervalNormalizer =
|
||||
intervalTotalScore === 0 ? 0 : intervalWeight / intervalTotalScore;
|
||||
const cred = distribution.map((x) => x * intervalNormalizer);
|
||||
return {interval, cred};
|
||||
return cred;
|
||||
});
|
||||
return {intervalCredScores, intervals};
|
||||
}
|
||||
|
|
|
@ -21,16 +21,16 @@ describe("src/core/algorithm/distributionToCred", () => {
|
|||
];
|
||||
const nodeOrder = [na("foo"), na("bar")];
|
||||
const actual = distributionToCred(ds, nodeOrder, [NodeAddress.empty]);
|
||||
const expected = [
|
||||
{
|
||||
interval: {startTimeMs: 0, endTimeMs: 10},
|
||||
cred: new Float64Array([1, 1]),
|
||||
},
|
||||
{
|
||||
interval: {startTimeMs: 10, endTimeMs: 20},
|
||||
cred: new Float64Array([9, 1]),
|
||||
},
|
||||
];
|
||||
const expected = {
|
||||
intervals: [
|
||||
{startTimeMs: 0, endTimeMs: 10},
|
||||
{startTimeMs: 10, endTimeMs: 20},
|
||||
],
|
||||
intervalCredScores: [
|
||||
new Float64Array([1, 1]),
|
||||
new Float64Array([9, 1]),
|
||||
],
|
||||
};
|
||||
expect(expected).toEqual(actual);
|
||||
});
|
||||
it("correctly handles multiple scoring prefixes", () => {
|
||||
|
@ -48,16 +48,16 @@ describe("src/core/algorithm/distributionToCred", () => {
|
|||
];
|
||||
const nodeOrder = [na("foo"), na("bar")];
|
||||
const actual = distributionToCred(ds, nodeOrder, [na("foo"), na("bar")]);
|
||||
const expected = [
|
||||
{
|
||||
interval: {startTimeMs: 0, endTimeMs: 10},
|
||||
cred: new Float64Array([1, 1]),
|
||||
},
|
||||
{
|
||||
interval: {startTimeMs: 10, endTimeMs: 20},
|
||||
cred: new Float64Array([9, 1]),
|
||||
},
|
||||
];
|
||||
const expected = {
|
||||
intervals: [
|
||||
{startTimeMs: 0, endTimeMs: 10},
|
||||
{startTimeMs: 10, endTimeMs: 20},
|
||||
],
|
||||
intervalCredScores: [
|
||||
new Float64Array([1, 1]),
|
||||
new Float64Array([9, 1]),
|
||||
],
|
||||
};
|
||||
expect(expected).toEqual(actual);
|
||||
});
|
||||
it("works in a case where some nodes are scoring", () => {
|
||||
|
@ -75,16 +75,16 @@ describe("src/core/algorithm/distributionToCred", () => {
|
|||
];
|
||||
const nodeOrder = [na("foo"), na("bar")];
|
||||
const actual = distributionToCred(ds, nodeOrder, [na("bar")]);
|
||||
const expected = [
|
||||
{
|
||||
interval: {startTimeMs: 0, endTimeMs: 10},
|
||||
cred: new Float64Array([2, 2]),
|
||||
},
|
||||
{
|
||||
interval: {startTimeMs: 10, endTimeMs: 20},
|
||||
cred: new Float64Array([90, 10]),
|
||||
},
|
||||
];
|
||||
const expected = {
|
||||
intervals: [
|
||||
{startTimeMs: 0, endTimeMs: 10},
|
||||
{startTimeMs: 10, endTimeMs: 20},
|
||||
],
|
||||
intervalCredScores: [
|
||||
new Float64Array([2, 2]),
|
||||
new Float64Array([90, 10]),
|
||||
],
|
||||
};
|
||||
expect(expected).toEqual(actual);
|
||||
});
|
||||
it("handles the case where no nodes are scoring", () => {
|
||||
|
@ -97,12 +97,10 @@ describe("src/core/algorithm/distributionToCred", () => {
|
|||
];
|
||||
const nodeOrder = [na("foo"), na("bar")];
|
||||
const actual = distributionToCred(ds, nodeOrder, []);
|
||||
const expected = [
|
||||
{
|
||||
interval: {startTimeMs: 0, endTimeMs: 10},
|
||||
cred: new Float64Array([0, 0]),
|
||||
},
|
||||
];
|
||||
const expected = {
|
||||
intervals: [{startTimeMs: 0, endTimeMs: 10}],
|
||||
intervalCredScores: [new Float64Array([0, 0])],
|
||||
};
|
||||
expect(actual).toEqual(expected);
|
||||
});
|
||||
|
||||
|
@ -116,17 +114,18 @@ describe("src/core/algorithm/distributionToCred", () => {
|
|||
];
|
||||
const nodeOrder = [na("foo"), na("bar")];
|
||||
const actual = distributionToCred(ds, nodeOrder, [na("bar")]);
|
||||
const expected = [
|
||||
{
|
||||
interval: {startTimeMs: 0, endTimeMs: 10},
|
||||
cred: new Float64Array([0, 0]),
|
||||
},
|
||||
];
|
||||
const expected = {
|
||||
intervals: [{startTimeMs: 0, endTimeMs: 10}],
|
||||
intervalCredScores: [new Float64Array([0, 0])],
|
||||
};
|
||||
expect(actual).toEqual(expected);
|
||||
});
|
||||
|
||||
it("returns empty array if no intervals are present", () => {
|
||||
expect(distributionToCred([], [], [])).toEqual([]);
|
||||
it("returns empty CredScores if no intervals are present", () => {
|
||||
expect(distributionToCred([], [], [])).toEqual({
|
||||
intervals: [],
|
||||
intervalCredScores: [],
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
Loading…
Reference in New Issue