Factor out _intervalResult in timelinePagerank (#1799)
As part of work for #1773, I want to add a lot more complexity to the logic for computing individual time-slices of pagerank scores, so that we can trace how much score flowed on each individual edge. This means adding more complexity to the _computeTimelineDistribution function; however, that function is an untested wrapper, so I'm hesitant to add more complexity directly. Instead, I'm first factoring out an _intervalResult method and a corresponding type, which computes the scores for a given timeline interval. I've also added sanity checking tests for this method. In a followon commit, I'll add more logic for tracking edge-level score flows. Test plan: This is just a refactor, maintaining existing behavior and adding tests. `yarn test --full` passes. Since our sharness tests include doing a full load (including timeline cred computation) on realistic data from GitHub, this gives us confidence that there hasn't been any change to cred semantics.
This commit is contained in:
parent
71c0b0d66d
commit
27aeacc906
|
@ -24,10 +24,7 @@ import {
|
|||
} from "./graphToMarkovChain";
|
||||
import {findStationaryDistribution, type PagerankParams} from "./markovChain";
|
||||
|
||||
/**
|
||||
* Represents raw PageRank distributions on a graph over time.
|
||||
*/
|
||||
export type TimelineDistributions = $ReadOnlyArray<{|
|
||||
export type IntervalResult = {|
|
||||
// The interval for this slice
|
||||
+interval: Interval,
|
||||
// The total node weight within this interval (normalized to account for the
|
||||
|
@ -37,7 +34,11 @@ export type TimelineDistributions = $ReadOnlyArray<{|
|
|||
// The raw score distribution over nodes for this interval (i.e. sums to 1).
|
||||
// Uses the canonical graph node order.
|
||||
+distribution: Distribution,
|
||||
|}>;
|
||||
|};
|
||||
/**
|
||||
* Represents raw PageRank distributions on a graph over time.
|
||||
*/
|
||||
export type TimelineDistributions = $ReadOnlyArray<IntervalResult>;
|
||||
|
||||
export const SYNTHETIC_LOOP_WEIGHT = 1e-3;
|
||||
|
||||
|
@ -206,6 +207,30 @@ export async function _computeTimelineDistribution(
|
|||
const nodeToConnections = NullUtil.get(
|
||||
nodeToConnectionsIterator.next().value
|
||||
);
|
||||
const result = await _intervalResult(
|
||||
nodeWeights,
|
||||
nodeToConnections,
|
||||
nodeOrder,
|
||||
interval,
|
||||
pi0,
|
||||
alpha
|
||||
);
|
||||
results.push(result);
|
||||
// Use the latest convergce results as the starting point for the next run
|
||||
// of PageRank
|
||||
pi0 = result.distribution;
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
export async function _intervalResult(
|
||||
nodeWeights: Map<NodeAddressT, number>,
|
||||
nodeToConnections: NodeToConnections,
|
||||
nodeOrder: $ReadOnlyArray<NodeAddressT>,
|
||||
interval: Interval,
|
||||
pi0: Distribution | null,
|
||||
alpha: number
|
||||
): Promise<IntervalResult> {
|
||||
const {chain} = createOrderedSparseMarkovChain(nodeToConnections);
|
||||
|
||||
const seed = weightedDistribution(nodeOrder, nodeWeights);
|
||||
|
@ -220,14 +245,9 @@ export async function _computeTimelineDistribution(
|
|||
yieldAfterMs: 30,
|
||||
});
|
||||
const intervalWeight = sum(nodeWeights.values());
|
||||
results.push({
|
||||
return {
|
||||
interval,
|
||||
intervalWeight,
|
||||
distribution: distributionResult.pi,
|
||||
});
|
||||
// Use the latest convergce results as the starting point for the next run
|
||||
// of PageRank
|
||||
pi0 = distributionResult.pi;
|
||||
}
|
||||
return results;
|
||||
};
|
||||
}
|
||||
|
|
|
@ -2,12 +2,13 @@
|
|||
|
||||
import {sum} from "d3-array";
|
||||
import * as NullUtil from "../../util/null";
|
||||
import {node, edge} from "../graphTestUtil";
|
||||
import {node, edge, advancedGraph} from "../graphTestUtil";
|
||||
import {Graph, type EdgeAddressT, type Edge} from "../graph";
|
||||
import {
|
||||
_timelineNodeWeights,
|
||||
_timelineNodeToConnections,
|
||||
SYNTHETIC_LOOP_WEIGHT,
|
||||
_intervalResult,
|
||||
} from "./timelinePagerank";
|
||||
import {
|
||||
createConnections,
|
||||
|
@ -107,4 +108,67 @@ describe("src/core/algorithm/timelinePagerank", () => {
|
|||
expect(chains[3]).toEqual(chain4);
|
||||
});
|
||||
});
|
||||
|
||||
describe("_intervalResult", () => {
|
||||
async function example() {
|
||||
const {graph1, nodes} = advancedGraph();
|
||||
const g = graph1();
|
||||
const nodeWeights = new Map()
|
||||
.set(nodes.src.address, 1)
|
||||
.set(nodes.isolated.address, 2);
|
||||
const edgeFn = (_unused_edge) => ({forwards: 1, backwards: 0.5});
|
||||
const nodeToConnections = createConnections(g, edgeFn, 1e-3);
|
||||
const nodeOrder = Array.from(g.nodes()).map((x) => x.address);
|
||||
const interval = {endTimeMs: 1000, startTimeMs: 0};
|
||||
const pi0 = null;
|
||||
const alpha = 0.05;
|
||||
const result = await _intervalResult(
|
||||
nodeWeights,
|
||||
nodeToConnections,
|
||||
nodeOrder,
|
||||
interval,
|
||||
pi0,
|
||||
alpha
|
||||
);
|
||||
return {
|
||||
graph: g,
|
||||
nodes,
|
||||
nodeOrder,
|
||||
nodeWeights,
|
||||
edgeFn,
|
||||
nodeToConnections,
|
||||
interval,
|
||||
pi0,
|
||||
alpha,
|
||||
result,
|
||||
};
|
||||
}
|
||||
it("passes through the interval", async () => {
|
||||
const {result, interval} = await example();
|
||||
expect(result.interval).toEqual(interval);
|
||||
});
|
||||
it("computes the summed nodeWeight", async () => {
|
||||
const {result, nodeWeights} = await example();
|
||||
const actualIntervalWeight = sum(nodeWeights.values());
|
||||
expect(result.intervalWeight).toEqual(actualIntervalWeight);
|
||||
});
|
||||
it("produces sane score distribution on an example graph", async () => {
|
||||
const {result, nodes, nodeOrder} = await example();
|
||||
function getScore(a) {
|
||||
const idx = nodeOrder.indexOf(a.address);
|
||||
if (idx === -1) {
|
||||
throw new Error("bad address");
|
||||
}
|
||||
return result.distribution[idx];
|
||||
}
|
||||
const isoScore = getScore(nodes.isolated);
|
||||
const srcScore = getScore(nodes.src);
|
||||
const dstScore = getScore(nodes.dst);
|
||||
expect(isoScore + srcScore + dstScore).toBeCloseTo(1);
|
||||
// It has 2/3rd weight, and is isolated, so it's simple
|
||||
expect(isoScore).toBeCloseTo(2 / 3);
|
||||
// src has the weight, and dst doesnt, so it should have a higher score
|
||||
expect(srcScore).toBeGreaterThan(dstScore);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
Loading…
Reference in New Issue