Refactor timelinePagerank, exposing connections (#1797)

This commit refactors internal helper methods in timelinePagerank so
that rather than piping around an OrderedSparseMarkovChain, we instead
provide the NodeToConnections from which that OSMC may be derived. This
is important because the NodeToConnections has the information necessary
to derive how score flowed across individual edges, and not just on the
adjacency topology of the graph. This will allow us to compute the
OutputEdge format with edge-specific cred flows as documented in #1773.

Test plan: `yarn test` passes. It's a simple refactor.
This commit is contained in:
Dandelion Mané 2020-05-30 14:02:23 -07:00 committed by GitHub
parent d1144217b0
commit 71c0b0d66d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 30 additions and 30 deletions

View File

@ -20,12 +20,9 @@ import {type Distribution} from "./distribution";
import {
createOrderedSparseMarkovChain,
createConnections,
type NodeToConnections,
} from "./graphToMarkovChain";
import {
findStationaryDistribution,
type PagerankParams,
type SparseMarkovChain,
} from "./markovChain";
import {findStationaryDistribution, type PagerankParams} from "./markovChain";
/**
* Represents raw PageRank distributions on a graph over time.
@ -42,6 +39,8 @@ export type TimelineDistributions = $ReadOnlyArray<{|
+distribution: Distribution,
|}>;
export const SYNTHETIC_LOOP_WEIGHT = 1e-3;
/**
* Runs timeline PageRank on a graph.
*
@ -127,7 +126,7 @@ export async function timelinePagerank(
nodeEvaluator,
intervalDecay
);
const markovChainIterator = _timelineMarkovChain(
const nodeToConnectionsIterator = _timelineNodeToConnections(
weightedGraph.graph,
edgeCreationHistory,
edgeEvaluator,
@ -137,7 +136,7 @@ export async function timelinePagerank(
nodeOrder,
intervals,
nodeWeightIterator,
markovChainIterator,
nodeToConnectionsIterator,
alpha
);
}
@ -166,12 +165,12 @@ export function* _timelineNodeWeights(
}
}
export function* _timelineMarkovChain(
export function* _timelineNodeToConnections(
graph: Graph,
edgeCreationHistory: $ReadOnlyArray<$ReadOnlyArray<Edge>>,
edgeEvaluator: EdgeWeightEvaluator,
intervalDecay: number
): Iterator<SparseMarkovChain> {
): Iterator<NodeToConnections> {
const edgeWeights = new Map();
for (const edges of edgeCreationHistory) {
for (const [address, {forwards, backwards}] of edgeWeights.entries()) {
@ -187,19 +186,7 @@ export function* _timelineMarkovChain(
const currentEdgeWeight = (e: Edge) => {
return NullUtil.orElse(edgeWeights.get(e.address), defaultEdgeWeight);
};
// Construct a new Markov chain corresponding to the current weights
// of the edges.
// TODO: Rather than constructing a markov chain from scratch, we can
// update the markov chain in-place. This should result in a significant
// performance improvement. We will need to change the markov chain
// representation to do so (we should add a `totalOutWeight` array to the
// chain, so that we can efficiently update the total weight as we add new
// connections, rather than needing to re-normalize the whole chain for
// each interval).
const chain = createOrderedSparseMarkovChain(
createConnections(graph, currentEdgeWeight, 1e-3)
).chain;
yield chain;
yield createConnections(graph, currentEdgeWeight, SYNTHETIC_LOOP_WEIGHT);
}
}
@ -209,14 +196,17 @@ export async function _computeTimelineDistribution(
nodeOrder: $ReadOnlyArray<NodeAddressT>,
intervals: $ReadOnlyArray<Interval>,
nodeWeightIterator: Iterator<Map<NodeAddressT, number>>,
markovChainIterator: Iterator<SparseMarkovChain>,
nodeToConnectionsIterator: Iterator<NodeToConnections>,
alpha: number
): Promise<TimelineDistributions> {
const results = [];
let pi0: Distribution | null = null;
for (const interval of intervals) {
const nodeWeights = NullUtil.get(nodeWeightIterator.next().value);
const chain = NullUtil.get(markovChainIterator.next().value);
const nodeToConnections = NullUtil.get(
nodeToConnectionsIterator.next().value
);
const {chain} = createOrderedSparseMarkovChain(nodeToConnections);
const seed = weightedDistribution(nodeOrder, nodeWeights);
if (pi0 == null) {

View File

@ -4,7 +4,11 @@ import {sum} from "d3-array";
import * as NullUtil from "../../util/null";
import {node, edge} from "../graphTestUtil";
import {Graph, type EdgeAddressT, type Edge} from "../graph";
import {_timelineNodeWeights, _timelineMarkovChain} from "./timelinePagerank";
import {
_timelineNodeWeights,
_timelineNodeToConnections,
SYNTHETIC_LOOP_WEIGHT,
} from "./timelinePagerank";
import {
createConnections,
createOrderedSparseMarkovChain,
@ -51,7 +55,7 @@ describe("src/core/algorithm/timelinePagerank", () => {
});
});
describe("_timelineMarkovChain", () => {
describe("_timelineNodeToConnections", () => {
it("works for a simple case", () => {
const a = node("a");
const b = node("b");
@ -64,19 +68,25 @@ describe("src/core/algorithm/timelinePagerank", () => {
): SparseMarkovChain {
const edgeWeight = (e: Edge) =>
NullUtil.orElse(w.get(e.address), {forwards: 0, backwards: 0});
const connections = createConnections(graph, edgeWeight, 1e-3);
return createOrderedSparseMarkovChain(connections).chain;
const nodeToConnections = createConnections(
graph,
edgeWeight,
SYNTHETIC_LOOP_WEIGHT
);
return createOrderedSparseMarkovChain(nodeToConnections).chain;
}
const edgeCreationHistory = [[], [e1], [], [e2]];
const edgeEvaluator = (_) => ({forwards: 1, backwards: 0});
const chainIterator = _timelineMarkovChain(
const nodeToConnectionsIterator = _timelineNodeToConnections(
graph,
edgeCreationHistory,
edgeEvaluator,
0.5
);
const chains = Array.from(chainIterator);
const chains = Array.from(nodeToConnectionsIterator).map(
(x) => createOrderedSparseMarkovChain(x).chain
);
const w1 = new Map();
const chain1 = weightsToChain(w1);