Factor out distribution modules (#1182)
This pulls distribution related code out of `markovChain.js` into the new `distribution.js` module, and from `graphToMarkovChain.js` into `nodeDistribution.js`. Since the `computeDelta` method is now exported, I've added some unit tests. Test plan: `yarn test` passes.
This commit is contained in:
parent
e47a5bd84e
commit
4029458098
|
@ -1,7 +1,7 @@
|
|||
// @flow
|
||||
|
||||
import {NodeAddress, type NodeAddressT} from "../core/graph";
|
||||
import type {NodeDistribution} from "../core/attribution/graphToMarkovChain";
|
||||
import type {NodeDistribution} from "../core/attribution/nodeDistribution";
|
||||
|
||||
export type NodeScore = Map<NodeAddressT, number>;
|
||||
|
||||
|
|
|
@ -23,10 +23,10 @@ import {
|
|||
findStationaryDistribution,
|
||||
type PagerankParams,
|
||||
type PagerankOptions as CorePagerankOptions,
|
||||
uniformDistribution,
|
||||
} from "../core/attribution/markovChain";
|
||||
import {uniformDistribution} from "../core/attribution/distribution";
|
||||
|
||||
export type {NodeDistribution} from "../core/attribution/graphToMarkovChain";
|
||||
export type {NodeDistribution} from "../core/attribution/nodeDistribution";
|
||||
export type {PagerankNodeDecomposition} from "./pagerankNodeDecomposition";
|
||||
export type PagerankOptions = {|
|
||||
+selfLoopWeight?: number,
|
||||
|
|
|
@ -9,8 +9,8 @@ import {
|
|||
import {
|
||||
findStationaryDistribution,
|
||||
type PagerankParams,
|
||||
uniformDistribution,
|
||||
} from "../core/attribution/markovChain";
|
||||
import {uniformDistribution} from "../core/attribution/distribution";
|
||||
import {
|
||||
decompose,
|
||||
type PagerankNodeDecomposition,
|
||||
|
|
|
@ -0,0 +1,35 @@
|
|||
// @flow
|
||||
|
||||
/**
|
||||
* A distribution over the integers `0` through `n - 1`, where `n` is
|
||||
* the length of the array. The value at index `i` is the probability of
|
||||
* `i` in the distribution. The values should sum to 1.
|
||||
*/
|
||||
export type Distribution = Float64Array;
|
||||
|
||||
export function uniformDistribution(n: number): Distribution {
|
||||
if (isNaN(n) || !isFinite(n) || n !== Math.floor(n) || n <= 0) {
|
||||
throw new Error("expected positive integer, but got: " + n);
|
||||
}
|
||||
return new Float64Array(n).fill(1 / n);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the maximum difference (in absolute value) between components in two
|
||||
* distributions.
|
||||
*
|
||||
* Equivalent to $\norm{pi0 - pi1}_\infty$.
|
||||
*/
|
||||
export function computeDelta(pi0: Distribution, pi1: Distribution) {
|
||||
if (pi0.length === 0 || pi0.length !== pi1.length) {
|
||||
throw new Error("invalid input");
|
||||
}
|
||||
let maxDelta = -Infinity;
|
||||
// Here, we assume that `pi0.nodeOrder` and `pi1.nodeOrder` are the
|
||||
// same (i.e., there has been no permutation).
|
||||
pi0.forEach((x, i) => {
|
||||
const delta = Math.abs(x - pi1[i]);
|
||||
maxDelta = Math.max(delta, maxDelta);
|
||||
});
|
||||
return maxDelta;
|
||||
}
|
|
@ -0,0 +1,52 @@
|
|||
// @flow
|
||||
|
||||
import {uniformDistribution, computeDelta} from "./distribution";
|
||||
|
||||
describe("core/attribution/distribution", () => {
|
||||
describe("uniformDistribution", () => {
|
||||
describe("errors for: ", () => {
|
||||
[
|
||||
[NaN, "NaN"],
|
||||
[-1, "negatives"],
|
||||
[0, "zero"],
|
||||
[1.337, "non-integer"],
|
||||
].forEach(([value, name]) => {
|
||||
it(name, () => {
|
||||
expect(() => uniformDistribution(value)).toThrowError(
|
||||
"expected positive integer"
|
||||
);
|
||||
});
|
||||
});
|
||||
});
|
||||
it("returns a uniform distribution of size 1", () => {
|
||||
expect(uniformDistribution(1)).toEqual(new Float64Array([1]));
|
||||
});
|
||||
it("returns a uniform distribution of size 2", () => {
|
||||
expect(uniformDistribution(2)).toEqual(new Float64Array([0.5, 0.5]));
|
||||
});
|
||||
});
|
||||
|
||||
describe("computeDelta", () => {
|
||||
const u = uniformDistribution;
|
||||
it("errors on empty array", () => {
|
||||
expect(() =>
|
||||
computeDelta(new Float64Array([]), new Float64Array([]))
|
||||
).toThrowError("invalid input");
|
||||
});
|
||||
it("works on size-1 array", () => {
|
||||
expect(computeDelta(u(1), u(1))).toEqual(0);
|
||||
});
|
||||
it("errors on mismatched sizes", () => {
|
||||
expect(() => computeDelta(u(1), u(2))).toThrowError("invalid input");
|
||||
});
|
||||
it("correctly computes max delta", () => {
|
||||
const pi = new Float64Array([0.5, 0.0, 0.5]);
|
||||
expect(computeDelta(u(3), pi)).toEqual(1 / 3);
|
||||
});
|
||||
it("doesn't depend on argument order", () => {
|
||||
// implies that it uses Math.abs for delta computation
|
||||
const pi = new Float64Array([0.5, 0.0, 0.5]);
|
||||
expect(computeDelta(u(3), pi)).toEqual(computeDelta(pi, u(3)));
|
||||
});
|
||||
});
|
||||
});
|
|
@ -1,15 +1,12 @@
|
|||
// @flow
|
||||
|
||||
import {type Edge, type Graph, type NodeAddressT, NodeAddress} from "../graph";
|
||||
import {
|
||||
type Distribution,
|
||||
type SparseMarkovChain,
|
||||
uniformDistribution,
|
||||
} from "./markovChain";
|
||||
import {type Edge, type Graph, type NodeAddressT} from "../graph";
|
||||
import {type Distribution} from "./distribution";
|
||||
import {type Probability, type NodeDistribution} from "./nodeDistribution";
|
||||
import {type SparseMarkovChain} from "./markovChain";
|
||||
import * as MapUtil from "../../util/map";
|
||||
import * as NullUtil from "../../util/null";
|
||||
|
||||
export type Probability = number;
|
||||
export type Adjacency =
|
||||
| {|+type: "SYNTHETIC_LOOP"|}
|
||||
| {|+type: "IN_EDGE", +edge: Edge|}
|
||||
|
@ -35,56 +32,6 @@ export function adjacencySource(target: NodeAddressT, adjacency: Adjacency) {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a Distribution using provided node weights.
|
||||
*
|
||||
* weightedDistribution takes in a node order (as a read only array of NodeAddressT),
|
||||
* and a map providing weights for a subset of those nodes. It returns a Distribution
|
||||
* with the invariant that every node's weight is proportional to its relative weight
|
||||
* in the weights map. For example, in a case where there were three nodes and they
|
||||
* had weights of 0, 1, and 3 respectively, the distribution would be [0, 0.25, 0.75].
|
||||
*
|
||||
* If a node address is not present in the weight map, its weight is assumed to be 0.
|
||||
* If any weight is negative or non-finite, an error will be thrown.
|
||||
* If the sum of all weights is 0, then a uniform distribution will be returned.
|
||||
* If the weight map assigned weight to nodes which are not in the node order, an error
|
||||
* will be thrown.
|
||||
*/
|
||||
export function weightedDistribution(
|
||||
nodeOrder: $ReadOnlyArray<NodeAddressT>,
|
||||
weights: Map<NodeAddressT, number>
|
||||
): Distribution {
|
||||
let totalWeight = 0;
|
||||
for (const [address, weight] of weights.entries()) {
|
||||
if (weight < 0 || !isFinite(weight)) {
|
||||
throw new Error(
|
||||
`Invalid weight ${weight} associated with address ${NodeAddress.toString(
|
||||
address
|
||||
)}`
|
||||
);
|
||||
}
|
||||
totalWeight += weight;
|
||||
}
|
||||
if (totalWeight === 0) {
|
||||
return uniformDistribution(nodeOrder.length);
|
||||
}
|
||||
let numEncounteredWeights = 0;
|
||||
const distribution = new Float64Array(nodeOrder.length);
|
||||
for (let i = 0; i < distribution.length; i++) {
|
||||
const weight = weights.get(nodeOrder[i]);
|
||||
if (weight != null) {
|
||||
numEncounteredWeights++;
|
||||
distribution[i] = weight / totalWeight;
|
||||
}
|
||||
}
|
||||
if (numEncounteredWeights !== weights.size) {
|
||||
throw new Error("weights included nodes not present in the nodeOrder");
|
||||
}
|
||||
return distribution;
|
||||
}
|
||||
|
||||
export type NodeDistribution = Map<NodeAddressT, Probability>;
|
||||
|
||||
export type NodeToConnections = Map<NodeAddressT, $ReadOnlyArray<Connection>>;
|
||||
|
||||
type NodeAddressMarkovChain = Map<
|
||||
|
|
|
@ -4,14 +4,16 @@ import sortBy from "lodash.sortby";
|
|||
|
||||
import {Graph, NodeAddress} from "../graph";
|
||||
import {
|
||||
distributionToNodeDistribution,
|
||||
createConnections,
|
||||
createOrderedSparseMarkovChain,
|
||||
normalize,
|
||||
normalizeNeighbors,
|
||||
permute,
|
||||
weightedDistribution,
|
||||
} from "./graphToMarkovChain";
|
||||
import {
|
||||
distributionToNodeDistribution,
|
||||
weightedDistribution,
|
||||
} from "./nodeDistribution";
|
||||
import * as MapUtil from "../../util/map";
|
||||
|
||||
import {node, advancedGraph, edge} from "../graphTestUtil";
|
||||
|
|
|
@ -1,12 +1,6 @@
|
|||
// @flow
|
||||
|
||||
/**
|
||||
* A distribution over the integers `0` through `n - 1`, where `n` is
|
||||
* the length of the array. The value at index `i` is the probability of
|
||||
* `i` in the distribution. The values should sum to 1.
|
||||
*/
|
||||
export type Distribution = Float64Array;
|
||||
|
||||
import {computeDelta, type Distribution} from "./distribution";
|
||||
/**
|
||||
* The data inputs to running PageRank.
|
||||
*
|
||||
|
@ -123,13 +117,6 @@ export function sparseMarkovChainFromTransitionMatrix(
|
|||
});
|
||||
}
|
||||
|
||||
export function uniformDistribution(n: number): Distribution {
|
||||
if (isNaN(n) || !isFinite(n) || n !== Math.floor(n) || n <= 0) {
|
||||
throw new Error("expected positive integer, but got: " + n);
|
||||
}
|
||||
return new Float64Array(n).fill(1 / n);
|
||||
}
|
||||
|
||||
function sparseMarkovChainActionInto(
|
||||
chain: SparseMarkovChain,
|
||||
seed: Distribution,
|
||||
|
@ -159,23 +146,6 @@ export function sparseMarkovChainAction(
|
|||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the maximum difference (in absolute value) between components in two
|
||||
* distributions.
|
||||
*
|
||||
* Equivalent to $\norm{pi0 - pi1}_\infty$.
|
||||
*/
|
||||
export function computeDelta(pi0: Distribution, pi1: Distribution) {
|
||||
let maxDelta = -Infinity;
|
||||
// Here, we assume that `pi0.nodeOrder` and `pi1.nodeOrder` are the
|
||||
// same (i.e., there has been no permutation).
|
||||
pi0.forEach((x, i) => {
|
||||
const delta = Math.abs(x - pi1[i]);
|
||||
maxDelta = Math.max(delta, maxDelta);
|
||||
});
|
||||
return maxDelta;
|
||||
}
|
||||
|
||||
function* findStationaryDistributionGenerator(
|
||||
params: PagerankParams,
|
||||
options: {|
|
||||
|
|
|
@ -1,12 +1,15 @@
|
|||
// @flow
|
||||
|
||||
import type {Distribution, SparseMarkovChain} from "./markovChain";
|
||||
import {
|
||||
type Distribution,
|
||||
uniformDistribution,
|
||||
computeDelta,
|
||||
} from "./distribution";
|
||||
import {
|
||||
type SparseMarkovChain,
|
||||
findStationaryDistribution,
|
||||
sparseMarkovChainAction,
|
||||
sparseMarkovChainFromTransitionMatrix,
|
||||
uniformDistribution,
|
||||
computeDelta,
|
||||
type StationaryDistributionResult,
|
||||
type PagerankParams,
|
||||
} from "./markovChain";
|
||||
|
|
|
@ -0,0 +1,67 @@
|
|||
// @flow
|
||||
|
||||
import {type NodeAddressT, NodeAddress} from "../graph";
|
||||
import {type Distribution, uniformDistribution} from "./distribution";
|
||||
|
||||
export type Probability = number;
|
||||
export type NodeDistribution = Map<NodeAddressT, Probability>;
|
||||
|
||||
export function distributionToNodeDistribution(
|
||||
nodeOrder: $ReadOnlyArray<NodeAddressT>,
|
||||
pi: Distribution
|
||||
): NodeDistribution {
|
||||
const result = new Map();
|
||||
nodeOrder.forEach((node, i) => {
|
||||
const probability = pi[i];
|
||||
result.set(node, probability);
|
||||
});
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a Distribution using provided node weights.
|
||||
*
|
||||
* weightedDistribution takes in a node order (as a read only array of NodeAddressT),
|
||||
* and a map providing weights for a subset of those nodes. It returns a Distribution
|
||||
* with the invariant that every node's weight is proportional to its relative weight
|
||||
* in the weights map. For example, in a case where there were three nodes and they
|
||||
* had weights of 0, 1, and 3 respectively, the distribution would be [0, 0.25, 0.75].
|
||||
*
|
||||
* If a node address is not present in the weight map, its weight is assumed to be 0.
|
||||
* If any weight is negative or non-finite, an error will be thrown.
|
||||
* If the sum of all weights is 0, then a uniform distribution will be returned.
|
||||
* If the weight map assigned weight to nodes which are not in the node order, an error
|
||||
* will be thrown.
|
||||
*/
|
||||
export function weightedDistribution(
|
||||
nodeOrder: $ReadOnlyArray<NodeAddressT>,
|
||||
weights: Map<NodeAddressT, number>
|
||||
): Distribution {
|
||||
let totalWeight = 0;
|
||||
for (const [address, weight] of weights.entries()) {
|
||||
if (weight < 0 || !isFinite(weight)) {
|
||||
throw new Error(
|
||||
`Invalid weight ${weight} associated with address ${NodeAddress.toString(
|
||||
address
|
||||
)}`
|
||||
);
|
||||
}
|
||||
totalWeight += weight;
|
||||
}
|
||||
if (totalWeight === 0) {
|
||||
return uniformDistribution(nodeOrder.length);
|
||||
}
|
||||
let numEncounteredWeights = 0;
|
||||
const distribution = new Float64Array(nodeOrder.length);
|
||||
for (let i = 0; i < distribution.length; i++) {
|
||||
const weight = weights.get(nodeOrder[i]);
|
||||
if (weight != null) {
|
||||
numEncounteredWeights++;
|
||||
distribution[i] = weight / totalWeight;
|
||||
}
|
||||
}
|
||||
if (numEncounteredWeights !== weights.size) {
|
||||
throw new Error("weights included nodes not present in the nodeOrder");
|
||||
}
|
||||
return distribution;
|
||||
}
|
|
@ -0,0 +1,89 @@
|
|||
// @flow
|
||||
|
||||
import {NodeAddress} from "../../core/graph";
|
||||
import {
|
||||
weightedDistribution,
|
||||
distributionToNodeDistribution,
|
||||
} from "./nodeDistribution";
|
||||
|
||||
describe("core/attribution/nodeDistribution", () => {
|
||||
const n1 = NodeAddress.fromParts(["n1"]);
|
||||
const n2 = NodeAddress.fromParts(["n2"]);
|
||||
|
||||
describe("distributionToNodeDistribution", () => {
|
||||
it("works", () => {
|
||||
const pi = new Float64Array([0.25, 0.75]);
|
||||
expect(distributionToNodeDistribution([n1, n2], pi)).toEqual(
|
||||
new Map().set(n1, 0.25).set(n2, 0.75)
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe("weightedDistribution", () => {
|
||||
const a = NodeAddress.fromParts(["a"]);
|
||||
const b = NodeAddress.fromParts(["b"]);
|
||||
const c = NodeAddress.fromParts(["c"]);
|
||||
const d = NodeAddress.fromParts(["d"]);
|
||||
const order = () => [a, b, c, d];
|
||||
it("gives a uniform distribution for an empty map", () => {
|
||||
expect(weightedDistribution(order(), new Map())).toEqual(
|
||||
new Float64Array([0.25, 0.25, 0.25, 0.25])
|
||||
);
|
||||
});
|
||||
it("gives a uniform distribution for a map with 0 weight", () => {
|
||||
const map = new Map().set(a, 0);
|
||||
expect(weightedDistribution(order(), map)).toEqual(
|
||||
new Float64Array([0.25, 0.25, 0.25, 0.25])
|
||||
);
|
||||
});
|
||||
it("can put all weight on one node", () => {
|
||||
const map = new Map().set(b, 0.1);
|
||||
expect(weightedDistribution(order(), map)).toEqual(
|
||||
new Float64Array([0, 1, 0, 0])
|
||||
);
|
||||
});
|
||||
it("can split weight unequally", () => {
|
||||
const map = new Map().set(b, 1).set(c, 3);
|
||||
expect(weightedDistribution(order(), map)).toEqual(
|
||||
new Float64Array([0, 0.25, 0.75, 0])
|
||||
);
|
||||
});
|
||||
it("can create a uniform distribution if all weights are equal", () => {
|
||||
const map = new Map()
|
||||
.set(a, 1)
|
||||
.set(b, 1)
|
||||
.set(c, 1)
|
||||
.set(d, 1);
|
||||
expect(weightedDistribution(order(), map)).toEqual(
|
||||
new Float64Array([0.25, 0.25, 0.25, 0.25])
|
||||
);
|
||||
});
|
||||
describe("errors if", () => {
|
||||
it("has a weighted node that is not in the order", () => {
|
||||
const z = NodeAddress.fromParts(["z"]);
|
||||
const map = new Map().set(z, 1);
|
||||
expect(() => weightedDistribution(order(), map)).toThrowError(
|
||||
"weights included nodes not present in the nodeOrder"
|
||||
);
|
||||
});
|
||||
it("has a node with negative weight", () => {
|
||||
const map = new Map().set(a, -1);
|
||||
expect(() => weightedDistribution(order(), map)).toThrowError(
|
||||
"Invalid weight -1"
|
||||
);
|
||||
});
|
||||
it("has a node with NaN weight", () => {
|
||||
const map = new Map().set(a, NaN);
|
||||
expect(() => weightedDistribution(order(), map)).toThrowError(
|
||||
"Invalid weight NaN"
|
||||
);
|
||||
});
|
||||
it("has a node with infinite weight", () => {
|
||||
const map = new Map().set(a, Infinity);
|
||||
expect(() => weightedDistribution(order(), map)).toThrowError(
|
||||
"Invalid weight Infinity"
|
||||
);
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
|
@ -16,12 +16,14 @@ import {
|
|||
type NeighborsOptions,
|
||||
} from "./graph";
|
||||
import {
|
||||
distributionToNodeDistribution,
|
||||
createConnections,
|
||||
createOrderedSparseMarkovChain,
|
||||
type EdgeWeight,
|
||||
weightedDistribution,
|
||||
} from "./attribution/graphToMarkovChain";
|
||||
import {
|
||||
distributionToNodeDistribution,
|
||||
weightedDistribution,
|
||||
} from "./attribution/nodeDistribution";
|
||||
import {
|
||||
findStationaryDistribution,
|
||||
type PagerankParams,
|
||||
|
|
Loading…
Reference in New Issue