Factor out distribution modules (#1182)

This pulls distribution related code out of `markovChain.js` into the new
`distribution.js` module, and from `graphToMarkovChain.js` into
`nodeDistribution.js`.

Since the `computeDelta` method is now exported, I've added some unit
tests.

Test plan: `yarn test` passes.
This commit is contained in:
Dandelion Mané 2019-06-13 23:24:37 +03:00 committed by GitHub
parent e47a5bd84e
commit 4029458098
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 266 additions and 99 deletions

View File

@ -1,7 +1,7 @@
// @flow // @flow
import {NodeAddress, type NodeAddressT} from "../core/graph"; import {NodeAddress, type NodeAddressT} from "../core/graph";
import type {NodeDistribution} from "../core/attribution/graphToMarkovChain"; import type {NodeDistribution} from "../core/attribution/nodeDistribution";
export type NodeScore = Map<NodeAddressT, number>; export type NodeScore = Map<NodeAddressT, number>;

View File

@ -23,10 +23,10 @@ import {
findStationaryDistribution, findStationaryDistribution,
type PagerankParams, type PagerankParams,
type PagerankOptions as CorePagerankOptions, type PagerankOptions as CorePagerankOptions,
uniformDistribution,
} from "../core/attribution/markovChain"; } from "../core/attribution/markovChain";
import {uniformDistribution} from "../core/attribution/distribution";
export type {NodeDistribution} from "../core/attribution/graphToMarkovChain"; export type {NodeDistribution} from "../core/attribution/nodeDistribution";
export type {PagerankNodeDecomposition} from "./pagerankNodeDecomposition"; export type {PagerankNodeDecomposition} from "./pagerankNodeDecomposition";
export type PagerankOptions = {| export type PagerankOptions = {|
+selfLoopWeight?: number, +selfLoopWeight?: number,

View File

@ -9,8 +9,8 @@ import {
import { import {
findStationaryDistribution, findStationaryDistribution,
type PagerankParams, type PagerankParams,
uniformDistribution,
} from "../core/attribution/markovChain"; } from "../core/attribution/markovChain";
import {uniformDistribution} from "../core/attribution/distribution";
import { import {
decompose, decompose,
type PagerankNodeDecomposition, type PagerankNodeDecomposition,

View File

@ -0,0 +1,35 @@
// @flow
/**
* A distribution over the integers `0` through `n - 1`, where `n` is
* the length of the array. The value at index `i` is the probability of
* `i` in the distribution. The values should sum to 1.
*/
export type Distribution = Float64Array;
export function uniformDistribution(n: number): Distribution {
if (isNaN(n) || !isFinite(n) || n !== Math.floor(n) || n <= 0) {
throw new Error("expected positive integer, but got: " + n);
}
return new Float64Array(n).fill(1 / n);
}
/**
* Compute the maximum difference (in absolute value) between components in two
* distributions.
*
* Equivalent to $\norm{pi0 - pi1}_\infty$.
*/
export function computeDelta(pi0: Distribution, pi1: Distribution) {
if (pi0.length === 0 || pi0.length !== pi1.length) {
throw new Error("invalid input");
}
let maxDelta = -Infinity;
// Here, we assume that `pi0.nodeOrder` and `pi1.nodeOrder` are the
// same (i.e., there has been no permutation).
pi0.forEach((x, i) => {
const delta = Math.abs(x - pi1[i]);
maxDelta = Math.max(delta, maxDelta);
});
return maxDelta;
}

View File

@ -0,0 +1,52 @@
// @flow
import {uniformDistribution, computeDelta} from "./distribution";
describe("core/attribution/distribution", () => {
describe("uniformDistribution", () => {
describe("errors for: ", () => {
[
[NaN, "NaN"],
[-1, "negatives"],
[0, "zero"],
[1.337, "non-integer"],
].forEach(([value, name]) => {
it(name, () => {
expect(() => uniformDistribution(value)).toThrowError(
"expected positive integer"
);
});
});
});
it("returns a uniform distribution of size 1", () => {
expect(uniformDistribution(1)).toEqual(new Float64Array([1]));
});
it("returns a uniform distribution of size 2", () => {
expect(uniformDistribution(2)).toEqual(new Float64Array([0.5, 0.5]));
});
});
describe("computeDelta", () => {
const u = uniformDistribution;
it("errors on empty array", () => {
expect(() =>
computeDelta(new Float64Array([]), new Float64Array([]))
).toThrowError("invalid input");
});
it("works on size-1 array", () => {
expect(computeDelta(u(1), u(1))).toEqual(0);
});
it("errors on mismatched sizes", () => {
expect(() => computeDelta(u(1), u(2))).toThrowError("invalid input");
});
it("correctly computes max delta", () => {
const pi = new Float64Array([0.5, 0.0, 0.5]);
expect(computeDelta(u(3), pi)).toEqual(1 / 3);
});
it("doesn't depend on argument order", () => {
// implies that it uses Math.abs for delta computation
const pi = new Float64Array([0.5, 0.0, 0.5]);
expect(computeDelta(u(3), pi)).toEqual(computeDelta(pi, u(3)));
});
});
});

View File

@ -1,15 +1,12 @@
// @flow // @flow
import {type Edge, type Graph, type NodeAddressT, NodeAddress} from "../graph"; import {type Edge, type Graph, type NodeAddressT} from "../graph";
import { import {type Distribution} from "./distribution";
type Distribution, import {type Probability, type NodeDistribution} from "./nodeDistribution";
type SparseMarkovChain, import {type SparseMarkovChain} from "./markovChain";
uniformDistribution,
} from "./markovChain";
import * as MapUtil from "../../util/map"; import * as MapUtil from "../../util/map";
import * as NullUtil from "../../util/null"; import * as NullUtil from "../../util/null";
export type Probability = number;
export type Adjacency = export type Adjacency =
| {|+type: "SYNTHETIC_LOOP"|} | {|+type: "SYNTHETIC_LOOP"|}
| {|+type: "IN_EDGE", +edge: Edge|} | {|+type: "IN_EDGE", +edge: Edge|}
@ -35,56 +32,6 @@ export function adjacencySource(target: NodeAddressT, adjacency: Adjacency) {
} }
} }
/**
* Create a Distribution using provided node weights.
*
* weightedDistribution takes in a node order (as a read only array of NodeAddressT),
* and a map providing weights for a subset of those nodes. It returns a Distribution
* with the invariant that every node's weight is proportional to its relative weight
* in the weights map. For example, in a case where there were three nodes and they
* had weights of 0, 1, and 3 respectively, the distribution would be [0, 0.25, 0.75].
*
* If a node address is not present in the weight map, its weight is assumed to be 0.
* If any weight is negative or non-finite, an error will be thrown.
* If the sum of all weights is 0, then a uniform distribution will be returned.
* If the weight map assigned weight to nodes which are not in the node order, an error
* will be thrown.
*/
export function weightedDistribution(
nodeOrder: $ReadOnlyArray<NodeAddressT>,
weights: Map<NodeAddressT, number>
): Distribution {
let totalWeight = 0;
for (const [address, weight] of weights.entries()) {
if (weight < 0 || !isFinite(weight)) {
throw new Error(
`Invalid weight ${weight} associated with address ${NodeAddress.toString(
address
)}`
);
}
totalWeight += weight;
}
if (totalWeight === 0) {
return uniformDistribution(nodeOrder.length);
}
let numEncounteredWeights = 0;
const distribution = new Float64Array(nodeOrder.length);
for (let i = 0; i < distribution.length; i++) {
const weight = weights.get(nodeOrder[i]);
if (weight != null) {
numEncounteredWeights++;
distribution[i] = weight / totalWeight;
}
}
if (numEncounteredWeights !== weights.size) {
throw new Error("weights included nodes not present in the nodeOrder");
}
return distribution;
}
export type NodeDistribution = Map<NodeAddressT, Probability>;
export type NodeToConnections = Map<NodeAddressT, $ReadOnlyArray<Connection>>; export type NodeToConnections = Map<NodeAddressT, $ReadOnlyArray<Connection>>;
type NodeAddressMarkovChain = Map< type NodeAddressMarkovChain = Map<

View File

@ -4,14 +4,16 @@ import sortBy from "lodash.sortby";
import {Graph, NodeAddress} from "../graph"; import {Graph, NodeAddress} from "../graph";
import { import {
distributionToNodeDistribution,
createConnections, createConnections,
createOrderedSparseMarkovChain, createOrderedSparseMarkovChain,
normalize, normalize,
normalizeNeighbors, normalizeNeighbors,
permute, permute,
weightedDistribution,
} from "./graphToMarkovChain"; } from "./graphToMarkovChain";
import {
distributionToNodeDistribution,
weightedDistribution,
} from "./nodeDistribution";
import * as MapUtil from "../../util/map"; import * as MapUtil from "../../util/map";
import {node, advancedGraph, edge} from "../graphTestUtil"; import {node, advancedGraph, edge} from "../graphTestUtil";

View File

@ -1,12 +1,6 @@
// @flow // @flow
/** import {computeDelta, type Distribution} from "./distribution";
* A distribution over the integers `0` through `n - 1`, where `n` is
* the length of the array. The value at index `i` is the probability of
* `i` in the distribution. The values should sum to 1.
*/
export type Distribution = Float64Array;
/** /**
* The data inputs to running PageRank. * The data inputs to running PageRank.
* *
@ -123,13 +117,6 @@ export function sparseMarkovChainFromTransitionMatrix(
}); });
} }
export function uniformDistribution(n: number): Distribution {
if (isNaN(n) || !isFinite(n) || n !== Math.floor(n) || n <= 0) {
throw new Error("expected positive integer, but got: " + n);
}
return new Float64Array(n).fill(1 / n);
}
function sparseMarkovChainActionInto( function sparseMarkovChainActionInto(
chain: SparseMarkovChain, chain: SparseMarkovChain,
seed: Distribution, seed: Distribution,
@ -159,23 +146,6 @@ export function sparseMarkovChainAction(
return result; return result;
} }
/**
* Compute the maximum difference (in absolute value) between components in two
* distributions.
*
* Equivalent to $\norm{pi0 - pi1}_\infty$.
*/
export function computeDelta(pi0: Distribution, pi1: Distribution) {
let maxDelta = -Infinity;
// Here, we assume that `pi0.nodeOrder` and `pi1.nodeOrder` are the
// same (i.e., there has been no permutation).
pi0.forEach((x, i) => {
const delta = Math.abs(x - pi1[i]);
maxDelta = Math.max(delta, maxDelta);
});
return maxDelta;
}
function* findStationaryDistributionGenerator( function* findStationaryDistributionGenerator(
params: PagerankParams, params: PagerankParams,
options: {| options: {|

View File

@ -1,12 +1,15 @@
// @flow // @flow
import type {Distribution, SparseMarkovChain} from "./markovChain";
import { import {
type Distribution,
uniformDistribution,
computeDelta,
} from "./distribution";
import {
type SparseMarkovChain,
findStationaryDistribution, findStationaryDistribution,
sparseMarkovChainAction, sparseMarkovChainAction,
sparseMarkovChainFromTransitionMatrix, sparseMarkovChainFromTransitionMatrix,
uniformDistribution,
computeDelta,
type StationaryDistributionResult, type StationaryDistributionResult,
type PagerankParams, type PagerankParams,
} from "./markovChain"; } from "./markovChain";

View File

@ -0,0 +1,67 @@
// @flow
import {type NodeAddressT, NodeAddress} from "../graph";
import {type Distribution, uniformDistribution} from "./distribution";
export type Probability = number;
export type NodeDistribution = Map<NodeAddressT, Probability>;
export function distributionToNodeDistribution(
nodeOrder: $ReadOnlyArray<NodeAddressT>,
pi: Distribution
): NodeDistribution {
const result = new Map();
nodeOrder.forEach((node, i) => {
const probability = pi[i];
result.set(node, probability);
});
return result;
}
/**
* Create a Distribution using provided node weights.
*
* weightedDistribution takes in a node order (as a read only array of NodeAddressT),
* and a map providing weights for a subset of those nodes. It returns a Distribution
* with the invariant that every node's weight is proportional to its relative weight
* in the weights map. For example, in a case where there were three nodes and they
* had weights of 0, 1, and 3 respectively, the distribution would be [0, 0.25, 0.75].
*
* If a node address is not present in the weight map, its weight is assumed to be 0.
* If any weight is negative or non-finite, an error will be thrown.
* If the sum of all weights is 0, then a uniform distribution will be returned.
* If the weight map assigned weight to nodes which are not in the node order, an error
* will be thrown.
*/
export function weightedDistribution(
nodeOrder: $ReadOnlyArray<NodeAddressT>,
weights: Map<NodeAddressT, number>
): Distribution {
let totalWeight = 0;
for (const [address, weight] of weights.entries()) {
if (weight < 0 || !isFinite(weight)) {
throw new Error(
`Invalid weight ${weight} associated with address ${NodeAddress.toString(
address
)}`
);
}
totalWeight += weight;
}
if (totalWeight === 0) {
return uniformDistribution(nodeOrder.length);
}
let numEncounteredWeights = 0;
const distribution = new Float64Array(nodeOrder.length);
for (let i = 0; i < distribution.length; i++) {
const weight = weights.get(nodeOrder[i]);
if (weight != null) {
numEncounteredWeights++;
distribution[i] = weight / totalWeight;
}
}
if (numEncounteredWeights !== weights.size) {
throw new Error("weights included nodes not present in the nodeOrder");
}
return distribution;
}

View File

@ -0,0 +1,89 @@
// @flow
import {NodeAddress} from "../../core/graph";
import {
weightedDistribution,
distributionToNodeDistribution,
} from "./nodeDistribution";
describe("core/attribution/nodeDistribution", () => {
const n1 = NodeAddress.fromParts(["n1"]);
const n2 = NodeAddress.fromParts(["n2"]);
describe("distributionToNodeDistribution", () => {
it("works", () => {
const pi = new Float64Array([0.25, 0.75]);
expect(distributionToNodeDistribution([n1, n2], pi)).toEqual(
new Map().set(n1, 0.25).set(n2, 0.75)
);
});
});
describe("weightedDistribution", () => {
const a = NodeAddress.fromParts(["a"]);
const b = NodeAddress.fromParts(["b"]);
const c = NodeAddress.fromParts(["c"]);
const d = NodeAddress.fromParts(["d"]);
const order = () => [a, b, c, d];
it("gives a uniform distribution for an empty map", () => {
expect(weightedDistribution(order(), new Map())).toEqual(
new Float64Array([0.25, 0.25, 0.25, 0.25])
);
});
it("gives a uniform distribution for a map with 0 weight", () => {
const map = new Map().set(a, 0);
expect(weightedDistribution(order(), map)).toEqual(
new Float64Array([0.25, 0.25, 0.25, 0.25])
);
});
it("can put all weight on one node", () => {
const map = new Map().set(b, 0.1);
expect(weightedDistribution(order(), map)).toEqual(
new Float64Array([0, 1, 0, 0])
);
});
it("can split weight unequally", () => {
const map = new Map().set(b, 1).set(c, 3);
expect(weightedDistribution(order(), map)).toEqual(
new Float64Array([0, 0.25, 0.75, 0])
);
});
it("can create a uniform distribution if all weights are equal", () => {
const map = new Map()
.set(a, 1)
.set(b, 1)
.set(c, 1)
.set(d, 1);
expect(weightedDistribution(order(), map)).toEqual(
new Float64Array([0.25, 0.25, 0.25, 0.25])
);
});
describe("errors if", () => {
it("has a weighted node that is not in the order", () => {
const z = NodeAddress.fromParts(["z"]);
const map = new Map().set(z, 1);
expect(() => weightedDistribution(order(), map)).toThrowError(
"weights included nodes not present in the nodeOrder"
);
});
it("has a node with negative weight", () => {
const map = new Map().set(a, -1);
expect(() => weightedDistribution(order(), map)).toThrowError(
"Invalid weight -1"
);
});
it("has a node with NaN weight", () => {
const map = new Map().set(a, NaN);
expect(() => weightedDistribution(order(), map)).toThrowError(
"Invalid weight NaN"
);
});
it("has a node with infinite weight", () => {
const map = new Map().set(a, Infinity);
expect(() => weightedDistribution(order(), map)).toThrowError(
"Invalid weight Infinity"
);
});
});
});
});

View File

@ -16,12 +16,14 @@ import {
type NeighborsOptions, type NeighborsOptions,
} from "./graph"; } from "./graph";
import { import {
distributionToNodeDistribution,
createConnections, createConnections,
createOrderedSparseMarkovChain, createOrderedSparseMarkovChain,
type EdgeWeight, type EdgeWeight,
weightedDistribution,
} from "./attribution/graphToMarkovChain"; } from "./attribution/graphToMarkovChain";
import {
distributionToNodeDistribution,
weightedDistribution,
} from "./attribution/nodeDistribution";
import { import {
findStationaryDistribution, findStationaryDistribution,
type PagerankParams, type PagerankParams,