add analysis/timeline/interval

This commit adds an `interval` module which defines intervals (time
ranges), and methods for slicing up a graph into its consistuent time
intervals. This is pre-requisite work for #862.

I've added a dep on d3-array.

Test plan: Unit tests added; run `yarn test`
This commit is contained in:
Dandelion Mané 2019-07-07 15:13:12 +01:00
parent 6cb1b336d5
commit 2335c5d844
4 changed files with 321 additions and 0 deletions

View File

@ -7,6 +7,8 @@
"better-sqlite3": "^5.4.0",
"chalk": "2.4.2",
"commonmark": "^0.29.0",
"d3-array": "^2.2.0",
"d3-time": "^1.0.11",
"express": "^4.16.3",
"fs-extra": "8.1.0",
"history": "^3.0.0",

View File

@ -0,0 +1,134 @@
// @flow
import {max, min} from "d3-array";
import sortBy from "lodash.sortby";
import {utcWeek} from "d3-time";
import * as NullUtil from "../../util/null";
import type {Node, Edge, Graph} from "../../core/graph";
/**
* Represents a time interval
* The interval is half open [startTimeMs, endTimeMs),
* i.e. if a timestamp is exactly on the interval boundary, it will fall at the
* start of the older interval.
*/
export type Interval = {|
+startTimeMs: number,
+endTimeMs: number,
|};
/**
* Represents a slice of a time-partitioned graph
* Includes the interval, as well as all of the nodes and edges whose timestamps
* are within the interval.
*/
export type GraphInterval = {|
+interval: Interval,
+nodes: $ReadOnlyArray<Node>,
+edges: $ReadOnlyArray<Edge>,
|};
export type GraphIntervalPartition = $ReadOnlyArray<GraphInterval>;
/**
* Partition a graph based on time intervals.
*
* The intervals are always one week long, as calculated using d3.utcWeek.
* The result may contain empty intervals.
* If the graph is empty, no intervals are returned.
* Timeless nodes are not included in the partition, nor are dangling edges.
*/
export function partitionGraph(graph: Graph): GraphIntervalPartition {
const nodes = Array.from(graph.nodes());
const timefulNodes = nodes.filter((x) => x.timestampMs != null);
const sortedNodes = sortBy(timefulNodes, (x) => x.timestampMs);
const edges = Array.from(graph.edges({showDangling: false}));
const sortedEdges = sortBy(edges, (x) => x.timestampMs);
const intervals = graphIntervals(graph);
let nodeIndex = 0;
let edgeIndex = 0;
return intervals.map((interval) => {
const nodes = [];
const edges = [];
while (
nodeIndex < sortedNodes.length &&
sortedNodes[nodeIndex].timestampMs < interval.endTimeMs
) {
nodes.push(sortedNodes[nodeIndex++]);
}
while (
edgeIndex < sortedEdges.length &&
sortedEdges[edgeIndex].timestampMs < interval.endTimeMs
) {
edges.push(sortedEdges[edgeIndex++]);
}
return {interval, nodes, edges};
});
}
/**
* Produce an array of Intervals which cover all the node and edge timestamps
* for a graph.
*
* The intervals are one week long, and are aligned on clean week boundaries.
*
* This function is basically a wrapper around weekIntervals that makes sure
* the graph's nodes and edges are all accounted for properly.
*/
export function graphIntervals(graph: Graph): Interval[] {
const nodeTimestamps = Array.from(graph.nodes())
.map((x) => x.timestampMs)
.filter((x) => x != null)
// Unnecessary map is to satisfy flow that the array doesn't contain null.
.map((x) => NullUtil.get(x));
const edgeTimestamps = Array.from(graph.edges({showDangling: false})).map(
(x) => x.timestampMs
);
if (nodeTimestamps.length === 0 && edgeTimestamps.length === 0) {
return [];
}
const allTimestamps = nodeTimestamps.concat(edgeTimestamps);
const start = min(allTimestamps);
const end = max(allTimestamps);
return weekIntervals(start, end);
}
/**
* Produce an array of week-long intervals to cover the startTime and endTime.
*
* Each interval is one week long and aligned on week boundaries, as produced
* by d3.utcWeek. The weeks always use UTC boundaries to ensure consistent
* output regardless of which timezone the user is in.
*
* Assuming that the inputs are valid, there will always be at least one
* interval, so that that interval can cover the input timestamps. (E.g. if
* startMs and endMs are the same value, then the produced interval will be the
* start and end of the last week that starts on or before startMs.)
*/
export function weekIntervals(startMs: number, endMs: number): Interval[] {
if (!isFinite(startMs) || !isFinite(endMs)) {
throw new Error("invalid non-finite input");
}
if (typeof startMs !== "number" || typeof endMs !== "number") {
throw new Error("start or end are not numbers");
}
if (startMs > endMs) {
throw new Error("start time after end time");
}
// Promote the window to the nearest week boundaries, to ensure that
// utcWeek.range will not return an empty array.
// We add one to the endTime so that just in case we're exactly on a week
// boundary, we still get at least one interval.
startMs = utcWeek.floor(startMs);
endMs = utcWeek.ceil(endMs + 1);
const boundaries = utcWeek.range(startMs, endMs);
boundaries.push(endMs);
const intervals = [];
for (let i = 0; i < boundaries.length - 1; i++) {
intervals.push({
startTimeMs: +boundaries[i],
endTimeMs: +boundaries[i + 1],
});
}
return intervals;
}

View File

@ -0,0 +1,175 @@
// @flow
import {utcWeek} from "d3-time";
import {node, edge} from "../../core/graphTestUtil";
import {Graph} from "../../core/graph";
import {partitionGraph, graphIntervals, weekIntervals} from "./interval";
describe("src/analysis/timeline/interval", () => {
const WEEK_MID = 1562501362239;
const WEEK_START = +utcWeek.floor(WEEK_MID);
const WEEK_END = +utcWeek.ceil(WEEK_MID);
const week = (n) => +utcWeek.offset(WEEK_MID, n);
function graphWithTiming(
nodeTimes: (number | null)[],
edgeTimes: number[]
): Graph {
const graph = new Graph();
const timeless = {...node("timeless"), timestampMs: null};
// Add a timeless node so we can ensure all the edges are non-dangling
graph.addNode(timeless);
for (let i = 0; i < nodeTimes.length; i++) {
const n = node(String(i));
const nt = nodeTimes[i];
const timestampMs = nt == null ? null : week(nt);
graph.addNode({...n, timestampMs});
}
for (let i = 0; i < edgeTimes.length; i++) {
const e = edge(String(i), timeless, timeless);
graph.addEdge({...e, timestampMs: week(edgeTimes[i])});
}
return graph;
}
describe("partitionGraph", () => {
function checkPartition(g: Graph) {
const slices = partitionGraph(g);
const expectedIntervals = graphIntervals(g);
expect(slices.map((x) => x.interval)).toEqual(expectedIntervals);
const seenNodeAddresses = new Set();
const seenEdgeAddresses = new Set();
for (const {interval, nodes, edges} of slices) {
for (const {address, timestampMs} of nodes) {
expect(timestampMs).not.toBe(null);
expect(timestampMs).toBeGreaterThanOrEqual(interval.startTimeMs);
expect(timestampMs).toBeLessThan(interval.endTimeMs);
expect(seenNodeAddresses.has(address)).toBe(false);
seenNodeAddresses.add(address);
}
for (const {address, timestampMs} of edges) {
expect(timestampMs).toBeGreaterThanOrEqual(interval.startTimeMs);
expect(timestampMs).toBeLessThan(interval.endTimeMs);
expect(seenEdgeAddresses.has(address)).toBe(false);
seenEdgeAddresses.add(address);
}
}
const timefulNodes = Array.from(g.nodes()).filter(
(x) => x.timestampMs != null
);
expect(timefulNodes).toHaveLength(seenNodeAddresses.size);
const edges = Array.from(g.edges({showDangling: false}));
expect(edges).toHaveLength(seenEdgeAddresses.size);
}
it("partitions an empty graph correctly", () => {
checkPartition(new Graph());
});
it("partitions a graph with just nodes", () => {
checkPartition(graphWithTiming([5, 3, 99, 12], []));
});
it("partitions a graph with just edges", () => {
checkPartition(graphWithTiming([], [3, 4, 99]));
});
it("partitions a graph with nodes and edges", () => {
checkPartition(graphWithTiming([3, 9], [4, 12]));
});
it("partitions a graph with dangling edges", () => {
const graph = graphWithTiming([3, 9], [4, 12]);
const n = node("nope");
const d = edge("dangling", n, n);
graph.addEdge(d);
checkPartition(graph);
});
});
describe("graphIntervals", () => {
it("an empty graph has no intervals", () => {
const intervals = graphIntervals(new Graph());
expect(intervals).toHaveLength(0);
});
it("a graph with only timeless nodes has no intervals", () => {
const graph = graphWithTiming([null, null], []);
const intervals = graphIntervals(graph);
expect(intervals).toHaveLength(0);
});
it("a graph with only dangling edges has no intervals", () => {
const graph = new Graph();
const n = node("nonexistent");
const e = {...edge("dangling", n, n), timestampMs: WEEK_MID};
graph.addEdge(e);
const intervals = graphIntervals(graph);
expect(intervals).toHaveLength(0);
});
it("timing information comes from the nodes and the edges", () => {
// Note that the nodes/edges have not been added in time-sorted order,
// and that the max time comes from the edges while the min time comes from the nodes.
const graph = graphWithTiming([3, 1, 9], [2, 14, 3]);
const intervals = graphIntervals(graph);
expect(intervals).toEqual(weekIntervals(week(1), week(14)));
});
});
describe("weekIntervals", () => {
it("produces a covering interval for a single timestamp", () => {
const intervals = weekIntervals(WEEK_MID, WEEK_MID);
expect(intervals).toEqual([
{
startTimeMs: WEEK_START,
endTimeMs: WEEK_END,
},
]);
});
it("produces a correct interval for a single timestamp aligned on week start", () => {
const intervals = weekIntervals(WEEK_START, WEEK_START);
expect(intervals).toEqual([
{
startTimeMs: WEEK_START,
endTimeMs: WEEK_END,
},
]);
});
it("produces one interval if passed start and end-1", () => {
const intervals = weekIntervals(WEEK_START, WEEK_END - 1);
expect(intervals).toEqual([
{
startTimeMs: WEEK_START,
endTimeMs: WEEK_END,
},
]);
});
it("produces two intervals if passed start and end of week", () => {
const intervals = weekIntervals(WEEK_START, WEEK_END);
// It needs to have this behavior because the intervals are defined as half-open.
// So if there is a node with timestamp WEEK_END, it will need to fall at the start
// of the subsequent interval.
expect(intervals).toEqual([
{
startTimeMs: WEEK_START,
endTimeMs: WEEK_END,
},
{
startTimeMs: WEEK_END,
endTimeMs: +utcWeek.ceil(WEEK_END + 1),
},
]);
});
it("produces three intervals if the boundaries extend past a week on both sides", () => {
const intervals = weekIntervals(WEEK_START - 1, WEEK_END + 1);
expect(intervals).toEqual([
{
startTimeMs: +utcWeek.floor(WEEK_START - 1),
endTimeMs: WEEK_START,
},
{
startTimeMs: WEEK_START,
endTimeMs: WEEK_END,
},
{
startTimeMs: WEEK_END,
endTimeMs: +utcWeek.ceil(WEEK_END + 1),
},
]);
});
});
});

View File

@ -2610,6 +2610,16 @@ cyclist@~0.2.2:
resolved "https://registry.yarnpkg.com/cyclist/-/cyclist-0.2.2.tgz#1b33792e11e914a2fd6d6ed6447464444e5fa640"
integrity sha1-GzN5LhHpFKL9bW7WRHRkRE5fpkA=
d3-array@^2.2.0:
version "2.2.0"
resolved "https://registry.yarnpkg.com/d3-array/-/d3-array-2.2.0.tgz#a9e966b8f8d78f0888d98db1fb840fc8da8ac5c7"
integrity sha512-eE0QmSh6xToqM3sxHiJYg/QFdNn52ZEgmFE8A8abU8GsHvsIOolqH8B70/8+VGAKm5MlwaExhqR3DLIjOJMLPA==
d3-time@^1.0.11:
version "1.0.11"
resolved "https://registry.yarnpkg.com/d3-time/-/d3-time-1.0.11.tgz#1d831a3e25cd189eb256c17770a666368762bbce"
integrity sha512-Z3wpvhPLW4vEScGeIMUckDW7+3hWKOQfAWg/U7PlWBnQmeKQ00gCUsTtWSYulrKNA7ta8hJ+xXc6MHrMuITwEw==
d@1:
version "1.0.1"
resolved "https://registry.yarnpkg.com/d/-/d-1.0.1.tgz#8698095372d58dbee346ffd0c7093f99f8f9eb5a"