From 2335c5d8447790e896a8fcd388f15d84094dfbad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dandelion=20Man=C3=A9?= Date: Sun, 7 Jul 2019 15:13:12 +0100 Subject: [PATCH] add `analysis/timeline/interval` This commit adds an `interval` module which defines intervals (time ranges), and methods for slicing up a graph into its consistuent time intervals. This is pre-requisite work for #862. I've added a dep on d3-array. Test plan: Unit tests added; run `yarn test` --- package.json | 2 + src/analysis/timeline/interval.js | 134 +++++++++++++++++++ src/analysis/timeline/interval.test.js | 175 +++++++++++++++++++++++++ yarn.lock | 10 ++ 4 files changed, 321 insertions(+) create mode 100644 src/analysis/timeline/interval.js create mode 100644 src/analysis/timeline/interval.test.js diff --git a/package.json b/package.json index 626ce73..51957fa 100644 --- a/package.json +++ b/package.json @@ -7,6 +7,8 @@ "better-sqlite3": "^5.4.0", "chalk": "2.4.2", "commonmark": "^0.29.0", + "d3-array": "^2.2.0", + "d3-time": "^1.0.11", "express": "^4.16.3", "fs-extra": "8.1.0", "history": "^3.0.0", diff --git a/src/analysis/timeline/interval.js b/src/analysis/timeline/interval.js new file mode 100644 index 0000000..6734156 --- /dev/null +++ b/src/analysis/timeline/interval.js @@ -0,0 +1,134 @@ +// @flow + +import {max, min} from "d3-array"; +import sortBy from "lodash.sortby"; +import {utcWeek} from "d3-time"; +import * as NullUtil from "../../util/null"; +import type {Node, Edge, Graph} from "../../core/graph"; + +/** + * Represents a time interval + * The interval is half open [startTimeMs, endTimeMs), + * i.e. if a timestamp is exactly on the interval boundary, it will fall at the + * start of the older interval. + */ +export type Interval = {| + +startTimeMs: number, + +endTimeMs: number, +|}; + +/** + * Represents a slice of a time-partitioned graph + * Includes the interval, as well as all of the nodes and edges whose timestamps + * are within the interval. + */ +export type GraphInterval = {| + +interval: Interval, + +nodes: $ReadOnlyArray, + +edges: $ReadOnlyArray, +|}; + +export type GraphIntervalPartition = $ReadOnlyArray; + +/** + * Partition a graph based on time intervals. + * + * The intervals are always one week long, as calculated using d3.utcWeek. + * The result may contain empty intervals. + * If the graph is empty, no intervals are returned. + * Timeless nodes are not included in the partition, nor are dangling edges. + */ +export function partitionGraph(graph: Graph): GraphIntervalPartition { + const nodes = Array.from(graph.nodes()); + const timefulNodes = nodes.filter((x) => x.timestampMs != null); + const sortedNodes = sortBy(timefulNodes, (x) => x.timestampMs); + const edges = Array.from(graph.edges({showDangling: false})); + const sortedEdges = sortBy(edges, (x) => x.timestampMs); + const intervals = graphIntervals(graph); + let nodeIndex = 0; + let edgeIndex = 0; + return intervals.map((interval) => { + const nodes = []; + const edges = []; + while ( + nodeIndex < sortedNodes.length && + sortedNodes[nodeIndex].timestampMs < interval.endTimeMs + ) { + nodes.push(sortedNodes[nodeIndex++]); + } + while ( + edgeIndex < sortedEdges.length && + sortedEdges[edgeIndex].timestampMs < interval.endTimeMs + ) { + edges.push(sortedEdges[edgeIndex++]); + } + return {interval, nodes, edges}; + }); +} + +/** + * Produce an array of Intervals which cover all the node and edge timestamps + * for a graph. + * + * The intervals are one week long, and are aligned on clean week boundaries. + * + * This function is basically a wrapper around weekIntervals that makes sure + * the graph's nodes and edges are all accounted for properly. + */ +export function graphIntervals(graph: Graph): Interval[] { + const nodeTimestamps = Array.from(graph.nodes()) + .map((x) => x.timestampMs) + .filter((x) => x != null) + // Unnecessary map is to satisfy flow that the array doesn't contain null. + .map((x) => NullUtil.get(x)); + const edgeTimestamps = Array.from(graph.edges({showDangling: false})).map( + (x) => x.timestampMs + ); + if (nodeTimestamps.length === 0 && edgeTimestamps.length === 0) { + return []; + } + const allTimestamps = nodeTimestamps.concat(edgeTimestamps); + const start = min(allTimestamps); + const end = max(allTimestamps); + return weekIntervals(start, end); +} + +/** + * Produce an array of week-long intervals to cover the startTime and endTime. + * + * Each interval is one week long and aligned on week boundaries, as produced + * by d3.utcWeek. The weeks always use UTC boundaries to ensure consistent + * output regardless of which timezone the user is in. + * + * Assuming that the inputs are valid, there will always be at least one + * interval, so that that interval can cover the input timestamps. (E.g. if + * startMs and endMs are the same value, then the produced interval will be the + * start and end of the last week that starts on or before startMs.) + */ +export function weekIntervals(startMs: number, endMs: number): Interval[] { + if (!isFinite(startMs) || !isFinite(endMs)) { + throw new Error("invalid non-finite input"); + } + if (typeof startMs !== "number" || typeof endMs !== "number") { + throw new Error("start or end are not numbers"); + } + if (startMs > endMs) { + throw new Error("start time after end time"); + } + // Promote the window to the nearest week boundaries, to ensure that + // utcWeek.range will not return an empty array. + // We add one to the endTime so that just in case we're exactly on a week + // boundary, we still get at least one interval. + startMs = utcWeek.floor(startMs); + endMs = utcWeek.ceil(endMs + 1); + const boundaries = utcWeek.range(startMs, endMs); + boundaries.push(endMs); + const intervals = []; + for (let i = 0; i < boundaries.length - 1; i++) { + intervals.push({ + startTimeMs: +boundaries[i], + endTimeMs: +boundaries[i + 1], + }); + } + return intervals; +} diff --git a/src/analysis/timeline/interval.test.js b/src/analysis/timeline/interval.test.js new file mode 100644 index 0000000..fb81801 --- /dev/null +++ b/src/analysis/timeline/interval.test.js @@ -0,0 +1,175 @@ +// @flow + +import {utcWeek} from "d3-time"; +import {node, edge} from "../../core/graphTestUtil"; +import {Graph} from "../../core/graph"; +import {partitionGraph, graphIntervals, weekIntervals} from "./interval"; + +describe("src/analysis/timeline/interval", () => { + const WEEK_MID = 1562501362239; + const WEEK_START = +utcWeek.floor(WEEK_MID); + const WEEK_END = +utcWeek.ceil(WEEK_MID); + const week = (n) => +utcWeek.offset(WEEK_MID, n); + function graphWithTiming( + nodeTimes: (number | null)[], + edgeTimes: number[] + ): Graph { + const graph = new Graph(); + const timeless = {...node("timeless"), timestampMs: null}; + // Add a timeless node so we can ensure all the edges are non-dangling + graph.addNode(timeless); + for (let i = 0; i < nodeTimes.length; i++) { + const n = node(String(i)); + const nt = nodeTimes[i]; + const timestampMs = nt == null ? null : week(nt); + graph.addNode({...n, timestampMs}); + } + for (let i = 0; i < edgeTimes.length; i++) { + const e = edge(String(i), timeless, timeless); + graph.addEdge({...e, timestampMs: week(edgeTimes[i])}); + } + return graph; + } + + describe("partitionGraph", () => { + function checkPartition(g: Graph) { + const slices = partitionGraph(g); + const expectedIntervals = graphIntervals(g); + expect(slices.map((x) => x.interval)).toEqual(expectedIntervals); + + const seenNodeAddresses = new Set(); + const seenEdgeAddresses = new Set(); + for (const {interval, nodes, edges} of slices) { + for (const {address, timestampMs} of nodes) { + expect(timestampMs).not.toBe(null); + expect(timestampMs).toBeGreaterThanOrEqual(interval.startTimeMs); + expect(timestampMs).toBeLessThan(interval.endTimeMs); + expect(seenNodeAddresses.has(address)).toBe(false); + seenNodeAddresses.add(address); + } + for (const {address, timestampMs} of edges) { + expect(timestampMs).toBeGreaterThanOrEqual(interval.startTimeMs); + expect(timestampMs).toBeLessThan(interval.endTimeMs); + expect(seenEdgeAddresses.has(address)).toBe(false); + seenEdgeAddresses.add(address); + } + } + const timefulNodes = Array.from(g.nodes()).filter( + (x) => x.timestampMs != null + ); + expect(timefulNodes).toHaveLength(seenNodeAddresses.size); + const edges = Array.from(g.edges({showDangling: false})); + expect(edges).toHaveLength(seenEdgeAddresses.size); + } + + it("partitions an empty graph correctly", () => { + checkPartition(new Graph()); + }); + it("partitions a graph with just nodes", () => { + checkPartition(graphWithTiming([5, 3, 99, 12], [])); + }); + it("partitions a graph with just edges", () => { + checkPartition(graphWithTiming([], [3, 4, 99])); + }); + it("partitions a graph with nodes and edges", () => { + checkPartition(graphWithTiming([3, 9], [4, 12])); + }); + it("partitions a graph with dangling edges", () => { + const graph = graphWithTiming([3, 9], [4, 12]); + const n = node("nope"); + const d = edge("dangling", n, n); + graph.addEdge(d); + checkPartition(graph); + }); + }); + + describe("graphIntervals", () => { + it("an empty graph has no intervals", () => { + const intervals = graphIntervals(new Graph()); + expect(intervals).toHaveLength(0); + }); + it("a graph with only timeless nodes has no intervals", () => { + const graph = graphWithTiming([null, null], []); + const intervals = graphIntervals(graph); + expect(intervals).toHaveLength(0); + }); + it("a graph with only dangling edges has no intervals", () => { + const graph = new Graph(); + const n = node("nonexistent"); + const e = {...edge("dangling", n, n), timestampMs: WEEK_MID}; + graph.addEdge(e); + const intervals = graphIntervals(graph); + expect(intervals).toHaveLength(0); + }); + it("timing information comes from the nodes and the edges", () => { + // Note that the nodes/edges have not been added in time-sorted order, + // and that the max time comes from the edges while the min time comes from the nodes. + const graph = graphWithTiming([3, 1, 9], [2, 14, 3]); + const intervals = graphIntervals(graph); + expect(intervals).toEqual(weekIntervals(week(1), week(14))); + }); + }); + + describe("weekIntervals", () => { + it("produces a covering interval for a single timestamp", () => { + const intervals = weekIntervals(WEEK_MID, WEEK_MID); + expect(intervals).toEqual([ + { + startTimeMs: WEEK_START, + endTimeMs: WEEK_END, + }, + ]); + }); + it("produces a correct interval for a single timestamp aligned on week start", () => { + const intervals = weekIntervals(WEEK_START, WEEK_START); + expect(intervals).toEqual([ + { + startTimeMs: WEEK_START, + endTimeMs: WEEK_END, + }, + ]); + }); + it("produces one interval if passed start and end-1", () => { + const intervals = weekIntervals(WEEK_START, WEEK_END - 1); + expect(intervals).toEqual([ + { + startTimeMs: WEEK_START, + endTimeMs: WEEK_END, + }, + ]); + }); + it("produces two intervals if passed start and end of week", () => { + const intervals = weekIntervals(WEEK_START, WEEK_END); + // It needs to have this behavior because the intervals are defined as half-open. + // So if there is a node with timestamp WEEK_END, it will need to fall at the start + // of the subsequent interval. + expect(intervals).toEqual([ + { + startTimeMs: WEEK_START, + endTimeMs: WEEK_END, + }, + { + startTimeMs: WEEK_END, + endTimeMs: +utcWeek.ceil(WEEK_END + 1), + }, + ]); + }); + it("produces three intervals if the boundaries extend past a week on both sides", () => { + const intervals = weekIntervals(WEEK_START - 1, WEEK_END + 1); + expect(intervals).toEqual([ + { + startTimeMs: +utcWeek.floor(WEEK_START - 1), + endTimeMs: WEEK_START, + }, + { + startTimeMs: WEEK_START, + endTimeMs: WEEK_END, + }, + { + startTimeMs: WEEK_END, + endTimeMs: +utcWeek.ceil(WEEK_END + 1), + }, + ]); + }); + }); +}); diff --git a/yarn.lock b/yarn.lock index d910693..5a8f45f 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2610,6 +2610,16 @@ cyclist@~0.2.2: resolved "https://registry.yarnpkg.com/cyclist/-/cyclist-0.2.2.tgz#1b33792e11e914a2fd6d6ed6447464444e5fa640" integrity sha1-GzN5LhHpFKL9bW7WRHRkRE5fpkA= +d3-array@^2.2.0: + version "2.2.0" + resolved "https://registry.yarnpkg.com/d3-array/-/d3-array-2.2.0.tgz#a9e966b8f8d78f0888d98db1fb840fc8da8ac5c7" + integrity sha512-eE0QmSh6xToqM3sxHiJYg/QFdNn52ZEgmFE8A8abU8GsHvsIOolqH8B70/8+VGAKm5MlwaExhqR3DLIjOJMLPA== + +d3-time@^1.0.11: + version "1.0.11" + resolved "https://registry.yarnpkg.com/d3-time/-/d3-time-1.0.11.tgz#1d831a3e25cd189eb256c17770a666368762bbce" + integrity sha512-Z3wpvhPLW4vEScGeIMUckDW7+3hWKOQfAWg/U7PlWBnQmeKQ00gCUsTtWSYulrKNA7ta8hJ+xXc6MHrMuITwEw== + d@1: version "1.0.1" resolved "https://registry.yarnpkg.com/d/-/d-1.0.1.tgz#8698095372d58dbee346ffd0c7093f99f8f9eb5a"