mirror of
https://github.com/status-im/sourcecred.git
synced 2025-02-27 11:40:26 +00:00
add analysis/timeline/interval
This commit adds an `interval` module which defines intervals (time ranges), and methods for slicing up a graph into its consistuent time intervals. This is pre-requisite work for #862. I've added a dep on d3-array. Test plan: Unit tests added; run `yarn test`
This commit is contained in:
parent
6cb1b336d5
commit
2335c5d844
@ -7,6 +7,8 @@
|
|||||||
"better-sqlite3": "^5.4.0",
|
"better-sqlite3": "^5.4.0",
|
||||||
"chalk": "2.4.2",
|
"chalk": "2.4.2",
|
||||||
"commonmark": "^0.29.0",
|
"commonmark": "^0.29.0",
|
||||||
|
"d3-array": "^2.2.0",
|
||||||
|
"d3-time": "^1.0.11",
|
||||||
"express": "^4.16.3",
|
"express": "^4.16.3",
|
||||||
"fs-extra": "8.1.0",
|
"fs-extra": "8.1.0",
|
||||||
"history": "^3.0.0",
|
"history": "^3.0.0",
|
||||||
|
134
src/analysis/timeline/interval.js
Normal file
134
src/analysis/timeline/interval.js
Normal file
@ -0,0 +1,134 @@
|
|||||||
|
// @flow
|
||||||
|
|
||||||
|
import {max, min} from "d3-array";
|
||||||
|
import sortBy from "lodash.sortby";
|
||||||
|
import {utcWeek} from "d3-time";
|
||||||
|
import * as NullUtil from "../../util/null";
|
||||||
|
import type {Node, Edge, Graph} from "../../core/graph";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Represents a time interval
|
||||||
|
* The interval is half open [startTimeMs, endTimeMs),
|
||||||
|
* i.e. if a timestamp is exactly on the interval boundary, it will fall at the
|
||||||
|
* start of the older interval.
|
||||||
|
*/
|
||||||
|
export type Interval = {|
|
||||||
|
+startTimeMs: number,
|
||||||
|
+endTimeMs: number,
|
||||||
|
|};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Represents a slice of a time-partitioned graph
|
||||||
|
* Includes the interval, as well as all of the nodes and edges whose timestamps
|
||||||
|
* are within the interval.
|
||||||
|
*/
|
||||||
|
export type GraphInterval = {|
|
||||||
|
+interval: Interval,
|
||||||
|
+nodes: $ReadOnlyArray<Node>,
|
||||||
|
+edges: $ReadOnlyArray<Edge>,
|
||||||
|
|};
|
||||||
|
|
||||||
|
export type GraphIntervalPartition = $ReadOnlyArray<GraphInterval>;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Partition a graph based on time intervals.
|
||||||
|
*
|
||||||
|
* The intervals are always one week long, as calculated using d3.utcWeek.
|
||||||
|
* The result may contain empty intervals.
|
||||||
|
* If the graph is empty, no intervals are returned.
|
||||||
|
* Timeless nodes are not included in the partition, nor are dangling edges.
|
||||||
|
*/
|
||||||
|
export function partitionGraph(graph: Graph): GraphIntervalPartition {
|
||||||
|
const nodes = Array.from(graph.nodes());
|
||||||
|
const timefulNodes = nodes.filter((x) => x.timestampMs != null);
|
||||||
|
const sortedNodes = sortBy(timefulNodes, (x) => x.timestampMs);
|
||||||
|
const edges = Array.from(graph.edges({showDangling: false}));
|
||||||
|
const sortedEdges = sortBy(edges, (x) => x.timestampMs);
|
||||||
|
const intervals = graphIntervals(graph);
|
||||||
|
let nodeIndex = 0;
|
||||||
|
let edgeIndex = 0;
|
||||||
|
return intervals.map((interval) => {
|
||||||
|
const nodes = [];
|
||||||
|
const edges = [];
|
||||||
|
while (
|
||||||
|
nodeIndex < sortedNodes.length &&
|
||||||
|
sortedNodes[nodeIndex].timestampMs < interval.endTimeMs
|
||||||
|
) {
|
||||||
|
nodes.push(sortedNodes[nodeIndex++]);
|
||||||
|
}
|
||||||
|
while (
|
||||||
|
edgeIndex < sortedEdges.length &&
|
||||||
|
sortedEdges[edgeIndex].timestampMs < interval.endTimeMs
|
||||||
|
) {
|
||||||
|
edges.push(sortedEdges[edgeIndex++]);
|
||||||
|
}
|
||||||
|
return {interval, nodes, edges};
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Produce an array of Intervals which cover all the node and edge timestamps
|
||||||
|
* for a graph.
|
||||||
|
*
|
||||||
|
* The intervals are one week long, and are aligned on clean week boundaries.
|
||||||
|
*
|
||||||
|
* This function is basically a wrapper around weekIntervals that makes sure
|
||||||
|
* the graph's nodes and edges are all accounted for properly.
|
||||||
|
*/
|
||||||
|
export function graphIntervals(graph: Graph): Interval[] {
|
||||||
|
const nodeTimestamps = Array.from(graph.nodes())
|
||||||
|
.map((x) => x.timestampMs)
|
||||||
|
.filter((x) => x != null)
|
||||||
|
// Unnecessary map is to satisfy flow that the array doesn't contain null.
|
||||||
|
.map((x) => NullUtil.get(x));
|
||||||
|
const edgeTimestamps = Array.from(graph.edges({showDangling: false})).map(
|
||||||
|
(x) => x.timestampMs
|
||||||
|
);
|
||||||
|
if (nodeTimestamps.length === 0 && edgeTimestamps.length === 0) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
const allTimestamps = nodeTimestamps.concat(edgeTimestamps);
|
||||||
|
const start = min(allTimestamps);
|
||||||
|
const end = max(allTimestamps);
|
||||||
|
return weekIntervals(start, end);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Produce an array of week-long intervals to cover the startTime and endTime.
|
||||||
|
*
|
||||||
|
* Each interval is one week long and aligned on week boundaries, as produced
|
||||||
|
* by d3.utcWeek. The weeks always use UTC boundaries to ensure consistent
|
||||||
|
* output regardless of which timezone the user is in.
|
||||||
|
*
|
||||||
|
* Assuming that the inputs are valid, there will always be at least one
|
||||||
|
* interval, so that that interval can cover the input timestamps. (E.g. if
|
||||||
|
* startMs and endMs are the same value, then the produced interval will be the
|
||||||
|
* start and end of the last week that starts on or before startMs.)
|
||||||
|
*/
|
||||||
|
export function weekIntervals(startMs: number, endMs: number): Interval[] {
|
||||||
|
if (!isFinite(startMs) || !isFinite(endMs)) {
|
||||||
|
throw new Error("invalid non-finite input");
|
||||||
|
}
|
||||||
|
if (typeof startMs !== "number" || typeof endMs !== "number") {
|
||||||
|
throw new Error("start or end are not numbers");
|
||||||
|
}
|
||||||
|
if (startMs > endMs) {
|
||||||
|
throw new Error("start time after end time");
|
||||||
|
}
|
||||||
|
// Promote the window to the nearest week boundaries, to ensure that
|
||||||
|
// utcWeek.range will not return an empty array.
|
||||||
|
// We add one to the endTime so that just in case we're exactly on a week
|
||||||
|
// boundary, we still get at least one interval.
|
||||||
|
startMs = utcWeek.floor(startMs);
|
||||||
|
endMs = utcWeek.ceil(endMs + 1);
|
||||||
|
const boundaries = utcWeek.range(startMs, endMs);
|
||||||
|
boundaries.push(endMs);
|
||||||
|
const intervals = [];
|
||||||
|
for (let i = 0; i < boundaries.length - 1; i++) {
|
||||||
|
intervals.push({
|
||||||
|
startTimeMs: +boundaries[i],
|
||||||
|
endTimeMs: +boundaries[i + 1],
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return intervals;
|
||||||
|
}
|
175
src/analysis/timeline/interval.test.js
Normal file
175
src/analysis/timeline/interval.test.js
Normal file
@ -0,0 +1,175 @@
|
|||||||
|
// @flow
|
||||||
|
|
||||||
|
import {utcWeek} from "d3-time";
|
||||||
|
import {node, edge} from "../../core/graphTestUtil";
|
||||||
|
import {Graph} from "../../core/graph";
|
||||||
|
import {partitionGraph, graphIntervals, weekIntervals} from "./interval";
|
||||||
|
|
||||||
|
describe("src/analysis/timeline/interval", () => {
|
||||||
|
const WEEK_MID = 1562501362239;
|
||||||
|
const WEEK_START = +utcWeek.floor(WEEK_MID);
|
||||||
|
const WEEK_END = +utcWeek.ceil(WEEK_MID);
|
||||||
|
const week = (n) => +utcWeek.offset(WEEK_MID, n);
|
||||||
|
function graphWithTiming(
|
||||||
|
nodeTimes: (number | null)[],
|
||||||
|
edgeTimes: number[]
|
||||||
|
): Graph {
|
||||||
|
const graph = new Graph();
|
||||||
|
const timeless = {...node("timeless"), timestampMs: null};
|
||||||
|
// Add a timeless node so we can ensure all the edges are non-dangling
|
||||||
|
graph.addNode(timeless);
|
||||||
|
for (let i = 0; i < nodeTimes.length; i++) {
|
||||||
|
const n = node(String(i));
|
||||||
|
const nt = nodeTimes[i];
|
||||||
|
const timestampMs = nt == null ? null : week(nt);
|
||||||
|
graph.addNode({...n, timestampMs});
|
||||||
|
}
|
||||||
|
for (let i = 0; i < edgeTimes.length; i++) {
|
||||||
|
const e = edge(String(i), timeless, timeless);
|
||||||
|
graph.addEdge({...e, timestampMs: week(edgeTimes[i])});
|
||||||
|
}
|
||||||
|
return graph;
|
||||||
|
}
|
||||||
|
|
||||||
|
describe("partitionGraph", () => {
|
||||||
|
function checkPartition(g: Graph) {
|
||||||
|
const slices = partitionGraph(g);
|
||||||
|
const expectedIntervals = graphIntervals(g);
|
||||||
|
expect(slices.map((x) => x.interval)).toEqual(expectedIntervals);
|
||||||
|
|
||||||
|
const seenNodeAddresses = new Set();
|
||||||
|
const seenEdgeAddresses = new Set();
|
||||||
|
for (const {interval, nodes, edges} of slices) {
|
||||||
|
for (const {address, timestampMs} of nodes) {
|
||||||
|
expect(timestampMs).not.toBe(null);
|
||||||
|
expect(timestampMs).toBeGreaterThanOrEqual(interval.startTimeMs);
|
||||||
|
expect(timestampMs).toBeLessThan(interval.endTimeMs);
|
||||||
|
expect(seenNodeAddresses.has(address)).toBe(false);
|
||||||
|
seenNodeAddresses.add(address);
|
||||||
|
}
|
||||||
|
for (const {address, timestampMs} of edges) {
|
||||||
|
expect(timestampMs).toBeGreaterThanOrEqual(interval.startTimeMs);
|
||||||
|
expect(timestampMs).toBeLessThan(interval.endTimeMs);
|
||||||
|
expect(seenEdgeAddresses.has(address)).toBe(false);
|
||||||
|
seenEdgeAddresses.add(address);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const timefulNodes = Array.from(g.nodes()).filter(
|
||||||
|
(x) => x.timestampMs != null
|
||||||
|
);
|
||||||
|
expect(timefulNodes).toHaveLength(seenNodeAddresses.size);
|
||||||
|
const edges = Array.from(g.edges({showDangling: false}));
|
||||||
|
expect(edges).toHaveLength(seenEdgeAddresses.size);
|
||||||
|
}
|
||||||
|
|
||||||
|
it("partitions an empty graph correctly", () => {
|
||||||
|
checkPartition(new Graph());
|
||||||
|
});
|
||||||
|
it("partitions a graph with just nodes", () => {
|
||||||
|
checkPartition(graphWithTiming([5, 3, 99, 12], []));
|
||||||
|
});
|
||||||
|
it("partitions a graph with just edges", () => {
|
||||||
|
checkPartition(graphWithTiming([], [3, 4, 99]));
|
||||||
|
});
|
||||||
|
it("partitions a graph with nodes and edges", () => {
|
||||||
|
checkPartition(graphWithTiming([3, 9], [4, 12]));
|
||||||
|
});
|
||||||
|
it("partitions a graph with dangling edges", () => {
|
||||||
|
const graph = graphWithTiming([3, 9], [4, 12]);
|
||||||
|
const n = node("nope");
|
||||||
|
const d = edge("dangling", n, n);
|
||||||
|
graph.addEdge(d);
|
||||||
|
checkPartition(graph);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("graphIntervals", () => {
|
||||||
|
it("an empty graph has no intervals", () => {
|
||||||
|
const intervals = graphIntervals(new Graph());
|
||||||
|
expect(intervals).toHaveLength(0);
|
||||||
|
});
|
||||||
|
it("a graph with only timeless nodes has no intervals", () => {
|
||||||
|
const graph = graphWithTiming([null, null], []);
|
||||||
|
const intervals = graphIntervals(graph);
|
||||||
|
expect(intervals).toHaveLength(0);
|
||||||
|
});
|
||||||
|
it("a graph with only dangling edges has no intervals", () => {
|
||||||
|
const graph = new Graph();
|
||||||
|
const n = node("nonexistent");
|
||||||
|
const e = {...edge("dangling", n, n), timestampMs: WEEK_MID};
|
||||||
|
graph.addEdge(e);
|
||||||
|
const intervals = graphIntervals(graph);
|
||||||
|
expect(intervals).toHaveLength(0);
|
||||||
|
});
|
||||||
|
it("timing information comes from the nodes and the edges", () => {
|
||||||
|
// Note that the nodes/edges have not been added in time-sorted order,
|
||||||
|
// and that the max time comes from the edges while the min time comes from the nodes.
|
||||||
|
const graph = graphWithTiming([3, 1, 9], [2, 14, 3]);
|
||||||
|
const intervals = graphIntervals(graph);
|
||||||
|
expect(intervals).toEqual(weekIntervals(week(1), week(14)));
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("weekIntervals", () => {
|
||||||
|
it("produces a covering interval for a single timestamp", () => {
|
||||||
|
const intervals = weekIntervals(WEEK_MID, WEEK_MID);
|
||||||
|
expect(intervals).toEqual([
|
||||||
|
{
|
||||||
|
startTimeMs: WEEK_START,
|
||||||
|
endTimeMs: WEEK_END,
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
it("produces a correct interval for a single timestamp aligned on week start", () => {
|
||||||
|
const intervals = weekIntervals(WEEK_START, WEEK_START);
|
||||||
|
expect(intervals).toEqual([
|
||||||
|
{
|
||||||
|
startTimeMs: WEEK_START,
|
||||||
|
endTimeMs: WEEK_END,
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
it("produces one interval if passed start and end-1", () => {
|
||||||
|
const intervals = weekIntervals(WEEK_START, WEEK_END - 1);
|
||||||
|
expect(intervals).toEqual([
|
||||||
|
{
|
||||||
|
startTimeMs: WEEK_START,
|
||||||
|
endTimeMs: WEEK_END,
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
it("produces two intervals if passed start and end of week", () => {
|
||||||
|
const intervals = weekIntervals(WEEK_START, WEEK_END);
|
||||||
|
// It needs to have this behavior because the intervals are defined as half-open.
|
||||||
|
// So if there is a node with timestamp WEEK_END, it will need to fall at the start
|
||||||
|
// of the subsequent interval.
|
||||||
|
expect(intervals).toEqual([
|
||||||
|
{
|
||||||
|
startTimeMs: WEEK_START,
|
||||||
|
endTimeMs: WEEK_END,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
startTimeMs: WEEK_END,
|
||||||
|
endTimeMs: +utcWeek.ceil(WEEK_END + 1),
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
it("produces three intervals if the boundaries extend past a week on both sides", () => {
|
||||||
|
const intervals = weekIntervals(WEEK_START - 1, WEEK_END + 1);
|
||||||
|
expect(intervals).toEqual([
|
||||||
|
{
|
||||||
|
startTimeMs: +utcWeek.floor(WEEK_START - 1),
|
||||||
|
endTimeMs: WEEK_START,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
startTimeMs: WEEK_START,
|
||||||
|
endTimeMs: WEEK_END,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
startTimeMs: WEEK_END,
|
||||||
|
endTimeMs: +utcWeek.ceil(WEEK_END + 1),
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
10
yarn.lock
10
yarn.lock
@ -2610,6 +2610,16 @@ cyclist@~0.2.2:
|
|||||||
resolved "https://registry.yarnpkg.com/cyclist/-/cyclist-0.2.2.tgz#1b33792e11e914a2fd6d6ed6447464444e5fa640"
|
resolved "https://registry.yarnpkg.com/cyclist/-/cyclist-0.2.2.tgz#1b33792e11e914a2fd6d6ed6447464444e5fa640"
|
||||||
integrity sha1-GzN5LhHpFKL9bW7WRHRkRE5fpkA=
|
integrity sha1-GzN5LhHpFKL9bW7WRHRkRE5fpkA=
|
||||||
|
|
||||||
|
d3-array@^2.2.0:
|
||||||
|
version "2.2.0"
|
||||||
|
resolved "https://registry.yarnpkg.com/d3-array/-/d3-array-2.2.0.tgz#a9e966b8f8d78f0888d98db1fb840fc8da8ac5c7"
|
||||||
|
integrity sha512-eE0QmSh6xToqM3sxHiJYg/QFdNn52ZEgmFE8A8abU8GsHvsIOolqH8B70/8+VGAKm5MlwaExhqR3DLIjOJMLPA==
|
||||||
|
|
||||||
|
d3-time@^1.0.11:
|
||||||
|
version "1.0.11"
|
||||||
|
resolved "https://registry.yarnpkg.com/d3-time/-/d3-time-1.0.11.tgz#1d831a3e25cd189eb256c17770a666368762bbce"
|
||||||
|
integrity sha512-Z3wpvhPLW4vEScGeIMUckDW7+3hWKOQfAWg/U7PlWBnQmeKQ00gCUsTtWSYulrKNA7ta8hJ+xXc6MHrMuITwEw==
|
||||||
|
|
||||||
d@1:
|
d@1:
|
||||||
version "1.0.1"
|
version "1.0.1"
|
||||||
resolved "https://registry.yarnpkg.com/d/-/d-1.0.1.tgz#8698095372d58dbee346ffd0c7093f99f8f9eb5a"
|
resolved "https://registry.yarnpkg.com/d/-/d-1.0.1.tgz#8698095372d58dbee346ffd0c7093f99f8f9eb5a"
|
||||||
|
Loading…
x
Reference in New Issue
Block a user