mirror of
https://github.com/status-im/sourcecred.git
synced 2025-02-27 11:40:26 +00:00
add analysis/timeline/interval
This commit adds an `interval` module which defines intervals (time ranges), and methods for slicing up a graph into its consistuent time intervals. This is pre-requisite work for #862. I've added a dep on d3-array. Test plan: Unit tests added; run `yarn test`
This commit is contained in:
parent
6cb1b336d5
commit
2335c5d844
@ -7,6 +7,8 @@
|
||||
"better-sqlite3": "^5.4.0",
|
||||
"chalk": "2.4.2",
|
||||
"commonmark": "^0.29.0",
|
||||
"d3-array": "^2.2.0",
|
||||
"d3-time": "^1.0.11",
|
||||
"express": "^4.16.3",
|
||||
"fs-extra": "8.1.0",
|
||||
"history": "^3.0.0",
|
||||
|
134
src/analysis/timeline/interval.js
Normal file
134
src/analysis/timeline/interval.js
Normal file
@ -0,0 +1,134 @@
|
||||
// @flow
|
||||
|
||||
import {max, min} from "d3-array";
|
||||
import sortBy from "lodash.sortby";
|
||||
import {utcWeek} from "d3-time";
|
||||
import * as NullUtil from "../../util/null";
|
||||
import type {Node, Edge, Graph} from "../../core/graph";
|
||||
|
||||
/**
|
||||
* Represents a time interval
|
||||
* The interval is half open [startTimeMs, endTimeMs),
|
||||
* i.e. if a timestamp is exactly on the interval boundary, it will fall at the
|
||||
* start of the older interval.
|
||||
*/
|
||||
export type Interval = {|
|
||||
+startTimeMs: number,
|
||||
+endTimeMs: number,
|
||||
|};
|
||||
|
||||
/**
|
||||
* Represents a slice of a time-partitioned graph
|
||||
* Includes the interval, as well as all of the nodes and edges whose timestamps
|
||||
* are within the interval.
|
||||
*/
|
||||
export type GraphInterval = {|
|
||||
+interval: Interval,
|
||||
+nodes: $ReadOnlyArray<Node>,
|
||||
+edges: $ReadOnlyArray<Edge>,
|
||||
|};
|
||||
|
||||
export type GraphIntervalPartition = $ReadOnlyArray<GraphInterval>;
|
||||
|
||||
/**
|
||||
* Partition a graph based on time intervals.
|
||||
*
|
||||
* The intervals are always one week long, as calculated using d3.utcWeek.
|
||||
* The result may contain empty intervals.
|
||||
* If the graph is empty, no intervals are returned.
|
||||
* Timeless nodes are not included in the partition, nor are dangling edges.
|
||||
*/
|
||||
export function partitionGraph(graph: Graph): GraphIntervalPartition {
|
||||
const nodes = Array.from(graph.nodes());
|
||||
const timefulNodes = nodes.filter((x) => x.timestampMs != null);
|
||||
const sortedNodes = sortBy(timefulNodes, (x) => x.timestampMs);
|
||||
const edges = Array.from(graph.edges({showDangling: false}));
|
||||
const sortedEdges = sortBy(edges, (x) => x.timestampMs);
|
||||
const intervals = graphIntervals(graph);
|
||||
let nodeIndex = 0;
|
||||
let edgeIndex = 0;
|
||||
return intervals.map((interval) => {
|
||||
const nodes = [];
|
||||
const edges = [];
|
||||
while (
|
||||
nodeIndex < sortedNodes.length &&
|
||||
sortedNodes[nodeIndex].timestampMs < interval.endTimeMs
|
||||
) {
|
||||
nodes.push(sortedNodes[nodeIndex++]);
|
||||
}
|
||||
while (
|
||||
edgeIndex < sortedEdges.length &&
|
||||
sortedEdges[edgeIndex].timestampMs < interval.endTimeMs
|
||||
) {
|
||||
edges.push(sortedEdges[edgeIndex++]);
|
||||
}
|
||||
return {interval, nodes, edges};
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Produce an array of Intervals which cover all the node and edge timestamps
|
||||
* for a graph.
|
||||
*
|
||||
* The intervals are one week long, and are aligned on clean week boundaries.
|
||||
*
|
||||
* This function is basically a wrapper around weekIntervals that makes sure
|
||||
* the graph's nodes and edges are all accounted for properly.
|
||||
*/
|
||||
export function graphIntervals(graph: Graph): Interval[] {
|
||||
const nodeTimestamps = Array.from(graph.nodes())
|
||||
.map((x) => x.timestampMs)
|
||||
.filter((x) => x != null)
|
||||
// Unnecessary map is to satisfy flow that the array doesn't contain null.
|
||||
.map((x) => NullUtil.get(x));
|
||||
const edgeTimestamps = Array.from(graph.edges({showDangling: false})).map(
|
||||
(x) => x.timestampMs
|
||||
);
|
||||
if (nodeTimestamps.length === 0 && edgeTimestamps.length === 0) {
|
||||
return [];
|
||||
}
|
||||
const allTimestamps = nodeTimestamps.concat(edgeTimestamps);
|
||||
const start = min(allTimestamps);
|
||||
const end = max(allTimestamps);
|
||||
return weekIntervals(start, end);
|
||||
}
|
||||
|
||||
/**
|
||||
* Produce an array of week-long intervals to cover the startTime and endTime.
|
||||
*
|
||||
* Each interval is one week long and aligned on week boundaries, as produced
|
||||
* by d3.utcWeek. The weeks always use UTC boundaries to ensure consistent
|
||||
* output regardless of which timezone the user is in.
|
||||
*
|
||||
* Assuming that the inputs are valid, there will always be at least one
|
||||
* interval, so that that interval can cover the input timestamps. (E.g. if
|
||||
* startMs and endMs are the same value, then the produced interval will be the
|
||||
* start and end of the last week that starts on or before startMs.)
|
||||
*/
|
||||
export function weekIntervals(startMs: number, endMs: number): Interval[] {
|
||||
if (!isFinite(startMs) || !isFinite(endMs)) {
|
||||
throw new Error("invalid non-finite input");
|
||||
}
|
||||
if (typeof startMs !== "number" || typeof endMs !== "number") {
|
||||
throw new Error("start or end are not numbers");
|
||||
}
|
||||
if (startMs > endMs) {
|
||||
throw new Error("start time after end time");
|
||||
}
|
||||
// Promote the window to the nearest week boundaries, to ensure that
|
||||
// utcWeek.range will not return an empty array.
|
||||
// We add one to the endTime so that just in case we're exactly on a week
|
||||
// boundary, we still get at least one interval.
|
||||
startMs = utcWeek.floor(startMs);
|
||||
endMs = utcWeek.ceil(endMs + 1);
|
||||
const boundaries = utcWeek.range(startMs, endMs);
|
||||
boundaries.push(endMs);
|
||||
const intervals = [];
|
||||
for (let i = 0; i < boundaries.length - 1; i++) {
|
||||
intervals.push({
|
||||
startTimeMs: +boundaries[i],
|
||||
endTimeMs: +boundaries[i + 1],
|
||||
});
|
||||
}
|
||||
return intervals;
|
||||
}
|
175
src/analysis/timeline/interval.test.js
Normal file
175
src/analysis/timeline/interval.test.js
Normal file
@ -0,0 +1,175 @@
|
||||
// @flow
|
||||
|
||||
import {utcWeek} from "d3-time";
|
||||
import {node, edge} from "../../core/graphTestUtil";
|
||||
import {Graph} from "../../core/graph";
|
||||
import {partitionGraph, graphIntervals, weekIntervals} from "./interval";
|
||||
|
||||
describe("src/analysis/timeline/interval", () => {
|
||||
const WEEK_MID = 1562501362239;
|
||||
const WEEK_START = +utcWeek.floor(WEEK_MID);
|
||||
const WEEK_END = +utcWeek.ceil(WEEK_MID);
|
||||
const week = (n) => +utcWeek.offset(WEEK_MID, n);
|
||||
function graphWithTiming(
|
||||
nodeTimes: (number | null)[],
|
||||
edgeTimes: number[]
|
||||
): Graph {
|
||||
const graph = new Graph();
|
||||
const timeless = {...node("timeless"), timestampMs: null};
|
||||
// Add a timeless node so we can ensure all the edges are non-dangling
|
||||
graph.addNode(timeless);
|
||||
for (let i = 0; i < nodeTimes.length; i++) {
|
||||
const n = node(String(i));
|
||||
const nt = nodeTimes[i];
|
||||
const timestampMs = nt == null ? null : week(nt);
|
||||
graph.addNode({...n, timestampMs});
|
||||
}
|
||||
for (let i = 0; i < edgeTimes.length; i++) {
|
||||
const e = edge(String(i), timeless, timeless);
|
||||
graph.addEdge({...e, timestampMs: week(edgeTimes[i])});
|
||||
}
|
||||
return graph;
|
||||
}
|
||||
|
||||
describe("partitionGraph", () => {
|
||||
function checkPartition(g: Graph) {
|
||||
const slices = partitionGraph(g);
|
||||
const expectedIntervals = graphIntervals(g);
|
||||
expect(slices.map((x) => x.interval)).toEqual(expectedIntervals);
|
||||
|
||||
const seenNodeAddresses = new Set();
|
||||
const seenEdgeAddresses = new Set();
|
||||
for (const {interval, nodes, edges} of slices) {
|
||||
for (const {address, timestampMs} of nodes) {
|
||||
expect(timestampMs).not.toBe(null);
|
||||
expect(timestampMs).toBeGreaterThanOrEqual(interval.startTimeMs);
|
||||
expect(timestampMs).toBeLessThan(interval.endTimeMs);
|
||||
expect(seenNodeAddresses.has(address)).toBe(false);
|
||||
seenNodeAddresses.add(address);
|
||||
}
|
||||
for (const {address, timestampMs} of edges) {
|
||||
expect(timestampMs).toBeGreaterThanOrEqual(interval.startTimeMs);
|
||||
expect(timestampMs).toBeLessThan(interval.endTimeMs);
|
||||
expect(seenEdgeAddresses.has(address)).toBe(false);
|
||||
seenEdgeAddresses.add(address);
|
||||
}
|
||||
}
|
||||
const timefulNodes = Array.from(g.nodes()).filter(
|
||||
(x) => x.timestampMs != null
|
||||
);
|
||||
expect(timefulNodes).toHaveLength(seenNodeAddresses.size);
|
||||
const edges = Array.from(g.edges({showDangling: false}));
|
||||
expect(edges).toHaveLength(seenEdgeAddresses.size);
|
||||
}
|
||||
|
||||
it("partitions an empty graph correctly", () => {
|
||||
checkPartition(new Graph());
|
||||
});
|
||||
it("partitions a graph with just nodes", () => {
|
||||
checkPartition(graphWithTiming([5, 3, 99, 12], []));
|
||||
});
|
||||
it("partitions a graph with just edges", () => {
|
||||
checkPartition(graphWithTiming([], [3, 4, 99]));
|
||||
});
|
||||
it("partitions a graph with nodes and edges", () => {
|
||||
checkPartition(graphWithTiming([3, 9], [4, 12]));
|
||||
});
|
||||
it("partitions a graph with dangling edges", () => {
|
||||
const graph = graphWithTiming([3, 9], [4, 12]);
|
||||
const n = node("nope");
|
||||
const d = edge("dangling", n, n);
|
||||
graph.addEdge(d);
|
||||
checkPartition(graph);
|
||||
});
|
||||
});
|
||||
|
||||
describe("graphIntervals", () => {
|
||||
it("an empty graph has no intervals", () => {
|
||||
const intervals = graphIntervals(new Graph());
|
||||
expect(intervals).toHaveLength(0);
|
||||
});
|
||||
it("a graph with only timeless nodes has no intervals", () => {
|
||||
const graph = graphWithTiming([null, null], []);
|
||||
const intervals = graphIntervals(graph);
|
||||
expect(intervals).toHaveLength(0);
|
||||
});
|
||||
it("a graph with only dangling edges has no intervals", () => {
|
||||
const graph = new Graph();
|
||||
const n = node("nonexistent");
|
||||
const e = {...edge("dangling", n, n), timestampMs: WEEK_MID};
|
||||
graph.addEdge(e);
|
||||
const intervals = graphIntervals(graph);
|
||||
expect(intervals).toHaveLength(0);
|
||||
});
|
||||
it("timing information comes from the nodes and the edges", () => {
|
||||
// Note that the nodes/edges have not been added in time-sorted order,
|
||||
// and that the max time comes from the edges while the min time comes from the nodes.
|
||||
const graph = graphWithTiming([3, 1, 9], [2, 14, 3]);
|
||||
const intervals = graphIntervals(graph);
|
||||
expect(intervals).toEqual(weekIntervals(week(1), week(14)));
|
||||
});
|
||||
});
|
||||
|
||||
describe("weekIntervals", () => {
|
||||
it("produces a covering interval for a single timestamp", () => {
|
||||
const intervals = weekIntervals(WEEK_MID, WEEK_MID);
|
||||
expect(intervals).toEqual([
|
||||
{
|
||||
startTimeMs: WEEK_START,
|
||||
endTimeMs: WEEK_END,
|
||||
},
|
||||
]);
|
||||
});
|
||||
it("produces a correct interval for a single timestamp aligned on week start", () => {
|
||||
const intervals = weekIntervals(WEEK_START, WEEK_START);
|
||||
expect(intervals).toEqual([
|
||||
{
|
||||
startTimeMs: WEEK_START,
|
||||
endTimeMs: WEEK_END,
|
||||
},
|
||||
]);
|
||||
});
|
||||
it("produces one interval if passed start and end-1", () => {
|
||||
const intervals = weekIntervals(WEEK_START, WEEK_END - 1);
|
||||
expect(intervals).toEqual([
|
||||
{
|
||||
startTimeMs: WEEK_START,
|
||||
endTimeMs: WEEK_END,
|
||||
},
|
||||
]);
|
||||
});
|
||||
it("produces two intervals if passed start and end of week", () => {
|
||||
const intervals = weekIntervals(WEEK_START, WEEK_END);
|
||||
// It needs to have this behavior because the intervals are defined as half-open.
|
||||
// So if there is a node with timestamp WEEK_END, it will need to fall at the start
|
||||
// of the subsequent interval.
|
||||
expect(intervals).toEqual([
|
||||
{
|
||||
startTimeMs: WEEK_START,
|
||||
endTimeMs: WEEK_END,
|
||||
},
|
||||
{
|
||||
startTimeMs: WEEK_END,
|
||||
endTimeMs: +utcWeek.ceil(WEEK_END + 1),
|
||||
},
|
||||
]);
|
||||
});
|
||||
it("produces three intervals if the boundaries extend past a week on both sides", () => {
|
||||
const intervals = weekIntervals(WEEK_START - 1, WEEK_END + 1);
|
||||
expect(intervals).toEqual([
|
||||
{
|
||||
startTimeMs: +utcWeek.floor(WEEK_START - 1),
|
||||
endTimeMs: WEEK_START,
|
||||
},
|
||||
{
|
||||
startTimeMs: WEEK_START,
|
||||
endTimeMs: WEEK_END,
|
||||
},
|
||||
{
|
||||
startTimeMs: WEEK_END,
|
||||
endTimeMs: +utcWeek.ceil(WEEK_END + 1),
|
||||
},
|
||||
]);
|
||||
});
|
||||
});
|
||||
});
|
10
yarn.lock
10
yarn.lock
@ -2610,6 +2610,16 @@ cyclist@~0.2.2:
|
||||
resolved "https://registry.yarnpkg.com/cyclist/-/cyclist-0.2.2.tgz#1b33792e11e914a2fd6d6ed6447464444e5fa640"
|
||||
integrity sha1-GzN5LhHpFKL9bW7WRHRkRE5fpkA=
|
||||
|
||||
d3-array@^2.2.0:
|
||||
version "2.2.0"
|
||||
resolved "https://registry.yarnpkg.com/d3-array/-/d3-array-2.2.0.tgz#a9e966b8f8d78f0888d98db1fb840fc8da8ac5c7"
|
||||
integrity sha512-eE0QmSh6xToqM3sxHiJYg/QFdNn52ZEgmFE8A8abU8GsHvsIOolqH8B70/8+VGAKm5MlwaExhqR3DLIjOJMLPA==
|
||||
|
||||
d3-time@^1.0.11:
|
||||
version "1.0.11"
|
||||
resolved "https://registry.yarnpkg.com/d3-time/-/d3-time-1.0.11.tgz#1d831a3e25cd189eb256c17770a666368762bbce"
|
||||
integrity sha512-Z3wpvhPLW4vEScGeIMUckDW7+3hWKOQfAWg/U7PlWBnQmeKQ00gCUsTtWSYulrKNA7ta8hJ+xXc6MHrMuITwEw==
|
||||
|
||||
d@1:
|
||||
version "1.0.1"
|
||||
resolved "https://registry.yarnpkg.com/d/-/d-1.0.1.tgz#8698095372d58dbee346ffd0c7093f99f8f9eb5a"
|
||||
|
Loading…
x
Reference in New Issue
Block a user