2020-10-09 21:31:15 +01:00
|
|
|
/*eslint no-console: "off"*/
|
2020-11-11 16:59:15 +00:00
|
|
|
(function() {
|
|
|
|
var emptySeries = { unitSuffix: '', labels: {}, data: [] };
|
2020-10-20 16:41:16 +01:00
|
|
|
|
2020-10-09 21:31:15 +01:00
|
|
|
var prometheusProvider = {
|
|
|
|
options: {},
|
|
|
|
|
|
|
|
/**
|
2020-10-20 16:41:16 +01:00
|
|
|
* init is called when the provider is first loaded.
|
2020-10-09 21:31:15 +01:00
|
|
|
*
|
|
|
|
* options.providerOptions contains any operator configured parameters
|
|
|
|
* specified in the Consul agent config that is serving the UI.
|
|
|
|
*
|
2020-11-04 09:33:37 +00:00
|
|
|
* Consul will provide:
|
|
|
|
*
|
|
|
|
* 1. A boolean options.metrics_proxy_enabled to indicate whether the agent
|
|
|
|
* has a metrics proxy configured.
|
|
|
|
* 2. A fetch-like options.fetch which is a thin fetch wrapper that prefixes
|
|
|
|
* any url with the url of Consul's proxy endpoint and adds your current
|
|
|
|
* Consul ACL token to the request headers. Otherwise it functions like the
|
|
|
|
* browsers native fetch
|
2020-10-20 16:41:16 +01:00
|
|
|
*
|
|
|
|
* The provider should throw an Exception if the options are not valid for
|
|
|
|
* example because it requires a metrics proxy and one is not configured.
|
2020-10-09 21:31:15 +01:00
|
|
|
*/
|
|
|
|
init: function(options) {
|
|
|
|
this.options = options;
|
2020-10-20 16:41:16 +01:00
|
|
|
if (!this.options.metrics_proxy_enabled) {
|
2020-11-11 16:59:15 +00:00
|
|
|
throw new Error(
|
|
|
|
'prometheus metrics provider currently requires the ui_config.metrics_proxy to be configured in the Consul agent.'
|
|
|
|
);
|
2020-10-20 16:41:16 +01:00
|
|
|
}
|
2020-10-09 21:31:15 +01:00
|
|
|
},
|
|
|
|
|
2020-11-04 09:33:37 +00:00
|
|
|
// simple httpGet function that also encodes query parameters
|
|
|
|
// before passing the constructed url through to native fetch
|
|
|
|
// any errors should throw an error with a statusCode property
|
|
|
|
httpGet: function(url, queryParams, headers) {
|
|
|
|
if (queryParams) {
|
|
|
|
var separator = url.indexOf('?') !== -1 ? '&' : '?';
|
2020-11-11 16:59:15 +00:00
|
|
|
var qs = Object.keys(queryParams)
|
|
|
|
.map(function(key) {
|
|
|
|
return encodeURIComponent(key) + '=' + encodeURIComponent(queryParams[key]);
|
|
|
|
})
|
|
|
|
.join('&');
|
2020-11-04 09:33:37 +00:00
|
|
|
url = url + separator + qs;
|
|
|
|
}
|
|
|
|
// fetch the url along with any headers
|
2020-11-11 16:59:15 +00:00
|
|
|
return this.options.fetch(url, { headers: headers || {} }).then(function(response) {
|
|
|
|
if (response.ok) {
|
|
|
|
return response.json();
|
|
|
|
} else {
|
|
|
|
// throw a statusCode error if any errors are received
|
|
|
|
var e = new Error('HTTP Error: ' + response.statusText);
|
|
|
|
e.statusCode = response.status;
|
|
|
|
throw e;
|
2020-11-04 09:33:37 +00:00
|
|
|
}
|
2020-11-11 16:59:15 +00:00
|
|
|
});
|
2020-11-04 09:33:37 +00:00
|
|
|
},
|
|
|
|
|
2020-10-09 21:31:15 +01:00
|
|
|
/**
|
|
|
|
* serviceRecentSummarySeries should return time series for a recent time
|
2020-10-26 19:48:23 +00:00
|
|
|
* period summarizing the usage of the named service in the indicated
|
|
|
|
* datacenter. In Consul Enterprise a non-empty namespace is also provided.
|
2020-10-09 21:31:15 +01:00
|
|
|
*
|
2020-10-20 16:41:16 +01:00
|
|
|
* If these metrics aren't available then an empty series array may be
|
|
|
|
* returned.
|
2020-10-09 21:31:15 +01:00
|
|
|
*
|
|
|
|
* The period may (later) be specified in options.startTime and
|
|
|
|
* options.endTime.
|
|
|
|
*
|
|
|
|
* The service's protocol must be given as one of Consul's supported
|
|
|
|
* protocols e.g. "tcp", "http", "http2", "grpc". If it is empty or the
|
2020-10-20 16:41:16 +01:00
|
|
|
* provider doesn't recognize the protocol, it should treat it as "tcp" and
|
|
|
|
* provide basic connection stats.
|
2020-10-09 21:31:15 +01:00
|
|
|
*
|
|
|
|
* The expected return value is a promise which resolves to an object that
|
|
|
|
* should look like the following:
|
|
|
|
*
|
|
|
|
* {
|
2020-10-20 16:41:16 +01:00
|
|
|
* // The unitSuffix is shown after the value in tooltips. Values will be
|
|
|
|
* // rounded and shortened. Larger values will already have a suffix
|
|
|
|
* // like "10k". The suffix provided here is concatenated directly
|
|
|
|
* // allowing for suffixes like "mbps/kbps" by using a suffix of "bps".
|
|
|
|
* // If the unit doesn't make sense in this format, include a
|
|
|
|
* // leading space for example " rps" would show as "1.2k rps".
|
|
|
|
* unitSuffix: " rps",
|
|
|
|
*
|
|
|
|
* // The set of labels to graph. The key should exactly correspond to a
|
|
|
|
* // property of every data point in the array below except for the
|
|
|
|
* // special case "Total" which is used to show the sum of all the
|
|
|
|
* // stacked graph values. The key is displayed in the tooltop so it
|
|
|
|
* // should be human-friendly but as concise as possible. The value is a
|
|
|
|
* // longer description that is displayed in the graph's key on request
|
|
|
|
* // to explain exactly what the metrics mean.
|
|
|
|
* labels: {
|
|
|
|
* "Total": "Total inbound requests per second.",
|
2020-10-26 19:48:23 +00:00
|
|
|
* "Successes": "Successful responses (with an HTTP response code ...",
|
|
|
|
* "Errors": "Error responses (with an HTTP response code in the ...",
|
2020-10-20 16:41:16 +01:00
|
|
|
* },
|
|
|
|
*
|
|
|
|
* data: [
|
2020-10-09 21:31:15 +01:00
|
|
|
* {
|
2020-10-20 16:41:16 +01:00
|
|
|
* time: 1600944516286, // milliseconds since Unix epoch
|
|
|
|
* "Successes": 1234.5,
|
|
|
|
* "Errors": 2.3,
|
2020-10-09 21:31:15 +01:00
|
|
|
* },
|
|
|
|
* ...
|
|
|
|
* ]
|
|
|
|
* }
|
|
|
|
*
|
2020-10-20 16:41:16 +01:00
|
|
|
* Every data point object should have a value for every series label
|
|
|
|
* (except for "Total") otherwise it will be assumed to be "0".
|
2020-10-09 21:31:15 +01:00
|
|
|
*/
|
2020-10-26 19:48:23 +00:00
|
|
|
serviceRecentSummarySeries: function(serviceDC, namespace, serviceName, protocol, options) {
|
2020-10-09 21:31:15 +01:00
|
|
|
// Fetch time-series
|
2020-11-11 16:59:15 +00:00
|
|
|
var series = [];
|
|
|
|
var labels = [];
|
2020-10-09 21:31:15 +01:00
|
|
|
|
|
|
|
// Set the start and end range here so that all queries end up with
|
|
|
|
// identical time axes. Later we might accept these as options.
|
2020-11-11 16:59:15 +00:00
|
|
|
var now = new Date().getTime() / 1000;
|
|
|
|
options.start = now - 15 * 60;
|
2020-10-09 21:31:15 +01:00
|
|
|
options.end = now;
|
|
|
|
|
|
|
|
if (this.hasL7Metrics(protocol)) {
|
2020-10-20 16:41:16 +01:00
|
|
|
return this.fetchRequestRateSeries(serviceName, options);
|
2020-10-09 21:31:15 +01:00
|
|
|
}
|
|
|
|
|
2020-10-20 16:41:16 +01:00
|
|
|
// Fallback to just L4 metrics.
|
|
|
|
return this.fetchDataRateSeries(serviceName, options);
|
2020-10-09 21:31:15 +01:00
|
|
|
},
|
|
|
|
|
|
|
|
/**
|
|
|
|
* serviceRecentSummaryStats should return four summary statistics for a
|
2020-10-26 19:48:23 +00:00
|
|
|
* recent time period for the named service in the indicated datacenter. In
|
|
|
|
* Consul Enterprise a non-empty namespace is also provided.
|
2020-10-09 21:31:15 +01:00
|
|
|
*
|
|
|
|
* If these metrics aren't available then an empty array may be returned.
|
|
|
|
*
|
|
|
|
* The period may (later) be specified in options.startTime and
|
|
|
|
* options.endTime.
|
|
|
|
*
|
|
|
|
* The service's protocol must be given as one of Consul's supported
|
|
|
|
* protocols e.g. "tcp", "http", "http2", "grpc". If it is empty or the
|
|
|
|
* provider doesn't recognize it it should treat it as "tcp" and provide
|
|
|
|
* just basic connection stats.
|
|
|
|
*
|
|
|
|
* The expected return value is a promise which resolves to an object that
|
|
|
|
* should look like the following:
|
|
|
|
*
|
|
|
|
* {
|
|
|
|
* stats: [ // We expect four of these for now.
|
|
|
|
* {
|
|
|
|
* // label should be 3 chars or fewer as an abbreviation
|
|
|
|
* label: "SR",
|
2020-10-26 19:48:23 +00:00
|
|
|
*
|
2020-10-09 21:31:15 +01:00
|
|
|
* // desc describes the stat in a tooltip
|
|
|
|
* desc: "Success Rate - the percentage of all requests that were not 5xx status",
|
2020-10-26 19:48:23 +00:00
|
|
|
*
|
2020-10-09 21:31:15 +01:00
|
|
|
* // value is a string allowing the provider to format it and add
|
|
|
|
* // units as appropriate. It should be as compact as possible.
|
|
|
|
* value: "98%",
|
|
|
|
* }
|
|
|
|
* ]
|
|
|
|
* }
|
|
|
|
*/
|
2020-10-26 19:48:23 +00:00
|
|
|
serviceRecentSummaryStats: function(serviceDC, namespace, serviceName, protocol, options) {
|
2020-10-09 21:31:15 +01:00
|
|
|
// Fetch stats
|
|
|
|
var stats = [];
|
|
|
|
if (this.hasL7Metrics(protocol)) {
|
2020-11-11 16:59:15 +00:00
|
|
|
stats.push(this.fetchRPS(serviceName, 'service', options));
|
|
|
|
stats.push(this.fetchER(serviceName, 'service', options));
|
|
|
|
stats.push(this.fetchPercentile(50, serviceName, 'service', options));
|
|
|
|
stats.push(this.fetchPercentile(99, serviceName, 'service', options));
|
2020-10-09 21:31:15 +01:00
|
|
|
} else {
|
|
|
|
// Fallback to just L4 metrics.
|
2020-11-11 16:59:15 +00:00
|
|
|
stats.push(this.fetchConnRate(serviceName, 'service', options));
|
|
|
|
stats.push(this.fetchServiceRx(serviceName, 'service', options));
|
|
|
|
stats.push(this.fetchServiceTx(serviceName, 'service', options));
|
|
|
|
stats.push(this.fetchServiceNoRoute(serviceName, 'service', options));
|
2020-10-09 21:31:15 +01:00
|
|
|
}
|
2020-11-11 16:59:15 +00:00
|
|
|
return this.fetchStats(stats);
|
2020-10-09 21:31:15 +01:00
|
|
|
},
|
|
|
|
|
|
|
|
/**
|
|
|
|
* upstreamRecentSummaryStats should return four summary statistics for each
|
2020-10-26 19:48:23 +00:00
|
|
|
* upstream service over a recent time period, relative to the named service
|
|
|
|
* in the indicated datacenter. In Consul Enterprise a non-empty namespace
|
|
|
|
* is also provided.
|
|
|
|
*
|
|
|
|
* Note that the upstreams themselves might be in different datacenters but
|
|
|
|
* we only pass the target service DC since typically these metrics should
|
|
|
|
* be from the outbound listener of the target service in this DC even if
|
|
|
|
* they eventually end up in another DC.
|
2020-10-09 21:31:15 +01:00
|
|
|
*
|
|
|
|
* If these metrics aren't available then an empty array may be returned.
|
|
|
|
*
|
|
|
|
* The period may (later) be specified in options.startTime and
|
|
|
|
* options.endTime.
|
|
|
|
*
|
|
|
|
* The expected return value format is shown below:
|
|
|
|
*
|
|
|
|
* {
|
|
|
|
* stats: {
|
|
|
|
* // Each upstream will appear as an entry keyed by the upstream
|
|
|
|
* // service name. The value is an array of stats with the same
|
|
|
|
* // format as serviceRecentSummaryStats response.stats. Note that
|
|
|
|
* // different upstreams might show different stats depending on
|
|
|
|
* // their protocol.
|
|
|
|
* "upstream_name": [
|
|
|
|
* {label: "SR", desc: "...", value: "99%"},
|
|
|
|
* ...
|
|
|
|
* ],
|
|
|
|
* ...
|
|
|
|
* }
|
|
|
|
* }
|
|
|
|
*/
|
2020-10-26 19:48:23 +00:00
|
|
|
upstreamRecentSummaryStats: function(serviceDC, namespace, serviceName, upstreamName, options) {
|
2020-11-11 16:59:15 +00:00
|
|
|
return this.fetchRecentSummaryStats(serviceName, 'upstream', options);
|
2020-10-09 21:31:15 +01:00
|
|
|
},
|
|
|
|
|
|
|
|
/**
|
2020-10-20 16:41:16 +01:00
|
|
|
* downstreamRecentSummaryStats should return four summary statistics for
|
2020-10-26 19:48:23 +00:00
|
|
|
* each downstream service over a recent time period, relative to the named
|
|
|
|
* service in the indicated datacenter. In Consul Enterprise a non-empty
|
|
|
|
* namespace is also provided.
|
|
|
|
*
|
|
|
|
* Note that the service may have downstreams in different datacenters. For
|
|
|
|
* some metrics systems which are per-datacenter this makes it hard to query
|
|
|
|
* for all downstream metrics from one source. For now the UI will only show
|
|
|
|
* downstreams in the same datacenter as the target service. In the future
|
|
|
|
* this method may be called multiple times, once for each DC that contains
|
|
|
|
* downstream services to gather metrics from each. In that case a separate
|
|
|
|
* option for target datacenter will be used since the target service's DC
|
|
|
|
* is still needed to correctly identify the outbound clusters that will
|
|
|
|
* route to it from the remote DC.
|
2020-10-09 21:31:15 +01:00
|
|
|
*
|
|
|
|
* If these metrics aren't available then an empty array may be returned.
|
|
|
|
*
|
|
|
|
* The period may (later) be specified in options.startTime and
|
|
|
|
* options.endTime.
|
|
|
|
*
|
|
|
|
* The expected return value format is shown below:
|
|
|
|
*
|
|
|
|
* {
|
|
|
|
* stats: {
|
|
|
|
* // Each downstream will appear as an entry keyed by the downstream
|
|
|
|
* // service name. The value is an array of stats with the same
|
2020-10-20 16:41:16 +01:00
|
|
|
* // format as serviceRecentSummaryStats response.stats. Different
|
|
|
|
* // downstreams may display different stats if required although the
|
|
|
|
* // protocol should be the same for all as it is the target
|
|
|
|
* // service's protocol that matters here.
|
2020-10-09 21:31:15 +01:00
|
|
|
* "downstream_name": [
|
|
|
|
* {label: "SR", desc: "...", value: "99%"},
|
|
|
|
* ...
|
|
|
|
* ],
|
|
|
|
* ...
|
|
|
|
* }
|
|
|
|
* }
|
|
|
|
*/
|
2020-10-26 19:48:23 +00:00
|
|
|
downstreamRecentSummaryStats: function(serviceDC, namespace, serviceName, options) {
|
2020-11-11 16:59:15 +00:00
|
|
|
return this.fetchRecentSummaryStats(serviceName, 'downstream', options);
|
2020-10-09 21:31:15 +01:00
|
|
|
},
|
|
|
|
|
|
|
|
fetchRecentSummaryStats: function(serviceName, type, options) {
|
|
|
|
// Fetch stats
|
|
|
|
var stats = [];
|
|
|
|
|
|
|
|
// We don't know which upstreams are HTTP/TCP so just fetch all of them.
|
|
|
|
|
|
|
|
// HTTP
|
2020-11-11 16:59:15 +00:00
|
|
|
stats.push(this.fetchRPS(serviceName, type, options));
|
|
|
|
stats.push(this.fetchER(serviceName, type, options));
|
|
|
|
stats.push(this.fetchPercentile(50, serviceName, type, options));
|
|
|
|
stats.push(this.fetchPercentile(99, serviceName, type, options));
|
2020-10-09 21:31:15 +01:00
|
|
|
|
|
|
|
// L4
|
2020-11-11 16:59:15 +00:00
|
|
|
stats.push(this.fetchConnRate(serviceName, type, options));
|
|
|
|
stats.push(this.fetchServiceRx(serviceName, type, options));
|
|
|
|
stats.push(this.fetchServiceTx(serviceName, type, options));
|
|
|
|
stats.push(this.fetchServiceNoRoute(serviceName, type, options));
|
2020-10-09 21:31:15 +01:00
|
|
|
|
2020-11-11 16:59:15 +00:00
|
|
|
return this.fetchStatsGrouped(stats);
|
2020-10-09 21:31:15 +01:00
|
|
|
},
|
|
|
|
|
|
|
|
hasL7Metrics: function(protocol) {
|
2020-11-11 16:59:15 +00:00
|
|
|
return protocol === 'http' || protocol === 'http2' || protocol === 'grpc';
|
2020-10-09 21:31:15 +01:00
|
|
|
},
|
|
|
|
|
|
|
|
fetchStats: function(statsPromises) {
|
2020-11-11 16:59:15 +00:00
|
|
|
var all = Promise.all(statsPromises).then(function(results) {
|
2020-10-09 21:31:15 +01:00
|
|
|
var data = {
|
2020-11-11 16:59:15 +00:00
|
|
|
stats: [],
|
|
|
|
};
|
2020-10-09 21:31:15 +01:00
|
|
|
// Add all non-empty stats
|
|
|
|
for (var i = 0; i < statsPromises.length; i++) {
|
|
|
|
if (results[i].value) {
|
2020-10-27 14:51:15 +00:00
|
|
|
data.stats.push(results[i]);
|
2020-10-09 21:31:15 +01:00
|
|
|
}
|
|
|
|
}
|
2020-11-11 16:59:15 +00:00
|
|
|
return data;
|
|
|
|
});
|
2020-10-09 21:31:15 +01:00
|
|
|
|
|
|
|
// Fetch the metrics async, and return a promise to the result.
|
2020-11-11 16:59:15 +00:00
|
|
|
return all;
|
2020-10-09 21:31:15 +01:00
|
|
|
},
|
|
|
|
|
|
|
|
fetchStatsGrouped: function(statsPromises) {
|
2020-11-11 16:59:15 +00:00
|
|
|
var all = Promise.all(statsPromises).then(function(results) {
|
2020-10-09 21:31:15 +01:00
|
|
|
var data = {
|
2020-11-11 16:59:15 +00:00
|
|
|
stats: {},
|
|
|
|
};
|
2020-10-09 21:31:15 +01:00
|
|
|
// Add all non-empty stats
|
|
|
|
for (var i = 0; i < statsPromises.length; i++) {
|
2020-10-27 14:51:15 +00:00
|
|
|
if (results[i]) {
|
|
|
|
for (var group in results[i]) {
|
|
|
|
if (!results[i].hasOwnProperty(group)) continue;
|
2020-10-09 21:31:15 +01:00
|
|
|
if (!data.stats[group]) {
|
2020-11-11 16:59:15 +00:00
|
|
|
data.stats[group] = [];
|
2020-10-09 21:31:15 +01:00
|
|
|
}
|
2020-11-11 16:59:15 +00:00
|
|
|
data.stats[group].push(results[i][group]);
|
2020-10-09 21:31:15 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2020-11-11 16:59:15 +00:00
|
|
|
return data;
|
|
|
|
});
|
2020-10-09 21:31:15 +01:00
|
|
|
|
|
|
|
// Fetch the metrics async, and return a promise to the result.
|
2020-11-11 16:59:15 +00:00
|
|
|
return all;
|
2020-10-09 21:31:15 +01:00
|
|
|
},
|
|
|
|
|
2020-10-20 16:41:16 +01:00
|
|
|
reformatSeries: function(unitSuffix, labelMap) {
|
|
|
|
return function(response) {
|
|
|
|
// Handle empty result sets gracefully.
|
2020-11-11 16:59:15 +00:00
|
|
|
if (
|
|
|
|
!response.data ||
|
|
|
|
!response.data.result ||
|
|
|
|
response.data.result.length == 0 ||
|
|
|
|
!response.data.result[0].values ||
|
|
|
|
response.data.result[0].values.length == 0
|
|
|
|
) {
|
2020-10-20 16:41:16 +01:00
|
|
|
return emptySeries;
|
|
|
|
}
|
|
|
|
// Reformat the prometheus data to be the format we want with stacked
|
|
|
|
// values as object properties.
|
2020-10-09 21:31:15 +01:00
|
|
|
|
2020-10-20 16:41:16 +01:00
|
|
|
// Populate time values first based on first result since Prometheus will
|
|
|
|
// always return all the same points for all series in the query.
|
|
|
|
let series = response.data.result[0].values.map(function(d, i) {
|
|
|
|
return {
|
|
|
|
time: Math.round(d[0] * 1000),
|
|
|
|
};
|
|
|
|
});
|
2020-10-09 21:31:15 +01:00
|
|
|
|
2020-10-20 16:41:16 +01:00
|
|
|
// Then for each series returned populate the labels and values in the
|
|
|
|
// points.
|
|
|
|
response.data.result.map(function(d) {
|
|
|
|
d.values.map(function(p, i) {
|
|
|
|
series[i][d.metric.label] = parseFloat(p[1]);
|
|
|
|
});
|
|
|
|
});
|
|
|
|
|
|
|
|
return {
|
|
|
|
unitSuffix: unitSuffix,
|
|
|
|
labels: labelMap,
|
2020-11-11 16:59:15 +00:00
|
|
|
data: series,
|
2020-10-20 16:41:16 +01:00
|
|
|
};
|
|
|
|
};
|
2020-10-09 21:31:15 +01:00
|
|
|
},
|
|
|
|
|
2020-11-11 16:59:15 +00:00
|
|
|
fetchRequestRateSeries: function(serviceName, options) {
|
2020-10-20 16:41:16 +01:00
|
|
|
// We need the sum of all non-500 error rates as one value and the 500
|
|
|
|
// error rate as a separate series so that they stack to show the full
|
|
|
|
// request rate. Some creative label replacement makes this possible in
|
|
|
|
// one query.
|
2020-11-11 16:59:15 +00:00
|
|
|
var q =
|
|
|
|
`sum by (label) (` +
|
2020-10-20 16:41:16 +01:00
|
|
|
// The outer label_replace catches 5xx error and relabels them as
|
|
|
|
// err=yes
|
2020-11-11 16:59:15 +00:00
|
|
|
`label_replace(` +
|
|
|
|
// The inner label_replace relabels all !5xx rates as err=no so they
|
|
|
|
// will get summed together.
|
|
|
|
`label_replace(` +
|
|
|
|
// Get rate of requests to the service
|
|
|
|
`irate(envoy_listener_http_downstream_rq_xx{local_cluster="${serviceName}",envoy_http_conn_manager_prefix="public_listener_http"}[10m])` +
|
|
|
|
// ... inner replacement matches all code classes except "5" and
|
|
|
|
// applies err=no
|
|
|
|
`, "label", "Successes", "envoy_response_code_class", "[^5]")` +
|
|
|
|
// ... outer replacement matches code=5 and applies err=yes
|
|
|
|
`, "label", "Errors", "envoy_response_code_class", "5")` +
|
|
|
|
`)`;
|
2020-10-20 16:41:16 +01:00
|
|
|
var labelMap = {
|
|
|
|
Total: 'Total inbound requests per second',
|
2020-11-11 16:59:15 +00:00
|
|
|
Successes:
|
|
|
|
'Successful responses (with an HTTP response code not in the 5xx range) per second.',
|
2020-10-20 16:41:16 +01:00
|
|
|
Errors: 'Error responses (with an HTTP response code in the 5xx range) per second.',
|
|
|
|
};
|
2020-11-11 16:59:15 +00:00
|
|
|
return this.fetchSeries(q, options).then(this.reformatSeries(' rps', labelMap));
|
2020-10-09 21:31:15 +01:00
|
|
|
},
|
|
|
|
|
2020-11-11 16:59:15 +00:00
|
|
|
fetchDataRateSeries: function(serviceName, options) {
|
2020-10-20 16:41:16 +01:00
|
|
|
// 8 * converts from bytes/second to bits/second
|
2020-11-11 16:59:15 +00:00
|
|
|
var q =
|
|
|
|
`8 * sum by (label) (` +
|
2020-10-20 16:41:16 +01:00
|
|
|
// Label replace generates a unique label per rx/tx metric to stop them
|
|
|
|
// being summed together.
|
2020-11-11 16:59:15 +00:00
|
|
|
`label_replace(` +
|
|
|
|
// Get the tx rate
|
|
|
|
`irate(envoy_tcp_downstream_cx_tx_bytes_total{local_cluster="${serviceName}",envoy_tcp_prefix="public_listener_tcp"}[10m])` +
|
|
|
|
// Match all and apply the tx label
|
|
|
|
`, "label", "Outbound", "__name__", ".*"` +
|
2020-10-20 16:41:16 +01:00
|
|
|
// Union those vectors with the RX ones
|
2020-11-11 16:59:15 +00:00
|
|
|
`) or label_replace(` +
|
|
|
|
// Get the rx rate
|
|
|
|
`irate(envoy_tcp_downstream_cx_rx_bytes_total{local_cluster="${serviceName}",envoy_tcp_prefix="public_listener_tcp"}[10m])` +
|
|
|
|
// Match all and apply the rx label
|
|
|
|
`, "label", "Inbound", "__name__", ".*"` +
|
|
|
|
`)` +
|
|
|
|
`)`;
|
2020-10-20 16:41:16 +01:00
|
|
|
var labelMap = {
|
|
|
|
Total: 'Total bandwidth',
|
|
|
|
Inbound: 'Inbound data rate (data recieved) from the network in bits per second.',
|
|
|
|
Outbound: 'Outbound data rate (data transmitted) from the network in bits per second.',
|
|
|
|
};
|
2020-11-11 16:59:15 +00:00
|
|
|
return this.fetchSeries(q, options).then(this.reformatSeries('bps', labelMap));
|
2020-10-09 21:31:15 +01:00
|
|
|
},
|
|
|
|
|
|
|
|
makeSubject: function(serviceName, type) {
|
2020-11-11 16:59:15 +00:00
|
|
|
if (type == 'upstream') {
|
2020-10-09 21:31:15 +01:00
|
|
|
// {{GROUP}} is a placeholder that is replaced by the upstream name
|
|
|
|
return `${serviceName} → {{GROUP}}`;
|
|
|
|
}
|
2020-11-11 16:59:15 +00:00
|
|
|
if (type == 'downstream') {
|
2020-10-09 21:31:15 +01:00
|
|
|
// {{GROUP}} is a placeholder that is replaced by the downstream name
|
|
|
|
return `{{GROUP}} → ${serviceName}`;
|
|
|
|
}
|
2020-11-11 16:59:15 +00:00
|
|
|
return serviceName;
|
2020-10-09 21:31:15 +01:00
|
|
|
},
|
|
|
|
|
|
|
|
makeHTTPSelector: function(serviceName, type) {
|
|
|
|
// Downstreams are totally different
|
2020-11-11 16:59:15 +00:00
|
|
|
if (type == 'downstream') {
|
|
|
|
return `consul_service="${serviceName}"`;
|
2020-10-09 21:31:15 +01:00
|
|
|
}
|
2020-11-11 16:59:15 +00:00
|
|
|
var lc = `local_cluster="${serviceName}"`;
|
|
|
|
if (type == 'upstream') {
|
2020-10-09 21:31:15 +01:00
|
|
|
lc += `,envoy_http_conn_manager_prefix=~"upstream_.*"`;
|
|
|
|
} else {
|
|
|
|
// Only care about inbound public listener
|
2020-11-11 16:59:15 +00:00
|
|
|
lc += `,envoy_http_conn_manager_prefix="public_listener_http"`;
|
2020-10-09 21:31:15 +01:00
|
|
|
}
|
2020-11-11 16:59:15 +00:00
|
|
|
return lc;
|
2020-10-09 21:31:15 +01:00
|
|
|
},
|
|
|
|
|
|
|
|
makeTCPSelector: function(serviceName, type) {
|
|
|
|
// Downstreams are totally different
|
2020-11-11 16:59:15 +00:00
|
|
|
if (type == 'downstream') {
|
|
|
|
return `consul_service="${serviceName}"`;
|
2020-10-09 21:31:15 +01:00
|
|
|
}
|
2020-11-11 16:59:15 +00:00
|
|
|
var lc = `local_cluster="${serviceName}"`;
|
|
|
|
if (type == 'upstream') {
|
2020-10-09 21:31:15 +01:00
|
|
|
lc += `,envoy_tcp_prefix=~"upstream_.*"`;
|
|
|
|
} else {
|
|
|
|
// Only care about inbound public listener
|
2020-11-11 16:59:15 +00:00
|
|
|
lc += `,envoy_tcp_prefix="public_listener_tcp"`;
|
2020-10-09 21:31:15 +01:00
|
|
|
}
|
2020-11-11 16:59:15 +00:00
|
|
|
return lc;
|
2020-10-09 21:31:15 +01:00
|
|
|
},
|
|
|
|
|
|
|
|
groupQueryHTTP: function(type, q) {
|
2020-11-11 16:59:15 +00:00
|
|
|
if (type == 'upstream') {
|
|
|
|
q += ' by (envoy_http_conn_manager_prefix)';
|
2020-10-09 21:31:15 +01:00
|
|
|
// Extract the raw upstream service name to group results by
|
2020-11-11 16:59:15 +00:00
|
|
|
q = this.upstreamRelabelQueryHTTP(q);
|
|
|
|
} else if (type == 'downstream') {
|
|
|
|
q += ' by (local_cluster)';
|
|
|
|
q = this.downstreamRelabelQuery(q);
|
2020-10-09 21:31:15 +01:00
|
|
|
}
|
2020-11-11 16:59:15 +00:00
|
|
|
return q;
|
2020-10-09 21:31:15 +01:00
|
|
|
},
|
|
|
|
|
|
|
|
groupQueryTCP: function(type, q) {
|
2020-11-11 16:59:15 +00:00
|
|
|
if (type == 'upstream') {
|
|
|
|
q += ' by (envoy_tcp_prefix)';
|
2020-10-09 21:31:15 +01:00
|
|
|
// Extract the raw upstream service name to group results by
|
2020-11-11 16:59:15 +00:00
|
|
|
q = this.upstreamRelabelQueryTCP(q);
|
|
|
|
} else if (type == 'downstream') {
|
|
|
|
q += ' by (local_cluster)';
|
|
|
|
q = this.downstreamRelabelQuery(q);
|
2020-10-09 21:31:15 +01:00
|
|
|
}
|
2020-11-11 16:59:15 +00:00
|
|
|
return q;
|
2020-10-09 21:31:15 +01:00
|
|
|
},
|
|
|
|
|
|
|
|
upstreamRelabelQueryHTTP: function(q) {
|
2020-11-11 16:59:15 +00:00
|
|
|
return `label_replace(${q}, "upstream", "$1", "envoy_http_conn_manager_prefix", "upstream_(.*)_http")`;
|
2020-10-09 21:31:15 +01:00
|
|
|
},
|
|
|
|
|
|
|
|
upstreamRelabelQueryTCP: function(q) {
|
2020-11-11 16:59:15 +00:00
|
|
|
return `label_replace(${q}, "upstream", "$1", "envoy_tcp_prefix", "upstream_(.*)_tcp")`;
|
2020-10-09 21:31:15 +01:00
|
|
|
},
|
|
|
|
|
|
|
|
downstreamRelabelQuery: function(q) {
|
2020-11-11 16:59:15 +00:00
|
|
|
return `label_replace(${q}, "downstream", "$1", "local_cluster", "(.*)")`;
|
2020-10-09 21:31:15 +01:00
|
|
|
},
|
|
|
|
|
|
|
|
groupBy: function(type) {
|
2020-11-11 16:59:15 +00:00
|
|
|
if (type == 'service') {
|
|
|
|
return false;
|
2020-10-09 21:31:15 +01:00
|
|
|
}
|
|
|
|
return type;
|
|
|
|
},
|
|
|
|
|
|
|
|
metricPrefixHTTP: function(type) {
|
2020-11-11 16:59:15 +00:00
|
|
|
if (type == 'downstream') {
|
|
|
|
return 'envoy_cluster_upstream_rq';
|
2020-10-09 21:31:15 +01:00
|
|
|
}
|
2020-11-11 16:59:15 +00:00
|
|
|
return 'envoy_http_downstream_rq';
|
2020-10-09 21:31:15 +01:00
|
|
|
},
|
|
|
|
|
|
|
|
metricPrefixTCP: function(type) {
|
2020-11-11 16:59:15 +00:00
|
|
|
if (type == 'downstream') {
|
|
|
|
return 'envoy_cluster_upstream_cx';
|
2020-10-09 21:31:15 +01:00
|
|
|
}
|
2020-11-11 16:59:15 +00:00
|
|
|
return 'envoy_tcp_downstream_cx';
|
2020-10-09 21:31:15 +01:00
|
|
|
},
|
|
|
|
|
2020-11-11 16:59:15 +00:00
|
|
|
fetchRPS: function(serviceName, type, options) {
|
|
|
|
var sel = this.makeHTTPSelector(serviceName, type);
|
|
|
|
var subject = this.makeSubject(serviceName, type);
|
|
|
|
var metricPfx = this.metricPrefixHTTP(type);
|
|
|
|
var q = `sum(rate(${metricPfx}_completed{${sel}}[15m]))`;
|
|
|
|
return this.fetchStat(
|
|
|
|
this.groupQueryHTTP(type, q),
|
|
|
|
'RPS',
|
2020-10-09 21:31:15 +01:00
|
|
|
`<b>${subject}</b> request rate averaged over the last 15 minutes`,
|
|
|
|
shortNumStr,
|
|
|
|
this.groupBy(type)
|
2020-11-11 16:59:15 +00:00
|
|
|
);
|
2020-10-09 21:31:15 +01:00
|
|
|
},
|
|
|
|
|
2020-11-11 16:59:15 +00:00
|
|
|
fetchER: function(serviceName, type, options) {
|
|
|
|
var sel = this.makeHTTPSelector(serviceName, type);
|
|
|
|
var subject = this.makeSubject(serviceName, type);
|
|
|
|
var groupBy = '';
|
|
|
|
if (type == 'upstream') {
|
|
|
|
groupBy += ' by (envoy_http_conn_manager_prefix)';
|
|
|
|
} else if (type == 'downstream') {
|
|
|
|
groupBy += ' by (local_cluster)';
|
2020-10-09 21:31:15 +01:00
|
|
|
}
|
2020-11-11 16:59:15 +00:00
|
|
|
var metricPfx = this.metricPrefixHTTP(type);
|
|
|
|
var q = `sum(rate(${metricPfx}_xx{${sel},envoy_response_code_class="5"}[15m]))${groupBy}/sum(rate(${metricPfx}_xx{${sel}}[15m]))${groupBy}`;
|
|
|
|
if (type == 'upstream') {
|
|
|
|
q = this.upstreamRelabelQueryHTTP(q);
|
|
|
|
} else if (type == 'downstream') {
|
|
|
|
q = this.downstreamRelabelQuery(q);
|
2020-10-09 21:31:15 +01:00
|
|
|
}
|
2020-11-11 16:59:15 +00:00
|
|
|
return this.fetchStat(
|
|
|
|
q,
|
|
|
|
'ER',
|
2020-10-09 21:31:15 +01:00
|
|
|
`Percentage of <b>${subject}</b> requests which were 5xx status over the last 15 minutes`,
|
2020-11-11 16:59:15 +00:00
|
|
|
function(val) {
|
|
|
|
return shortNumStr(val) + '%';
|
2020-10-09 21:31:15 +01:00
|
|
|
},
|
|
|
|
this.groupBy(type)
|
2020-11-11 16:59:15 +00:00
|
|
|
);
|
2020-10-09 21:31:15 +01:00
|
|
|
},
|
|
|
|
|
2020-11-11 16:59:15 +00:00
|
|
|
fetchPercentile: function(percentile, serviceName, type, options) {
|
|
|
|
var sel = this.makeHTTPSelector(serviceName, type);
|
|
|
|
var subject = this.makeSubject(serviceName, type);
|
|
|
|
var groupBy = 'le';
|
|
|
|
if (type == 'upstream') {
|
|
|
|
groupBy += ',envoy_http_conn_manager_prefix';
|
|
|
|
} else if (type == 'downstream') {
|
|
|
|
groupBy += ',local_cluster';
|
2020-10-09 21:31:15 +01:00
|
|
|
}
|
2020-11-11 16:59:15 +00:00
|
|
|
var metricPfx = this.metricPrefixHTTP(type);
|
|
|
|
var q = `histogram_quantile(${percentile /
|
|
|
|
100}, sum by(${groupBy}) (rate(${metricPfx}_time_bucket{${sel}}[15m])))`;
|
|
|
|
if (type == 'upstream') {
|
|
|
|
q = this.upstreamRelabelQueryHTTP(q);
|
|
|
|
} else if (type == 'downstream') {
|
|
|
|
q = this.downstreamRelabelQuery(q);
|
2020-10-09 21:31:15 +01:00
|
|
|
}
|
2020-11-11 16:59:15 +00:00
|
|
|
return this.fetchStat(
|
|
|
|
q,
|
2020-10-09 21:31:15 +01:00
|
|
|
`P${percentile}`,
|
|
|
|
`<b>${subject}</b> ${percentile}th percentile request service time over the last 15 minutes`,
|
|
|
|
shortTimeStr,
|
|
|
|
this.groupBy(type)
|
2020-11-11 16:59:15 +00:00
|
|
|
);
|
2020-10-09 21:31:15 +01:00
|
|
|
},
|
|
|
|
|
|
|
|
fetchConnRate: function(serviceName, type, options) {
|
2020-11-11 16:59:15 +00:00
|
|
|
var sel = this.makeTCPSelector(serviceName, type);
|
|
|
|
var subject = this.makeSubject(serviceName, type);
|
|
|
|
var metricPfx = this.metricPrefixTCP(type);
|
|
|
|
var q = `sum(rate(${metricPfx}_total{${sel}}[15m]))`;
|
|
|
|
return this.fetchStat(
|
|
|
|
this.groupQueryTCP(type, q),
|
|
|
|
'CR',
|
2020-10-09 21:31:15 +01:00
|
|
|
`<b>${subject}</b> inbound TCP connections per second averaged over the last 15 minutes`,
|
|
|
|
shortNumStr,
|
|
|
|
this.groupBy(type)
|
2020-11-11 16:59:15 +00:00
|
|
|
);
|
2020-10-09 21:31:15 +01:00
|
|
|
},
|
|
|
|
|
|
|
|
fetchServiceRx: function(serviceName, type, options) {
|
2020-11-11 16:59:15 +00:00
|
|
|
var sel = this.makeTCPSelector(serviceName, type);
|
|
|
|
var subject = this.makeSubject(serviceName, type);
|
|
|
|
var metricPfx = this.metricPrefixTCP(type);
|
|
|
|
var q = `8 * sum(rate(${metricPfx}_rx_bytes_total{${sel}}[15m]))`;
|
|
|
|
return this.fetchStat(
|
|
|
|
this.groupQueryTCP(type, q),
|
|
|
|
'RX',
|
2020-10-09 21:31:15 +01:00
|
|
|
`<b>${subject}</b> received bits per second averaged over the last 15 minutes`,
|
|
|
|
shortNumStr,
|
|
|
|
this.groupBy(type)
|
2020-11-11 16:59:15 +00:00
|
|
|
);
|
2020-10-09 21:31:15 +01:00
|
|
|
},
|
|
|
|
|
|
|
|
fetchServiceTx: function(serviceName, type, options) {
|
2020-11-11 16:59:15 +00:00
|
|
|
var sel = this.makeTCPSelector(serviceName, type);
|
|
|
|
var subject = this.makeSubject(serviceName, type);
|
|
|
|
var metricPfx = this.metricPrefixTCP(type);
|
|
|
|
var q = `8 * sum(rate(${metricPfx}_tx_bytes_total{${sel}}[15m]))`;
|
|
|
|
var self = this;
|
|
|
|
return this.fetchStat(
|
|
|
|
this.groupQueryTCP(type, q),
|
|
|
|
'TX',
|
2020-10-09 21:31:15 +01:00
|
|
|
`<b>${subject}</b> transmitted bits per second averaged over the last 15 minutes`,
|
|
|
|
shortNumStr,
|
|
|
|
this.groupBy(type)
|
2020-11-11 16:59:15 +00:00
|
|
|
);
|
2020-10-09 21:31:15 +01:00
|
|
|
},
|
|
|
|
|
|
|
|
fetchServiceNoRoute: function(serviceName, type, options) {
|
2020-11-11 16:59:15 +00:00
|
|
|
var sel = this.makeTCPSelector(serviceName, type);
|
|
|
|
var subject = this.makeSubject(serviceName, type);
|
|
|
|
var metricPfx = this.metricPrefixTCP(type);
|
|
|
|
var metric = '_no_route';
|
|
|
|
if (type == 'downstream') {
|
|
|
|
metric = '_connect_fail';
|
2020-10-09 21:31:15 +01:00
|
|
|
}
|
2020-11-11 16:59:15 +00:00
|
|
|
var q = `sum(rate(${metricPfx}${metric}{${sel}}[15m]))`;
|
|
|
|
return this.fetchStat(
|
|
|
|
this.groupQueryTCP(type, q),
|
|
|
|
'NR',
|
2020-10-09 21:31:15 +01:00
|
|
|
`<b>${subject}</b> unroutable (failed) connections per second averaged over the last 15 minutes`,
|
|
|
|
shortNumStr,
|
|
|
|
this.groupBy(type)
|
2020-11-11 16:59:15 +00:00
|
|
|
);
|
2020-10-09 21:31:15 +01:00
|
|
|
},
|
|
|
|
|
|
|
|
fetchStat: function(promql, label, desc, formatter, groupBy) {
|
|
|
|
if (!groupBy) {
|
|
|
|
// If we don't have a grouped result and its just a single stat, return
|
|
|
|
// no result as a zero not a missing stat.
|
2020-11-11 16:59:15 +00:00
|
|
|
promql += ' OR on() vector(0)';
|
2020-10-09 21:31:15 +01:00
|
|
|
}
|
|
|
|
//console.log(promql)
|
|
|
|
var params = {
|
|
|
|
query: promql,
|
2020-11-11 16:59:15 +00:00
|
|
|
time: new Date().getTime() / 1000,
|
|
|
|
};
|
|
|
|
return this.httpGet('/api/v1/query', params).then(function(response) {
|
2020-10-09 21:31:15 +01:00
|
|
|
if (!groupBy) {
|
|
|
|
// Not grouped, expect just one stat value return that
|
2020-11-11 16:59:15 +00:00
|
|
|
var v = parseFloat(response.data.result[0].value[1]);
|
2020-10-09 21:31:15 +01:00
|
|
|
return {
|
|
|
|
label: label,
|
|
|
|
desc: desc,
|
2020-11-11 16:59:15 +00:00
|
|
|
value: formatter(v),
|
|
|
|
};
|
2020-10-09 21:31:15 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
var data = {};
|
|
|
|
for (var i = 0; i < response.data.result.length; i++) {
|
|
|
|
var res = response.data.result[i];
|
|
|
|
var v = parseFloat(res.value[1]);
|
|
|
|
var groupName = res.metric[groupBy];
|
|
|
|
data[groupName] = {
|
|
|
|
label: label,
|
|
|
|
desc: desc.replace('{{GROUP}}', groupName),
|
2020-11-11 16:59:15 +00:00
|
|
|
value: formatter(v),
|
|
|
|
};
|
2020-10-09 21:31:15 +01:00
|
|
|
}
|
|
|
|
return data;
|
2020-11-11 16:59:15 +00:00
|
|
|
});
|
2020-10-09 21:31:15 +01:00
|
|
|
},
|
|
|
|
|
|
|
|
fetchSeries: function(promql, options) {
|
|
|
|
var params = {
|
|
|
|
query: promql,
|
|
|
|
start: options.start,
|
|
|
|
end: options.end,
|
2020-11-11 16:59:15 +00:00
|
|
|
step: '10s',
|
|
|
|
timeout: '8s',
|
|
|
|
};
|
|
|
|
return this.httpGet('/api/v1/query_range', params);
|
2020-10-09 21:31:15 +01:00
|
|
|
},
|
2020-11-11 16:59:15 +00:00
|
|
|
};
|
2020-10-09 21:31:15 +01:00
|
|
|
|
|
|
|
// Helper functions
|
|
|
|
function shortNumStr(n) {
|
|
|
|
if (n < 1e3) {
|
2020-11-11 16:59:15 +00:00
|
|
|
if (Number.isInteger(n)) return '' + n;
|
2020-10-09 21:31:15 +01:00
|
|
|
if (n >= 100) {
|
|
|
|
// Go to 3 significant figures but wrap it in Number to avoid scientific
|
|
|
|
// notation lie 2.3e+2 for 230.
|
2020-11-11 16:59:15 +00:00
|
|
|
return Number(n.toPrecision(3));
|
|
|
|
}
|
|
|
|
if (n < 1) {
|
2020-10-09 21:31:15 +01:00
|
|
|
// Very small numbers show with limited precision to prevent long string
|
|
|
|
// of 0.000000.
|
|
|
|
return Number(n.toFixed(2));
|
|
|
|
} else {
|
|
|
|
// Two sig figs is enough below this
|
|
|
|
return Number(n.toPrecision(2));
|
|
|
|
}
|
|
|
|
}
|
2020-11-11 16:59:15 +00:00
|
|
|
if (n >= 1e3 && n < 1e6) return +(n / 1e3).toPrecision(3) + 'k';
|
|
|
|
if (n >= 1e6 && n < 1e9) return +(n / 1e6).toPrecision(3) + 'm';
|
|
|
|
if (n >= 1e9 && n < 1e12) return +(n / 1e9).toPrecision(3) + 'g';
|
|
|
|
if (n >= 1e12) return +(n / 1e12).toFixed(0) + 't';
|
2020-10-09 21:31:15 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
function shortTimeStr(n) {
|
2020-11-11 16:59:15 +00:00
|
|
|
if (n < 1e3) return Math.round(n) + 'ms';
|
2020-10-09 21:31:15 +01:00
|
|
|
|
2020-11-11 16:59:15 +00:00
|
|
|
var secs = n / 1e3;
|
|
|
|
if (secs < 60) return secs.toFixed(1) + 's';
|
2020-10-09 21:31:15 +01:00
|
|
|
|
2020-11-11 16:59:15 +00:00
|
|
|
var mins = secs / 60;
|
|
|
|
if (mins < 60) return mins.toFixed(1) + 'm';
|
2020-10-09 21:31:15 +01:00
|
|
|
|
2020-11-11 16:59:15 +00:00
|
|
|
var hours = mins / 60;
|
|
|
|
if (hours < 24) return hours.toFixed(1) + 'h';
|
2020-10-09 21:31:15 +01:00
|
|
|
|
2020-11-11 16:59:15 +00:00
|
|
|
var days = hours / 24;
|
|
|
|
return days.toFixed(1) + 'd';
|
2020-10-09 21:31:15 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/* global consul:writable */
|
2020-11-11 16:59:15 +00:00
|
|
|
window.consul.registerMetricsProvider('prometheus', prometheusProvider);
|
|
|
|
})();
|