Loops in metrics from Giuliano's async profiler

This commit is contained in:
benbierens 2024-10-24 16:10:07 +02:00
parent 5c65d1d74e
commit e6a5838b05
No known key found for this signature in database
GPG Key ID: 877D2C2E09A22F3A
8 changed files with 162 additions and 41 deletions

View File

@ -6,14 +6,14 @@ namespace MetricsPlugin
{ {
public static class CoreInterfaceExtensions public static class CoreInterfaceExtensions
{ {
public static RunningPod DeployMetricsCollector(this CoreInterface ci, params IHasMetricsScrapeTarget[] scrapeTargets) public static RunningPod DeployMetricsCollector(this CoreInterface ci, TimeSpan scrapeInterval, params IHasMetricsScrapeTarget[] scrapeTargets)
{ {
return Plugin(ci).DeployMetricsCollector(scrapeTargets.Select(t => t.MetricsScrapeTarget).ToArray()); return Plugin(ci).DeployMetricsCollector(scrapeTargets.Select(t => t.MetricsScrapeTarget).ToArray(), scrapeInterval);
} }
public static RunningPod DeployMetricsCollector(this CoreInterface ci, params IMetricsScrapeTarget[] scrapeTargets) public static RunningPod DeployMetricsCollector(this CoreInterface ci, TimeSpan scrapeInterval, params IMetricsScrapeTarget[] scrapeTargets)
{ {
return Plugin(ci).DeployMetricsCollector(scrapeTargets); return Plugin(ci).DeployMetricsCollector(scrapeTargets, scrapeInterval);
} }
public static IMetricsAccess WrapMetricsCollector(this CoreInterface ci, RunningPod metricsPod, IHasMetricsScrapeTarget scrapeTarget) public static IMetricsAccess WrapMetricsCollector(this CoreInterface ci, RunningPod metricsPod, IHasMetricsScrapeTarget scrapeTarget)
@ -26,19 +26,19 @@ namespace MetricsPlugin
return Plugin(ci).WrapMetricsCollectorDeployment(metricsPod, scrapeTarget); return Plugin(ci).WrapMetricsCollectorDeployment(metricsPod, scrapeTarget);
} }
public static IMetricsAccess[] GetMetricsFor(this CoreInterface ci, params IHasManyMetricScrapeTargets[] manyScrapeTargets) public static IMetricsAccess[] GetMetricsFor(this CoreInterface ci, TimeSpan scrapeInterval, params IHasManyMetricScrapeTargets[] manyScrapeTargets)
{ {
return ci.GetMetricsFor(manyScrapeTargets.SelectMany(t => t.ScrapeTargets).ToArray()); return ci.GetMetricsFor(scrapeInterval, manyScrapeTargets.SelectMany(t => t.ScrapeTargets).ToArray());
} }
public static IMetricsAccess[] GetMetricsFor(this CoreInterface ci, params IHasMetricsScrapeTarget[] scrapeTargets) public static IMetricsAccess[] GetMetricsFor(this CoreInterface ci, TimeSpan scrapeInterval, params IHasMetricsScrapeTarget[] scrapeTargets)
{ {
return ci.GetMetricsFor(scrapeTargets.Select(t => t.MetricsScrapeTarget).ToArray()); return ci.GetMetricsFor(scrapeInterval, scrapeTargets.Select(t => t.MetricsScrapeTarget).ToArray());
} }
public static IMetricsAccess[] GetMetricsFor(this CoreInterface ci, params IMetricsScrapeTarget[] scrapeTargets) public static IMetricsAccess[] GetMetricsFor(this CoreInterface ci, TimeSpan scrapeInterval, params IMetricsScrapeTarget[] scrapeTargets)
{ {
var rc = ci.DeployMetricsCollector(scrapeTargets); var rc = ci.DeployMetricsCollector(scrapeInterval, scrapeTargets);
return scrapeTargets.Select(t => ci.WrapMetricsCollector(rc, t)).ToArray(); return scrapeTargets.Select(t => ci.WrapMetricsCollector(rc, t)).ToArray();
} }

View File

@ -7,7 +7,7 @@ namespace MetricsPlugin
public interface IMetricsAccess : IHasContainer public interface IMetricsAccess : IHasContainer
{ {
string TargetName { get; } string TargetName { get; }
Metrics? GetAllMetrics(); Metrics GetAllMetrics();
MetricsSet GetMetric(string metricName); MetricsSet GetMetric(string metricName);
MetricsSet GetMetric(string metricName, TimeSpan timeout); MetricsSet GetMetric(string metricName, TimeSpan timeout);
} }
@ -27,7 +27,7 @@ namespace MetricsPlugin
public string TargetName { get; } public string TargetName { get; }
public RunningContainer Container => query.RunningContainer; public RunningContainer Container => query.RunningContainer;
public Metrics? GetAllMetrics() public Metrics GetAllMetrics()
{ {
return query.GetAllMetricsForNode(target); return query.GetAllMetricsForNode(target);
} }
@ -54,11 +54,10 @@ namespace MetricsPlugin
} }
} }
private MetricsSet? GetMostRecent(string metricName) private MetricsSet GetMostRecent(string metricName)
{ {
var result = query.GetMostRecent(metricName, target); var result = query.GetMostRecent(metricName, target);
if (result == null) return null; return result.Sets.Last();
return result.Sets.LastOrDefault();
} }
} }
} }

View File

@ -31,9 +31,9 @@ namespace MetricsPlugin
{ {
} }
public RunningPod DeployMetricsCollector(IMetricsScrapeTarget[] scrapeTargets) public RunningPod DeployMetricsCollector(IMetricsScrapeTarget[] scrapeTargets, TimeSpan scrapeInterval)
{ {
return starter.CollectMetricsFor(scrapeTargets); return starter.CollectMetricsFor(scrapeTargets, scrapeInterval);
} }
public IMetricsAccess WrapMetricsCollectorDeployment(RunningPod runningPod, IMetricsScrapeTarget target) public IMetricsAccess WrapMetricsCollectorDeployment(RunningPod runningPod, IMetricsScrapeTarget target)

View File

@ -23,10 +23,10 @@ namespace MetricsPlugin
public RunningContainer RunningContainer { get; } public RunningContainer RunningContainer { get; }
public Metrics? GetMostRecent(string metricName, IMetricsScrapeTarget target) public Metrics GetMostRecent(string metricName, IMetricsScrapeTarget target)
{ {
var response = GetLastOverTime(metricName, GetInstanceStringForNode(target)); var response = GetLastOverTime(metricName, GetInstanceStringForNode(target));
if (response == null) return null; if (response == null) throw new Exception($"Failed to get most recent metric: {metricName}");
var result = new Metrics var result = new Metrics
{ {
@ -44,19 +44,20 @@ namespace MetricsPlugin
return result; return result;
} }
public Metrics? GetMetrics(string metricName) public Metrics GetMetrics(string metricName)
{ {
var response = GetAll(metricName); var response = GetAll(metricName);
if (response == null) return null; if (response == null) throw new Exception($"Failed to get metrics by name: {metricName}");
var result = MapResponseToMetrics(response); var result = MapResponseToMetrics(response);
Log(metricName, result); Log(metricName, result);
return result; return result;
} }
public Metrics? GetAllMetricsForNode(IMetricsScrapeTarget target) public Metrics GetAllMetricsForNode(IMetricsScrapeTarget target)
{ {
var response = endpoint.HttpGetJson<PrometheusQueryResponse>($"query?query={GetInstanceStringForNode(target)}{GetQueryTimeRange()}"); var instanceString = GetInstanceStringForNode(target);
if (response.status != "success") return null; var response = endpoint.HttpGetJson<PrometheusQueryResponse>($"query?query={instanceString}{GetQueryTimeRange()}");
if (response.status != "success") throw new Exception($"Failed to get metrics for target: {instanceString}");
var result = MapResponseToMetrics(response); var result = MapResponseToMetrics(response);
Log(target, result); Log(target, result);
return result; return result;
@ -80,18 +81,32 @@ namespace MetricsPlugin
{ {
return new Metrics return new Metrics
{ {
Sets = response.data.result.Select(r => Sets = response.data.result.Select(CreateMetricsSet).ToArray()
{
return new MetricsSet
{
Name = r.metric.__name__,
Instance = r.metric.instance,
Values = MapMultipleValues(r.values)
};
}).ToArray()
}; };
} }
private MetricsSet CreateMetricsSet(PrometheusQueryResponseDataResultEntry r)
{
var result = new MetricsSet
{
Name = r.metric.__name__,
Instance = r.metric.instance,
Values = MapMultipleValues(r.values)
};
if (!string.IsNullOrEmpty(r.metric.file) && !string.IsNullOrEmpty(r.metric.line) && !string.IsNullOrEmpty(r.metric.proc))
{
result.AsyncProfiler = new AsyncProfilerMetrics
{
File = r.metric.file,
Line = r.metric.line,
Proc = r.metric.proc
};
}
return result;
}
private MetricsSetValue[] MapSingleValue(object[] value) private MetricsSetValue[] MapSingleValue(object[] value)
{ {
if (value != null && value.Length > 0) if (value != null && value.Length > 0)
@ -220,14 +235,28 @@ namespace MetricsPlugin
{ {
public string Name { get; set; } = string.Empty; public string Name { get; set; } = string.Empty;
public string Instance { get; set; } = string.Empty; public string Instance { get; set; } = string.Empty;
public AsyncProfilerMetrics? AsyncProfiler { get; set; } = null;
public MetricsSetValue[] Values { get; set; } = Array.Empty<MetricsSetValue>(); public MetricsSetValue[] Values { get; set; } = Array.Empty<MetricsSetValue>();
public override string ToString() public override string ToString()
{ {
return $"{Name} ({Instance}) : {{{string.Join(",", Values.Select(v => v.ToString()))}}}"; var prefix = "";
if (AsyncProfiler != null)
{
prefix = $"proc: '{AsyncProfiler.Proc}' in '{AsyncProfiler.File}:{AsyncProfiler.Line}'";
}
return $"{prefix}{Name} ({Instance}) : {{{string.Join(",", Values.Select(v => v.ToString()))}}}";
} }
} }
public class AsyncProfilerMetrics
{
public string File { get; set; } = string.Empty;
public string Line { get; set; } = string.Empty;
public string Proc { get; set; } = string.Empty;
}
public class MetricsSetValue public class MetricsSetValue
{ {
public DateTime Timestamp { get; set; } public DateTime Timestamp { get; set; }
@ -263,6 +292,10 @@ namespace MetricsPlugin
public string __name__ { get; set; } = string.Empty; public string __name__ { get; set; } = string.Empty;
public string instance { get; set; } = string.Empty; public string instance { get; set; } = string.Empty;
public string job { get; set; } = string.Empty; public string job { get; set; } = string.Empty;
// Async profiler output.
public string? file { get; set; } = null;
public string? line { get; set; } = null;
public string? proc { get; set; } = null;
} }
public class PrometheusAllNamesResponse public class PrometheusAllNamesResponse

View File

@ -16,13 +16,13 @@ namespace MetricsPlugin
this.tools = tools; this.tools = tools;
} }
public RunningPod CollectMetricsFor(IMetricsScrapeTarget[] targets) public RunningPod CollectMetricsFor(IMetricsScrapeTarget[] targets, TimeSpan scrapeInterval)
{ {
if (!targets.Any()) throw new ArgumentException(nameof(targets) + " must not be empty."); if (!targets.Any()) throw new ArgumentException(nameof(targets) + " must not be empty.");
Log($"Starting metrics server for {targets.Length} targets..."); Log($"Starting metrics server for {targets.Length} targets...");
var startupConfig = new StartupConfig(); var startupConfig = new StartupConfig();
startupConfig.Add(new PrometheusStartupConfig(GeneratePrometheusConfig(targets))); startupConfig.Add(new PrometheusStartupConfig(GeneratePrometheusConfig(targets, scrapeInterval)));
var workflow = tools.CreateWorkflow(); var workflow = tools.CreateWorkflow();
var runningContainers = workflow.Start(1, recipe, startupConfig).WaitForOnline(); var runningContainers = workflow.Start(1, recipe, startupConfig).WaitForOnline();
@ -48,12 +48,16 @@ namespace MetricsPlugin
tools.GetLog().Log(msg); tools.GetLog().Log(msg);
} }
private string GeneratePrometheusConfig(IMetricsScrapeTarget[] targets) private string GeneratePrometheusConfig(IMetricsScrapeTarget[] targets, TimeSpan scrapeInterval)
{ {
var secs = Convert.ToInt32(scrapeInterval.TotalSeconds);
if (secs < 1) throw new Exception("ScrapeInterval can't be < 1s");
if (secs > 60) throw new Exception("ScrapeInterval can't be > 60s");
var config = ""; var config = "";
config += "global:\n"; config += "global:\n";
config += " scrape_interval: 10s\n"; config += $" scrape_interval: {secs}s\n";
config += " scrape_timeout: 10s\n"; config += $" scrape_timeout: {secs}s\n";
config += "\n"; config += "\n";
config += "scrape_configs:\n"; config += "scrape_configs:\n";
config += " - job_name: services\n"; config += " - job_name: services\n";

View File

@ -0,0 +1,85 @@
using NUnit.Framework;
using MetricsPlugin;
using Utils;
namespace CodexTests.BasicTests
{
[TestFixture]
public class AsyncProfiling : CodexDistTest
{
[Test]
public void AsyncProfileMetricsPlz()
{
var node = StartCodex(s => s.EnableMetrics());
var metrics = Ci.GetMetricsFor(scrapeInterval: TimeSpan.FromSeconds(3.0), node).Single();
var file = GenerateTestFile(100.MB());
node.UploadFile(file);
Thread.Sleep(10000);
var profilerMetrics = new AsyncProfileMetrics(metrics.GetAllMetrics());
var log = GetTestLog();
log.Log($"{nameof(profilerMetrics.CallCount)} = {profilerMetrics.CallCount.Highest()}");
log.Log($"{nameof(profilerMetrics.ExecTime)} = {profilerMetrics.ExecTime.Highest()}");
log.Log($"{nameof(profilerMetrics.ExecTimeWithChildren)} = {profilerMetrics.ExecTimeWithChildren.Highest()}");
log.Log($"{nameof(profilerMetrics.SingleExecTimeMax)} = {profilerMetrics.SingleExecTimeMax.Highest()}");
log.Log($"{nameof(profilerMetrics.WallTime)} = {profilerMetrics.WallTime.Highest()}");
}
}
public class AsyncProfileMetrics
{
public AsyncProfileMetrics(Metrics metrics)
{
CallCount = CreateMetric(metrics, "chronos_call_count_total");
ExecTime = CreateMetric(metrics, "chronos_exec_time_total");
ExecTimeWithChildren = CreateMetric(metrics, "chronos_exec_time_with_children_total");
SingleExecTimeMax = CreateMetric(metrics, "chronos_single_exec_time_max");
WallTime = CreateMetric(metrics, "chronos_wall_time_total");
}
public AsyncProfileMetric CallCount { get; }
public AsyncProfileMetric ExecTime { get; }
public AsyncProfileMetric ExecTimeWithChildren { get; }
public AsyncProfileMetric SingleExecTimeMax { get; }
public AsyncProfileMetric WallTime { get; }
private static AsyncProfileMetric CreateMetric(Metrics metrics, string name)
{
var sets = metrics.Sets.Where(s => s.Name == name).ToArray();
return new AsyncProfileMetric(sets);
}
}
public class AsyncProfileMetric
{
private readonly MetricsSet[] metricsSets;
public AsyncProfileMetric(MetricsSet[] metricsSets)
{
this.metricsSets = metricsSets;
}
public MetricsSet Highest()
{
MetricsSet? result = null;
var highest = double.MinValue;
foreach (var metric in metricsSets)
{
foreach (var value in metric.Values)
{
if (value.Value > highest)
{
highest = value.Value;
result = metric;
}
}
}
if (result == null) throw new Exception("None were highest");
return result;
}
}
}

View File

@ -36,7 +36,7 @@ namespace CodexTests.BasicTests
var primary2 = group2[0]; var primary2 = group2[0];
var secondary2 = group2[1]; var secondary2 = group2[1];
var metrics = Ci.GetMetricsFor(primary, primary2); var metrics = Ci.GetMetricsFor(scrapeInterval: TimeSpan.FromSeconds(10), primary, primary2);
primary.ConnectToPeer(secondary); primary.ConnectToPeer(secondary);
primary2.ConnectToPeer(secondary2); primary2.ConnectToPeer(secondary2);

View File

@ -161,7 +161,7 @@ namespace CodexNetDeployer
Log("Starting metrics service..."); Log("Starting metrics service...");
var runningContainer = ci.DeployMetricsCollector(startResults.Select(r => r.CodexNode).ToArray()); var runningContainer = ci.DeployMetricsCollector(scrapeInterval: TimeSpan.FromSeconds(10.0), startResults.Select(r => r.CodexNode).ToArray());
Log("Metrics service started."); Log("Metrics service started.");