Loops in metrics from Giuliano's async profiler

This commit is contained in:
benbierens 2024-10-24 16:10:07 +02:00
parent 5c65d1d74e
commit e6a5838b05
No known key found for this signature in database
GPG Key ID: 877D2C2E09A22F3A
8 changed files with 162 additions and 41 deletions

View File

@ -6,14 +6,14 @@ namespace MetricsPlugin
{
public static class CoreInterfaceExtensions
{
public static RunningPod DeployMetricsCollector(this CoreInterface ci, params IHasMetricsScrapeTarget[] scrapeTargets)
public static RunningPod DeployMetricsCollector(this CoreInterface ci, TimeSpan scrapeInterval, params IHasMetricsScrapeTarget[] scrapeTargets)
{
return Plugin(ci).DeployMetricsCollector(scrapeTargets.Select(t => t.MetricsScrapeTarget).ToArray());
return Plugin(ci).DeployMetricsCollector(scrapeTargets.Select(t => t.MetricsScrapeTarget).ToArray(), scrapeInterval);
}
public static RunningPod DeployMetricsCollector(this CoreInterface ci, params IMetricsScrapeTarget[] scrapeTargets)
public static RunningPod DeployMetricsCollector(this CoreInterface ci, TimeSpan scrapeInterval, params IMetricsScrapeTarget[] scrapeTargets)
{
return Plugin(ci).DeployMetricsCollector(scrapeTargets);
return Plugin(ci).DeployMetricsCollector(scrapeTargets, scrapeInterval);
}
public static IMetricsAccess WrapMetricsCollector(this CoreInterface ci, RunningPod metricsPod, IHasMetricsScrapeTarget scrapeTarget)
@ -26,19 +26,19 @@ namespace MetricsPlugin
return Plugin(ci).WrapMetricsCollectorDeployment(metricsPod, scrapeTarget);
}
public static IMetricsAccess[] GetMetricsFor(this CoreInterface ci, params IHasManyMetricScrapeTargets[] manyScrapeTargets)
public static IMetricsAccess[] GetMetricsFor(this CoreInterface ci, TimeSpan scrapeInterval, params IHasManyMetricScrapeTargets[] manyScrapeTargets)
{
return ci.GetMetricsFor(manyScrapeTargets.SelectMany(t => t.ScrapeTargets).ToArray());
return ci.GetMetricsFor(scrapeInterval, manyScrapeTargets.SelectMany(t => t.ScrapeTargets).ToArray());
}
public static IMetricsAccess[] GetMetricsFor(this CoreInterface ci, params IHasMetricsScrapeTarget[] scrapeTargets)
public static IMetricsAccess[] GetMetricsFor(this CoreInterface ci, TimeSpan scrapeInterval, params IHasMetricsScrapeTarget[] scrapeTargets)
{
return ci.GetMetricsFor(scrapeTargets.Select(t => t.MetricsScrapeTarget).ToArray());
return ci.GetMetricsFor(scrapeInterval, scrapeTargets.Select(t => t.MetricsScrapeTarget).ToArray());
}
public static IMetricsAccess[] GetMetricsFor(this CoreInterface ci, params IMetricsScrapeTarget[] scrapeTargets)
public static IMetricsAccess[] GetMetricsFor(this CoreInterface ci, TimeSpan scrapeInterval, params IMetricsScrapeTarget[] scrapeTargets)
{
var rc = ci.DeployMetricsCollector(scrapeTargets);
var rc = ci.DeployMetricsCollector(scrapeInterval, scrapeTargets);
return scrapeTargets.Select(t => ci.WrapMetricsCollector(rc, t)).ToArray();
}

View File

@ -7,7 +7,7 @@ namespace MetricsPlugin
public interface IMetricsAccess : IHasContainer
{
string TargetName { get; }
Metrics? GetAllMetrics();
Metrics GetAllMetrics();
MetricsSet GetMetric(string metricName);
MetricsSet GetMetric(string metricName, TimeSpan timeout);
}
@ -27,7 +27,7 @@ namespace MetricsPlugin
public string TargetName { get; }
public RunningContainer Container => query.RunningContainer;
public Metrics? GetAllMetrics()
public Metrics GetAllMetrics()
{
return query.GetAllMetricsForNode(target);
}
@ -54,11 +54,10 @@ namespace MetricsPlugin
}
}
private MetricsSet? GetMostRecent(string metricName)
private MetricsSet GetMostRecent(string metricName)
{
var result = query.GetMostRecent(metricName, target);
if (result == null) return null;
return result.Sets.LastOrDefault();
return result.Sets.Last();
}
}
}

View File

@ -31,9 +31,9 @@ namespace MetricsPlugin
{
}
public RunningPod DeployMetricsCollector(IMetricsScrapeTarget[] scrapeTargets)
public RunningPod DeployMetricsCollector(IMetricsScrapeTarget[] scrapeTargets, TimeSpan scrapeInterval)
{
return starter.CollectMetricsFor(scrapeTargets);
return starter.CollectMetricsFor(scrapeTargets, scrapeInterval);
}
public IMetricsAccess WrapMetricsCollectorDeployment(RunningPod runningPod, IMetricsScrapeTarget target)

View File

@ -23,10 +23,10 @@ namespace MetricsPlugin
public RunningContainer RunningContainer { get; }
public Metrics? GetMostRecent(string metricName, IMetricsScrapeTarget target)
public Metrics GetMostRecent(string metricName, IMetricsScrapeTarget target)
{
var response = GetLastOverTime(metricName, GetInstanceStringForNode(target));
if (response == null) return null;
if (response == null) throw new Exception($"Failed to get most recent metric: {metricName}");
var result = new Metrics
{
@ -44,19 +44,20 @@ namespace MetricsPlugin
return result;
}
public Metrics? GetMetrics(string metricName)
public Metrics GetMetrics(string metricName)
{
var response = GetAll(metricName);
if (response == null) return null;
if (response == null) throw new Exception($"Failed to get metrics by name: {metricName}");
var result = MapResponseToMetrics(response);
Log(metricName, result);
return result;
}
public Metrics? GetAllMetricsForNode(IMetricsScrapeTarget target)
public Metrics GetAllMetricsForNode(IMetricsScrapeTarget target)
{
var response = endpoint.HttpGetJson<PrometheusQueryResponse>($"query?query={GetInstanceStringForNode(target)}{GetQueryTimeRange()}");
if (response.status != "success") return null;
var instanceString = GetInstanceStringForNode(target);
var response = endpoint.HttpGetJson<PrometheusQueryResponse>($"query?query={instanceString}{GetQueryTimeRange()}");
if (response.status != "success") throw new Exception($"Failed to get metrics for target: {instanceString}");
var result = MapResponseToMetrics(response);
Log(target, result);
return result;
@ -80,18 +81,32 @@ namespace MetricsPlugin
{
return new Metrics
{
Sets = response.data.result.Select(r =>
{
return new MetricsSet
{
Name = r.metric.__name__,
Instance = r.metric.instance,
Values = MapMultipleValues(r.values)
};
}).ToArray()
Sets = response.data.result.Select(CreateMetricsSet).ToArray()
};
}
private MetricsSet CreateMetricsSet(PrometheusQueryResponseDataResultEntry r)
{
var result = new MetricsSet
{
Name = r.metric.__name__,
Instance = r.metric.instance,
Values = MapMultipleValues(r.values)
};
if (!string.IsNullOrEmpty(r.metric.file) && !string.IsNullOrEmpty(r.metric.line) && !string.IsNullOrEmpty(r.metric.proc))
{
result.AsyncProfiler = new AsyncProfilerMetrics
{
File = r.metric.file,
Line = r.metric.line,
Proc = r.metric.proc
};
}
return result;
}
private MetricsSetValue[] MapSingleValue(object[] value)
{
if (value != null && value.Length > 0)
@ -220,14 +235,28 @@ namespace MetricsPlugin
{
public string Name { get; set; } = string.Empty;
public string Instance { get; set; } = string.Empty;
public AsyncProfilerMetrics? AsyncProfiler { get; set; } = null;
public MetricsSetValue[] Values { get; set; } = Array.Empty<MetricsSetValue>();
public override string ToString()
{
return $"{Name} ({Instance}) : {{{string.Join(",", Values.Select(v => v.ToString()))}}}";
var prefix = "";
if (AsyncProfiler != null)
{
prefix = $"proc: '{AsyncProfiler.Proc}' in '{AsyncProfiler.File}:{AsyncProfiler.Line}'";
}
return $"{prefix}{Name} ({Instance}) : {{{string.Join(",", Values.Select(v => v.ToString()))}}}";
}
}
public class AsyncProfilerMetrics
{
public string File { get; set; } = string.Empty;
public string Line { get; set; } = string.Empty;
public string Proc { get; set; } = string.Empty;
}
public class MetricsSetValue
{
public DateTime Timestamp { get; set; }
@ -263,6 +292,10 @@ namespace MetricsPlugin
public string __name__ { get; set; } = string.Empty;
public string instance { get; set; } = string.Empty;
public string job { get; set; } = string.Empty;
// Async profiler output.
public string? file { get; set; } = null;
public string? line { get; set; } = null;
public string? proc { get; set; } = null;
}
public class PrometheusAllNamesResponse

View File

@ -16,13 +16,13 @@ namespace MetricsPlugin
this.tools = tools;
}
public RunningPod CollectMetricsFor(IMetricsScrapeTarget[] targets)
public RunningPod CollectMetricsFor(IMetricsScrapeTarget[] targets, TimeSpan scrapeInterval)
{
if (!targets.Any()) throw new ArgumentException(nameof(targets) + " must not be empty.");
Log($"Starting metrics server for {targets.Length} targets...");
var startupConfig = new StartupConfig();
startupConfig.Add(new PrometheusStartupConfig(GeneratePrometheusConfig(targets)));
startupConfig.Add(new PrometheusStartupConfig(GeneratePrometheusConfig(targets, scrapeInterval)));
var workflow = tools.CreateWorkflow();
var runningContainers = workflow.Start(1, recipe, startupConfig).WaitForOnline();
@ -48,12 +48,16 @@ namespace MetricsPlugin
tools.GetLog().Log(msg);
}
private string GeneratePrometheusConfig(IMetricsScrapeTarget[] targets)
private string GeneratePrometheusConfig(IMetricsScrapeTarget[] targets, TimeSpan scrapeInterval)
{
var secs = Convert.ToInt32(scrapeInterval.TotalSeconds);
if (secs < 1) throw new Exception("ScrapeInterval can't be < 1s");
if (secs > 60) throw new Exception("ScrapeInterval can't be > 60s");
var config = "";
config += "global:\n";
config += " scrape_interval: 10s\n";
config += " scrape_timeout: 10s\n";
config += $" scrape_interval: {secs}s\n";
config += $" scrape_timeout: {secs}s\n";
config += "\n";
config += "scrape_configs:\n";
config += " - job_name: services\n";

View File

@ -0,0 +1,85 @@
using NUnit.Framework;
using MetricsPlugin;
using Utils;
namespace CodexTests.BasicTests
{
[TestFixture]
public class AsyncProfiling : CodexDistTest
{
[Test]
public void AsyncProfileMetricsPlz()
{
var node = StartCodex(s => s.EnableMetrics());
var metrics = Ci.GetMetricsFor(scrapeInterval: TimeSpan.FromSeconds(3.0), node).Single();
var file = GenerateTestFile(100.MB());
node.UploadFile(file);
Thread.Sleep(10000);
var profilerMetrics = new AsyncProfileMetrics(metrics.GetAllMetrics());
var log = GetTestLog();
log.Log($"{nameof(profilerMetrics.CallCount)} = {profilerMetrics.CallCount.Highest()}");
log.Log($"{nameof(profilerMetrics.ExecTime)} = {profilerMetrics.ExecTime.Highest()}");
log.Log($"{nameof(profilerMetrics.ExecTimeWithChildren)} = {profilerMetrics.ExecTimeWithChildren.Highest()}");
log.Log($"{nameof(profilerMetrics.SingleExecTimeMax)} = {profilerMetrics.SingleExecTimeMax.Highest()}");
log.Log($"{nameof(profilerMetrics.WallTime)} = {profilerMetrics.WallTime.Highest()}");
}
}
public class AsyncProfileMetrics
{
public AsyncProfileMetrics(Metrics metrics)
{
CallCount = CreateMetric(metrics, "chronos_call_count_total");
ExecTime = CreateMetric(metrics, "chronos_exec_time_total");
ExecTimeWithChildren = CreateMetric(metrics, "chronos_exec_time_with_children_total");
SingleExecTimeMax = CreateMetric(metrics, "chronos_single_exec_time_max");
WallTime = CreateMetric(metrics, "chronos_wall_time_total");
}
public AsyncProfileMetric CallCount { get; }
public AsyncProfileMetric ExecTime { get; }
public AsyncProfileMetric ExecTimeWithChildren { get; }
public AsyncProfileMetric SingleExecTimeMax { get; }
public AsyncProfileMetric WallTime { get; }
private static AsyncProfileMetric CreateMetric(Metrics metrics, string name)
{
var sets = metrics.Sets.Where(s => s.Name == name).ToArray();
return new AsyncProfileMetric(sets);
}
}
public class AsyncProfileMetric
{
private readonly MetricsSet[] metricsSets;
public AsyncProfileMetric(MetricsSet[] metricsSets)
{
this.metricsSets = metricsSets;
}
public MetricsSet Highest()
{
MetricsSet? result = null;
var highest = double.MinValue;
foreach (var metric in metricsSets)
{
foreach (var value in metric.Values)
{
if (value.Value > highest)
{
highest = value.Value;
result = metric;
}
}
}
if (result == null) throw new Exception("None were highest");
return result;
}
}
}

View File

@ -36,7 +36,7 @@ namespace CodexTests.BasicTests
var primary2 = group2[0];
var secondary2 = group2[1];
var metrics = Ci.GetMetricsFor(primary, primary2);
var metrics = Ci.GetMetricsFor(scrapeInterval: TimeSpan.FromSeconds(10), primary, primary2);
primary.ConnectToPeer(secondary);
primary2.ConnectToPeer(secondary2);

View File

@ -161,7 +161,7 @@ namespace CodexNetDeployer
Log("Starting metrics service...");
var runningContainer = ci.DeployMetricsCollector(startResults.Select(r => r.CodexNode).ToArray());
var runningContainer = ci.DeployMetricsCollector(scrapeInterval: TimeSpan.FromSeconds(10.0), startResults.Select(r => r.CodexNode).ToArray());
Log("Metrics service started.");