Files
raven/server/AyaNova/generator/CoreJobMetricsSnapshot.cs
2020-05-25 15:01:04 +00:00

282 lines
11 KiB
C#

using System;
using System.Collections.Generic;
using System.Threading.Tasks;
using System.Diagnostics;
using Microsoft.Extensions.Logging;
using AyaNova.Util;
using AyaNova.Models;
namespace AyaNova.Biz
{
/// <summary>
/// called by Generator to gather server metrics and insert in db
/// </summary>
internal static class CoreJobMetricsSnapshot
{
private static ILogger log = AyaNova.Util.ApplicationLogging.CreateLogger("CoreJobMetricsSnapshot");
private static Process _process = Process.GetCurrentProcess();
private static TimeSpan _oldCPUTime = TimeSpan.Zero;
private static DateTime _lastSnapshot = DateTime.UtcNow;
private static DateTime _lastRpsTime = DateTime.UtcNow;
private static double _cpu = 0, _rps = 0;
private static TimeSpan tsOneMinute = new TimeSpan(0, 1, 0);
private static TimeSpan tsOneHour = new TimeSpan(1, 0, 0);
private static TimeSpan ts24Hours = new TimeSpan(24, 0, 0);
////////////////////////////////////////////////////////////////////////////////////////////////
// DoAsync
//
public static async Task DoJobAsync(AyContext ct)
{
//https://github.com/sebastienros/memoryleak/blob/master/src/MemoryLeak/MemoryLeak/Controllers/DiagnosticsController.cs
//Gather stats, output to database but only every minute or more
/*
//TODO: figure out teh best format to store it in based on what I need at the client end
// TODO: Use one data table per interval, it's more efficient for all values
i.e. for one minute stuff use a single table, for 10 minute use another otherwise will have empty entries in some
(test storing null in two columns results in same size so no saving)
// todo: store data using Postgres REAL / c# float datatype, is 38mb vs 55 for double precision with one year 10 column test data
what others track:
https://www.dynatrace.com/technologies/net-monitoring/#&gid=0&pid=1
https://docs.microsoft.com/en-us/aspnet/core/performance/memory?view=aspnetcore-3.1
Make a chart at client with test data from digital ocean to play with
try to replicate their stuff to learn how to best do it
Downsampling, should I convert old data to downsampled so I can keep more of it or is this presentation only
depends on storage space I guess
seperate tables per metric?
Seems likely since there would be less data to move around, but if I'm fetching all anyway??
hmm... something to consider / experiment with
Some D.O. charts have max value at top of left axis (y?) that change with the range and values and some have 100% or 1.0 and never change that axis
////////////////////////////////////////////////////////////////
TESTING / SCRATCH PAD:
// retention setting defaults to 1 year?
Generate sample data, see how large it would be under various scenarios
i.e. if I gather every minute, how much can I practically store?
Shortest time frame that is shown for DO is 1 minute intervals (6 hour / 360 entries)
RESULT:
525600 entries (every minute for 1 year) results in 22mb of space used
Same but with 10 different data columns results in 59mb used
####### USE ONE TABLE: if it was individual tables it would be 220 mb used so it's worth using one table for all values
query: insert into ametriccpu (t,v) select CURRENT_TIMESTAMP, 58.43239007949476 from generate_series(1, 525600) s(i)
insert into ametriccpu (
t,v1,v2,v3,v4,v5,v6,v7,v8,v9,v10
)
select
LOCALTIMESTAMP,
58.43239007949476,
0.33006058073955513,
102.44723488288768,
46.078341513002755,
30.23570573933185,
0.000136518543824419,
65.8400891412282,
0.01,
58.43239007949476,
58.43239007949476
from generate_series(1, 525600) s(i)
Timestamp, decimal/float value (need to determine this number type)
Number types do has this:
CPU avg: 0.33006058073955513
transmit bytes: 102.44723488288768
receive bytes: 46.078341513002755
disk usage avg: (percentage) 30.23570573933185
disk sectors written: 0.000136518543824419
memory avg: 65.8400891412282
load: 0.01
Digital ocean has a seperate query for each type of info
Implying maybe seperate storage for each type which makes sense I guess
though their data comes from all over the place
For a 30 day period their data came across like this:
109 minutes apart
CPU average: 396 entries 1.8 times per hour or every 109 minutes
Disk usage: 400 entries every 108 minutes (peculiar)
For 6 hour period get 360 entries?!
1 minute apart
they are all today, so it's always returning the same number of entries, regardless of time period
they are one minute apart
so it must be sampling somehow for the longer periods at least
for 24 hours get 401 entries
3.6 minutes apart
7 days get 400 entries
25 minutes apart
OK, not a trivial thing to graph it requires "downsampling" or "decimation"
Here is a link to a c# method that will accomplish this:
https://github.com/sveinn-steinarsson/flot-downsample/
c#
https://gist.github.com/DanielWJudge/63300889f27c7f50eeb7
CREATE flat_table(
trip_id integer,
tstamp timestamptz,
speed float,
distance float,
temperature float,
,...);
*/
//Nothing is gathered less than one minute frequency
if (!DateUtil.IsAfterDuration(_lastSnapshot, tsOneMinute))
return;
log.LogTrace("Starting metrics snapshot");
/////////////////////////////////////////////
//ONE MINUTE SNAPS
//
var now = DateTime.UtcNow;
_process.Refresh();
//CPU
var cpuElapsedTime = now.Subtract(_lastSnapshot).TotalMilliseconds;
var newCPUTime = _process.TotalProcessorTime;
var elapsedCPU = (newCPUTime - _oldCPUTime).TotalMilliseconds;
_cpu = elapsedCPU * 100 / Environment.ProcessorCount / cpuElapsedTime;
_oldCPUTime = newCPUTime;
//MEMORY
// The memory occupied by objects.
var Allocated = GC.GetTotalMemory(false);
// The working set includes both shared and private data. The shared data includes the pages that contain all the
// instructions that the process executes, including instructions in the process modules and the system libraries.
var WorkingSet = _process.WorkingSet64;
// The value returned by this property represents the current size of memory used by the process, in bytes, that
// cannot be shared with other processes.
var PrivateBytes = _process.PrivateMemorySize64;
// The number of generation 0 collections
var Gen0 = GC.CollectionCount(0);
// The number of generation 1 collections
var Gen1 = GC.CollectionCount(1);
// The number of generation 2 collections
var Gen2 = GC.CollectionCount(2);
var CPU=_cpu;
//write to db
MetricMM mm = new MetricMM();
/////////////////////////////////////////////
//EVERY HOUR SNAPS
//
if (DateUtil.IsAfterDuration(_lastSnapshot, tsOneHour))
{
//RECORDS IN TABLE
//Only do this once per hour
log.LogTrace("Counting table records");
//Get a count of important tables in db
List<string> allTableNames = await DbUtil.GetAllTablenamesAsync();
//Skip some tables as they are internal and / or only ever have one record
List<string> skipTableNames = new List<string>();
skipTableNames.Add("alicense");
skipTableNames.Add("aschemaversion");
foreach (string table in allTableNames)
{
if (!skipTableNames.Contains(table))
{
//var tags = new MetricTags("TableTagKey", table);
// metrics.Measure.Gauge.SetValue(MetricsRegistry.DBRecordsGauge, tags, await DbUtil.CountOfRecordsAsync(table));
}
}
//JOB COUNTS (DEAD, RUNNING, COMPLETED, SLEEPING)
foreach (JobStatus stat in Enum.GetValues(typeof(JobStatus)))
{
// var jobtag = new MetricTags("JobStatus", stat.ToString());
// metrics.Measure.Gauge.SetValue(MetricsRegistry.JobsGauge, jobtag, await JobsBiz.GetCountForJobStatusAsync(ct, stat));
}
}
/////////////////////////////////////////////
//ONCE A DAY SNAPS
//
if (DateUtil.IsAfterDuration(_lastSnapshot, ts24Hours))
{
//FILES ON DISK
log.LogTrace("Files on disk information");
var UtilFilesInfo = FileUtil.GetUtilityFolderSizeInfo();
var UserFilesInfo = FileUtil.GetAttachmentFolderSizeInfo();
// var mtag = new MetricTags("File type", "Business object files");
// metrics.Measure.Gauge.SetValue(MetricsRegistry.FileCountGauge, mtag, UserFilesInfo.FileCountWithChildren);
// metrics.Measure.Gauge.SetValue(MetricsRegistry.FileSizeGauge, mtag, UserFilesInfo.SizeWithChildren);
// mtag = new MetricTags("File type", "OPS files");
// metrics.Measure.Gauge.SetValue(MetricsRegistry.FileCountGauge, mtag, UtilFilesInfo.FileCountWithChildren);
// metrics.Measure.Gauge.SetValue(MetricsRegistry.FileSizeGauge, mtag, UtilFilesInfo.SizeWithChildren);
}
_lastSnapshot = now;
//just to hide compiler warning for now
await Task.CompletedTask;
}
/////////////////////////////////////////////////////////////////////
}//eoc
}//eons