using System; using System.Collections.Generic; using System.Threading.Tasks; using System.Diagnostics; using Microsoft.Extensions.Logging; using AyaNova.Util; using AyaNova.Models; using Microsoft.EntityFrameworkCore; namespace AyaNova.Biz { /// /// called by Generator to gather server metrics and insert in db /// internal static class CoreJobMetricsSnapshot { private static ILogger log = AyaNova.Util.ApplicationLogging.CreateLogger("CoreJobMetricsSnapshot"); private static TimeSpan tsDataRetention = new TimeSpan(396, 0, 0, 0, 0);//one year and one month approximately private static Process _process = Process.GetCurrentProcess(); private static TimeSpan _oldCPUTime = TimeSpan.Zero; private static DateTime _lastSnapshot = DateTime.UtcNow; private static DateTime _lastRpsTime = DateTime.UtcNow; private static double _cpu = 0; #if(DEBUG) private static TimeSpan tsOneMinute = new TimeSpan(0, 0, 10); #else private static TimeSpan tsOneMinute = new TimeSpan(0, 1, 0); #endif private static TimeSpan tsOneHour = new TimeSpan(1, 0, 0); private static TimeSpan ts24Hours = new TimeSpan(24, 0, 0); //////////////////////////////////////////////////////////////////////////////////////////////// // DoAsync // public static void DoJob() { #region worksheet //https://github.com/sebastienros/memoryleak/blob/master/src/MemoryLeak/MemoryLeak/Controllers/DiagnosticsController.cs // //DATA TYPES .net to postgres map //http://www.npgsql.org/doc/types/basic.html //Gather stats, output to database but only every minute or more /* //TODO: figure out teh best format to store it in based on what I need at the client end // TODO: Use one data table per interval, it's more efficient for all values i.e. for one minute stuff use a single table, for 10 minute use another otherwise will have empty entries in some (test storing null in two columns results in same size so no saving) // todo: store data using Postgres REAL / c# float datatype, is 38mb vs 55 for double precision with one year 10 column test data what others track: https://www.dynatrace.com/technologies/net-monitoring/#&gid=0&pid=1 https://docs.microsoft.com/en-us/aspnet/core/performance/memory?view=aspnetcore-3.1 Make a chart at client with test data from digital ocean to play with try to replicate their stuff to learn how to best do it Downsampling, should I convert old data to downsampled so I can keep more of it or is this presentation only depends on storage space I guess seperate tables per metric? Seems likely since there would be less data to move around, but if I'm fetching all anyway?? hmm... something to consider / experiment with Some D.O. charts have max value at top of left axis (y?) that change with the range and values and some have 100% or 1.0 and never change that axis //////////////////////////////////////////////////////////////// TESTING / SCRATCH PAD: // retention setting defaults to 1 year? Generate sample data, see how large it would be under various scenarios i.e. if I gather every minute, how much can I practically store? Shortest time frame that is shown for DO is 1 minute intervals (6 hour / 360 entries) RESULT: 525600 entries (every minute for 1 year) results in 22mb of space used Same but with 10 different data columns results in 59mb used ####### USE ONE TABLE: if it was individual tables it would be 220 mb used so it's worth using one table for all values query: insert into ametriccpu (t,v) select CURRENT_TIMESTAMP, 58.43239007949476 from generate_series(1, 525600) s(i) insert into ametriccpu ( t,v1,v2,v3,v4,v5,v6,v7,v8,v9,v10 ) select LOCALTIMESTAMP, 58.43239007949476, 0.33006058073955513, 102.44723488288768, 46.078341513002755, 30.23570573933185, 0.000136518543824419, 65.8400891412282, 0.01, 58.43239007949476, 58.43239007949476 from generate_series(1, 525600) s(i) Timestamp, decimal/float value (need to determine this number type) Number types do has this: CPU avg: 0.33006058073955513 transmit bytes: 102.44723488288768 receive bytes: 46.078341513002755 disk usage avg: (percentage) 30.23570573933185 disk sectors written: 0.000136518543824419 memory avg: 65.8400891412282 load: 0.01 Digital ocean has a seperate query for each type of info Implying maybe seperate storage for each type which makes sense I guess though their data comes from all over the place For a 30 day period their data came across like this: 109 minutes apart CPU average: 396 entries 1.8 times per hour or every 109 minutes Disk usage: 400 entries every 108 minutes (peculiar) For 6 hour period get 360 entries?! 1 minute apart they are all today, so it's always returning the same number of entries, regardless of time period they are one minute apart so it must be sampling somehow for the longer periods at least for 24 hours get 401 entries 3.6 minutes apart 7 days get 400 entries 25 minutes apart OK, not a trivial thing to graph it requires "downsampling" or "decimation" Here is a link to a c# method that will accomplish this: https://github.com/sveinn-steinarsson/flot-downsample/ c# https://gist.github.com/DanielWJudge/63300889f27c7f50eeb7 CREATE flat_table( trip_id integer, tstamp timestamptz, speed float, distance float, temperature float, ,...); */ #endregion worksheet //Nothing is gathered less than one minute frequency if (!DateUtil.IsAfterDuration(_lastSnapshot, tsOneMinute)) return; log.LogTrace("Starting metrics snapshot"); ///////////////////////////////////////////// //ONE MINUTE SNAPS // var now = DateTime.UtcNow; _process.Refresh(); //CPU var cpuElapsedTime = now.Subtract(_lastSnapshot).TotalMilliseconds; var newCPUTime = _process.TotalProcessorTime; var elapsedCPU = (newCPUTime - _oldCPUTime).TotalMilliseconds; _cpu = elapsedCPU * 100 / Environment.ProcessorCount / cpuElapsedTime; _oldCPUTime = newCPUTime; //MEMORY // The memory occupied by objects. var Allocated = GC.GetTotalMemory(false);//bigint // The working set includes both shared and private data. The shared data includes the pages that contain all the // instructions that the process executes, including instructions in the process modules and the system libraries. var WorkingSet = _process.WorkingSet64;//bigint // The value returned by this property represents the current size of memory used by the process, in bytes, that // cannot be shared with other processes. var PrivateBytes = _process.PrivateMemorySize64;//bigint // The number of generation 0 collections var Gen0 = GC.CollectionCount(0);//integer // The number of generation 1 collections var Gen1 = GC.CollectionCount(1);//integer // The number of generation 2 collections var Gen2 = GC.CollectionCount(2);//integer var CPU = _cpu;// double precision System.Diagnostics.Debug.WriteLine($"MM Snapshot, cpu: {CPU}"); using (AyContext ct = ServiceProviderProvider.DBContext) { //write to db MetricMM mm = new MetricMM(Allocated, WorkingSet, PrivateBytes, Gen0, Gen1, Gen2, CPU); ct.MetricMM.Add(mm); ct.SaveChanges(); System.Diagnostics.Debug.WriteLine("MM SAVED"); } ///////////////////////////////////////////// //EVERY HOUR SNAPS // if (DateUtil.IsAfterDuration(_lastSnapshot, tsOneHour)) { //RECORDS IN TABLE // //Only do this once per hour // log.LogTrace("Counting table records"); // //Get a count of important tables in db // List allTableNames = await DbUtil.GetAllTablenamesAsync(); // //Skip some tables as they are internal and / or only ever have one record // List skipTableNames = new List(); // skipTableNames.Add("alicense"); // skipTableNames.Add("aschemaversion"); // foreach (string table in allTableNames) // { // if (!skipTableNames.Contains(table)) // { // //var tags = new MetricTags("TableTagKey", table); // // metrics.Measure.Gauge.SetValue(MetricsRegistry.DBRecordsGauge, tags, await DbUtil.CountOfRecordsAsync(table)); // } // } //JOB COUNTS (DEAD, RUNNING, COMPLETED, SLEEPING) // foreach (JobStatus stat in Enum.GetValues(typeof(JobStatus))) // { // // var jobtag = new MetricTags("JobStatus", stat.ToString()); // // metrics.Measure.Gauge.SetValue(MetricsRegistry.JobsGauge, jobtag, await JobsBiz.GetCountForJobStatusAsync(ct, stat)); // } } ///////////////////////////////////////////// //ONCE A DAY SNAPS AND CLEANUP // if (DateUtil.IsAfterDuration(_lastSnapshot, ts24Hours)) { //FILES ON DISK // log.LogTrace("Files on disk information"); // var UtilFilesInfo = FileUtil.GetUtilityFolderSizeInfo(); // var UserFilesInfo = FileUtil.GetAttachmentFolderSizeInfo(); // var mtag = new MetricTags("File type", "Business object files"); // metrics.Measure.Gauge.SetValue(MetricsRegistry.FileCountGauge, mtag, UserFilesInfo.FileCountWithChildren); // metrics.Measure.Gauge.SetValue(MetricsRegistry.FileSizeGauge, mtag, UserFilesInfo.SizeWithChildren); // mtag = new MetricTags("File type", "OPS files"); // metrics.Measure.Gauge.SetValue(MetricsRegistry.FileCountGauge, mtag, UtilFilesInfo.FileCountWithChildren); // metrics.Measure.Gauge.SetValue(MetricsRegistry.FileSizeGauge, mtag, UtilFilesInfo.SizeWithChildren); ///////////////////////////////// //CLEAR OLD ENTRIES // DateTime ClearDate = DateTime.UtcNow - tsDataRetention; using (AyContext ct = ServiceProviderProvider.DBContext) { ct.Database.ExecuteSqlInterpolated($"delete from ametricmm where t < {ClearDate}"); } } _lastSnapshot = now; } ///////////////////////////////////////////////////////////////////// }//eoc }//eons