Files
raven/server/AyaNova/util/ReportProcessManager.cs
2021-10-27 23:18:59 +00:00

221 lines
9.5 KiB
C#

using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Diagnostics;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
namespace AyaNova.Util
{
/// <summary>
/// Used by reporting system to ensure headless browsers don't hang around in an untimely manner
/// needed due to bugs in puppeteersharp where it won't close the browser on timeout properly
/// also zombie process issues in linux etc, this just ensures it's safe
/// This is triggered when a report is rendered
/// </summary>
internal static class ReportRenderManager
{
/*
Use thread safe concurrent dictionary collection to manage up to AYANOVA_REPORT_RENDERING_MAX_INSTANCES
(it's allowed to go slightly over, this is not ever going to be exactly right under heavy load, but it should always kill the old processes no matter what)
Render route controller checks for an available slot with the reportprocessormanager first "ProcessSlotAvailable"
if there is no free slot it immediately callse Cleanup which looks for the oldest slot that is over the limit and attempts to shut it down, once shut down it returns true
if there is no free slot and none are over the limit it returns false signifying try again
If there *is* a free slot then it passes off to reportbiz as usual
Report biz reserves a slot when launches the browser process with here by adding it to the dictionary "AddProcess(processid)"
When the report is generated it will remove from the slot by calling into here "RemoveProcess(processId)"
Remove from slot here will confirm the process is no longer running and if it is kill it or if it's not remove it from the collection
expired processes are removed by the act of tryign to get a new slot so in this way it still supports running super long reports overnight for example as long as there is no contention
The other way was by a job that looks for expired processes but that would mean all old jobs would expire all the time so there would be an issue with huge reports never working
*/
//thread safe collection for unordered items, optimized for single thread produce/consume (which is the norm here) but supports multithread produce / consume (which is needed for separate cleanup job)
private static ConcurrentBag<ReportRenderInstanceInfo> _baginstances = new ConcurrentBag<ReportRenderInstanceInfo>();
public class ReportRenderInstanceInfo
{
public int ReporterProcessId { get; set; }
public DateTime Expires { get; set; }
public ReportRenderInstanceInfo(int processId)
{
ReporterProcessId = processId;
Expires = DateTime.UtcNow.AddMilliseconds(ServerBootConfig.AYANOVA_REPORT_RENDERING_TIMEOUT);
}
}
public static bool RenderSlotAvailable(ILogger log)
{
log.LogTrace("RenderSlotAvailable check");
var count = _baginstances.Count;
#if (DEBUG)
log.LogInformation($"DBG: RenderSlotAvailable check, there are currently {count} instances in the bag");
#endif
if (count >= ServerBootConfig.AYANOVA_REPORT_RENDERING_MAX_INSTANCES)
{
log.LogTrace($"RenderSlotAvailable there are no free report rendering slots available, current count is {count}, checking for expired slots to force closed");
//check for expired and remove
var Instances = _baginstances.ToArray();
var dtNow = DateTime.UtcNow;
foreach (ReportRenderInstanceInfo i in Instances)
{
if (i.Expires < dtNow)
{
#if (DEBUG)
log.LogInformation($"DBG: RenderSlotAvailable attempting kill of expired process {i.ReporterProcessId}");
#endif
ForceCloseProcess(i, log);
}
}
}
//allow to continue if there are now fewer than max instances in the bag
return _baginstances.Count < ServerBootConfig.AYANOVA_REPORT_RENDERING_MAX_INSTANCES;
}
private static bool ForceCloseProcess(ReportRenderInstanceInfo instance, ILogger log)
{
log.LogTrace($"ForceCloseProcess on instance id {instance.ReporterProcessId} started {instance.Expires.ToString()} utc");
try
{
var p = Process.GetProcessById(instance.ReporterProcessId);
if (p != null)
{
//we have an existing process
//try to kill it
p.Kill();
if (p.HasExited == false)
{
log.LogDebug($"RenderSlotAvailable oldest slot could not be stopped");
return false;//can't kill it so can't free up a slot
}
}
//remove it from the list, it's either gone or killed at this point
//this would not be unexpected since it will normally just close on it's own
//at the finally in render report
_baginstances.TryTake(out instance);
return true;//process that was there is now not there so while not perfect system we will consider it free
}
catch (ArgumentException)
{
//do nothing, this is normal, the process could not be found and this means it's already been removed:
//ArgumentException
//The process specified by the processId parameter is not running. The identifier might be expired.
_baginstances.TryTake(out instance);
return true;
}
}
internal static void AddProcess(int processId)
{
_baginstances.Add(new ReportRenderInstanceInfo(processId));
}
internal static void RemoveProcess(int processId, ILogger log)
{
foreach (var i in _baginstances)
{
if (i.ReporterProcessId == processId)
{
ForceCloseProcess(i, log);
break;
}
}
}
// internal async static Task EnsureReporterAvailableAsync(ILogger log)
// {
// Process reportProcess = ReporterProcess();
// if (reportProcess == null)
// {
// return;
// }
// //await it's completion in the specified timeout
// int HardTimeout = ServerBootConfig.AYANOVA_REPORT_RENDERING_TIMEOUT;
// //don't wait forever, hard cap of 3 minutes regardless of setting
// if (HardTimeout > 180000) HardTimeout = 180000;
// bool keepOnWaiting = true;
// while (keepOnWaiting)
// {
// //don't check continually
// await Task.Delay(500);
// //check process is still running
// if (reportProcess?.HasExited == false)
// {
// //time to kill it?
// if ((DateTime.UtcNow - Started).TotalMilliseconds > HardTimeout)
// {
// log.LogInformation($"Report processor did not complete in {HardTimeout}ms and will be force stopped");
// reportProcess.Kill();
// keepOnWaiting = false;
// }
// }
// else
// {
// log.LogDebug($"EnsureReporterAvailableAsync Reporter processor completed normally");
// keepOnWaiting = false;
// }
// };
// ReporterProcessId = -1;
// Started = DateTime.MinValue;
// return;
// }
// internal static void RecordNewReportGeneratorProcess(int processId)
// {
// ReporterProcessId = processId;
// Started = DateTime.UtcNow;
// }
// private static Process ReporterProcess()
// {
// if (ReporterProcessId == -1) return null;
// try
// {
// return Process.GetProcessById(ReporterProcessId);
// }
// catch (ArgumentException)
// {
// return null;//no process available / not running
// }
// }
/*
//Is the report generator (browser) already running?
if(ReportingProcessCache.ReporterProcess()!=null){
//there is an existing process in action, let's wait for timeout seconds and kill it if it's still running before proceeding
//first check to see if it's still actually running or not:
var process = System.Diagnostics.Process.GetProcessById(ReportingProcessCache.ReporterProcessId);
// if (ChromiumProcessID > 0 && process?.HasExited == false)
// {
// log.LogError($"Error during render, Chromium process (pid {ChromiumProcessID}) still active, forcing it to stop now");
// process.Kill();
// }
bool keepOnWaiting=true;
while(keepOnWaiting){
var v= DateTime.UtcNow-ReportingProcessCache.Started;
if(v.TotalSeconds> ServerBootConfig.REPORT_RENDERING_OPERATION_TIMEOUT){
}
}
}
*/
}//eoc
}//eons