157 lines
6.7 KiB
C#
157 lines
6.7 KiB
C#
using System;
|
|
using System.Linq;
|
|
using System.Threading.Tasks;
|
|
using Microsoft.EntityFrameworkCore;
|
|
using Microsoft.Extensions.Logging;
|
|
using AyaNova.Models;
|
|
|
|
namespace AyaNova.Biz
|
|
{
|
|
|
|
|
|
/// <summary>
|
|
/// JobSweeper - called by Generator to clean out old jobs that are completed and their logs
|
|
///
|
|
/// </summary>
|
|
internal static class CoreJobSweeper
|
|
{
|
|
private static ILogger log = AyaNova.Util.ApplicationLogging.CreateLogger("CoreJobSweeper");
|
|
private static DateTime lastSweep = DateTime.MinValue;
|
|
private static TimeSpan SWEEP_EVERY_INTERVAL = new TimeSpan(0, 0, 10);
|
|
private static TimeSpan SUCCEEDED_JOBS_DELETE_AFTER_THIS_TIMESPAN = new TimeSpan(14, 0, 0, 0);//14 days
|
|
private static TimeSpan FAILED_JOBS_DELETE_AFTER_THIS_TIMESPAN = new TimeSpan(14, 0, 0, 0);//14 days (gives people time to notice and look into it)
|
|
private static TimeSpan INTERNAL_JOBS_LOGS_DELETE_AFTER_THIS_TIMESPAN = new TimeSpan(14, 0, 0, 0);//14 days
|
|
|
|
private static TimeSpan RUNNING_JOBS_BECOME_FAILED_AFTER_THIS_TIMESPAN = new TimeSpan(24, 0, 0);//24 hours (time running jobs are allowed to sit in "running" state before considered failed)
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////
|
|
// DoSweep
|
|
//
|
|
public static async Task DoWorkAsync()
|
|
{
|
|
//This will get triggered roughly every minute, but we don't want to sweep that frequently
|
|
if (DateTime.UtcNow - lastSweep < SWEEP_EVERY_INTERVAL)
|
|
return;
|
|
|
|
log.LogTrace("Sweep starting");
|
|
using (AyContext ct = AyaNova.Util.ServiceProviderProvider.DBContext)
|
|
{
|
|
//SWEEP SUCCESSFUL JOBS
|
|
//calculate cutoff to delete
|
|
DateTime dtDeleteCutoff = DateTime.UtcNow - SUCCEEDED_JOBS_DELETE_AFTER_THIS_TIMESPAN;
|
|
await sweepAsync(ct, dtDeleteCutoff, JobStatus.Completed);
|
|
|
|
//SWEEP FAILED JOBS
|
|
//calculate cutoff to delete
|
|
dtDeleteCutoff = DateTime.UtcNow - FAILED_JOBS_DELETE_AFTER_THIS_TIMESPAN;
|
|
await sweepAsync(ct, dtDeleteCutoff, JobStatus.Failed);
|
|
|
|
//KILL STUCK JOBS
|
|
//calculate cutoff to delete
|
|
DateTime dtRunningDeadline = DateTime.UtcNow - RUNNING_JOBS_BECOME_FAILED_AFTER_THIS_TIMESPAN;
|
|
await killStuckJobsAsync(ct, dtRunningDeadline);
|
|
|
|
//SWEEP INTERNAL JOB LOG
|
|
//calculate cutoff to delete
|
|
dtDeleteCutoff = DateTime.UtcNow - INTERNAL_JOBS_LOGS_DELETE_AFTER_THIS_TIMESPAN;
|
|
await SweepInternalJobsLogsAsync(ct, dtDeleteCutoff);
|
|
|
|
//Stealthy check of user count exceeded
|
|
if (await UserBiz.ActiveCountAsync() > AyaNova.Core.License.ActiveKey.ActiveNumber)
|
|
{
|
|
var msg = $"E1020 - Active count exceeded capacity";
|
|
AyaNova.Util.ServiceProviderProvider.ServerState.SetSystemLock(msg);
|
|
log.LogCritical(msg);
|
|
return;
|
|
}
|
|
|
|
}
|
|
lastSweep = DateTime.UtcNow;
|
|
}
|
|
|
|
|
|
private static async Task sweepAsync(AyContext ct, DateTime dtDeleteCutoff, JobStatus jobStatus)
|
|
{
|
|
//Get the deleteable succeeded jobs list
|
|
var jobs = await ct.OpsJob
|
|
.AsNoTracking()
|
|
.Where(z => z.Created < dtDeleteCutoff && z.JobStatus == jobStatus)
|
|
.OrderBy(z => z.Created)
|
|
.ToListAsync();
|
|
|
|
log.LogTrace($"SweepAsync processing: cutoff={dtDeleteCutoff.ToString()}, for {jobs.Count.ToString()} jobs of status {jobStatus.ToString()}");
|
|
|
|
foreach (OpsJob j in jobs)
|
|
{
|
|
try
|
|
{
|
|
|
|
await JobsBiz.RemoveJobAndLogsAsync(j.GId);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
log.LogError(ex, "sweepAsync exception calling JobsBiz.RemoveJobAndLogsAsync");
|
|
//for now just throw it but this needs to be removed when logging added and better handling
|
|
throw;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/// <summary>
|
|
/// Kill jobs that have been stuck in "running" state for too long
|
|
/// </summary>
|
|
private static async Task killStuckJobsAsync(AyContext ct, DateTime dtRunningDeadline)
|
|
{
|
|
//Get the deleteable succeeded jobs list
|
|
var jobs = await ct.OpsJob
|
|
.AsNoTracking()
|
|
.Where(z => z.Created < dtRunningDeadline && z.JobStatus == JobStatus.Running)
|
|
.OrderBy(z => z.Created)
|
|
.ToListAsync();
|
|
|
|
log.LogTrace($"killStuckJobsAsync processing: cutoff={dtRunningDeadline.ToString()}, for {jobs.Count.ToString()} jobs of status {JobStatus.Running.ToString()}");
|
|
|
|
foreach (OpsJob j in jobs)
|
|
{
|
|
//OPSMETRIC
|
|
await JobsBiz.LogJobAsync(j.GId, "LT:JobFailed LT:TimedOut");
|
|
log.LogError($"Job found job stuck in running status and set to failed: deadline={dtRunningDeadline.ToString()}, jobId={j.GId.ToString()}, jobname={j.Name}, jobtype={j.JobType.ToString()}, jobObjectType={j.ObjectType.ToString()}, jobObjectId={j.ObjectId.ToString()}");
|
|
await JobsBiz.UpdateJobStatusAsync(j.GId, JobStatus.Failed);
|
|
}
|
|
}
|
|
|
|
|
|
private static async Task SweepInternalJobsLogsAsync(AyContext ct, DateTime dtDeleteCutoff)
|
|
{
|
|
//Get the deleteable list (this is for reporting, could easily just do it in one go)
|
|
var logs = await ct.OpsJobLog
|
|
.AsNoTracking()
|
|
.Where(z => z.Created < dtDeleteCutoff)
|
|
.OrderBy(z => z.Created)
|
|
.ToListAsync();
|
|
|
|
log.LogTrace($"SweepInternalJobsLogsAsync processing: cutoff={dtDeleteCutoff.ToString()}, for {logs.Count.ToString()} log entries");
|
|
|
|
foreach (OpsJobLog l in logs)
|
|
{
|
|
try
|
|
{
|
|
await ct.Database.ExecuteSqlInterpolatedAsync($"delete from aopsjoblog where gid = {l.GId}");
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
log.LogError(ex, "SweepInternalJobsLogsAsync exception removed old log entries");
|
|
throw;
|
|
}
|
|
}
|
|
}
|
|
|
|
/////////////////////////////////////////////////////////////////////
|
|
|
|
}//eoc
|
|
|
|
|
|
}//eons
|
|
|