using System; using System.Linq; using System.Threading.Tasks; using Microsoft.EntityFrameworkCore; using Microsoft.Extensions.Logging; using AyaNova.Models; namespace AyaNova.Biz { /// /// JobSweeper - called by Generator to clean out old jobs that are completed and their logs /// /// internal static class CoreJobSweeper { private static ILogger log = AyaNova.Util.ApplicationLogging.CreateLogger("CoreJobSweeper"); private static DateTime lastSweep = DateTime.MinValue; private static TimeSpan SWEEP_EVERY_INTERVAL = new TimeSpan(0, 10, 10);//every ten minutes roughly private static TimeSpan SUCCEEDED_JOBS_DELETE_AFTER_THIS_TIMESPAN = new TimeSpan(14, 0, 0, 0);//14 days private static TimeSpan FAILED_JOBS_DELETE_AFTER_THIS_TIMESPAN = new TimeSpan(14, 0, 0, 0);//14 days (gives people time to notice and look into it) private static TimeSpan INTERNAL_JOBS_LOGS_DELETE_AFTER_THIS_TIMESPAN = new TimeSpan(14, 0, 0, 0);//14 days private static TimeSpan RUNNING_JOBS_BECOME_FAILED_AFTER_THIS_TIMESPAN = new TimeSpan(24, 0, 0);//24 hours (time running jobs are allowed to sit in "running" state before considered failed) //////////////////////////////////////////////////////////////////////////////////////////////// // DoSweep // public static async Task DoWorkAsync() { //This will get triggered roughly every minute, but we don't want to sweep that frequently if (DateTime.UtcNow - lastSweep < SWEEP_EVERY_INTERVAL) return; log.LogDebug("Sweep starting"); using (AyContext ct = AyaNova.Util.ServiceProviderProvider.DBContext) { //SWEEP SUCCESSFUL JOBS //calculate cutoff to delete DateTime dtDeleteCutoff = DateTime.UtcNow - SUCCEEDED_JOBS_DELETE_AFTER_THIS_TIMESPAN; await sweepAsync(ct, dtDeleteCutoff, JobStatus.Completed); //SWEEP FAILED JOBS //calculate cutoff to delete dtDeleteCutoff = DateTime.UtcNow - FAILED_JOBS_DELETE_AFTER_THIS_TIMESPAN; await sweepAsync(ct, dtDeleteCutoff, JobStatus.Failed); //KILL STUCK JOBS //calculate cutoff to delete DateTime dtRunningDeadline = DateTime.UtcNow - RUNNING_JOBS_BECOME_FAILED_AFTER_THIS_TIMESPAN; await killStuckJobsAsync(ct, dtRunningDeadline); //SWEEP INTERNAL JOB LOG //calculate cutoff to delete dtDeleteCutoff = DateTime.UtcNow - INTERNAL_JOBS_LOGS_DELETE_AFTER_THIS_TIMESPAN; await SweepInternalJobsLogsAsync(ct, dtDeleteCutoff); //Stealthy check of user count exceeded if (await UserBiz.ActiveCountAsync() > AyaNova.Core.License.ActiveKey.ActiveNumber) { //WARNING: DO not change the text of this message without also //updating the authcontroller ReturnUserCredsOnSuccessfulAuthentication licenselockout check code //it must match var msg = $"E1020 - Active count exceeded capacity"; AyaNova.Util.ServiceProviderProvider.ServerState.SetSystemLock(msg); log.LogCritical(msg); return; } } lastSweep = DateTime.UtcNow; } private static async Task sweepAsync(AyContext ct, DateTime dtDeleteCutoff, JobStatus jobStatus) { //Get the deleteable succeeded jobs list var jobs = await ct.OpsJob .AsNoTracking() .Where(z => z.Created < dtDeleteCutoff && z.JobStatus == jobStatus) .OrderBy(z => z.Created) .ToListAsync(); log.LogDebug($"SweepAsync processing: cutoff={dtDeleteCutoff.ToString()}, for {jobs.Count.ToString()} jobs of status {jobStatus.ToString()}"); foreach (OpsJob j in jobs) { try { await JobsBiz.RemoveJobAndLogsAsync(j.GId); } catch (Exception ex) { log.LogError(ex, "sweepAsync exception calling JobsBiz.RemoveJobAndLogsAsync"); //for now just throw it but this needs to be removed when logging added and better handling throw; } } } /// /// Kill jobs that have been stuck in "running" state for too long /// private static async Task killStuckJobsAsync(AyContext ct, DateTime dtRunningDeadline) { //Get the deleteable succeeded jobs list var jobs = await ct.OpsJob .AsNoTracking() .Where(z => z.Created < dtRunningDeadline && z.JobStatus == JobStatus.Running) .OrderBy(z => z.Created) .ToListAsync(); log.LogDebug($"killStuckJobsAsync processing: cutoff={dtRunningDeadline.ToString()}, for {jobs.Count.ToString()} jobs of status {JobStatus.Running.ToString()}"); foreach (OpsJob j in jobs) { //OPSMETRIC await JobsBiz.LogJobAsync(j.GId, "LT:JobFailed LT:TimedOut"); log.LogError($"Job found job stuck in running status and set to failed: deadline={dtRunningDeadline.ToString()}, jobId={j.GId.ToString()}, jobname={j.Name}, jobtype={j.JobType.ToString()}, jobAType={j.AType.ToString()}, jobObjectId={j.ObjectId.ToString()}"); await JobsBiz.UpdateJobStatusAsync(j.GId, JobStatus.Failed); } } private static async Task SweepInternalJobsLogsAsync(AyContext ct, DateTime dtDeleteCutoff) { //Get the deleteable list (this is for reporting, could easily just do it in one go) var logs = await ct.OpsJobLog .AsNoTracking() .Where(z => z.Created < dtDeleteCutoff) .OrderBy(z => z.Created) .ToListAsync(); log.LogDebug($"SweepInternalJobsLogsAsync processing: cutoff={dtDeleteCutoff.ToString()}, for {logs.Count.ToString()} log entries"); foreach (OpsJobLog l in logs) { try { await ct.Database.ExecuteSqlInterpolatedAsync($"delete from aopsjoblog where gid = {l.GId}"); } catch (Exception ex) { log.LogError(ex, "SweepInternalJobsLogsAsync exception removed old log entries"); throw; } } } ///////////////////////////////////////////////////////////////////// }//eoc }//eons