This commit is contained in:
@@ -1,9 +1,11 @@
|
||||
using System;
|
||||
using System.Linq;
|
||||
using System.Net.Http;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Sockeye.Models;
|
||||
using Sockeye.Util;
|
||||
|
||||
namespace Sockeye.Biz
|
||||
{
|
||||
@@ -31,17 +33,81 @@ namespace Sockeye.Biz
|
||||
using (AyContext ct = Sockeye.Util.ServiceProviderProvider.DBContext)
|
||||
{
|
||||
|
||||
//get a list of all active server ID's
|
||||
var ActiveServerIdList = await ct.SubscriptionServer
|
||||
.AsNoTracking()
|
||||
.Where(z => z.ServerState != ServerState.DeActivated && z.ServerState != ServerState.DeActivated)
|
||||
.Where(z => z.ServerState != ServerState.DeActivated
|
||||
&& z.ServerState != ServerState.Destroyed
|
||||
&& z.ServerState != ServerState.Requested)
|
||||
.OrderBy(z => z.Id)
|
||||
.Select(z=>z.Id)
|
||||
.Select(z => z.Id)
|
||||
.ToListAsync();
|
||||
|
||||
foreach(long serverId in ActiveServerIdList){
|
||||
//get the health and triage accordingly
|
||||
|
||||
|
||||
try
|
||||
{
|
||||
// var content = new StringContent(JsonConvert.SerializeObject(trialRequest), Encoding.UTF8, "application/json");
|
||||
foreach (long serverId in ActiveServerIdList)
|
||||
{
|
||||
//get the health and triage accordingly
|
||||
var client = ServiceProviderProvider.HttpClientFactory.CreateClient();
|
||||
var biz = SubscriptionServerBiz.GetBiz(ct);
|
||||
var srv = await biz.GetAsync(serverId, false);
|
||||
if (srv == null)
|
||||
{
|
||||
//this is a serious issue log and server ops it
|
||||
var err = $"SockBotSubscriptionServerHealthChecks error running job, subscription server record id {serverId} could not be fetched {biz.GetErrorsAsString}";
|
||||
await NotifyEventHelper.AddOpsProblemEvent(err);
|
||||
log.LogError(err);
|
||||
}
|
||||
else
|
||||
{
|
||||
//https://learn.microsoft.com/en-us/aspnet/core/host-and-deploy/health-checks?source=recommendations&view=aspnetcore-7.0
|
||||
//https://learn.microsoft.com/en-us/aspnet/core/host-and-deploy/health-checks?source=recommendations&view=aspnetcore-7.0#customize-the-http-status-code
|
||||
//Basically it returns only plain text
|
||||
//http status code = 200 for both Healthy and Degraded
|
||||
//http status code = 503 for UnHealthy
|
||||
//for our purposes anything other than 200 and "Healthy" is a problem
|
||||
var res = await client.GetAsync($"https://{srv.Name}/health");
|
||||
var responseText = await res.Content.ReadAsStringAsync();
|
||||
srv.LastHealthCheck = DateTime.UtcNow;
|
||||
srv.LastHealthStatus = responseText;
|
||||
if (res.IsSuccessStatusCode && responseText == "Healthy")
|
||||
{
|
||||
//a-ok
|
||||
srv.ServerState = ServerState.ActiveHealthy;
|
||||
}
|
||||
else
|
||||
{
|
||||
//PROBLEM!
|
||||
//progress through the ladder of severity
|
||||
//3rd fail means triggers notification
|
||||
switch (srv.ServerState)
|
||||
{
|
||||
case ServerState.ActiveHealthy:
|
||||
srv.ServerState = ServerState.FailFirstHealthCheck;
|
||||
break;
|
||||
case ServerState.FailFirstHealthCheck:
|
||||
srv.ServerState = ServerState.FailSecondHealthCheck;
|
||||
break;
|
||||
case ServerState.FailSecondHealthCheck:
|
||||
srv.ServerState = ServerState.FailedRequiresAttention;
|
||||
break;
|
||||
case ServerState.FailedRequiresAttention:
|
||||
//no change, it still requires attention
|
||||
break;
|
||||
}
|
||||
}
|
||||
//save changes
|
||||
await biz.PutAsync(srv);
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
var err = "SockBotSubscriptionServerHealthChecks error running job";
|
||||
//serious issue requires immediate notification
|
||||
await NotifyEventHelper.AddOpsProblemEvent(err, ex);
|
||||
log.LogError(ex, err);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -51,10 +117,10 @@ namespace Sockeye.Biz
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
Reference in New Issue
Block a user