This commit is contained in:
2023-01-19 01:53:25 +00:00
parent 126ae54f66
commit a5993f4570
4 changed files with 79 additions and 20 deletions

View File

@@ -585,7 +585,6 @@ namespace Sockeye.Api.Controllers
{
TranslationKeysToFetch.Add("ServerStateRequested");
TranslationKeysToFetch.Add("ServerStateActiveHealthy");
TranslationKeysToFetch.Add("ServerStateActiveRequiresAttention");
TranslationKeysToFetch.Add("ServerStateFailFirstHealthCheck");
TranslationKeysToFetch.Add("ServerStateFailSecondHealthCheck");
TranslationKeysToFetch.Add("ServerStateFailedRequiresAttention");
@@ -594,7 +593,6 @@ namespace Sockeye.Api.Controllers
var LT = await TranslationBiz.GetSubsetStaticAsync(TranslationKeysToFetch, translationId);
ReturnList.Add(new NameIdItem() { Name = LT["ServerStateRequested"], Id = (long)ServerState.Requested });
ReturnList.Add(new NameIdItem() { Name = LT["ServerStateActiveHealthy"], Id = (long)ServerState.ActiveHealthy });
ReturnList.Add(new NameIdItem() { Name = LT["ServerStateActiveRequiresAttention"], Id = (long)ServerState.ActiveRequiresAttention });
ReturnList.Add(new NameIdItem() { Name = LT["ServerStateFailFirstHealthCheck"], Id = (long)ServerState.FailFirstHealthCheck });
ReturnList.Add(new NameIdItem() { Name = LT["ServerStateFailSecondHealthCheck"], Id = (long)ServerState.FailSecondHealthCheck });
ReturnList.Add(new NameIdItem() { Name = LT["ServerStateFailedRequiresAttention"], Id = (long)ServerState.FailedRequiresAttention });

View File

@@ -5,11 +5,10 @@ namespace Sockeye.Biz
//Any of these changes trigger event log and potentially notification event
Requested = 0, //New server requested, not physically present yet but needs to be created and activated
ActiveHealthy = 1, //running normally no actions required
ActiveRequiresAttention=2,//running but something is up, maybe updates required to linux or it's responding slowly or low on disk space, event triggered, event log entry etc
FailFirstHealthCheck = 3, //first check failed
FailSecondHealthCheck = 4, //second check failed
FailedRequiresAttention = 5, //failed 3 health checks needs someone to physically intervene, triggers notify event
DeActivated = 6, //swtiched off, unavailable, about to be destroyed usually when customer stops paying given grace period
Destroyed = 7// historical, not an active server doesn't need to be tracked or dealt with
FailFirstHealthCheck = 2, //first check failed
FailSecondHealthCheck = 3, //second check failed
FailedRequiresAttention = 4, //failed 3 health checks needs someone to physically intervene, triggers notify event
DeActivated = 5, //swtiched off, unavailable but not destroyed, about to be destroyed usually when customer stops paying given grace period
Destroyed = 6// historical, not an active server doesn't need to be tracked or dealt with
}
}//eons

View File

@@ -1,9 +1,11 @@
using System;
using System.Linq;
using System.Net.Http;
using System.Threading.Tasks;
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.Logging;
using Sockeye.Models;
using Sockeye.Util;
namespace Sockeye.Biz
{
@@ -31,17 +33,81 @@ namespace Sockeye.Biz
using (AyContext ct = Sockeye.Util.ServiceProviderProvider.DBContext)
{
//get a list of all active server ID's
var ActiveServerIdList = await ct.SubscriptionServer
.AsNoTracking()
.Where(z => z.ServerState != ServerState.DeActivated && z.ServerState != ServerState.DeActivated)
.Where(z => z.ServerState != ServerState.DeActivated
&& z.ServerState != ServerState.Destroyed
&& z.ServerState != ServerState.Requested)
.OrderBy(z => z.Id)
.Select(z=>z.Id)
.Select(z => z.Id)
.ToListAsync();
foreach(long serverId in ActiveServerIdList){
//get the health and triage accordingly
try
{
// var content = new StringContent(JsonConvert.SerializeObject(trialRequest), Encoding.UTF8, "application/json");
foreach (long serverId in ActiveServerIdList)
{
//get the health and triage accordingly
var client = ServiceProviderProvider.HttpClientFactory.CreateClient();
var biz = SubscriptionServerBiz.GetBiz(ct);
var srv = await biz.GetAsync(serverId, false);
if (srv == null)
{
//this is a serious issue log and server ops it
var err = $"SockBotSubscriptionServerHealthChecks error running job, subscription server record id {serverId} could not be fetched {biz.GetErrorsAsString}";
await NotifyEventHelper.AddOpsProblemEvent(err);
log.LogError(err);
}
else
{
//https://learn.microsoft.com/en-us/aspnet/core/host-and-deploy/health-checks?source=recommendations&view=aspnetcore-7.0
//https://learn.microsoft.com/en-us/aspnet/core/host-and-deploy/health-checks?source=recommendations&view=aspnetcore-7.0#customize-the-http-status-code
//Basically it returns only plain text
//http status code = 200 for both Healthy and Degraded
//http status code = 503 for UnHealthy
//for our purposes anything other than 200 and "Healthy" is a problem
var res = await client.GetAsync($"https://{srv.Name}/health");
var responseText = await res.Content.ReadAsStringAsync();
srv.LastHealthCheck = DateTime.UtcNow;
srv.LastHealthStatus = responseText;
if (res.IsSuccessStatusCode && responseText == "Healthy")
{
//a-ok
srv.ServerState = ServerState.ActiveHealthy;
}
else
{
//PROBLEM!
//progress through the ladder of severity
//3rd fail means triggers notification
switch (srv.ServerState)
{
case ServerState.ActiveHealthy:
srv.ServerState = ServerState.FailFirstHealthCheck;
break;
case ServerState.FailFirstHealthCheck:
srv.ServerState = ServerState.FailSecondHealthCheck;
break;
case ServerState.FailSecondHealthCheck:
srv.ServerState = ServerState.FailedRequiresAttention;
break;
case ServerState.FailedRequiresAttention:
//no change, it still requires attention
break;
}
}
//save changes
await biz.PutAsync(srv);
}
}
}
catch (Exception ex)
{
var err = "SockBotSubscriptionServerHealthChecks error running job";
//serious issue requires immediate notification
await NotifyEventHelper.AddOpsProblemEvent(err, ex);
log.LogError(ex, err);
}
}
@@ -51,10 +117,10 @@ namespace Sockeye.Biz
/////////////////////////////////////////////////////////////////////

View File

@@ -1059,7 +1059,6 @@ $BODY$ LANGUAGE PLPGSQL STABLE");
await ExecQueryAsync("INSERT INTO atranslationitem(translationid,key,display) SELECT t.id, 'SubServerCustomerDomain', 'Customer subdomain' FROM atranslation t where t.baselanguage = 'en'");
await ExecQueryAsync("INSERT INTO atranslationitem(translationid,key,display) SELECT t.id, 'ServerStateRequested', 'Requested' FROM atranslation t where t.baselanguage = 'en'");
await ExecQueryAsync("INSERT INTO atranslationitem(translationid,key,display) SELECT t.id, 'ServerStateActiveHealthy', 'Active healthy' FROM atranslation t where t.baselanguage = 'en'");
await ExecQueryAsync("INSERT INTO atranslationitem(translationid,key,display) SELECT t.id, 'ServerStateActiveRequiresAttention', 'Active requires attention' FROM atranslation t where t.baselanguage = 'en'");
await ExecQueryAsync("INSERT INTO atranslationitem(translationid,key,display) SELECT t.id, 'ServerStateFailFirstHealthCheck', 'Fail first health check' FROM atranslation t where t.baselanguage = 'en'");
await ExecQueryAsync("INSERT INTO atranslationitem(translationid,key,display) SELECT t.id, 'ServerStateFailSecondHealthCheck', 'Fail second health check' FROM atranslation t where t.baselanguage = 'en'");
await ExecQueryAsync("INSERT INTO atranslationitem(translationid,key,display) SELECT t.id, 'ServerStateFailedRequiresAttention', 'Failed requires attention' FROM atranslation t where t.baselanguage = 'en'");
@@ -1086,7 +1085,6 @@ $BODY$ LANGUAGE PLPGSQL STABLE");
await ExecQueryAsync("INSERT INTO atranslationitem(translationid,key,display) SELECT t.id, 'SubServerCustomerDomain', 'Customer subdomain' FROM atranslation t where t.baselanguage = 'es'");
await ExecQueryAsync("INSERT INTO atranslationitem(translationid,key,display) SELECT t.id, 'ServerStateRequested', 'Requested' FROM atranslation t where t.baselanguage = 'es'");
await ExecQueryAsync("INSERT INTO atranslationitem(translationid,key,display) SELECT t.id, 'ServerStateActiveHealthy', 'Active healthy' FROM atranslation t where t.baselanguage = 'es'");
await ExecQueryAsync("INSERT INTO atranslationitem(translationid,key,display) SELECT t.id, 'ServerStateActiveRequiresAttention', 'Active requires attention' FROM atranslation t where t.baselanguage = 'es'");
await ExecQueryAsync("INSERT INTO atranslationitem(translationid,key,display) SELECT t.id, 'ServerStateFailFirstHealthCheck', 'Fail first health check' FROM atranslation t where t.baselanguage = 'es'");
await ExecQueryAsync("INSERT INTO atranslationitem(translationid,key,display) SELECT t.id, 'ServerStateFailSecondHealthCheck', 'Fail second health check' FROM atranslation t where t.baselanguage = 'es'");
await ExecQueryAsync("INSERT INTO atranslationitem(translationid,key,display) SELECT t.id, 'ServerStateFailedRequiresAttention', 'Failed requires attention' FROM atranslation t where t.baselanguage = 'es'");
@@ -1113,7 +1111,6 @@ $BODY$ LANGUAGE PLPGSQL STABLE");
await ExecQueryAsync("INSERT INTO atranslationitem(translationid,key,display) SELECT t.id, 'SubServerCustomerDomain', 'Customer subdomain' FROM atranslation t where t.baselanguage = 'fr'");
await ExecQueryAsync("INSERT INTO atranslationitem(translationid,key,display) SELECT t.id, 'ServerStateRequested', 'Requested' FROM atranslation t where t.baselanguage = 'fr'");
await ExecQueryAsync("INSERT INTO atranslationitem(translationid,key,display) SELECT t.id, 'ServerStateActiveHealthy', 'Active healthy' FROM atranslation t where t.baselanguage = 'fr'");
await ExecQueryAsync("INSERT INTO atranslationitem(translationid,key,display) SELECT t.id, 'ServerStateActiveRequiresAttention', 'Active requires attention' FROM atranslation t where t.baselanguage = 'fr'");
await ExecQueryAsync("INSERT INTO atranslationitem(translationid,key,display) SELECT t.id, 'ServerStateFailFirstHealthCheck', 'Fail first health check' FROM atranslation t where t.baselanguage = 'fr'");
await ExecQueryAsync("INSERT INTO atranslationitem(translationid,key,display) SELECT t.id, 'ServerStateFailSecondHealthCheck', 'Fail second health check' FROM atranslation t where t.baselanguage = 'fr'");
await ExecQueryAsync("INSERT INTO atranslationitem(translationid,key,display) SELECT t.id, 'ServerStateFailedRequiresAttention', 'Failed requires attention' FROM atranslation t where t.baselanguage = 'fr'");
@@ -1140,7 +1137,6 @@ $BODY$ LANGUAGE PLPGSQL STABLE");
await ExecQueryAsync("INSERT INTO atranslationitem(translationid,key,display) SELECT t.id, 'SubServerCustomerDomain', 'Customer subdomain' FROM atranslation t where t.baselanguage = 'de'");
await ExecQueryAsync("INSERT INTO atranslationitem(translationid,key,display) SELECT t.id, 'ServerStateRequested', 'Requested' FROM atranslation t where t.baselanguage = 'de'");
await ExecQueryAsync("INSERT INTO atranslationitem(translationid,key,display) SELECT t.id, 'ServerStateActiveHealthy', 'Active healthy' FROM atranslation t where t.baselanguage = 'de'");
await ExecQueryAsync("INSERT INTO atranslationitem(translationid,key,display) SELECT t.id, 'ServerStateActiveRequiresAttention', 'Active requires attention' FROM atranslation t where t.baselanguage = 'de'");
await ExecQueryAsync("INSERT INTO atranslationitem(translationid,key,display) SELECT t.id, 'ServerStateFailFirstHealthCheck', 'Fail first health check' FROM atranslation t where t.baselanguage = 'de'");
await ExecQueryAsync("INSERT INTO atranslationitem(translationid,key,display) SELECT t.id, 'ServerStateFailSecondHealthCheck', 'Fail second health check' FROM atranslation t where t.baselanguage = 'de'");
await ExecQueryAsync("INSERT INTO atranslationitem(translationid,key,display) SELECT t.id, 'ServerStateFailedRequiresAttention', 'Failed requires attention' FROM atranslation t where t.baselanguage = 'de'");