diff --git a/server/Controllers/EnumListController.cs b/server/Controllers/EnumListController.cs index c64f310..5786ade 100644 --- a/server/Controllers/EnumListController.cs +++ b/server/Controllers/EnumListController.cs @@ -585,7 +585,6 @@ namespace Sockeye.Api.Controllers { TranslationKeysToFetch.Add("ServerStateRequested"); TranslationKeysToFetch.Add("ServerStateActiveHealthy"); - TranslationKeysToFetch.Add("ServerStateActiveRequiresAttention"); TranslationKeysToFetch.Add("ServerStateFailFirstHealthCheck"); TranslationKeysToFetch.Add("ServerStateFailSecondHealthCheck"); TranslationKeysToFetch.Add("ServerStateFailedRequiresAttention"); @@ -594,7 +593,6 @@ namespace Sockeye.Api.Controllers var LT = await TranslationBiz.GetSubsetStaticAsync(TranslationKeysToFetch, translationId); ReturnList.Add(new NameIdItem() { Name = LT["ServerStateRequested"], Id = (long)ServerState.Requested }); ReturnList.Add(new NameIdItem() { Name = LT["ServerStateActiveHealthy"], Id = (long)ServerState.ActiveHealthy }); - ReturnList.Add(new NameIdItem() { Name = LT["ServerStateActiveRequiresAttention"], Id = (long)ServerState.ActiveRequiresAttention }); ReturnList.Add(new NameIdItem() { Name = LT["ServerStateFailFirstHealthCheck"], Id = (long)ServerState.FailFirstHealthCheck }); ReturnList.Add(new NameIdItem() { Name = LT["ServerStateFailSecondHealthCheck"], Id = (long)ServerState.FailSecondHealthCheck }); ReturnList.Add(new NameIdItem() { Name = LT["ServerStateFailedRequiresAttention"], Id = (long)ServerState.FailedRequiresAttention }); diff --git a/server/biz/ServerState.cs b/server/biz/ServerState.cs index bf4e028..4e76c3b 100644 --- a/server/biz/ServerState.cs +++ b/server/biz/ServerState.cs @@ -5,11 +5,10 @@ namespace Sockeye.Biz //Any of these changes trigger event log and potentially notification event Requested = 0, //New server requested, not physically present yet but needs to be created and activated ActiveHealthy = 1, //running normally no actions required - ActiveRequiresAttention=2,//running but something is up, maybe updates required to linux or it's responding slowly or low on disk space, event triggered, event log entry etc - FailFirstHealthCheck = 3, //first check failed - FailSecondHealthCheck = 4, //second check failed - FailedRequiresAttention = 5, //failed 3 health checks needs someone to physically intervene, triggers notify event - DeActivated = 6, //swtiched off, unavailable, about to be destroyed usually when customer stops paying given grace period - Destroyed = 7// historical, not an active server doesn't need to be tracked or dealt with + FailFirstHealthCheck = 2, //first check failed + FailSecondHealthCheck = 3, //second check failed + FailedRequiresAttention = 4, //failed 3 health checks needs someone to physically intervene, triggers notify event + DeActivated = 5, //swtiched off, unavailable but not destroyed, about to be destroyed usually when customer stops paying given grace period + Destroyed = 6// historical, not an active server doesn't need to be tracked or dealt with } }//eons \ No newline at end of file diff --git a/server/generator/SockBotSubscriptionServerHealthChecks.cs b/server/generator/SockBotSubscriptionServerHealthChecks.cs index d11dee4..e7e7aed 100644 --- a/server/generator/SockBotSubscriptionServerHealthChecks.cs +++ b/server/generator/SockBotSubscriptionServerHealthChecks.cs @@ -1,9 +1,11 @@ using System; using System.Linq; +using System.Net.Http; using System.Threading.Tasks; using Microsoft.EntityFrameworkCore; using Microsoft.Extensions.Logging; using Sockeye.Models; +using Sockeye.Util; namespace Sockeye.Biz { @@ -31,17 +33,81 @@ namespace Sockeye.Biz using (AyContext ct = Sockeye.Util.ServiceProviderProvider.DBContext) { + //get a list of all active server ID's var ActiveServerIdList = await ct.SubscriptionServer .AsNoTracking() - .Where(z => z.ServerState != ServerState.DeActivated && z.ServerState != ServerState.DeActivated) + .Where(z => z.ServerState != ServerState.DeActivated + && z.ServerState != ServerState.Destroyed + && z.ServerState != ServerState.Requested) .OrderBy(z => z.Id) - .Select(z=>z.Id) + .Select(z => z.Id) .ToListAsync(); - foreach(long serverId in ActiveServerIdList){ - //get the health and triage accordingly - - + try + { + // var content = new StringContent(JsonConvert.SerializeObject(trialRequest), Encoding.UTF8, "application/json"); + foreach (long serverId in ActiveServerIdList) + { + //get the health and triage accordingly + var client = ServiceProviderProvider.HttpClientFactory.CreateClient(); + var biz = SubscriptionServerBiz.GetBiz(ct); + var srv = await biz.GetAsync(serverId, false); + if (srv == null) + { + //this is a serious issue log and server ops it + var err = $"SockBotSubscriptionServerHealthChecks error running job, subscription server record id {serverId} could not be fetched {biz.GetErrorsAsString}"; + await NotifyEventHelper.AddOpsProblemEvent(err); + log.LogError(err); + } + else + { + //https://learn.microsoft.com/en-us/aspnet/core/host-and-deploy/health-checks?source=recommendations&view=aspnetcore-7.0 + //https://learn.microsoft.com/en-us/aspnet/core/host-and-deploy/health-checks?source=recommendations&view=aspnetcore-7.0#customize-the-http-status-code + //Basically it returns only plain text + //http status code = 200 for both Healthy and Degraded + //http status code = 503 for UnHealthy + //for our purposes anything other than 200 and "Healthy" is a problem + var res = await client.GetAsync($"https://{srv.Name}/health"); + var responseText = await res.Content.ReadAsStringAsync(); + srv.LastHealthCheck = DateTime.UtcNow; + srv.LastHealthStatus = responseText; + if (res.IsSuccessStatusCode && responseText == "Healthy") + { + //a-ok + srv.ServerState = ServerState.ActiveHealthy; + } + else + { + //PROBLEM! + //progress through the ladder of severity + //3rd fail means triggers notification + switch (srv.ServerState) + { + case ServerState.ActiveHealthy: + srv.ServerState = ServerState.FailFirstHealthCheck; + break; + case ServerState.FailFirstHealthCheck: + srv.ServerState = ServerState.FailSecondHealthCheck; + break; + case ServerState.FailSecondHealthCheck: + srv.ServerState = ServerState.FailedRequiresAttention; + break; + case ServerState.FailedRequiresAttention: + //no change, it still requires attention + break; + } + } + //save changes + await biz.PutAsync(srv); + } + } + } + catch (Exception ex) + { + var err = "SockBotSubscriptionServerHealthChecks error running job"; + //serious issue requires immediate notification + await NotifyEventHelper.AddOpsProblemEvent(err, ex); + log.LogError(ex, err); } } @@ -51,10 +117,10 @@ namespace Sockeye.Biz - - + + ///////////////////////////////////////////////////////////////////// diff --git a/server/util/AySchema.cs b/server/util/AySchema.cs index ab75c1d..39f19e3 100644 --- a/server/util/AySchema.cs +++ b/server/util/AySchema.cs @@ -1059,7 +1059,6 @@ $BODY$ LANGUAGE PLPGSQL STABLE"); await ExecQueryAsync("INSERT INTO atranslationitem(translationid,key,display) SELECT t.id, 'SubServerCustomerDomain', 'Customer subdomain' FROM atranslation t where t.baselanguage = 'en'"); await ExecQueryAsync("INSERT INTO atranslationitem(translationid,key,display) SELECT t.id, 'ServerStateRequested', 'Requested' FROM atranslation t where t.baselanguage = 'en'"); await ExecQueryAsync("INSERT INTO atranslationitem(translationid,key,display) SELECT t.id, 'ServerStateActiveHealthy', 'Active healthy' FROM atranslation t where t.baselanguage = 'en'"); - await ExecQueryAsync("INSERT INTO atranslationitem(translationid,key,display) SELECT t.id, 'ServerStateActiveRequiresAttention', 'Active requires attention' FROM atranslation t where t.baselanguage = 'en'"); await ExecQueryAsync("INSERT INTO atranslationitem(translationid,key,display) SELECT t.id, 'ServerStateFailFirstHealthCheck', 'Fail first health check' FROM atranslation t where t.baselanguage = 'en'"); await ExecQueryAsync("INSERT INTO atranslationitem(translationid,key,display) SELECT t.id, 'ServerStateFailSecondHealthCheck', 'Fail second health check' FROM atranslation t where t.baselanguage = 'en'"); await ExecQueryAsync("INSERT INTO atranslationitem(translationid,key,display) SELECT t.id, 'ServerStateFailedRequiresAttention', 'Failed requires attention' FROM atranslation t where t.baselanguage = 'en'"); @@ -1086,7 +1085,6 @@ $BODY$ LANGUAGE PLPGSQL STABLE"); await ExecQueryAsync("INSERT INTO atranslationitem(translationid,key,display) SELECT t.id, 'SubServerCustomerDomain', 'Customer subdomain' FROM atranslation t where t.baselanguage = 'es'"); await ExecQueryAsync("INSERT INTO atranslationitem(translationid,key,display) SELECT t.id, 'ServerStateRequested', 'Requested' FROM atranslation t where t.baselanguage = 'es'"); await ExecQueryAsync("INSERT INTO atranslationitem(translationid,key,display) SELECT t.id, 'ServerStateActiveHealthy', 'Active healthy' FROM atranslation t where t.baselanguage = 'es'"); - await ExecQueryAsync("INSERT INTO atranslationitem(translationid,key,display) SELECT t.id, 'ServerStateActiveRequiresAttention', 'Active requires attention' FROM atranslation t where t.baselanguage = 'es'"); await ExecQueryAsync("INSERT INTO atranslationitem(translationid,key,display) SELECT t.id, 'ServerStateFailFirstHealthCheck', 'Fail first health check' FROM atranslation t where t.baselanguage = 'es'"); await ExecQueryAsync("INSERT INTO atranslationitem(translationid,key,display) SELECT t.id, 'ServerStateFailSecondHealthCheck', 'Fail second health check' FROM atranslation t where t.baselanguage = 'es'"); await ExecQueryAsync("INSERT INTO atranslationitem(translationid,key,display) SELECT t.id, 'ServerStateFailedRequiresAttention', 'Failed requires attention' FROM atranslation t where t.baselanguage = 'es'"); @@ -1113,7 +1111,6 @@ $BODY$ LANGUAGE PLPGSQL STABLE"); await ExecQueryAsync("INSERT INTO atranslationitem(translationid,key,display) SELECT t.id, 'SubServerCustomerDomain', 'Customer subdomain' FROM atranslation t where t.baselanguage = 'fr'"); await ExecQueryAsync("INSERT INTO atranslationitem(translationid,key,display) SELECT t.id, 'ServerStateRequested', 'Requested' FROM atranslation t where t.baselanguage = 'fr'"); await ExecQueryAsync("INSERT INTO atranslationitem(translationid,key,display) SELECT t.id, 'ServerStateActiveHealthy', 'Active healthy' FROM atranslation t where t.baselanguage = 'fr'"); - await ExecQueryAsync("INSERT INTO atranslationitem(translationid,key,display) SELECT t.id, 'ServerStateActiveRequiresAttention', 'Active requires attention' FROM atranslation t where t.baselanguage = 'fr'"); await ExecQueryAsync("INSERT INTO atranslationitem(translationid,key,display) SELECT t.id, 'ServerStateFailFirstHealthCheck', 'Fail first health check' FROM atranslation t where t.baselanguage = 'fr'"); await ExecQueryAsync("INSERT INTO atranslationitem(translationid,key,display) SELECT t.id, 'ServerStateFailSecondHealthCheck', 'Fail second health check' FROM atranslation t where t.baselanguage = 'fr'"); await ExecQueryAsync("INSERT INTO atranslationitem(translationid,key,display) SELECT t.id, 'ServerStateFailedRequiresAttention', 'Failed requires attention' FROM atranslation t where t.baselanguage = 'fr'"); @@ -1140,7 +1137,6 @@ $BODY$ LANGUAGE PLPGSQL STABLE"); await ExecQueryAsync("INSERT INTO atranslationitem(translationid,key,display) SELECT t.id, 'SubServerCustomerDomain', 'Customer subdomain' FROM atranslation t where t.baselanguage = 'de'"); await ExecQueryAsync("INSERT INTO atranslationitem(translationid,key,display) SELECT t.id, 'ServerStateRequested', 'Requested' FROM atranslation t where t.baselanguage = 'de'"); await ExecQueryAsync("INSERT INTO atranslationitem(translationid,key,display) SELECT t.id, 'ServerStateActiveHealthy', 'Active healthy' FROM atranslation t where t.baselanguage = 'de'"); - await ExecQueryAsync("INSERT INTO atranslationitem(translationid,key,display) SELECT t.id, 'ServerStateActiveRequiresAttention', 'Active requires attention' FROM atranslation t where t.baselanguage = 'de'"); await ExecQueryAsync("INSERT INTO atranslationitem(translationid,key,display) SELECT t.id, 'ServerStateFailFirstHealthCheck', 'Fail first health check' FROM atranslation t where t.baselanguage = 'de'"); await ExecQueryAsync("INSERT INTO atranslationitem(translationid,key,display) SELECT t.id, 'ServerStateFailSecondHealthCheck', 'Fail second health check' FROM atranslation t where t.baselanguage = 'de'"); await ExecQueryAsync("INSERT INTO atranslationitem(translationid,key,display) SELECT t.id, 'ServerStateFailedRequiresAttention', 'Failed requires attention' FROM atranslation t where t.baselanguage = 'de'");