From 48c5753628abc046deac92446eedc18f321e54c5 Mon Sep 17 00:00:00 2001 From: John Cardinal Date: Wed, 20 May 2020 18:40:18 +0000 Subject: [PATCH] Search indexing improvements --- server/AyaNova/biz/Search.cs | 246 ++++++++++++++++++++++++----------- 1 file changed, 170 insertions(+), 76 deletions(-) diff --git a/server/AyaNova/biz/Search.cs b/server/AyaNova/biz/Search.cs index 89416298..9b6f91a3 100644 --- a/server/AyaNova/biz/Search.cs +++ b/server/AyaNova/biz/Search.cs @@ -738,8 +738,6 @@ namespace AyaNova.Biz FROM asearchdictionary AS a WHERE a.word IN ('eos', 'quia', 'voluptate', 'delectus', 'sapiente', 'omnis', 'suscipit', 'rerum', 'unbranded', 'soft', 'towels', '25', 'green', 'zone', 'red', 'sequi', 'aspernatur', 'animi', '85586490', '70907391547648') - - */ //Put the matching keyword ID's into the list @@ -748,15 +746,110 @@ WHERE a.word IN ('eos', 'quia', 'voluptate', 'delectus', 'sapiente', 'omnis', 's MatchingKeywordIdList.Add(K.Key); } + + + #region OLD - NEW WORD ADDITION second attempt, do it word by word and accept clashes and handle them + + // //-------- START CRITICAL SECTION ----------- + // //------------------------------------------- + + // #if (DEBUG) + // var log = AyaNova.Util.ApplicationLogging.CreateLogger("### Search::ProcessKeywords ###"); + // #endif + + + // foreach (string KeyWord in KeyWordList) + // { + // if (!ExistingKeywordMatches.ContainsValue(KeyWord)) + // { + + + + + + // //algorithm: Attempt to add it to the db and get the id, if it fails with the expected exception for a duplicate word insertion attempt, then immediately read back that word and handle it + + // //ATTEMPT TO ADD THE WORD TO THE SEARCHDICTIONARY + // SearchDictionary NewWord = new SearchDictionary(); + // NewWord.Word = KeyWord; + + // try + // { + + + // //ADD WORD TO DICTIONARY, SAVE THE ID INTO THE MATCHINGKEYWORDIDLIST + // var CtAdd = ServiceProviderProvider.DBContext; + // await CtAdd.SearchDictionary.AddAsync(NewWord); + // await CtAdd.SaveChangesAsync(); + + + // //------- + // //Add to matching keywords + // MatchingKeywordIdList.Add(NewWord.Id); + // //------- + + // //It exists now + // ExistingKeywordMatches.Add(NewWord.Id, NewWord.Word); + // } + // catch (Microsoft.EntityFrameworkCore.DbUpdateException ex) + // { + // #region Exceptions from word already existing (added maybe in another thread) + // #if (DEBUG) + // log.LogInformation($"###################### Exception caught attempting to add word: '{KeyWord}' fetching instead..."); + // #endif + // //FAIL DUE TO OTHER CAUSE THAN WORD ALREADY ADDED? + // if (ex.InnerException == null || !ex.InnerException.Message.Contains("asearchdictionary_word_idx")) + // { + // #if (DEBUG) + // log.LogInformation($"###################### Unexpected inner exception on add word: '{KeyWord}'!?"); + // #endif + // throw ex; + // } + + // //FETCH THE WORD ID, PLACE IN MATCHINGKEYWORDLIST AND MOVE ON TO THE NEXT WORD + // var SearchDictionaryMatchFoundInDB = await ServiceProviderProvider.DBContext.SearchDictionary.AsNoTracking().Where(z => z.Word == KeyWord).FirstOrDefaultAsync(); + // if (SearchDictionaryMatchFoundInDB != null) + // { + // MatchingKeywordIdList.Add(SearchDictionaryMatchFoundInDB.Id); + // //It exists now + // ExistingKeywordMatches.Add(SearchDictionaryMatchFoundInDB.Id, SearchDictionaryMatchFoundInDB.Word); + // } + // else + // { + // #if (DEBUG) + // log.LogInformation($"###################### NULL when expected to find word: '{KeyWord}'!?"); + // #endif + // } + // #endregion + // } + // catch (Exception ex) + // { + // #if (DEBUG) + // log.LogInformation(ex, $"###################### Unexpected exception adding word: '{KeyWord}'!?"); + // #endif + // throw ex; + // } + // } + // } + + + + // //-------- END CRITICAL SECTION ------------- + // //------------------------------------------- + #endregion second attempt + + + + + + #region NEW WORD ADDITION THIRD attempt, do it word by word use no conflict and bypass EF Core entirely + //-------- START CRITICAL SECTION ----------- //------------------------------------------- - #region NEW WORD ADDITION second attempt, do it word by word and accept clashes and handle them -#if (DEBUG) - var log = AyaNova.Util.ApplicationLogging.CreateLogger("### Search::ProcessKeywords ###"); -#endif - #region PERFORMANCE NOTES / EXPERIMENTS /* + + This next block is where all the slowness exists. I've played with it and brought it down to half the original time it took, but could likely find more savings, however not a good use of time right now and really only affects bulk ops which is seeding right now, @@ -820,17 +913,6 @@ ON CONFLICT IDEA /////////////////////////////////////////////////////////////////////////////// - */ - #endregion performance notes experiments - - foreach (string KeyWord in KeyWordList) - { - if (!ExistingKeywordMatches.ContainsValue(KeyWord)) - { - - - -/* NEW IDEA todo: Search indexing performance improvement and exception avoidance (Search.cs 828) ON CONFLICT IDEA @@ -888,78 +970,90 @@ RUN 6 - SMALL 2 2020-05-20 10:56:57.4113|INFO|Seeder|Seeding SMALL number of user(s).... 2020-05-20 10:56:57.5519|INFO|Seeder|25 Users seeded in 143 ms 2020-05-20 10:57:04.3323|INFO|Seeder|100 Widgets seeded in 6787 ms -*/ - //algorithm: Attempt to add it to the db and get the id, if it fails with the expected exception for a duplicate word insertion attempt, then immediately read back that word and handle it + */ + #endregion performance notes experiments - //ATTEMPT TO ADD THE WORD TO THE SEARCHDICTIONARY - SearchDictionary NewWord = new SearchDictionary(); - NewWord.Word = KeyWord; - - try - { - - - //ADD WORD TO DICTIONARY, SAVE THE ID INTO THE MATCHINGKEYWORDIDLIST - var CtAdd = ServiceProviderProvider.DBContext; - await CtAdd.SearchDictionary.AddAsync(NewWord); - await CtAdd.SaveChangesAsync(); - - - //------- - //Add to matching keywords - MatchingKeywordIdList.Add(NewWord.Id); - //------- - - //It exists now - ExistingKeywordMatches.Add(NewWord.Id, NewWord.Word); - } - catch (Microsoft.EntityFrameworkCore.DbUpdateException ex) - { - #region Exceptions from word already existing (added maybe in another thread) #if (DEBUG) - log.LogInformation($"###################### Exception caught attempting to add word: '{KeyWord}' fetching instead..."); + var log = AyaNova.Util.ApplicationLogging.CreateLogger("### Search::ProcessKeywords ###"); #endif - //FAIL DUE TO OTHER CAUSE THAN WORD ALREADY ADDED? - if (ex.InnerException == null || !ex.InnerException.Message.Contains("asearchdictionary_word_idx")) + + var CtAdd = ServiceProviderProvider.DBContext; + using (var command = CtAdd.Database.GetDbConnection().CreateCommand()) + { + + await CtAdd.Database.OpenConnectionAsync(); + + + foreach (string KeyWord in KeyWordList) + { + if (!ExistingKeywordMatches.ContainsValue(KeyWord)) + { + //ATTEMPT TO ADD THE WORD TO THE SEARCHDICTIONARY + try + { + //insert, if conflict then do what is essentially a no-op by updating word to it's existing value + //always returning id, this is then in effect an upsert without an update since all we need is the id + command.CommandText = $"insert into asearchdictionary (word) values('{KeyWord}') on conflict (word) do update set word=excluded.word returning id "; + + using (var dr = await command.ExecuteReaderAsync()) + { + dr.Read(); + var wordId = dr.GetInt64(0); + MatchingKeywordIdList.Add(wordId); + ExistingKeywordMatches.Add(wordId, KeyWord); + } + } + catch (Microsoft.EntityFrameworkCore.DbUpdateException ex) + { + #region Exceptions from word already existing (added maybe in another thread) +#if (DEBUG) + log.LogInformation($"###################### Exception caught attempting to add word: '{KeyWord}' fetching instead..."); +#endif + //FAIL DUE TO OTHER CAUSE THAN WORD ALREADY ADDED? + if (ex.InnerException == null || !ex.InnerException.Message.Contains("asearchdictionary_word_idx")) + { +#if (DEBUG) + log.LogInformation($"###################### Unexpected inner exception on add word: '{KeyWord}'!?"); +#endif + throw ex; + } + + //FETCH THE WORD ID, PLACE IN MATCHINGKEYWORDLIST AND MOVE ON TO THE NEXT WORD + var SearchDictionaryMatchFoundInDB = await ServiceProviderProvider.DBContext.SearchDictionary.AsNoTracking().Where(z => z.Word == KeyWord).FirstOrDefaultAsync(); + if (SearchDictionaryMatchFoundInDB != null) + { + MatchingKeywordIdList.Add(SearchDictionaryMatchFoundInDB.Id); + //It exists now + ExistingKeywordMatches.Add(SearchDictionaryMatchFoundInDB.Id, SearchDictionaryMatchFoundInDB.Word); + } + else + { +#if (DEBUG) + log.LogInformation($"###################### NULL when expected to find word: '{KeyWord}'!?"); +#endif + } + #endregion + } + catch (Exception ex) { #if (DEBUG) - log.LogInformation($"###################### Unexpected inner exception on add word: '{KeyWord}'!?"); + log.LogInformation(ex, $"###################### Unexpected exception adding word: '{KeyWord}'!?"); #endif throw ex; } - - //FETCH THE WORD ID, PLACE IN MATCHINGKEYWORDLIST AND MOVE ON TO THE NEXT WORD - var SearchDictionaryMatchFoundInDB = await ServiceProviderProvider.DBContext.SearchDictionary.AsNoTracking().Where(z => z.Word == KeyWord).FirstOrDefaultAsync(); - if (SearchDictionaryMatchFoundInDB != null) - { - MatchingKeywordIdList.Add(SearchDictionaryMatchFoundInDB.Id); - //It exists now - ExistingKeywordMatches.Add(SearchDictionaryMatchFoundInDB.Id, SearchDictionaryMatchFoundInDB.Word); - } - else - { -#if (DEBUG) - log.LogInformation($"###################### NULL when expected to find word: '{KeyWord}'!?"); -#endif - } - #endregion - } - catch (Exception ex) - { -#if (DEBUG) - log.LogInformation(ex, $"###################### Unexpected exception adding word: '{KeyWord}'!?"); -#endif - throw ex; } } - } + }//end of db using statement + - #endregion second attempt //-------- END CRITICAL SECTION ------------- //------------------------------------------- + #endregion third attempt + + @@ -995,7 +1089,7 @@ RUN 6 - SMALL 2 // } // } - // private static Dictionary translationWordBreakingDataCache = new Dictionary(); + // private static Dictionary translationWordBreakingDataCache = new Dictionary(); // //called at startup to populate cache //WAS GOING TO ADD THIS IN RESPONSE TO AN ISSUE WITH EXCEPTION ATTEMPTING TO ADD ALREADY EXISTING DICTIONARY ID 1, BUT IT NEVER HAPPENED AGAIN, SO :SHRUGEMOJI: @@ -1085,7 +1179,7 @@ RUN 6 - SMALL 2 return await BreakCoreAsync(translationId, true, textStrings, true); } - + internal static async Task> BreakCoreAsync(long translationId, bool KeepWildCards, List textStrings, bool ignoreStopWords = false) {