Search indexing improvements

This commit is contained in:
2020-05-20 18:40:18 +00:00
parent 3cccb4b385
commit 48c5753628

View File

@@ -738,8 +738,6 @@ namespace AyaNova.Biz
FROM asearchdictionary AS a
WHERE a.word IN ('eos', 'quia', 'voluptate', 'delectus', 'sapiente', 'omnis', 'suscipit', 'rerum', 'unbranded', 'soft', 'towels', '25', 'green', 'zone', 'red', 'sequi', 'aspernatur', 'animi', '85586490', '70907391547648')
*/
//Put the matching keyword ID's into the list
@@ -748,15 +746,110 @@ WHERE a.word IN ('eos', 'quia', 'voluptate', 'delectus', 'sapiente', 'omnis', 's
MatchingKeywordIdList.Add(K.Key);
}
#region OLD - NEW WORD ADDITION second attempt, do it word by word and accept clashes and handle them
// //-------- START CRITICAL SECTION -----------
// //-------------------------------------------
// #if (DEBUG)
// var log = AyaNova.Util.ApplicationLogging.CreateLogger("### Search::ProcessKeywords ###");
// #endif
// foreach (string KeyWord in KeyWordList)
// {
// if (!ExistingKeywordMatches.ContainsValue(KeyWord))
// {
// //algorithm: Attempt to add it to the db and get the id, if it fails with the expected exception for a duplicate word insertion attempt, then immediately read back that word and handle it
// //ATTEMPT TO ADD THE WORD TO THE SEARCHDICTIONARY
// SearchDictionary NewWord = new SearchDictionary();
// NewWord.Word = KeyWord;
// try
// {
// //ADD WORD TO DICTIONARY, SAVE THE ID INTO THE MATCHINGKEYWORDIDLIST
// var CtAdd = ServiceProviderProvider.DBContext;
// await CtAdd.SearchDictionary.AddAsync(NewWord);
// await CtAdd.SaveChangesAsync();
// //-------
// //Add to matching keywords
// MatchingKeywordIdList.Add(NewWord.Id);
// //-------
// //It exists now
// ExistingKeywordMatches.Add(NewWord.Id, NewWord.Word);
// }
// catch (Microsoft.EntityFrameworkCore.DbUpdateException ex)
// {
// #region Exceptions from word already existing (added maybe in another thread)
// #if (DEBUG)
// log.LogInformation($"###################### Exception caught attempting to add word: '{KeyWord}' fetching instead...");
// #endif
// //FAIL DUE TO OTHER CAUSE THAN WORD ALREADY ADDED?
// if (ex.InnerException == null || !ex.InnerException.Message.Contains("asearchdictionary_word_idx"))
// {
// #if (DEBUG)
// log.LogInformation($"###################### Unexpected inner exception on add word: '{KeyWord}'!?");
// #endif
// throw ex;
// }
// //FETCH THE WORD ID, PLACE IN MATCHINGKEYWORDLIST AND MOVE ON TO THE NEXT WORD
// var SearchDictionaryMatchFoundInDB = await ServiceProviderProvider.DBContext.SearchDictionary.AsNoTracking().Where(z => z.Word == KeyWord).FirstOrDefaultAsync();
// if (SearchDictionaryMatchFoundInDB != null)
// {
// MatchingKeywordIdList.Add(SearchDictionaryMatchFoundInDB.Id);
// //It exists now
// ExistingKeywordMatches.Add(SearchDictionaryMatchFoundInDB.Id, SearchDictionaryMatchFoundInDB.Word);
// }
// else
// {
// #if (DEBUG)
// log.LogInformation($"###################### NULL when expected to find word: '{KeyWord}'!?");
// #endif
// }
// #endregion
// }
// catch (Exception ex)
// {
// #if (DEBUG)
// log.LogInformation(ex, $"###################### Unexpected exception adding word: '{KeyWord}'!?");
// #endif
// throw ex;
// }
// }
// }
// //-------- END CRITICAL SECTION -------------
// //-------------------------------------------
#endregion second attempt
#region NEW WORD ADDITION THIRD attempt, do it word by word use no conflict and bypass EF Core entirely
//-------- START CRITICAL SECTION -----------
//-------------------------------------------
#region NEW WORD ADDITION second attempt, do it word by word and accept clashes and handle them
#if (DEBUG)
var log = AyaNova.Util.ApplicationLogging.CreateLogger("### Search::ProcessKeywords ###");
#endif
#region PERFORMANCE NOTES / EXPERIMENTS
/*
This next block is where all the slowness exists.
I've played with it and brought it down to half the original time it took, but could likely find more savings,
however not a good use of time right now and really only affects bulk ops which is seeding right now,
@@ -820,17 +913,6 @@ ON CONFLICT IDEA
///////////////////////////////////////////////////////////////////////////////
*/
#endregion performance notes experiments
foreach (string KeyWord in KeyWordList)
{
if (!ExistingKeywordMatches.ContainsValue(KeyWord))
{
/*
NEW IDEA
todo: Search indexing performance improvement and exception avoidance (Search.cs 828)
ON CONFLICT IDEA
@@ -888,78 +970,90 @@ RUN 6 - SMALL 2
2020-05-20 10:56:57.4113|INFO|Seeder|Seeding SMALL number of user(s)....
2020-05-20 10:56:57.5519|INFO|Seeder|25 Users seeded in 143 ms
2020-05-20 10:57:04.3323|INFO|Seeder|100 Widgets seeded in 6787 ms
*/
//algorithm: Attempt to add it to the db and get the id, if it fails with the expected exception for a duplicate word insertion attempt, then immediately read back that word and handle it
*/
#endregion performance notes experiments
//ATTEMPT TO ADD THE WORD TO THE SEARCHDICTIONARY
SearchDictionary NewWord = new SearchDictionary();
NewWord.Word = KeyWord;
try
{
//ADD WORD TO DICTIONARY, SAVE THE ID INTO THE MATCHINGKEYWORDIDLIST
var CtAdd = ServiceProviderProvider.DBContext;
await CtAdd.SearchDictionary.AddAsync(NewWord);
await CtAdd.SaveChangesAsync();
//-------
//Add to matching keywords
MatchingKeywordIdList.Add(NewWord.Id);
//-------
//It exists now
ExistingKeywordMatches.Add(NewWord.Id, NewWord.Word);
}
catch (Microsoft.EntityFrameworkCore.DbUpdateException ex)
{
#region Exceptions from word already existing (added maybe in another thread)
#if (DEBUG)
log.LogInformation($"###################### Exception caught attempting to add word: '{KeyWord}' fetching instead...");
var log = AyaNova.Util.ApplicationLogging.CreateLogger("### Search::ProcessKeywords ###");
#endif
//FAIL DUE TO OTHER CAUSE THAN WORD ALREADY ADDED?
if (ex.InnerException == null || !ex.InnerException.Message.Contains("asearchdictionary_word_idx"))
var CtAdd = ServiceProviderProvider.DBContext;
using (var command = CtAdd.Database.GetDbConnection().CreateCommand())
{
await CtAdd.Database.OpenConnectionAsync();
foreach (string KeyWord in KeyWordList)
{
if (!ExistingKeywordMatches.ContainsValue(KeyWord))
{
//ATTEMPT TO ADD THE WORD TO THE SEARCHDICTIONARY
try
{
//insert, if conflict then do what is essentially a no-op by updating word to it's existing value
//always returning id, this is then in effect an upsert without an update since all we need is the id
command.CommandText = $"insert into asearchdictionary (word) values('{KeyWord}') on conflict (word) do update set word=excluded.word returning id ";
using (var dr = await command.ExecuteReaderAsync())
{
dr.Read();
var wordId = dr.GetInt64(0);
MatchingKeywordIdList.Add(wordId);
ExistingKeywordMatches.Add(wordId, KeyWord);
}
}
catch (Microsoft.EntityFrameworkCore.DbUpdateException ex)
{
#region Exceptions from word already existing (added maybe in another thread)
#if (DEBUG)
log.LogInformation($"###################### Exception caught attempting to add word: '{KeyWord}' fetching instead...");
#endif
//FAIL DUE TO OTHER CAUSE THAN WORD ALREADY ADDED?
if (ex.InnerException == null || !ex.InnerException.Message.Contains("asearchdictionary_word_idx"))
{
#if (DEBUG)
log.LogInformation($"###################### Unexpected inner exception on add word: '{KeyWord}'!?");
#endif
throw ex;
}
//FETCH THE WORD ID, PLACE IN MATCHINGKEYWORDLIST AND MOVE ON TO THE NEXT WORD
var SearchDictionaryMatchFoundInDB = await ServiceProviderProvider.DBContext.SearchDictionary.AsNoTracking().Where(z => z.Word == KeyWord).FirstOrDefaultAsync();
if (SearchDictionaryMatchFoundInDB != null)
{
MatchingKeywordIdList.Add(SearchDictionaryMatchFoundInDB.Id);
//It exists now
ExistingKeywordMatches.Add(SearchDictionaryMatchFoundInDB.Id, SearchDictionaryMatchFoundInDB.Word);
}
else
{
#if (DEBUG)
log.LogInformation($"###################### NULL when expected to find word: '{KeyWord}'!?");
#endif
}
#endregion
}
catch (Exception ex)
{
#if (DEBUG)
log.LogInformation($"###################### Unexpected inner exception on add word: '{KeyWord}'!?");
log.LogInformation(ex, $"###################### Unexpected exception adding word: '{KeyWord}'!?");
#endif
throw ex;
}
//FETCH THE WORD ID, PLACE IN MATCHINGKEYWORDLIST AND MOVE ON TO THE NEXT WORD
var SearchDictionaryMatchFoundInDB = await ServiceProviderProvider.DBContext.SearchDictionary.AsNoTracking().Where(z => z.Word == KeyWord).FirstOrDefaultAsync();
if (SearchDictionaryMatchFoundInDB != null)
{
MatchingKeywordIdList.Add(SearchDictionaryMatchFoundInDB.Id);
//It exists now
ExistingKeywordMatches.Add(SearchDictionaryMatchFoundInDB.Id, SearchDictionaryMatchFoundInDB.Word);
}
else
{
#if (DEBUG)
log.LogInformation($"###################### NULL when expected to find word: '{KeyWord}'!?");
#endif
}
#endregion
}
catch (Exception ex)
{
#if (DEBUG)
log.LogInformation(ex, $"###################### Unexpected exception adding word: '{KeyWord}'!?");
#endif
throw ex;
}
}
}
}//end of db using statement
#endregion second attempt
//-------- END CRITICAL SECTION -------------
//-------------------------------------------
#endregion third attempt
@@ -995,7 +1089,7 @@ RUN 6 - SMALL 2
// }
// }
// private static Dictionary<long, TranslationWordBreakingData> translationWordBreakingDataCache = new Dictionary<long, TranslationWordBreakingData>();
// private static Dictionary<long, TranslationWordBreakingData> translationWordBreakingDataCache = new Dictionary<long, TranslationWordBreakingData>();
// //called at startup to populate cache
//WAS GOING TO ADD THIS IN RESPONSE TO AN ISSUE WITH EXCEPTION ATTEMPTING TO ADD ALREADY EXISTING DICTIONARY ID 1, BUT IT NEVER HAPPENED AGAIN, SO :SHRUGEMOJI:
@@ -1085,7 +1179,7 @@ RUN 6 - SMALL 2
return await BreakCoreAsync(translationId, true, textStrings, true);
}
internal static async Task<List<string>> BreakCoreAsync(long translationId, bool KeepWildCards, List<string> textStrings, bool ignoreStopWords = false)
{