Search indexing improvements

This commit is contained in:
2020-05-18 22:41:39 +00:00
parent dd08861f34
commit fc95aedaac
3 changed files with 118 additions and 55 deletions

2
.vscode/launch.json vendored
View File

@@ -50,7 +50,7 @@
"AYANOVA_FOLDER_USER_FILES": "c:\\temp\\RavenTestData\\userfiles", "AYANOVA_FOLDER_USER_FILES": "c:\\temp\\RavenTestData\\userfiles",
"AYANOVA_FOLDER_BACKUP_FILES": "c:\\temp\\RavenTestData\\backupfiles", "AYANOVA_FOLDER_BACKUP_FILES": "c:\\temp\\RavenTestData\\backupfiles",
"AYANOVA_METRICS_USE_INFLUXDB": "false", "AYANOVA_METRICS_USE_INFLUXDB": "false",
"AYANOVA_SERVER_TEST_MODE":"true", "AYANOVA_SERVER_TEST_MODE":"false",
"AYANOVA_SERVER_TEST_MODE_SEEDLEVEL":"small", "AYANOVA_SERVER_TEST_MODE_SEEDLEVEL":"small",
"AYANOVA_SERVER_TEST_MODE_TZ_OFFSET":"-7" "AYANOVA_SERVER_TEST_MODE_TZ_OFFSET":"-7"

View File

@@ -910,19 +910,19 @@ cache or provide directly the translation to save time repeatedly fetching it wh
#region Breaker #region Breaker
//Class to hold relevant translation data for breaking text // //Class to hold relevant translation data for breaking text
public class TranslationWordBreakingData // public class TranslationWordBreakingData
{ // {
public bool CJKIndex { get; set; } // public bool CJKIndex { get; set; }
public List<string> StopWords { get; set; } // public List<string> StopWords { get; set; }
public TranslationWordBreakingData() // public TranslationWordBreakingData()
{ // {
CJKIndex = false; // CJKIndex = false;
StopWords = new List<string>(); // StopWords = new List<string>();
} // }
} // }
private static Dictionary<long, TranslationWordBreakingData> translationWordBreakingDataCache = new Dictionary<long, TranslationWordBreakingData>(); // private static Dictionary<long, TranslationWordBreakingData> translationWordBreakingDataCache = new Dictionary<long, TranslationWordBreakingData>();
// //called at startup to populate cache // //called at startup to populate cache
//WAS GOING TO ADD THIS IN RESPONSE TO AN ISSUE WITH EXCEPTION ATTEMPTING TO ADD ALREADY EXISTING DICTIONARY ID 1, BUT IT NEVER HAPPENED AGAIN, SO :SHRUGEMOJI: //WAS GOING TO ADD THIS IN RESPONSE TO AN ISSUE WITH EXCEPTION ATTEMPTING TO ADD ALREADY EXISTING DICTIONARY ID 1, BUT IT NEVER HAPPENED AGAIN, SO :SHRUGEMOJI:
@@ -940,36 +940,36 @@ cache or provide directly the translation to save time repeatedly fetching it wh
// }).ToListAsync(); // }).ToListAsync();
// TranslationWordBreakingDataCache.Add(TranslationId, await GetTranslationSearchDataAsync(TranslationId)); // TranslationWordBreakingDataCache.Add(TranslationId, await GetTranslationSearchDataAsync(TranslationId));
// } // }
internal static async Task<TranslationWordBreakingData> GetTranslationSearchDataAsync(long translationId, AyContext ct = null) // internal static async Task<TranslationWordBreakingData> GetTranslationSearchDataAsync(long translationId, AyContext ct = null)
{ // {
TranslationWordBreakingData LSD = new TranslationWordBreakingData(); // TranslationWordBreakingData LSD = new TranslationWordBreakingData();
if (ct == null) // if (ct == null)
ct = ServiceProviderProvider.DBContext; // ct = ServiceProviderProvider.DBContext;
//Get stopwords // //Get stopwords
//Validate translation id, if not right then use default instead // //Validate translation id, if not right then use default instead
var Param = new List<string>(); // var Param = new List<string>();
translationId = await TranslationBiz.ReturnSpecifiedTranslationIdIfExistsOrDefaultTranslationId(translationId, ct); // translationId = await TranslationBiz.ReturnSpecifiedTranslationIdIfExistsOrDefaultTranslationId(translationId, ct);
Param.Add("StopWords1"); // Param.Add("StopWords1");
Param.Add("StopWords2"); // Param.Add("StopWords2");
Param.Add("StopWords3"); // Param.Add("StopWords3");
Param.Add("StopWords4"); // Param.Add("StopWords4");
Param.Add("StopWords5"); // Param.Add("StopWords5");
Param.Add("StopWords6"); // Param.Add("StopWords6");
Param.Add("StopWords7"); // Param.Add("StopWords7");
var Stops = await TranslationBiz.GetSubsetStaticAsync(Param, translationId); // var Stops = await TranslationBiz.GetSubsetStaticAsync(Param, translationId);
foreach (KeyValuePair<string, string> kvp in Stops) // foreach (KeyValuePair<string, string> kvp in Stops)
{ // {
//Each stopwords translation key is a space delimited list of words and in the case of an empty local string (i.e. StopWords7) it's value is a single question mark // //Each stopwords translation key is a space delimited list of words and in the case of an empty local string (i.e. StopWords7) it's value is a single question mark
if (kvp.Value != "?") // if (kvp.Value != "?")
{ // {
LSD.StopWords.AddRange(kvp.Value.Split(" ")); // LSD.StopWords.AddRange(kvp.Value.Split(" "));
} // }
} // }
LSD.CJKIndex = await TranslationBiz.GetCJKIndexAsync(translationId, ct); // LSD.CJKIndex = await TranslationBiz.GetCJKIndexAsync(translationId, ct);
return LSD; // return LSD;
} // }
public enum TokenTypes public enum TokenTypes
{ Nothing, Separator, CJK, Latin }; { Nothing, Separator, CJK, Latin };
@@ -1012,23 +1012,12 @@ cache or provide directly the translation to save time repeatedly fetching it wh
return await BreakCoreAsync(translationId, true, textStrings, true); return await BreakCoreAsync(translationId, true, textStrings, true);
} }
/// <summary>
/// Stop words list reset upon login or editing of Translation text
/// used for eliminating noise words from search dictionary
/// </summary>
internal static async Task<List<string>> BreakCoreAsync(long translationId, bool KeepWildCards, List<string> textStrings, bool ignoreStopWords = false) internal static async Task<List<string>> BreakCoreAsync(long translationId, bool KeepWildCards, List<string> textStrings, bool ignoreStopWords = false)
{ {
//For stopwords and CJKIndex flag value //For stopwords and CJKIndex flag value
//if not provided (will be provided by seeder for performance but normally never) then fetch var translationWordBreakData = await SearchTranslationWordBreakDataCache.GetWordBreakData(translationId);
if (!translationWordBreakingDataCache.ContainsKey(translationId))
{
translationWordBreakingDataCache.Add(translationId, await GetTranslationSearchDataAsync(translationId));
}
var translationWordBreakData = translationWordBreakingDataCache[translationId];
int MAXWORDLENGTH = 255; int MAXWORDLENGTH = 255;
int MINWORDLENGTH = 2;//A word isn't a word unless it's got at least two characters in it int MINWORDLENGTH = 2;//A word isn't a word unless it's got at least two characters in it

View File

@@ -0,0 +1,74 @@
using System.Threading.Tasks;
using System.Threading;
using System.Collections.Generic;
using AyaNova.Util;
using AyaNova.Models;
namespace AyaNova.Biz
{
public class SearchTranslationWordBreakDataCache
{
static SemaphoreSlim semaphoreSlim = new SemaphoreSlim(1, 1);
private static Dictionary<long, TranslationWordBreakingData> theCache = new Dictionary<long, TranslationWordBreakingData>();
public SearchTranslationWordBreakDataCache() { }
public static async Task<TranslationWordBreakingData> GetWordBreakData(long id)
{
await semaphoreSlim.WaitAsync();
try
{
if (!theCache.ContainsKey(1))
theCache[id] = await GetTranslationSearchDataAsync(id);
return theCache[id];
}
finally
{
semaphoreSlim.Release();
}
}
internal static async Task<TranslationWordBreakingData> GetTranslationSearchDataAsync(long translationId)
{
TranslationWordBreakingData LSD = new TranslationWordBreakingData();
AyContext ct = ServiceProviderProvider.DBContext;
//Get stopwords
//Validate translation id, if not right then use default instead
var Param = new List<string>();
translationId = await TranslationBiz.ReturnSpecifiedTranslationIdIfExistsOrDefaultTranslationId(translationId, ct);
Param.Add("StopWords1");
Param.Add("StopWords2");
Param.Add("StopWords3");
Param.Add("StopWords4");
Param.Add("StopWords5");
Param.Add("StopWords6");
Param.Add("StopWords7");
var Stops = await TranslationBiz.GetSubsetStaticAsync(Param, translationId);
foreach (KeyValuePair<string, string> kvp in Stops)
{
//Each stopwords translation key is a space delimited list of words and in the case of an empty local string (i.e. StopWords7) it's value is a single question mark
if (kvp.Value != "?")
{
LSD.StopWords.AddRange(kvp.Value.Split(" "));
}
}
LSD.CJKIndex = await TranslationBiz.GetCJKIndexAsync(translationId, ct);
return LSD;
}
//Class to hold relevant translation data for breaking text
public class TranslationWordBreakingData
{
public bool CJKIndex { get; set; }
public List<string> StopWords { get; set; }
public TranslationWordBreakingData()
{
CJKIndex = false;
StopWords = new List<string>();
}
}
}//eoc
}//eons