Search indexing improvements

This commit is contained in:
2020-05-18 22:41:39 +00:00
parent dd08861f34
commit fc95aedaac
3 changed files with 118 additions and 55 deletions

2
.vscode/launch.json vendored
View File

@@ -50,7 +50,7 @@
"AYANOVA_FOLDER_USER_FILES": "c:\\temp\\RavenTestData\\userfiles",
"AYANOVA_FOLDER_BACKUP_FILES": "c:\\temp\\RavenTestData\\backupfiles",
"AYANOVA_METRICS_USE_INFLUXDB": "false",
"AYANOVA_SERVER_TEST_MODE":"true",
"AYANOVA_SERVER_TEST_MODE":"false",
"AYANOVA_SERVER_TEST_MODE_SEEDLEVEL":"small",
"AYANOVA_SERVER_TEST_MODE_TZ_OFFSET":"-7"

View File

@@ -910,19 +910,19 @@ cache or provide directly the translation to save time repeatedly fetching it wh
#region Breaker
//Class to hold relevant translation data for breaking text
public class TranslationWordBreakingData
{
public bool CJKIndex { get; set; }
public List<string> StopWords { get; set; }
public TranslationWordBreakingData()
{
CJKIndex = false;
StopWords = new List<string>();
}
}
// //Class to hold relevant translation data for breaking text
// public class TranslationWordBreakingData
// {
// public bool CJKIndex { get; set; }
// public List<string> StopWords { get; set; }
// public TranslationWordBreakingData()
// {
// CJKIndex = false;
// StopWords = new List<string>();
// }
// }
private static Dictionary<long, TranslationWordBreakingData> translationWordBreakingDataCache = new Dictionary<long, TranslationWordBreakingData>();
// private static Dictionary<long, TranslationWordBreakingData> translationWordBreakingDataCache = new Dictionary<long, TranslationWordBreakingData>();
// //called at startup to populate cache
//WAS GOING TO ADD THIS IN RESPONSE TO AN ISSUE WITH EXCEPTION ATTEMPTING TO ADD ALREADY EXISTING DICTIONARY ID 1, BUT IT NEVER HAPPENED AGAIN, SO :SHRUGEMOJI:
@@ -940,36 +940,36 @@ cache or provide directly the translation to save time repeatedly fetching it wh
// }).ToListAsync();
// TranslationWordBreakingDataCache.Add(TranslationId, await GetTranslationSearchDataAsync(TranslationId));
// }
internal static async Task<TranslationWordBreakingData> GetTranslationSearchDataAsync(long translationId, AyContext ct = null)
{
TranslationWordBreakingData LSD = new TranslationWordBreakingData();
if (ct == null)
ct = ServiceProviderProvider.DBContext;
//Get stopwords
//Validate translation id, if not right then use default instead
var Param = new List<string>();
translationId = await TranslationBiz.ReturnSpecifiedTranslationIdIfExistsOrDefaultTranslationId(translationId, ct);
Param.Add("StopWords1");
Param.Add("StopWords2");
Param.Add("StopWords3");
Param.Add("StopWords4");
Param.Add("StopWords5");
Param.Add("StopWords6");
Param.Add("StopWords7");
var Stops = await TranslationBiz.GetSubsetStaticAsync(Param, translationId);
// internal static async Task<TranslationWordBreakingData> GetTranslationSearchDataAsync(long translationId, AyContext ct = null)
// {
// TranslationWordBreakingData LSD = new TranslationWordBreakingData();
// if (ct == null)
// ct = ServiceProviderProvider.DBContext;
// //Get stopwords
// //Validate translation id, if not right then use default instead
// var Param = new List<string>();
// translationId = await TranslationBiz.ReturnSpecifiedTranslationIdIfExistsOrDefaultTranslationId(translationId, ct);
// Param.Add("StopWords1");
// Param.Add("StopWords2");
// Param.Add("StopWords3");
// Param.Add("StopWords4");
// Param.Add("StopWords5");
// Param.Add("StopWords6");
// Param.Add("StopWords7");
// var Stops = await TranslationBiz.GetSubsetStaticAsync(Param, translationId);
foreach (KeyValuePair<string, string> kvp in Stops)
{
//Each stopwords translation key is a space delimited list of words and in the case of an empty local string (i.e. StopWords7) it's value is a single question mark
if (kvp.Value != "?")
{
LSD.StopWords.AddRange(kvp.Value.Split(" "));
}
}
// foreach (KeyValuePair<string, string> kvp in Stops)
// {
// //Each stopwords translation key is a space delimited list of words and in the case of an empty local string (i.e. StopWords7) it's value is a single question mark
// if (kvp.Value != "?")
// {
// LSD.StopWords.AddRange(kvp.Value.Split(" "));
// }
// }
LSD.CJKIndex = await TranslationBiz.GetCJKIndexAsync(translationId, ct);
return LSD;
}
// LSD.CJKIndex = await TranslationBiz.GetCJKIndexAsync(translationId, ct);
// return LSD;
// }
public enum TokenTypes
{ Nothing, Separator, CJK, Latin };
@@ -1012,23 +1012,12 @@ cache or provide directly the translation to save time repeatedly fetching it wh
return await BreakCoreAsync(translationId, true, textStrings, true);
}
/// <summary>
/// Stop words list reset upon login or editing of Translation text
/// used for eliminating noise words from search dictionary
/// </summary>
internal static async Task<List<string>> BreakCoreAsync(long translationId, bool KeepWildCards, List<string> textStrings, bool ignoreStopWords = false)
{
//For stopwords and CJKIndex flag value
//if not provided (will be provided by seeder for performance but normally never) then fetch
if (!translationWordBreakingDataCache.ContainsKey(translationId))
{
translationWordBreakingDataCache.Add(translationId, await GetTranslationSearchDataAsync(translationId));
}
var translationWordBreakData = translationWordBreakingDataCache[translationId];
var translationWordBreakData = await SearchTranslationWordBreakDataCache.GetWordBreakData(translationId);
int MAXWORDLENGTH = 255;
int MINWORDLENGTH = 2;//A word isn't a word unless it's got at least two characters in it

View File

@@ -0,0 +1,74 @@
using System.Threading.Tasks;
using System.Threading;
using System.Collections.Generic;
using AyaNova.Util;
using AyaNova.Models;
namespace AyaNova.Biz
{
public class SearchTranslationWordBreakDataCache
{
static SemaphoreSlim semaphoreSlim = new SemaphoreSlim(1, 1);
private static Dictionary<long, TranslationWordBreakingData> theCache = new Dictionary<long, TranslationWordBreakingData>();
public SearchTranslationWordBreakDataCache() { }
public static async Task<TranslationWordBreakingData> GetWordBreakData(long id)
{
await semaphoreSlim.WaitAsync();
try
{
if (!theCache.ContainsKey(1))
theCache[id] = await GetTranslationSearchDataAsync(id);
return theCache[id];
}
finally
{
semaphoreSlim.Release();
}
}
internal static async Task<TranslationWordBreakingData> GetTranslationSearchDataAsync(long translationId)
{
TranslationWordBreakingData LSD = new TranslationWordBreakingData();
AyContext ct = ServiceProviderProvider.DBContext;
//Get stopwords
//Validate translation id, if not right then use default instead
var Param = new List<string>();
translationId = await TranslationBiz.ReturnSpecifiedTranslationIdIfExistsOrDefaultTranslationId(translationId, ct);
Param.Add("StopWords1");
Param.Add("StopWords2");
Param.Add("StopWords3");
Param.Add("StopWords4");
Param.Add("StopWords5");
Param.Add("StopWords6");
Param.Add("StopWords7");
var Stops = await TranslationBiz.GetSubsetStaticAsync(Param, translationId);
foreach (KeyValuePair<string, string> kvp in Stops)
{
//Each stopwords translation key is a space delimited list of words and in the case of an empty local string (i.e. StopWords7) it's value is a single question mark
if (kvp.Value != "?")
{
LSD.StopWords.AddRange(kvp.Value.Split(" "));
}
}
LSD.CJKIndex = await TranslationBiz.GetCJKIndexAsync(translationId, ct);
return LSD;
}
//Class to hold relevant translation data for breaking text
public class TranslationWordBreakingData
{
public bool CJKIndex { get; set; }
public List<string> StopWords { get; set; }
public TranslationWordBreakingData()
{
CJKIndex = false;
StopWords = new List<string>();
}
}
}//eoc
}//eons