Search indexing improvements
This commit is contained in:
2
.vscode/launch.json
vendored
2
.vscode/launch.json
vendored
@@ -50,7 +50,7 @@
|
||||
"AYANOVA_FOLDER_USER_FILES": "c:\\temp\\RavenTestData\\userfiles",
|
||||
"AYANOVA_FOLDER_BACKUP_FILES": "c:\\temp\\RavenTestData\\backupfiles",
|
||||
"AYANOVA_METRICS_USE_INFLUXDB": "false",
|
||||
"AYANOVA_SERVER_TEST_MODE":"true",
|
||||
"AYANOVA_SERVER_TEST_MODE":"false",
|
||||
"AYANOVA_SERVER_TEST_MODE_SEEDLEVEL":"small",
|
||||
"AYANOVA_SERVER_TEST_MODE_TZ_OFFSET":"-7"
|
||||
|
||||
|
||||
@@ -910,19 +910,19 @@ cache or provide directly the translation to save time repeatedly fetching it wh
|
||||
|
||||
#region Breaker
|
||||
|
||||
//Class to hold relevant translation data for breaking text
|
||||
public class TranslationWordBreakingData
|
||||
{
|
||||
public bool CJKIndex { get; set; }
|
||||
public List<string> StopWords { get; set; }
|
||||
public TranslationWordBreakingData()
|
||||
{
|
||||
CJKIndex = false;
|
||||
StopWords = new List<string>();
|
||||
}
|
||||
}
|
||||
// //Class to hold relevant translation data for breaking text
|
||||
// public class TranslationWordBreakingData
|
||||
// {
|
||||
// public bool CJKIndex { get; set; }
|
||||
// public List<string> StopWords { get; set; }
|
||||
// public TranslationWordBreakingData()
|
||||
// {
|
||||
// CJKIndex = false;
|
||||
// StopWords = new List<string>();
|
||||
// }
|
||||
// }
|
||||
|
||||
private static Dictionary<long, TranslationWordBreakingData> translationWordBreakingDataCache = new Dictionary<long, TranslationWordBreakingData>();
|
||||
// private static Dictionary<long, TranslationWordBreakingData> translationWordBreakingDataCache = new Dictionary<long, TranslationWordBreakingData>();
|
||||
|
||||
// //called at startup to populate cache
|
||||
//WAS GOING TO ADD THIS IN RESPONSE TO AN ISSUE WITH EXCEPTION ATTEMPTING TO ADD ALREADY EXISTING DICTIONARY ID 1, BUT IT NEVER HAPPENED AGAIN, SO :SHRUGEMOJI:
|
||||
@@ -940,36 +940,36 @@ cache or provide directly the translation to save time repeatedly fetching it wh
|
||||
// }).ToListAsync();
|
||||
// TranslationWordBreakingDataCache.Add(TranslationId, await GetTranslationSearchDataAsync(TranslationId));
|
||||
// }
|
||||
internal static async Task<TranslationWordBreakingData> GetTranslationSearchDataAsync(long translationId, AyContext ct = null)
|
||||
{
|
||||
TranslationWordBreakingData LSD = new TranslationWordBreakingData();
|
||||
if (ct == null)
|
||||
ct = ServiceProviderProvider.DBContext;
|
||||
//Get stopwords
|
||||
//Validate translation id, if not right then use default instead
|
||||
var Param = new List<string>();
|
||||
translationId = await TranslationBiz.ReturnSpecifiedTranslationIdIfExistsOrDefaultTranslationId(translationId, ct);
|
||||
Param.Add("StopWords1");
|
||||
Param.Add("StopWords2");
|
||||
Param.Add("StopWords3");
|
||||
Param.Add("StopWords4");
|
||||
Param.Add("StopWords5");
|
||||
Param.Add("StopWords6");
|
||||
Param.Add("StopWords7");
|
||||
var Stops = await TranslationBiz.GetSubsetStaticAsync(Param, translationId);
|
||||
// internal static async Task<TranslationWordBreakingData> GetTranslationSearchDataAsync(long translationId, AyContext ct = null)
|
||||
// {
|
||||
// TranslationWordBreakingData LSD = new TranslationWordBreakingData();
|
||||
// if (ct == null)
|
||||
// ct = ServiceProviderProvider.DBContext;
|
||||
// //Get stopwords
|
||||
// //Validate translation id, if not right then use default instead
|
||||
// var Param = new List<string>();
|
||||
// translationId = await TranslationBiz.ReturnSpecifiedTranslationIdIfExistsOrDefaultTranslationId(translationId, ct);
|
||||
// Param.Add("StopWords1");
|
||||
// Param.Add("StopWords2");
|
||||
// Param.Add("StopWords3");
|
||||
// Param.Add("StopWords4");
|
||||
// Param.Add("StopWords5");
|
||||
// Param.Add("StopWords6");
|
||||
// Param.Add("StopWords7");
|
||||
// var Stops = await TranslationBiz.GetSubsetStaticAsync(Param, translationId);
|
||||
|
||||
foreach (KeyValuePair<string, string> kvp in Stops)
|
||||
{
|
||||
//Each stopwords translation key is a space delimited list of words and in the case of an empty local string (i.e. StopWords7) it's value is a single question mark
|
||||
if (kvp.Value != "?")
|
||||
{
|
||||
LSD.StopWords.AddRange(kvp.Value.Split(" "));
|
||||
}
|
||||
}
|
||||
// foreach (KeyValuePair<string, string> kvp in Stops)
|
||||
// {
|
||||
// //Each stopwords translation key is a space delimited list of words and in the case of an empty local string (i.e. StopWords7) it's value is a single question mark
|
||||
// if (kvp.Value != "?")
|
||||
// {
|
||||
// LSD.StopWords.AddRange(kvp.Value.Split(" "));
|
||||
// }
|
||||
// }
|
||||
|
||||
LSD.CJKIndex = await TranslationBiz.GetCJKIndexAsync(translationId, ct);
|
||||
return LSD;
|
||||
}
|
||||
// LSD.CJKIndex = await TranslationBiz.GetCJKIndexAsync(translationId, ct);
|
||||
// return LSD;
|
||||
// }
|
||||
|
||||
public enum TokenTypes
|
||||
{ Nothing, Separator, CJK, Latin };
|
||||
@@ -1012,23 +1012,12 @@ cache or provide directly the translation to save time repeatedly fetching it wh
|
||||
return await BreakCoreAsync(translationId, true, textStrings, true);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Stop words list reset upon login or editing of Translation text
|
||||
/// used for eliminating noise words from search dictionary
|
||||
/// </summary>
|
||||
|
||||
|
||||
internal static async Task<List<string>> BreakCoreAsync(long translationId, bool KeepWildCards, List<string> textStrings, bool ignoreStopWords = false)
|
||||
{
|
||||
//For stopwords and CJKIndex flag value
|
||||
//if not provided (will be provided by seeder for performance but normally never) then fetch
|
||||
|
||||
if (!translationWordBreakingDataCache.ContainsKey(translationId))
|
||||
{
|
||||
translationWordBreakingDataCache.Add(translationId, await GetTranslationSearchDataAsync(translationId));
|
||||
}
|
||||
var translationWordBreakData = translationWordBreakingDataCache[translationId];
|
||||
|
||||
var translationWordBreakData = await SearchTranslationWordBreakDataCache.GetWordBreakData(translationId);
|
||||
|
||||
int MAXWORDLENGTH = 255;
|
||||
int MINWORDLENGTH = 2;//A word isn't a word unless it's got at least two characters in it
|
||||
|
||||
74
server/AyaNova/biz/SearchTranslationWordBreakDataCache.cs
Normal file
74
server/AyaNova/biz/SearchTranslationWordBreakDataCache.cs
Normal file
@@ -0,0 +1,74 @@
|
||||
using System.Threading.Tasks;
|
||||
using System.Threading;
|
||||
using System.Collections.Generic;
|
||||
using AyaNova.Util;
|
||||
using AyaNova.Models;
|
||||
namespace AyaNova.Biz
|
||||
{
|
||||
|
||||
public class SearchTranslationWordBreakDataCache
|
||||
{
|
||||
static SemaphoreSlim semaphoreSlim = new SemaphoreSlim(1, 1);
|
||||
private static Dictionary<long, TranslationWordBreakingData> theCache = new Dictionary<long, TranslationWordBreakingData>();
|
||||
public SearchTranslationWordBreakDataCache() { }
|
||||
public static async Task<TranslationWordBreakingData> GetWordBreakData(long id)
|
||||
{
|
||||
await semaphoreSlim.WaitAsync();
|
||||
try
|
||||
{
|
||||
if (!theCache.ContainsKey(1))
|
||||
theCache[id] = await GetTranslationSearchDataAsync(id);
|
||||
return theCache[id];
|
||||
}
|
||||
finally
|
||||
{
|
||||
semaphoreSlim.Release();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
internal static async Task<TranslationWordBreakingData> GetTranslationSearchDataAsync(long translationId)
|
||||
{
|
||||
TranslationWordBreakingData LSD = new TranslationWordBreakingData();
|
||||
AyContext ct = ServiceProviderProvider.DBContext;
|
||||
//Get stopwords
|
||||
//Validate translation id, if not right then use default instead
|
||||
var Param = new List<string>();
|
||||
translationId = await TranslationBiz.ReturnSpecifiedTranslationIdIfExistsOrDefaultTranslationId(translationId, ct);
|
||||
Param.Add("StopWords1");
|
||||
Param.Add("StopWords2");
|
||||
Param.Add("StopWords3");
|
||||
Param.Add("StopWords4");
|
||||
Param.Add("StopWords5");
|
||||
Param.Add("StopWords6");
|
||||
Param.Add("StopWords7");
|
||||
var Stops = await TranslationBiz.GetSubsetStaticAsync(Param, translationId);
|
||||
|
||||
foreach (KeyValuePair<string, string> kvp in Stops)
|
||||
{
|
||||
//Each stopwords translation key is a space delimited list of words and in the case of an empty local string (i.e. StopWords7) it's value is a single question mark
|
||||
if (kvp.Value != "?")
|
||||
{
|
||||
LSD.StopWords.AddRange(kvp.Value.Split(" "));
|
||||
}
|
||||
}
|
||||
|
||||
LSD.CJKIndex = await TranslationBiz.GetCJKIndexAsync(translationId, ct);
|
||||
return LSD;
|
||||
}
|
||||
|
||||
//Class to hold relevant translation data for breaking text
|
||||
public class TranslationWordBreakingData
|
||||
{
|
||||
public bool CJKIndex { get; set; }
|
||||
public List<string> StopWords { get; set; }
|
||||
public TranslationWordBreakingData()
|
||||
{
|
||||
CJKIndex = false;
|
||||
StopWords = new List<string>();
|
||||
}
|
||||
}
|
||||
|
||||
}//eoc
|
||||
}//eons
|
||||
Reference in New Issue
Block a user