Search indexing improvements
This commit is contained in:
2
.vscode/launch.json
vendored
2
.vscode/launch.json
vendored
@@ -50,7 +50,7 @@
|
|||||||
"AYANOVA_FOLDER_USER_FILES": "c:\\temp\\RavenTestData\\userfiles",
|
"AYANOVA_FOLDER_USER_FILES": "c:\\temp\\RavenTestData\\userfiles",
|
||||||
"AYANOVA_FOLDER_BACKUP_FILES": "c:\\temp\\RavenTestData\\backupfiles",
|
"AYANOVA_FOLDER_BACKUP_FILES": "c:\\temp\\RavenTestData\\backupfiles",
|
||||||
"AYANOVA_METRICS_USE_INFLUXDB": "false",
|
"AYANOVA_METRICS_USE_INFLUXDB": "false",
|
||||||
"AYANOVA_SERVER_TEST_MODE":"true",
|
"AYANOVA_SERVER_TEST_MODE":"false",
|
||||||
"AYANOVA_SERVER_TEST_MODE_SEEDLEVEL":"small",
|
"AYANOVA_SERVER_TEST_MODE_SEEDLEVEL":"small",
|
||||||
"AYANOVA_SERVER_TEST_MODE_TZ_OFFSET":"-7"
|
"AYANOVA_SERVER_TEST_MODE_TZ_OFFSET":"-7"
|
||||||
|
|
||||||
|
|||||||
@@ -910,19 +910,19 @@ cache or provide directly the translation to save time repeatedly fetching it wh
|
|||||||
|
|
||||||
#region Breaker
|
#region Breaker
|
||||||
|
|
||||||
//Class to hold relevant translation data for breaking text
|
// //Class to hold relevant translation data for breaking text
|
||||||
public class TranslationWordBreakingData
|
// public class TranslationWordBreakingData
|
||||||
{
|
// {
|
||||||
public bool CJKIndex { get; set; }
|
// public bool CJKIndex { get; set; }
|
||||||
public List<string> StopWords { get; set; }
|
// public List<string> StopWords { get; set; }
|
||||||
public TranslationWordBreakingData()
|
// public TranslationWordBreakingData()
|
||||||
{
|
// {
|
||||||
CJKIndex = false;
|
// CJKIndex = false;
|
||||||
StopWords = new List<string>();
|
// StopWords = new List<string>();
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
|
|
||||||
private static Dictionary<long, TranslationWordBreakingData> translationWordBreakingDataCache = new Dictionary<long, TranslationWordBreakingData>();
|
// private static Dictionary<long, TranslationWordBreakingData> translationWordBreakingDataCache = new Dictionary<long, TranslationWordBreakingData>();
|
||||||
|
|
||||||
// //called at startup to populate cache
|
// //called at startup to populate cache
|
||||||
//WAS GOING TO ADD THIS IN RESPONSE TO AN ISSUE WITH EXCEPTION ATTEMPTING TO ADD ALREADY EXISTING DICTIONARY ID 1, BUT IT NEVER HAPPENED AGAIN, SO :SHRUGEMOJI:
|
//WAS GOING TO ADD THIS IN RESPONSE TO AN ISSUE WITH EXCEPTION ATTEMPTING TO ADD ALREADY EXISTING DICTIONARY ID 1, BUT IT NEVER HAPPENED AGAIN, SO :SHRUGEMOJI:
|
||||||
@@ -940,36 +940,36 @@ cache or provide directly the translation to save time repeatedly fetching it wh
|
|||||||
// }).ToListAsync();
|
// }).ToListAsync();
|
||||||
// TranslationWordBreakingDataCache.Add(TranslationId, await GetTranslationSearchDataAsync(TranslationId));
|
// TranslationWordBreakingDataCache.Add(TranslationId, await GetTranslationSearchDataAsync(TranslationId));
|
||||||
// }
|
// }
|
||||||
internal static async Task<TranslationWordBreakingData> GetTranslationSearchDataAsync(long translationId, AyContext ct = null)
|
// internal static async Task<TranslationWordBreakingData> GetTranslationSearchDataAsync(long translationId, AyContext ct = null)
|
||||||
{
|
// {
|
||||||
TranslationWordBreakingData LSD = new TranslationWordBreakingData();
|
// TranslationWordBreakingData LSD = new TranslationWordBreakingData();
|
||||||
if (ct == null)
|
// if (ct == null)
|
||||||
ct = ServiceProviderProvider.DBContext;
|
// ct = ServiceProviderProvider.DBContext;
|
||||||
//Get stopwords
|
// //Get stopwords
|
||||||
//Validate translation id, if not right then use default instead
|
// //Validate translation id, if not right then use default instead
|
||||||
var Param = new List<string>();
|
// var Param = new List<string>();
|
||||||
translationId = await TranslationBiz.ReturnSpecifiedTranslationIdIfExistsOrDefaultTranslationId(translationId, ct);
|
// translationId = await TranslationBiz.ReturnSpecifiedTranslationIdIfExistsOrDefaultTranslationId(translationId, ct);
|
||||||
Param.Add("StopWords1");
|
// Param.Add("StopWords1");
|
||||||
Param.Add("StopWords2");
|
// Param.Add("StopWords2");
|
||||||
Param.Add("StopWords3");
|
// Param.Add("StopWords3");
|
||||||
Param.Add("StopWords4");
|
// Param.Add("StopWords4");
|
||||||
Param.Add("StopWords5");
|
// Param.Add("StopWords5");
|
||||||
Param.Add("StopWords6");
|
// Param.Add("StopWords6");
|
||||||
Param.Add("StopWords7");
|
// Param.Add("StopWords7");
|
||||||
var Stops = await TranslationBiz.GetSubsetStaticAsync(Param, translationId);
|
// var Stops = await TranslationBiz.GetSubsetStaticAsync(Param, translationId);
|
||||||
|
|
||||||
foreach (KeyValuePair<string, string> kvp in Stops)
|
// foreach (KeyValuePair<string, string> kvp in Stops)
|
||||||
{
|
// {
|
||||||
//Each stopwords translation key is a space delimited list of words and in the case of an empty local string (i.e. StopWords7) it's value is a single question mark
|
// //Each stopwords translation key is a space delimited list of words and in the case of an empty local string (i.e. StopWords7) it's value is a single question mark
|
||||||
if (kvp.Value != "?")
|
// if (kvp.Value != "?")
|
||||||
{
|
// {
|
||||||
LSD.StopWords.AddRange(kvp.Value.Split(" "));
|
// LSD.StopWords.AddRange(kvp.Value.Split(" "));
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
|
|
||||||
LSD.CJKIndex = await TranslationBiz.GetCJKIndexAsync(translationId, ct);
|
// LSD.CJKIndex = await TranslationBiz.GetCJKIndexAsync(translationId, ct);
|
||||||
return LSD;
|
// return LSD;
|
||||||
}
|
// }
|
||||||
|
|
||||||
public enum TokenTypes
|
public enum TokenTypes
|
||||||
{ Nothing, Separator, CJK, Latin };
|
{ Nothing, Separator, CJK, Latin };
|
||||||
@@ -1012,23 +1012,12 @@ cache or provide directly the translation to save time repeatedly fetching it wh
|
|||||||
return await BreakCoreAsync(translationId, true, textStrings, true);
|
return await BreakCoreAsync(translationId, true, textStrings, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// Stop words list reset upon login or editing of Translation text
|
|
||||||
/// used for eliminating noise words from search dictionary
|
|
||||||
/// </summary>
|
|
||||||
|
|
||||||
|
|
||||||
internal static async Task<List<string>> BreakCoreAsync(long translationId, bool KeepWildCards, List<string> textStrings, bool ignoreStopWords = false)
|
internal static async Task<List<string>> BreakCoreAsync(long translationId, bool KeepWildCards, List<string> textStrings, bool ignoreStopWords = false)
|
||||||
{
|
{
|
||||||
//For stopwords and CJKIndex flag value
|
//For stopwords and CJKIndex flag value
|
||||||
//if not provided (will be provided by seeder for performance but normally never) then fetch
|
var translationWordBreakData = await SearchTranslationWordBreakDataCache.GetWordBreakData(translationId);
|
||||||
|
|
||||||
if (!translationWordBreakingDataCache.ContainsKey(translationId))
|
|
||||||
{
|
|
||||||
translationWordBreakingDataCache.Add(translationId, await GetTranslationSearchDataAsync(translationId));
|
|
||||||
}
|
|
||||||
var translationWordBreakData = translationWordBreakingDataCache[translationId];
|
|
||||||
|
|
||||||
|
|
||||||
int MAXWORDLENGTH = 255;
|
int MAXWORDLENGTH = 255;
|
||||||
int MINWORDLENGTH = 2;//A word isn't a word unless it's got at least two characters in it
|
int MINWORDLENGTH = 2;//A word isn't a word unless it's got at least two characters in it
|
||||||
|
|||||||
74
server/AyaNova/biz/SearchTranslationWordBreakDataCache.cs
Normal file
74
server/AyaNova/biz/SearchTranslationWordBreakDataCache.cs
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
using System.Threading.Tasks;
|
||||||
|
using System.Threading;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using AyaNova.Util;
|
||||||
|
using AyaNova.Models;
|
||||||
|
namespace AyaNova.Biz
|
||||||
|
{
|
||||||
|
|
||||||
|
public class SearchTranslationWordBreakDataCache
|
||||||
|
{
|
||||||
|
static SemaphoreSlim semaphoreSlim = new SemaphoreSlim(1, 1);
|
||||||
|
private static Dictionary<long, TranslationWordBreakingData> theCache = new Dictionary<long, TranslationWordBreakingData>();
|
||||||
|
public SearchTranslationWordBreakDataCache() { }
|
||||||
|
public static async Task<TranslationWordBreakingData> GetWordBreakData(long id)
|
||||||
|
{
|
||||||
|
await semaphoreSlim.WaitAsync();
|
||||||
|
try
|
||||||
|
{
|
||||||
|
if (!theCache.ContainsKey(1))
|
||||||
|
theCache[id] = await GetTranslationSearchDataAsync(id);
|
||||||
|
return theCache[id];
|
||||||
|
}
|
||||||
|
finally
|
||||||
|
{
|
||||||
|
semaphoreSlim.Release();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
internal static async Task<TranslationWordBreakingData> GetTranslationSearchDataAsync(long translationId)
|
||||||
|
{
|
||||||
|
TranslationWordBreakingData LSD = new TranslationWordBreakingData();
|
||||||
|
AyContext ct = ServiceProviderProvider.DBContext;
|
||||||
|
//Get stopwords
|
||||||
|
//Validate translation id, if not right then use default instead
|
||||||
|
var Param = new List<string>();
|
||||||
|
translationId = await TranslationBiz.ReturnSpecifiedTranslationIdIfExistsOrDefaultTranslationId(translationId, ct);
|
||||||
|
Param.Add("StopWords1");
|
||||||
|
Param.Add("StopWords2");
|
||||||
|
Param.Add("StopWords3");
|
||||||
|
Param.Add("StopWords4");
|
||||||
|
Param.Add("StopWords5");
|
||||||
|
Param.Add("StopWords6");
|
||||||
|
Param.Add("StopWords7");
|
||||||
|
var Stops = await TranslationBiz.GetSubsetStaticAsync(Param, translationId);
|
||||||
|
|
||||||
|
foreach (KeyValuePair<string, string> kvp in Stops)
|
||||||
|
{
|
||||||
|
//Each stopwords translation key is a space delimited list of words and in the case of an empty local string (i.e. StopWords7) it's value is a single question mark
|
||||||
|
if (kvp.Value != "?")
|
||||||
|
{
|
||||||
|
LSD.StopWords.AddRange(kvp.Value.Split(" "));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
LSD.CJKIndex = await TranslationBiz.GetCJKIndexAsync(translationId, ct);
|
||||||
|
return LSD;
|
||||||
|
}
|
||||||
|
|
||||||
|
//Class to hold relevant translation data for breaking text
|
||||||
|
public class TranslationWordBreakingData
|
||||||
|
{
|
||||||
|
public bool CJKIndex { get; set; }
|
||||||
|
public List<string> StopWords { get; set; }
|
||||||
|
public TranslationWordBreakingData()
|
||||||
|
{
|
||||||
|
CJKIndex = false;
|
||||||
|
StopWords = new List<string>();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}//eoc
|
||||||
|
}//eons
|
||||||
Reference in New Issue
Block a user