This commit is contained in:
2018-09-18 23:29:27 +00:00
parent 02d4084826
commit 3824467c8c

View File

@@ -1,4 +1,6 @@
using System;
using System.Globalization;
using System.Text;
using System.Collections.Generic;
using System.IO;
using Newtonsoft.Json.Linq;
@@ -24,7 +26,7 @@ namespace AyaNova.Biz
/// </summary>
public static void ProcessKeywords(AyContext ct, long localeId, long objectID, AyaType objectType, bool newRecord, string keyWords, string name)
{
var LocaleSearchData = GetLocaleSearchData(ct, localeId);
//Get CJK index bool flag.
//TODO: should this be a property of the locale or a global setting as before??
@@ -59,9 +61,11 @@ namespace AyaNova.Biz
//Get the current stopwords for the user's locale
private static LocaleSearchData GetLocaleSearchData(AyContext ct, long localeId)
private static LocaleSearchData GetLocaleSearchData(long localeId, AyContext ct = null)
{
LocaleSearchData LSD = new LocaleSearchData();
if (ct == null)
ct = ServiceProviderProvider.DBContext;
//Get stopwords
//Validate locale id, if not right then use default instead
var Param = new Api.Controllers.LocaleController.LocaleSubsetParam();
@@ -107,23 +111,24 @@ namespace AyaNova.Biz
/// (languages with no easily identifiable word boundaries as in english)
/// </summary>
///
/// <param name="localeId"></param>
/// <param name="text">An array of 0 to * strings of text</param>
/// <returns></returns>
internal static string Break(params string[] text)
internal static string Break(long localeId, params string[] text)
{
return BreakCore(false, text);
return BreakCore(localeId, false, text);
}
/// <summary>
/// Used to Process users search phrase and preserve wild
/// cards entered
/// </summary>
/// <param name="localeId"></param>
/// <param name="text"></param>
/// <returns></returns>
internal static string BreakSearchPhrase(params string[] text)
internal static string BreakSearchPhrase(long localeId, params string[] text)
{
return BreakCore(true, text);
return BreakCore(localeId, true, text);
}
/// <summary>
@@ -132,36 +137,21 @@ namespace AyaNova.Biz
/// </summary>
public static System.Collections.Generic.List<string> StopList = null;
internal static string BreakCore(bool KeepWildCards, params string[] text)
internal static string BreakCore(long localeId, bool KeepWildCards, params string[] text)
{
//Get stopwords and CJKIndex flag value
LocaleSearchData LSD = GetLocaleSearchData(localeId);
////case 1039 //log.Debug("Break");
#region stopwords
if (StopList == null)
{
StopList = new List<string>();
for (int stopkeys = 1; stopkeys < 8; stopkeys++)
{
MatchCollection mc = rxAllWords.Matches(LocalizedTextTable.GetLocalizedTextDirect("StopWords" + stopkeys.ToString()));
foreach (Match m in mc)
{
if (!string.IsNullOrEmpty(m.Value) && m.Value != "?" && !StopList.Contains(m.Value))
StopList.Add(m.Value);
}
}
}
#endregion
bool CJK = GlobalSettings.CJKIndex;
//bool CJK = GlobalSettings.CJKIndex;
int MAXWORDLENGTH = 255;
StringBuilder sbResults = new StringBuilder();
//Hashtable to temporarily hold parsed words
//List to temporarily hold parsed words
//used to easily ensure unique words only
Hashtable ht = new Hashtable();
List<string> tempParsedWords = new List<string>();
//Stuff required for creating xml fragment on the fly in memory (string)
StringBuilder sb = new StringBuilder();
@@ -194,7 +184,7 @@ namespace AyaNova.Biz
//get it as a character
char c = t.GetTextElement()[0];
if (!CJK)
if (!LSD.CJKIndex)
{
#region regular tokenizer
@@ -214,10 +204,9 @@ namespace AyaNova.Biz
if (sbWord.Length >= MAXWORDLENGTH)
{
//flush away...
if (!ht.ContainsKey(sbWord.ToString()))
if (!tempParsedWords.Contains(sbWord.ToString()))
{
ht[sbWord.ToString()] = 1;
//sbTest.Append(sbWord.ToString()+"\r\n");
tempParsedWords.Add(sbWord.ToString());
}
sbWord.Length = 0;
sbWord.Append(c);
@@ -240,15 +229,12 @@ namespace AyaNova.Biz
if (sbWord.Length > 0)
{
//flush away...
if (!ht.ContainsKey(sbWord.ToString()))
if (!tempParsedWords.Contains(sbWord.ToString()))
{
ht[sbWord.ToString()] = 1;
//sbTest.Append(sbWord.ToString()+"\r\n");
tempParsedWords.Add(sbWord.ToString());
}
sbWord.Length = 0;
continue;
}
#endregion
@@ -286,10 +272,9 @@ namespace AyaNova.Biz
if (LastToken == TokenTypes.CJK || sbWord.Length >= MAXWORDLENGTH)
{
//flush away...
if (!ht.ContainsKey(sbWord.ToString()))
if (!tempParsedWords.Contains(sbWord.ToString()))
{
ht[sbWord.ToString()] = 1;
//sbTest.Append(sbWord.ToString()+"\r\n");
tempParsedWords.Add(sbWord.ToString());
}
sbWord.Length = 0;
sbWord.Append(c);
@@ -312,10 +297,9 @@ namespace AyaNova.Biz
if (sbWord.Length > 0)
{
//flush away...
if (!ht.ContainsKey(sbWord.ToString()))
if (!tempParsedWords.Contains(sbWord.ToString()))
{
ht[sbWord.ToString()] = 1;
//sbTest.Append(sbWord.ToString()+"\r\n");
tempParsedWords.Add(sbWord.ToString());
}
sbWord.Length = 0;
@@ -341,10 +325,9 @@ namespace AyaNova.Biz
if (LastToken == TokenTypes.Latin || sbWord.Length >= MAXWORDLENGTH)
{
//flush away...
if (!ht.ContainsKey(sbWord.ToString()))
if (!tempParsedWords.Contains(sbWord.ToString()))
{
ht[sbWord.ToString()] = 1;
//sbTest.Append(sbWord.ToString()+"\r\n");
tempParsedWords.Add(sbWord.ToString());
}
sbWord.Length = 0;
sbWord.Append(c);
@@ -360,10 +343,10 @@ namespace AyaNova.Biz
//and flush the resultant 2 character n-gram
sbWord.Append(c);
System.Diagnostics.Debug.Assert(sbWord.Length == 2);
if (!ht.ContainsKey(sbWord.ToString()))
//flush away...
if (!tempParsedWords.Contains(sbWord.ToString()))
{
ht[sbWord.ToString()] = 1;
//sbTest.Append(sbWord.ToString()+"\r\n");
tempParsedWords.Add(sbWord.ToString());
}
sbWord.Length = 0;
sbWord.Append(c);
@@ -388,28 +371,19 @@ namespace AyaNova.Biz
if (sbWord.Length > 0)
{
//flush away...
if (!ht.ContainsKey(sbWord.ToString()))
if (!tempParsedWords.Contains(sbWord.ToString()))
{
ht[sbWord.ToString()] = 1;
//sbTest.Append(sbWord.ToString()+"\r\n");
tempParsedWords.Add(sbWord.ToString());
}
sbWord.Length = 0;
continue;
}
#endregion
}
}
#endregion
}
}
@@ -418,10 +392,9 @@ namespace AyaNova.Biz
if (sbWord.Length > 0)
{
//flush away...
if (!ht.ContainsKey(sbWord.ToString()))
if (!tempParsedWords.Contains(sbWord.ToString()))
{
ht[sbWord.ToString()] = 1;
////sbTest.Append(sbWord.ToString()+"\r\n");
tempParsedWords.Add(sbWord.ToString());
}
sbWord.Length = 0;
}
@@ -429,50 +402,25 @@ namespace AyaNova.Biz
//bail early if there is nothing indexed
if (ht.Count == 0) return "";
if (tempParsedWords.Count == 0) return "";
if (AsXML)
{
//Make a return xml fragment
//from the word list
foreach (DictionaryEntry d in ht)
{
//Add only non stopwords
if (!StopList.Contains(d.Key.ToString()))
{
w.WriteStartElement("i");
w.WriteAttributeString("w", d.Key.ToString());
w.WriteEndElement();
}
}
w.WriteEndElement();
sr.Close();
return sr.ToString();
}
else
{
//Make a return string array
//from the word list
foreach (DictionaryEntry d in ht)
foreach (string s in tempParsedWords)
{
//Add only non stopwords
if (!StopList.Contains(d.Key.ToString()))
if (!StopList.Contains(s))
{
sbResults.Append(d.Key.ToString());
sbResults.Append(s);
sbResults.Append(",");
}
}
//sometimes all the results are stop words so you end up
//here with nothing in sbResults. Removed some code that was
//causing a crash here
//here with nothing in sbResults.
return sbResults.ToString().TrimEnd(',');
}
//return sbTest.ToString();
}
#endregion
@@ -482,7 +430,8 @@ namespace AyaNova.Biz
{
public bool CJKIndex { get; set; }
public List<string> StopWords { get; set; }
public LocaleSearchData(){
public LocaleSearchData()
{
CJKIndex = false;
StopWords = new List<string>();
}