This commit is contained in:
2018-09-18 23:29:27 +00:00
parent 02d4084826
commit 3824467c8c

View File

@@ -1,4 +1,6 @@
using System; using System;
using System.Globalization;
using System.Text;
using System.Collections.Generic; using System.Collections.Generic;
using System.IO; using System.IO;
using Newtonsoft.Json.Linq; using Newtonsoft.Json.Linq;
@@ -24,7 +26,7 @@ namespace AyaNova.Biz
/// </summary> /// </summary>
public static void ProcessKeywords(AyContext ct, long localeId, long objectID, AyaType objectType, bool newRecord, string keyWords, string name) public static void ProcessKeywords(AyContext ct, long localeId, long objectID, AyaType objectType, bool newRecord, string keyWords, string name)
{ {
var LocaleSearchData = GetLocaleSearchData(ct, localeId);
//Get CJK index bool flag. //Get CJK index bool flag.
//TODO: should this be a property of the locale or a global setting as before?? //TODO: should this be a property of the locale or a global setting as before??
@@ -59,9 +61,11 @@ namespace AyaNova.Biz
//Get the current stopwords for the user's locale //Get the current stopwords for the user's locale
private static LocaleSearchData GetLocaleSearchData(AyContext ct, long localeId) private static LocaleSearchData GetLocaleSearchData(long localeId, AyContext ct = null)
{ {
LocaleSearchData LSD=new LocaleSearchData(); LocaleSearchData LSD = new LocaleSearchData();
if (ct == null)
ct = ServiceProviderProvider.DBContext;
//Get stopwords //Get stopwords
//Validate locale id, if not right then use default instead //Validate locale id, if not right then use default instead
var Param = new Api.Controllers.LocaleController.LocaleSubsetParam(); var Param = new Api.Controllers.LocaleController.LocaleSubsetParam();
@@ -80,11 +84,11 @@ namespace AyaNova.Biz
//Each stopwords locale key is a space delimited list of words and in the case of an empty local string (i.e. StopWords7) it's value is a single question mark //Each stopwords locale key is a space delimited list of words and in the case of an empty local string (i.e. StopWords7) it's value is a single question mark
if (kvp.Value != "?") if (kvp.Value != "?")
{ {
LSD.StopWords.AddRange(kvp.Value.Split(" ")); LSD.StopWords.AddRange(kvp.Value.Split(" "));
} }
} }
LSD.CJKIndex = LocaleBiz.GetCJKIndex(localeId,ct).Result; LSD.CJKIndex = LocaleBiz.GetCJKIndex(localeId, ct).Result;
return LSD; return LSD;
} }
@@ -107,23 +111,24 @@ namespace AyaNova.Biz
/// (languages with no easily identifiable word boundaries as in english) /// (languages with no easily identifiable word boundaries as in english)
/// </summary> /// </summary>
/// ///
/// <param name="localeId"></param>
/// <param name="text">An array of 0 to * strings of text</param> /// <param name="text">An array of 0 to * strings of text</param>
/// <returns></returns> /// <returns></returns>
internal static string Break(params string[] text) internal static string Break(long localeId, params string[] text)
{ {
return BreakCore(false, text); return BreakCore(localeId, false, text);
} }
/// <summary> /// <summary>
/// Used to Process users search phrase and preserve wild /// Used to Process users search phrase and preserve wild
/// cards entered /// cards entered
/// </summary> /// </summary>
/// <param name="localeId"></param>
/// <param name="text"></param> /// <param name="text"></param>
/// <returns></returns> /// <returns></returns>
internal static string BreakSearchPhrase(params string[] text) internal static string BreakSearchPhrase(long localeId, params string[] text)
{ {
return BreakCore(true, text); return BreakCore(localeId, true, text);
} }
/// <summary> /// <summary>
@@ -132,36 +137,21 @@ namespace AyaNova.Biz
/// </summary> /// </summary>
public static System.Collections.Generic.List<string> StopList = null; public static System.Collections.Generic.List<string> StopList = null;
internal static string BreakCore(bool KeepWildCards, params string[] text) internal static string BreakCore(long localeId, bool KeepWildCards, params string[] text)
{ {
//Get stopwords and CJKIndex flag value
LocaleSearchData LSD = GetLocaleSearchData(localeId);
////case 1039 //log.Debug("Break");
#region stopwords
if (StopList == null)
{
StopList = new List<string>();
for (int stopkeys = 1; stopkeys < 8; stopkeys++)
{
MatchCollection mc = rxAllWords.Matches(LocalizedTextTable.GetLocalizedTextDirect("StopWords" + stopkeys.ToString()));
foreach (Match m in mc)
{
if (!string.IsNullOrEmpty(m.Value) && m.Value != "?" && !StopList.Contains(m.Value))
StopList.Add(m.Value);
} //bool CJK = GlobalSettings.CJKIndex;
}
}
#endregion
bool CJK = GlobalSettings.CJKIndex;
int MAXWORDLENGTH = 255; int MAXWORDLENGTH = 255;
StringBuilder sbResults = new StringBuilder(); StringBuilder sbResults = new StringBuilder();
//Hashtable to temporarily hold parsed words //List to temporarily hold parsed words
//used to easily ensure unique words only //used to easily ensure unique words only
Hashtable ht = new Hashtable(); List<string> tempParsedWords = new List<string>();
//Stuff required for creating xml fragment on the fly in memory (string) //Stuff required for creating xml fragment on the fly in memory (string)
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
@@ -194,7 +184,7 @@ namespace AyaNova.Biz
//get it as a character //get it as a character
char c = t.GetTextElement()[0]; char c = t.GetTextElement()[0];
if (!CJK) if (!LSD.CJKIndex)
{ {
#region regular tokenizer #region regular tokenizer
@@ -214,10 +204,9 @@ namespace AyaNova.Biz
if (sbWord.Length >= MAXWORDLENGTH) if (sbWord.Length >= MAXWORDLENGTH)
{ {
//flush away... //flush away...
if (!ht.ContainsKey(sbWord.ToString())) if (!tempParsedWords.Contains(sbWord.ToString()))
{ {
ht[sbWord.ToString()] = 1; tempParsedWords.Add(sbWord.ToString());
//sbTest.Append(sbWord.ToString()+"\r\n");
} }
sbWord.Length = 0; sbWord.Length = 0;
sbWord.Append(c); sbWord.Append(c);
@@ -240,15 +229,12 @@ namespace AyaNova.Biz
if (sbWord.Length > 0) if (sbWord.Length > 0)
{ {
//flush away... //flush away...
if (!ht.ContainsKey(sbWord.ToString())) if (!tempParsedWords.Contains(sbWord.ToString()))
{ {
ht[sbWord.ToString()] = 1; tempParsedWords.Add(sbWord.ToString());
//sbTest.Append(sbWord.ToString()+"\r\n");
} }
sbWord.Length = 0; sbWord.Length = 0;
continue; continue;
} }
#endregion #endregion
@@ -286,10 +272,9 @@ namespace AyaNova.Biz
if (LastToken == TokenTypes.CJK || sbWord.Length >= MAXWORDLENGTH) if (LastToken == TokenTypes.CJK || sbWord.Length >= MAXWORDLENGTH)
{ {
//flush away... //flush away...
if (!ht.ContainsKey(sbWord.ToString())) if (!tempParsedWords.Contains(sbWord.ToString()))
{ {
ht[sbWord.ToString()] = 1; tempParsedWords.Add(sbWord.ToString());
//sbTest.Append(sbWord.ToString()+"\r\n");
} }
sbWord.Length = 0; sbWord.Length = 0;
sbWord.Append(c); sbWord.Append(c);
@@ -312,10 +297,9 @@ namespace AyaNova.Biz
if (sbWord.Length > 0) if (sbWord.Length > 0)
{ {
//flush away... //flush away...
if (!ht.ContainsKey(sbWord.ToString())) if (!tempParsedWords.Contains(sbWord.ToString()))
{ {
ht[sbWord.ToString()] = 1; tempParsedWords.Add(sbWord.ToString());
//sbTest.Append(sbWord.ToString()+"\r\n");
} }
sbWord.Length = 0; sbWord.Length = 0;
@@ -341,10 +325,9 @@ namespace AyaNova.Biz
if (LastToken == TokenTypes.Latin || sbWord.Length >= MAXWORDLENGTH) if (LastToken == TokenTypes.Latin || sbWord.Length >= MAXWORDLENGTH)
{ {
//flush away... //flush away...
if (!ht.ContainsKey(sbWord.ToString())) if (!tempParsedWords.Contains(sbWord.ToString()))
{ {
ht[sbWord.ToString()] = 1; tempParsedWords.Add(sbWord.ToString());
//sbTest.Append(sbWord.ToString()+"\r\n");
} }
sbWord.Length = 0; sbWord.Length = 0;
sbWord.Append(c); sbWord.Append(c);
@@ -360,10 +343,10 @@ namespace AyaNova.Biz
//and flush the resultant 2 character n-gram //and flush the resultant 2 character n-gram
sbWord.Append(c); sbWord.Append(c);
System.Diagnostics.Debug.Assert(sbWord.Length == 2); System.Diagnostics.Debug.Assert(sbWord.Length == 2);
if (!ht.ContainsKey(sbWord.ToString())) //flush away...
if (!tempParsedWords.Contains(sbWord.ToString()))
{ {
ht[sbWord.ToString()] = 1; tempParsedWords.Add(sbWord.ToString());
//sbTest.Append(sbWord.ToString()+"\r\n");
} }
sbWord.Length = 0; sbWord.Length = 0;
sbWord.Append(c); sbWord.Append(c);
@@ -388,28 +371,19 @@ namespace AyaNova.Biz
if (sbWord.Length > 0) if (sbWord.Length > 0)
{ {
//flush away... //flush away...
if (!ht.ContainsKey(sbWord.ToString())) if (!tempParsedWords.Contains(sbWord.ToString()))
{ {
ht[sbWord.ToString()] = 1; tempParsedWords.Add(sbWord.ToString());
//sbTest.Append(sbWord.ToString()+"\r\n");
} }
sbWord.Length = 0; sbWord.Length = 0;
continue; continue;
} }
#endregion #endregion
} }
} }
#endregion #endregion
} }
} }
@@ -418,10 +392,9 @@ namespace AyaNova.Biz
if (sbWord.Length > 0) if (sbWord.Length > 0)
{ {
//flush away... //flush away...
if (!ht.ContainsKey(sbWord.ToString())) if (!tempParsedWords.Contains(sbWord.ToString()))
{ {
ht[sbWord.ToString()] = 1; tempParsedWords.Add(sbWord.ToString());
////sbTest.Append(sbWord.ToString()+"\r\n");
} }
sbWord.Length = 0; sbWord.Length = 0;
} }
@@ -429,50 +402,25 @@ namespace AyaNova.Biz
//bail early if there is nothing indexed //bail early if there is nothing indexed
if (ht.Count == 0) return ""; if (tempParsedWords.Count == 0) return "";
if (AsXML)
{
//Make a return xml fragment
//from the word list
foreach (DictionaryEntry d in ht)
{
//Add only non stopwords
if (!StopList.Contains(d.Key.ToString()))
{
w.WriteStartElement("i");
w.WriteAttributeString("w", d.Key.ToString());
w.WriteEndElement();
}
}
w.WriteEndElement();
sr.Close();
return sr.ToString();
}
else
{
//Make a return string array //Make a return string array
//from the word list //from the word list
foreach (DictionaryEntry d in ht) foreach (string s in tempParsedWords)
{ {
//Add only non stopwords //Add only non stopwords
if (!StopList.Contains(d.Key.ToString())) if (!StopList.Contains(s))
{ {
sbResults.Append(d.Key.ToString()); sbResults.Append(s);
sbResults.Append(","); sbResults.Append(",");
} }
} }
//sometimes all the results are stop words so you end up //sometimes all the results are stop words so you end up
//here with nothing in sbResults. Removed some code that was //here with nothing in sbResults.
//causing a crash here
return sbResults.ToString().TrimEnd(','); return sbResults.ToString().TrimEnd(',');
}
//return sbTest.ToString();
} }
#endregion #endregion
@@ -482,9 +430,10 @@ namespace AyaNova.Biz
{ {
public bool CJKIndex { get; set; } public bool CJKIndex { get; set; }
public List<string> StopWords { get; set; } public List<string> StopWords { get; set; }
public LocaleSearchData(){ public LocaleSearchData()
CJKIndex=false; {
StopWords=new List<string>(); CJKIndex = false;
StopWords = new List<string>();
} }
} }