From 3824467c8c83af776ceec2e2df9cd112c522a983 Mon Sep 17 00:00:00 2001 From: John Cardinal Date: Tue, 18 Sep 2018 23:29:27 +0000 Subject: [PATCH] --- server/AyaNova/biz/Search.cs | 153 ++++++++++++----------------------- 1 file changed, 51 insertions(+), 102 deletions(-) diff --git a/server/AyaNova/biz/Search.cs b/server/AyaNova/biz/Search.cs index 34c601e1..4c34056a 100644 --- a/server/AyaNova/biz/Search.cs +++ b/server/AyaNova/biz/Search.cs @@ -1,4 +1,6 @@ using System; +using System.Globalization; +using System.Text; using System.Collections.Generic; using System.IO; using Newtonsoft.Json.Linq; @@ -24,7 +26,7 @@ namespace AyaNova.Biz /// public static void ProcessKeywords(AyContext ct, long localeId, long objectID, AyaType objectType, bool newRecord, string keyWords, string name) { - var LocaleSearchData = GetLocaleSearchData(ct, localeId); + //Get CJK index bool flag. //TODO: should this be a property of the locale or a global setting as before?? @@ -59,9 +61,11 @@ namespace AyaNova.Biz //Get the current stopwords for the user's locale - private static LocaleSearchData GetLocaleSearchData(AyContext ct, long localeId) + private static LocaleSearchData GetLocaleSearchData(long localeId, AyContext ct = null) { - LocaleSearchData LSD=new LocaleSearchData(); + LocaleSearchData LSD = new LocaleSearchData(); + if (ct == null) + ct = ServiceProviderProvider.DBContext; //Get stopwords //Validate locale id, if not right then use default instead var Param = new Api.Controllers.LocaleController.LocaleSubsetParam(); @@ -74,17 +78,17 @@ namespace AyaNova.Biz Param.Keys.Add("StopWords6"); Param.Keys.Add("StopWords7"); var Stops = LocaleBiz.GetSubsetStatic(Param).Result; - + foreach (KeyValuePair kvp in Stops) { //Each stopwords locale key is a space delimited list of words and in the case of an empty local string (i.e. StopWords7) it's value is a single question mark if (kvp.Value != "?") { - LSD.StopWords.AddRange(kvp.Value.Split(" ")); + LSD.StopWords.AddRange(kvp.Value.Split(" ")); } } - LSD.CJKIndex = LocaleBiz.GetCJKIndex(localeId,ct).Result; + LSD.CJKIndex = LocaleBiz.GetCJKIndex(localeId, ct).Result; return LSD; } @@ -107,23 +111,24 @@ namespace AyaNova.Biz /// (languages with no easily identifiable word boundaries as in english) /// /// - + /// /// An array of 0 to * strings of text /// - internal static string Break(params string[] text) + internal static string Break(long localeId, params string[] text) { - return BreakCore(false, text); + return BreakCore(localeId, false, text); } /// /// Used to Process users search phrase and preserve wild /// cards entered /// + /// /// /// - internal static string BreakSearchPhrase(params string[] text) + internal static string BreakSearchPhrase(long localeId, params string[] text) { - return BreakCore(true, text); + return BreakCore(localeId, true, text); } /// @@ -132,36 +137,21 @@ namespace AyaNova.Biz /// public static System.Collections.Generic.List StopList = null; - internal static string BreakCore(bool KeepWildCards, params string[] text) + internal static string BreakCore(long localeId, bool KeepWildCards, params string[] text) { + //Get stopwords and CJKIndex flag value + LocaleSearchData LSD = GetLocaleSearchData(localeId); - ////case 1039 //log.Debug("Break"); - #region stopwords - if (StopList == null) - { - StopList = new List(); - for (int stopkeys = 1; stopkeys < 8; stopkeys++) - { - MatchCollection mc = rxAllWords.Matches(LocalizedTextTable.GetLocalizedTextDirect("StopWords" + stopkeys.ToString())); - foreach (Match m in mc) - { - if (!string.IsNullOrEmpty(m.Value) && m.Value != "?" && !StopList.Contains(m.Value)) - StopList.Add(m.Value); - } - } - } - #endregion - - bool CJK = GlobalSettings.CJKIndex; + //bool CJK = GlobalSettings.CJKIndex; int MAXWORDLENGTH = 255; StringBuilder sbResults = new StringBuilder(); - //Hashtable to temporarily hold parsed words + //List to temporarily hold parsed words //used to easily ensure unique words only - Hashtable ht = new Hashtable(); + List tempParsedWords = new List(); //Stuff required for creating xml fragment on the fly in memory (string) StringBuilder sb = new StringBuilder(); @@ -194,7 +184,7 @@ namespace AyaNova.Biz //get it as a character char c = t.GetTextElement()[0]; - if (!CJK) + if (!LSD.CJKIndex) { #region regular tokenizer @@ -214,10 +204,9 @@ namespace AyaNova.Biz if (sbWord.Length >= MAXWORDLENGTH) { //flush away... - if (!ht.ContainsKey(sbWord.ToString())) + if (!tempParsedWords.Contains(sbWord.ToString())) { - ht[sbWord.ToString()] = 1; - //sbTest.Append(sbWord.ToString()+"\r\n"); + tempParsedWords.Add(sbWord.ToString()); } sbWord.Length = 0; sbWord.Append(c); @@ -240,15 +229,12 @@ namespace AyaNova.Biz if (sbWord.Length > 0) { //flush away... - if (!ht.ContainsKey(sbWord.ToString())) + if (!tempParsedWords.Contains(sbWord.ToString())) { - ht[sbWord.ToString()] = 1; - //sbTest.Append(sbWord.ToString()+"\r\n"); + tempParsedWords.Add(sbWord.ToString()); } sbWord.Length = 0; - continue; - } #endregion @@ -286,10 +272,9 @@ namespace AyaNova.Biz if (LastToken == TokenTypes.CJK || sbWord.Length >= MAXWORDLENGTH) { //flush away... - if (!ht.ContainsKey(sbWord.ToString())) + if (!tempParsedWords.Contains(sbWord.ToString())) { - ht[sbWord.ToString()] = 1; - //sbTest.Append(sbWord.ToString()+"\r\n"); + tempParsedWords.Add(sbWord.ToString()); } sbWord.Length = 0; sbWord.Append(c); @@ -312,10 +297,9 @@ namespace AyaNova.Biz if (sbWord.Length > 0) { //flush away... - if (!ht.ContainsKey(sbWord.ToString())) + if (!tempParsedWords.Contains(sbWord.ToString())) { - ht[sbWord.ToString()] = 1; - //sbTest.Append(sbWord.ToString()+"\r\n"); + tempParsedWords.Add(sbWord.ToString()); } sbWord.Length = 0; @@ -341,10 +325,9 @@ namespace AyaNova.Biz if (LastToken == TokenTypes.Latin || sbWord.Length >= MAXWORDLENGTH) { //flush away... - if (!ht.ContainsKey(sbWord.ToString())) + if (!tempParsedWords.Contains(sbWord.ToString())) { - ht[sbWord.ToString()] = 1; - //sbTest.Append(sbWord.ToString()+"\r\n"); + tempParsedWords.Add(sbWord.ToString()); } sbWord.Length = 0; sbWord.Append(c); @@ -360,10 +343,10 @@ namespace AyaNova.Biz //and flush the resultant 2 character n-gram sbWord.Append(c); System.Diagnostics.Debug.Assert(sbWord.Length == 2); - if (!ht.ContainsKey(sbWord.ToString())) + //flush away... + if (!tempParsedWords.Contains(sbWord.ToString())) { - ht[sbWord.ToString()] = 1; - //sbTest.Append(sbWord.ToString()+"\r\n"); + tempParsedWords.Add(sbWord.ToString()); } sbWord.Length = 0; sbWord.Append(c); @@ -388,28 +371,19 @@ namespace AyaNova.Biz if (sbWord.Length > 0) { //flush away... - if (!ht.ContainsKey(sbWord.ToString())) + if (!tempParsedWords.Contains(sbWord.ToString())) { - ht[sbWord.ToString()] = 1; - //sbTest.Append(sbWord.ToString()+"\r\n"); + tempParsedWords.Add(sbWord.ToString()); } sbWord.Length = 0; - continue; - } #endregion } - } - - - - - #endregion } } @@ -418,10 +392,9 @@ namespace AyaNova.Biz if (sbWord.Length > 0) { //flush away... - if (!ht.ContainsKey(sbWord.ToString())) + if (!tempParsedWords.Contains(sbWord.ToString())) { - ht[sbWord.ToString()] = 1; - ////sbTest.Append(sbWord.ToString()+"\r\n"); + tempParsedWords.Add(sbWord.ToString()); } sbWord.Length = 0; } @@ -429,49 +402,24 @@ namespace AyaNova.Biz //bail early if there is nothing indexed - if (ht.Count == 0) return ""; + if (tempParsedWords.Count == 0) return ""; - if (AsXML) - { - //Make a return xml fragment - //from the word list - foreach (DictionaryEntry d in ht) - { - //Add only non stopwords - if (!StopList.Contains(d.Key.ToString())) - { - w.WriteStartElement("i"); - w.WriteAttributeString("w", d.Key.ToString()); - w.WriteEndElement(); - } - } - - w.WriteEndElement(); - sr.Close(); - return sr.ToString(); - } - else - { + //Make a return string array //from the word list - foreach (DictionaryEntry d in ht) + foreach (string s in tempParsedWords) { //Add only non stopwords - if (!StopList.Contains(d.Key.ToString())) + if (!StopList.Contains(s)) { - sbResults.Append(d.Key.ToString()); + sbResults.Append(s); sbResults.Append(","); - } } //sometimes all the results are stop words so you end up - //here with nothing in sbResults. Removed some code that was - //causing a crash here - return sbResults.ToString().TrimEnd(','); - - } - //return sbTest.ToString(); + //here with nothing in sbResults. + return sbResults.ToString().TrimEnd(','); } @@ -482,9 +430,10 @@ namespace AyaNova.Biz { public bool CJKIndex { get; set; } public List StopWords { get; set; } - public LocaleSearchData(){ - CJKIndex=false; - StopWords=new List(); + public LocaleSearchData() + { + CJKIndex = false; + StopWords = new List(); } }