This commit is contained in:
@@ -1,4 +1,6 @@
|
|||||||
using System;
|
using System;
|
||||||
|
using System.Globalization;
|
||||||
|
using System.Text;
|
||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
using System.IO;
|
using System.IO;
|
||||||
using Newtonsoft.Json.Linq;
|
using Newtonsoft.Json.Linq;
|
||||||
@@ -24,7 +26,7 @@ namespace AyaNova.Biz
|
|||||||
/// </summary>
|
/// </summary>
|
||||||
public static void ProcessKeywords(AyContext ct, long localeId, long objectID, AyaType objectType, bool newRecord, string keyWords, string name)
|
public static void ProcessKeywords(AyContext ct, long localeId, long objectID, AyaType objectType, bool newRecord, string keyWords, string name)
|
||||||
{
|
{
|
||||||
var LocaleSearchData = GetLocaleSearchData(ct, localeId);
|
|
||||||
|
|
||||||
//Get CJK index bool flag.
|
//Get CJK index bool flag.
|
||||||
//TODO: should this be a property of the locale or a global setting as before??
|
//TODO: should this be a property of the locale or a global setting as before??
|
||||||
@@ -59,9 +61,11 @@ namespace AyaNova.Biz
|
|||||||
|
|
||||||
|
|
||||||
//Get the current stopwords for the user's locale
|
//Get the current stopwords for the user's locale
|
||||||
private static LocaleSearchData GetLocaleSearchData(AyContext ct, long localeId)
|
private static LocaleSearchData GetLocaleSearchData(long localeId, AyContext ct = null)
|
||||||
{
|
{
|
||||||
LocaleSearchData LSD=new LocaleSearchData();
|
LocaleSearchData LSD = new LocaleSearchData();
|
||||||
|
if (ct == null)
|
||||||
|
ct = ServiceProviderProvider.DBContext;
|
||||||
//Get stopwords
|
//Get stopwords
|
||||||
//Validate locale id, if not right then use default instead
|
//Validate locale id, if not right then use default instead
|
||||||
var Param = new Api.Controllers.LocaleController.LocaleSubsetParam();
|
var Param = new Api.Controllers.LocaleController.LocaleSubsetParam();
|
||||||
@@ -80,11 +84,11 @@ namespace AyaNova.Biz
|
|||||||
//Each stopwords locale key is a space delimited list of words and in the case of an empty local string (i.e. StopWords7) it's value is a single question mark
|
//Each stopwords locale key is a space delimited list of words and in the case of an empty local string (i.e. StopWords7) it's value is a single question mark
|
||||||
if (kvp.Value != "?")
|
if (kvp.Value != "?")
|
||||||
{
|
{
|
||||||
LSD.StopWords.AddRange(kvp.Value.Split(" "));
|
LSD.StopWords.AddRange(kvp.Value.Split(" "));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
LSD.CJKIndex = LocaleBiz.GetCJKIndex(localeId,ct).Result;
|
LSD.CJKIndex = LocaleBiz.GetCJKIndex(localeId, ct).Result;
|
||||||
return LSD;
|
return LSD;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -107,23 +111,24 @@ namespace AyaNova.Biz
|
|||||||
/// (languages with no easily identifiable word boundaries as in english)
|
/// (languages with no easily identifiable word boundaries as in english)
|
||||||
/// </summary>
|
/// </summary>
|
||||||
///
|
///
|
||||||
|
/// <param name="localeId"></param>
|
||||||
/// <param name="text">An array of 0 to * strings of text</param>
|
/// <param name="text">An array of 0 to * strings of text</param>
|
||||||
/// <returns></returns>
|
/// <returns></returns>
|
||||||
internal static string Break(params string[] text)
|
internal static string Break(long localeId, params string[] text)
|
||||||
{
|
{
|
||||||
return BreakCore(false, text);
|
return BreakCore(localeId, false, text);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Used to Process users search phrase and preserve wild
|
/// Used to Process users search phrase and preserve wild
|
||||||
/// cards entered
|
/// cards entered
|
||||||
/// </summary>
|
/// </summary>
|
||||||
|
/// <param name="localeId"></param>
|
||||||
/// <param name="text"></param>
|
/// <param name="text"></param>
|
||||||
/// <returns></returns>
|
/// <returns></returns>
|
||||||
internal static string BreakSearchPhrase(params string[] text)
|
internal static string BreakSearchPhrase(long localeId, params string[] text)
|
||||||
{
|
{
|
||||||
return BreakCore(true, text);
|
return BreakCore(localeId, true, text);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
@@ -132,36 +137,21 @@ namespace AyaNova.Biz
|
|||||||
/// </summary>
|
/// </summary>
|
||||||
public static System.Collections.Generic.List<string> StopList = null;
|
public static System.Collections.Generic.List<string> StopList = null;
|
||||||
|
|
||||||
internal static string BreakCore(bool KeepWildCards, params string[] text)
|
internal static string BreakCore(long localeId, bool KeepWildCards, params string[] text)
|
||||||
{
|
{
|
||||||
|
//Get stopwords and CJKIndex flag value
|
||||||
|
LocaleSearchData LSD = GetLocaleSearchData(localeId);
|
||||||
|
|
||||||
////case 1039 //log.Debug("Break");
|
|
||||||
|
|
||||||
#region stopwords
|
|
||||||
if (StopList == null)
|
|
||||||
{
|
|
||||||
StopList = new List<string>();
|
|
||||||
for (int stopkeys = 1; stopkeys < 8; stopkeys++)
|
|
||||||
{
|
|
||||||
MatchCollection mc = rxAllWords.Matches(LocalizedTextTable.GetLocalizedTextDirect("StopWords" + stopkeys.ToString()));
|
|
||||||
foreach (Match m in mc)
|
|
||||||
{
|
|
||||||
if (!string.IsNullOrEmpty(m.Value) && m.Value != "?" && !StopList.Contains(m.Value))
|
|
||||||
StopList.Add(m.Value);
|
|
||||||
|
|
||||||
}
|
//bool CJK = GlobalSettings.CJKIndex;
|
||||||
}
|
|
||||||
}
|
|
||||||
#endregion
|
|
||||||
|
|
||||||
bool CJK = GlobalSettings.CJKIndex;
|
|
||||||
int MAXWORDLENGTH = 255;
|
int MAXWORDLENGTH = 255;
|
||||||
|
|
||||||
StringBuilder sbResults = new StringBuilder();
|
StringBuilder sbResults = new StringBuilder();
|
||||||
|
|
||||||
//Hashtable to temporarily hold parsed words
|
//List to temporarily hold parsed words
|
||||||
//used to easily ensure unique words only
|
//used to easily ensure unique words only
|
||||||
Hashtable ht = new Hashtable();
|
List<string> tempParsedWords = new List<string>();
|
||||||
|
|
||||||
//Stuff required for creating xml fragment on the fly in memory (string)
|
//Stuff required for creating xml fragment on the fly in memory (string)
|
||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder();
|
||||||
@@ -194,7 +184,7 @@ namespace AyaNova.Biz
|
|||||||
//get it as a character
|
//get it as a character
|
||||||
char c = t.GetTextElement()[0];
|
char c = t.GetTextElement()[0];
|
||||||
|
|
||||||
if (!CJK)
|
if (!LSD.CJKIndex)
|
||||||
{
|
{
|
||||||
#region regular tokenizer
|
#region regular tokenizer
|
||||||
|
|
||||||
@@ -214,10 +204,9 @@ namespace AyaNova.Biz
|
|||||||
if (sbWord.Length >= MAXWORDLENGTH)
|
if (sbWord.Length >= MAXWORDLENGTH)
|
||||||
{
|
{
|
||||||
//flush away...
|
//flush away...
|
||||||
if (!ht.ContainsKey(sbWord.ToString()))
|
if (!tempParsedWords.Contains(sbWord.ToString()))
|
||||||
{
|
{
|
||||||
ht[sbWord.ToString()] = 1;
|
tempParsedWords.Add(sbWord.ToString());
|
||||||
//sbTest.Append(sbWord.ToString()+"\r\n");
|
|
||||||
}
|
}
|
||||||
sbWord.Length = 0;
|
sbWord.Length = 0;
|
||||||
sbWord.Append(c);
|
sbWord.Append(c);
|
||||||
@@ -240,15 +229,12 @@ namespace AyaNova.Biz
|
|||||||
if (sbWord.Length > 0)
|
if (sbWord.Length > 0)
|
||||||
{
|
{
|
||||||
//flush away...
|
//flush away...
|
||||||
if (!ht.ContainsKey(sbWord.ToString()))
|
if (!tempParsedWords.Contains(sbWord.ToString()))
|
||||||
{
|
{
|
||||||
ht[sbWord.ToString()] = 1;
|
tempParsedWords.Add(sbWord.ToString());
|
||||||
//sbTest.Append(sbWord.ToString()+"\r\n");
|
|
||||||
}
|
}
|
||||||
sbWord.Length = 0;
|
sbWord.Length = 0;
|
||||||
|
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#endregion
|
#endregion
|
||||||
@@ -286,10 +272,9 @@ namespace AyaNova.Biz
|
|||||||
if (LastToken == TokenTypes.CJK || sbWord.Length >= MAXWORDLENGTH)
|
if (LastToken == TokenTypes.CJK || sbWord.Length >= MAXWORDLENGTH)
|
||||||
{
|
{
|
||||||
//flush away...
|
//flush away...
|
||||||
if (!ht.ContainsKey(sbWord.ToString()))
|
if (!tempParsedWords.Contains(sbWord.ToString()))
|
||||||
{
|
{
|
||||||
ht[sbWord.ToString()] = 1;
|
tempParsedWords.Add(sbWord.ToString());
|
||||||
//sbTest.Append(sbWord.ToString()+"\r\n");
|
|
||||||
}
|
}
|
||||||
sbWord.Length = 0;
|
sbWord.Length = 0;
|
||||||
sbWord.Append(c);
|
sbWord.Append(c);
|
||||||
@@ -312,10 +297,9 @@ namespace AyaNova.Biz
|
|||||||
if (sbWord.Length > 0)
|
if (sbWord.Length > 0)
|
||||||
{
|
{
|
||||||
//flush away...
|
//flush away...
|
||||||
if (!ht.ContainsKey(sbWord.ToString()))
|
if (!tempParsedWords.Contains(sbWord.ToString()))
|
||||||
{
|
{
|
||||||
ht[sbWord.ToString()] = 1;
|
tempParsedWords.Add(sbWord.ToString());
|
||||||
//sbTest.Append(sbWord.ToString()+"\r\n");
|
|
||||||
}
|
}
|
||||||
sbWord.Length = 0;
|
sbWord.Length = 0;
|
||||||
|
|
||||||
@@ -341,10 +325,9 @@ namespace AyaNova.Biz
|
|||||||
if (LastToken == TokenTypes.Latin || sbWord.Length >= MAXWORDLENGTH)
|
if (LastToken == TokenTypes.Latin || sbWord.Length >= MAXWORDLENGTH)
|
||||||
{
|
{
|
||||||
//flush away...
|
//flush away...
|
||||||
if (!ht.ContainsKey(sbWord.ToString()))
|
if (!tempParsedWords.Contains(sbWord.ToString()))
|
||||||
{
|
{
|
||||||
ht[sbWord.ToString()] = 1;
|
tempParsedWords.Add(sbWord.ToString());
|
||||||
//sbTest.Append(sbWord.ToString()+"\r\n");
|
|
||||||
}
|
}
|
||||||
sbWord.Length = 0;
|
sbWord.Length = 0;
|
||||||
sbWord.Append(c);
|
sbWord.Append(c);
|
||||||
@@ -360,10 +343,10 @@ namespace AyaNova.Biz
|
|||||||
//and flush the resultant 2 character n-gram
|
//and flush the resultant 2 character n-gram
|
||||||
sbWord.Append(c);
|
sbWord.Append(c);
|
||||||
System.Diagnostics.Debug.Assert(sbWord.Length == 2);
|
System.Diagnostics.Debug.Assert(sbWord.Length == 2);
|
||||||
if (!ht.ContainsKey(sbWord.ToString()))
|
//flush away...
|
||||||
|
if (!tempParsedWords.Contains(sbWord.ToString()))
|
||||||
{
|
{
|
||||||
ht[sbWord.ToString()] = 1;
|
tempParsedWords.Add(sbWord.ToString());
|
||||||
//sbTest.Append(sbWord.ToString()+"\r\n");
|
|
||||||
}
|
}
|
||||||
sbWord.Length = 0;
|
sbWord.Length = 0;
|
||||||
sbWord.Append(c);
|
sbWord.Append(c);
|
||||||
@@ -388,28 +371,19 @@ namespace AyaNova.Biz
|
|||||||
if (sbWord.Length > 0)
|
if (sbWord.Length > 0)
|
||||||
{
|
{
|
||||||
//flush away...
|
//flush away...
|
||||||
if (!ht.ContainsKey(sbWord.ToString()))
|
if (!tempParsedWords.Contains(sbWord.ToString()))
|
||||||
{
|
{
|
||||||
ht[sbWord.ToString()] = 1;
|
tempParsedWords.Add(sbWord.ToString());
|
||||||
//sbTest.Append(sbWord.ToString()+"\r\n");
|
|
||||||
}
|
}
|
||||||
sbWord.Length = 0;
|
sbWord.Length = 0;
|
||||||
|
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#endregion
|
#endregion
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#endregion
|
#endregion
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -418,10 +392,9 @@ namespace AyaNova.Biz
|
|||||||
if (sbWord.Length > 0)
|
if (sbWord.Length > 0)
|
||||||
{
|
{
|
||||||
//flush away...
|
//flush away...
|
||||||
if (!ht.ContainsKey(sbWord.ToString()))
|
if (!tempParsedWords.Contains(sbWord.ToString()))
|
||||||
{
|
{
|
||||||
ht[sbWord.ToString()] = 1;
|
tempParsedWords.Add(sbWord.ToString());
|
||||||
////sbTest.Append(sbWord.ToString()+"\r\n");
|
|
||||||
}
|
}
|
||||||
sbWord.Length = 0;
|
sbWord.Length = 0;
|
||||||
}
|
}
|
||||||
@@ -429,50 +402,25 @@ namespace AyaNova.Biz
|
|||||||
|
|
||||||
|
|
||||||
//bail early if there is nothing indexed
|
//bail early if there is nothing indexed
|
||||||
if (ht.Count == 0) return "";
|
if (tempParsedWords.Count == 0) return "";
|
||||||
|
|
||||||
if (AsXML)
|
|
||||||
{
|
|
||||||
//Make a return xml fragment
|
|
||||||
//from the word list
|
|
||||||
foreach (DictionaryEntry d in ht)
|
|
||||||
{
|
|
||||||
//Add only non stopwords
|
|
||||||
if (!StopList.Contains(d.Key.ToString()))
|
|
||||||
{
|
|
||||||
w.WriteStartElement("i");
|
|
||||||
w.WriteAttributeString("w", d.Key.ToString());
|
|
||||||
w.WriteEndElement();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
w.WriteEndElement();
|
|
||||||
sr.Close();
|
|
||||||
return sr.ToString();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
//Make a return string array
|
//Make a return string array
|
||||||
//from the word list
|
//from the word list
|
||||||
foreach (DictionaryEntry d in ht)
|
foreach (string s in tempParsedWords)
|
||||||
{
|
{
|
||||||
//Add only non stopwords
|
//Add only non stopwords
|
||||||
if (!StopList.Contains(d.Key.ToString()))
|
if (!StopList.Contains(s))
|
||||||
{
|
{
|
||||||
sbResults.Append(d.Key.ToString());
|
sbResults.Append(s);
|
||||||
sbResults.Append(",");
|
sbResults.Append(",");
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//sometimes all the results are stop words so you end up
|
//sometimes all the results are stop words so you end up
|
||||||
//here with nothing in sbResults. Removed some code that was
|
//here with nothing in sbResults.
|
||||||
//causing a crash here
|
|
||||||
return sbResults.ToString().TrimEnd(',');
|
return sbResults.ToString().TrimEnd(',');
|
||||||
|
|
||||||
}
|
|
||||||
//return sbTest.ToString();
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#endregion
|
#endregion
|
||||||
@@ -482,9 +430,10 @@ namespace AyaNova.Biz
|
|||||||
{
|
{
|
||||||
public bool CJKIndex { get; set; }
|
public bool CJKIndex { get; set; }
|
||||||
public List<string> StopWords { get; set; }
|
public List<string> StopWords { get; set; }
|
||||||
public LocaleSearchData(){
|
public LocaleSearchData()
|
||||||
CJKIndex=false;
|
{
|
||||||
StopWords=new List<string>();
|
CJKIndex = false;
|
||||||
|
StopWords = new List<string>();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user