This commit is contained in:
2018-09-28 18:58:22 +00:00
parent 1e3ccfcc14
commit fcb61e7ad5
4 changed files with 53 additions and 11 deletions

View File

@@ -35,6 +35,9 @@ REQUIREMENTS
- USE-CASE: Central text search for any match. Can include tags. Can specify a type of object result desired.
- MINIMUM 2 character words only are searchable
- MAXIMUM 255 character words only are indexed (cut off at 255 if longer)
- ? USE-CASE: In-object text search for the typeandid that user is in, e.g. when in Client info form can search on that client.
- This is problematic because it seems to require searching for related objects but that's deprecated to priority 2
- Maybe more ideally is a search that can be something like "search all workorders FOR THIS CLIENT" or search all units "FOR THIS CLIENT"
@@ -68,7 +71,6 @@ REQUIREMENTS
- Coding: break this into separate discrete classes, the old v7 code is very monolithic and in-elegant
//TODO:
- SAMPLE DATA: Need a huge amount of sample data indexed to load test it
- INDEXES: play with it and see what works best

View File

@@ -29,11 +29,8 @@ IMMEDIATE ITEMS:
================
- Search and search text indexing
- Create a test for search that searches the widgets LOREM text
- Test with huge dataset for performance testing
- Need to be able to specify max results, code that shit!
- bugbug: why is the single letter a being indexed? Missing shortness filter, A not in stopwords for english??
- Search and search text indexing
- Can it be sped up with huge data test??
- Update all the other routes to include search indexing (attachments, tags etc, anything with text in it)

View File

@@ -568,6 +568,7 @@ namespace AyaNova.Biz
//Get stopwords and CJKIndex flag value
LocaleWordBreakingData LocaleSearchData = GetLocaleSearchData(localeId);
int MAXWORDLENGTH = 255;
int MINWORDLENGTH = 2;//A word isn't a word unless it's got at least two characters in it
StringBuilder sbResults = new StringBuilder();
//List to temporarily hold parsed words
//used to easily ensure unique words only
@@ -824,10 +825,15 @@ namespace AyaNova.Biz
//from the word list
foreach (string s in tempParsedWords)
{
//Add only non stopwords
if (!LocaleSearchData.StopWords.Contains(s))
//Filter out short words if we are breaking for indexing
//but keep them if they are part of a wildcard search phrase
if (s.Length > MINWORDLENGTH || (KeepWildCards && s.Contains('%')))
{
ReturnList.Add(s);
//Add only non stopwords
if (!LocaleSearchData.StopWords.Contains(s))
{
ReturnList.Add(s);
}
}
}

View File

@@ -451,7 +451,7 @@ namespace raven_integration
[Fact]
public async void BigDataSearchShouldHonourMaxResultsAndBeRelativelyFast()
public async void ConstrainedBigDataSearchShouldHonourMaxResultsAndBeRelativelyFast()
{
//THIS test is a bit different in that it relies partly on the big dataset for testing
@@ -481,7 +481,44 @@ namespace raven_integration
ResultCount.Should().BeLessOrEqualTo(1000);
//1755ms is the longest I've seen in initial testing with all 1000 results so setting slightly above
TimeToSearch.Should().BeLessThan(1760, "Big data search should not be too slow");
TimeToSearch.Should().BeLessThan(1760, "Constrained big data search should not be too slow");
}//eot
[Fact]
public async void UnboundBigDataSearchShouldBeRelativelyFast()
{
//THIS test is a bit different in that it relies partly on the big dataset for testing
//so it has different paths depending upon if it's testing against the big data or not
const string TEST_SEARCH_PHRASE = "et*";
//Now see if can find those objects with a phrase search
dynamic SearchParameters = new JObject();
SearchParameters.phrase = TEST_SEARCH_PHRASE;
SearchParameters.nameOnly = false;
SearchParameters.typeOnly = 0;//no type
SearchParameters.maxResults = 0;//0=return all results
var watch = new System.Diagnostics.Stopwatch();
watch.Start();
ApiResponse a = await Util.PostAsync("Search", await Util.GetTokenAsync("manager", "l3tm3in"), SearchParameters.ToString());
watch.Stop();
var TimeToSearch = watch.ElapsedMilliseconds;
Util.ValidateDataReturnResponseOk(a);
//Now validate the return list
var ResultCount = ((JArray)a.ObjectResponse["result"]).Count;
//assert it's not unbounded
// ResultCount.Should().BeGreaterThan(1000);
//24072 ms is the longest I've seen in initial testing with all bigData seeded results so setting slightly above
TimeToSearch.Should().BeLessThan(24072, "Unconstrained big data search should not be too slow");
//Fastest is 17227 ms with 14143 results
}//eot