diff --git a/devdocs/specs/core-search.txt b/devdocs/specs/core-search.txt index 3f97eb55..16f4e98d 100644 --- a/devdocs/specs/core-search.txt +++ b/devdocs/specs/core-search.txt @@ -35,6 +35,9 @@ REQUIREMENTS - USE-CASE: Central text search for any match. Can include tags. Can specify a type of object result desired. +- MINIMUM 2 character words only are searchable +- MAXIMUM 255 character words only are indexed (cut off at 255 if longer) + - ? USE-CASE: In-object text search for the typeandid that user is in, e.g. when in Client info form can search on that client. - This is problematic because it seems to require searching for related objects but that's deprecated to priority 2 - Maybe more ideally is a search that can be something like "search all workorders FOR THIS CLIENT" or search all units "FOR THIS CLIENT" @@ -68,7 +71,6 @@ REQUIREMENTS - Coding: break this into separate discrete classes, the old v7 code is very monolithic and in-elegant //TODO: -- SAMPLE DATA: Need a huge amount of sample data indexed to load test it - INDEXES: play with it and see what works best diff --git a/devdocs/todo.txt b/devdocs/todo.txt index dbfed66b..74786d61 100644 --- a/devdocs/todo.txt +++ b/devdocs/todo.txt @@ -29,11 +29,8 @@ IMMEDIATE ITEMS: ================ - - Search and search text indexing - - Create a test for search that searches the widgets LOREM text - - Test with huge dataset for performance testing - - Need to be able to specify max results, code that shit! - - bugbug: why is the single letter a being indexed? Missing shortness filter, A not in stopwords for english?? + - Search and search text indexing + - Can it be sped up with huge data test?? - Update all the other routes to include search indexing (attachments, tags etc, anything with text in it) diff --git a/server/AyaNova/biz/Search.cs b/server/AyaNova/biz/Search.cs index bee6c82e..7bc8141e 100644 --- a/server/AyaNova/biz/Search.cs +++ b/server/AyaNova/biz/Search.cs @@ -568,6 +568,7 @@ namespace AyaNova.Biz //Get stopwords and CJKIndex flag value LocaleWordBreakingData LocaleSearchData = GetLocaleSearchData(localeId); int MAXWORDLENGTH = 255; + int MINWORDLENGTH = 2;//A word isn't a word unless it's got at least two characters in it StringBuilder sbResults = new StringBuilder(); //List to temporarily hold parsed words //used to easily ensure unique words only @@ -824,10 +825,15 @@ namespace AyaNova.Biz //from the word list foreach (string s in tempParsedWords) { - //Add only non stopwords - if (!LocaleSearchData.StopWords.Contains(s)) + //Filter out short words if we are breaking for indexing + //but keep them if they are part of a wildcard search phrase + if (s.Length > MINWORDLENGTH || (KeepWildCards && s.Contains('%'))) { - ReturnList.Add(s); + //Add only non stopwords + if (!LocaleSearchData.StopWords.Contains(s)) + { + ReturnList.Add(s); + } } } diff --git a/test/raven-integration/Search/SearchOps.cs b/test/raven-integration/Search/SearchOps.cs index 02a5e067..77ccfc01 100644 --- a/test/raven-integration/Search/SearchOps.cs +++ b/test/raven-integration/Search/SearchOps.cs @@ -451,7 +451,7 @@ namespace raven_integration [Fact] - public async void BigDataSearchShouldHonourMaxResultsAndBeRelativelyFast() + public async void ConstrainedBigDataSearchShouldHonourMaxResultsAndBeRelativelyFast() { //THIS test is a bit different in that it relies partly on the big dataset for testing @@ -481,7 +481,44 @@ namespace raven_integration ResultCount.Should().BeLessOrEqualTo(1000); //1755ms is the longest I've seen in initial testing with all 1000 results so setting slightly above - TimeToSearch.Should().BeLessThan(1760, "Big data search should not be too slow"); + TimeToSearch.Should().BeLessThan(1760, "Constrained big data search should not be too slow"); + + }//eot + + + [Fact] + public async void UnboundBigDataSearchShouldBeRelativelyFast() + { + + //THIS test is a bit different in that it relies partly on the big dataset for testing + //so it has different paths depending upon if it's testing against the big data or not + const string TEST_SEARCH_PHRASE = "et*"; + + //Now see if can find those objects with a phrase search + dynamic SearchParameters = new JObject(); + + SearchParameters.phrase = TEST_SEARCH_PHRASE; + SearchParameters.nameOnly = false; + SearchParameters.typeOnly = 0;//no type + SearchParameters.maxResults = 0;//0=return all results + + var watch = new System.Diagnostics.Stopwatch(); + watch.Start(); + ApiResponse a = await Util.PostAsync("Search", await Util.GetTokenAsync("manager", "l3tm3in"), SearchParameters.ToString()); + watch.Stop(); + + var TimeToSearch = watch.ElapsedMilliseconds; + + Util.ValidateDataReturnResponseOk(a); + + //Now validate the return list + var ResultCount = ((JArray)a.ObjectResponse["result"]).Count; + //assert it's not unbounded + // ResultCount.Should().BeGreaterThan(1000); + + //24072 ms is the longest I've seen in initial testing with all bigData seeded results so setting slightly above + TimeToSearch.Should().BeLessThan(24072, "Unconstrained big data search should not be too slow"); + //Fastest is 17227 ms with 14143 results }//eot