From 2ba9ded8e9764da28598d82cb5278034702bce1f Mon Sep 17 00:00:00 2001 From: John Cardinal Date: Thu, 9 Apr 2020 14:15:48 +0000 Subject: [PATCH] --- .../AyaNova/Controllers/SearchController.cs | 25 ++ server/AyaNova/biz/Search.cs | 313 +++++++++++++++++- server/AyaNova/models/dto/TypeAndIdInfo.cs | 13 - 3 files changed, 332 insertions(+), 19 deletions(-) delete mode 100644 server/AyaNova/models/dto/TypeAndIdInfo.cs diff --git a/server/AyaNova/Controllers/SearchController.cs b/server/AyaNova/Controllers/SearchController.cs index 0c7e738c..39f84c4f 100644 --- a/server/AyaNova/Controllers/SearchController.cs +++ b/server/AyaNova/Controllers/SearchController.cs @@ -69,7 +69,32 @@ namespace AyaNova.Api.Controllers return Ok(ApiOkResponse.Response(SearchResults, true)); } + /// + /// Get search result summary + /// + /// + /// + /// + /// A search result excerpt of object + [HttpGet("Info/{ayaType}/{id}")] + public async Task GetInfo([FromRoute] AyaType ayaType, [FromRoute] long id, [FromQuery] string phrase) + { + if (serverState.IsClosed) + return StatusCode(503, new ApiErrorResponse(serverState.ApiErrorCode, null, serverState.Reason)); + + if (!Authorized.HasReadFullRole(HttpContext.Items, ayaType)) + return StatusCode(403, new ApiNotAuthorizedResponse()); + if (!ModelState.IsValid) + return BadRequest(new ApiErrorResponse(ModelState)); + if(id==0){ + return NotFound(); + } + + var res = await Search.GetInfoAsync(ct, UserTranslationIdFromContext.Id(HttpContext.Items), UserRolesFromContext.Roles(HttpContext.Items), phrase, ayaType, id); + + return Ok(ApiOkResponse.Response(res, true)); + } //------------ diff --git a/server/AyaNova/biz/Search.cs b/server/AyaNova/biz/Search.cs index 185c5422..c9321a6a 100644 --- a/server/AyaNova/biz/Search.cs +++ b/server/AyaNova/biz/Search.cs @@ -121,12 +121,6 @@ namespace AyaNova.Biz return ReturnObject; } - // todo: SEARCH UI - // - all searches without wildcards or quotes are "contains" searches by default and multiple phrases space delimited are accomodated - // - if user want's an exact search then they put it in quotes like google for MUST have in that exact form and case (if case insensitive mode) - - - //IF PHRASE SPECIFIED <---wtf? why wouldn't it be? //escape literal percentage signs first just in case they are searching for 50% off or something //https://www.postgresql.org/docs/current/functions-matching.html#FUNCTIONS-LIKE @@ -277,6 +271,313 @@ namespace AyaNova.Biz #endregion dosearch + #region Get info (excerpt) + public static async Task GetInfoAsync(AyContext ct, long translationId, + AuthorizationRoles currentUserRoles, string phrase, AyaType ayaType, long id) + { + //escape literal percentage signs first just in case they are searching for 50% off or something + //https://www.postgresql.org/docs/current/functions-matching.html#FUNCTIONS-LIKE + //need to get around breaking possibly losing the symbol so make it text + phrase = phrase.Replace("%", "pctsym"); + + //Modify Phrase to replace wildcard * with % as breakcore expects sql style wildcards + phrase = phrase.Replace("*", "%"); + + //BREAK SEARCH PHRASE INTO SEPARATE TERMS + var PhraseItems = await BreakSearchPhraseAsync(translationId, phrase); + + PhraseItems.ToArray(); + + + } + + + + #region Search rank and extract + /// + /// Rank and extract best excerpt of specified text and search terms + /// + public sealed class ExtractAndRank + { + + #region Fields + private string[] searchTerms; + private string rawtext; + private string extract = ""; + private bool flattenExtract = true; + private float ranking; + private int extractionThresholdRank = 10; + private int maximumCharactersToExtract = 80; + #endregion + + #region Properties + + /// + /// This is the ranking of the source text as it pertains to the + /// search terms + /// + /// A rank of zero means either there was no match or the rank that was calculated + /// was lower than the threshold ranking, either way, no excerpt extraction is done. + /// + /// It is a percentage value on a scale of 0 to 100 + /// and is weighted: + /// + /// 75% of the score is the percentage of all search terms found in the text + /// 25% of the score is the percentage of all characters in the text that are search term characters + /// + /// + /// + public float Ranking + { + get + { + return ranking; + } + } + + /// + /// Maximum characters to appear in an extraction + /// default is 80 + /// Minimum is 10 + /// + public int MaximumCharactersToExtract + { + get + { + return maximumCharactersToExtract; + } + set + { + + if (value > 10) + maximumCharactersToExtract = value; + else + maximumCharactersToExtract = 10; + + } + } + + /// + /// ExtractionThresholdRank + /// Extraction will only take place if the rank is + /// this value or higher + /// + /// default is 10, maximum is 100 minimum is 0 + /// + public int ExtractionThresholdRank + { + get + { + return extractionThresholdRank; + } + set + { + if (value > 100) + extractionThresholdRank = 100; + else if (value < 0) + extractionThresholdRank = 0; + else + extractionThresholdRank = value; + } + } + + + + /// + /// If true, carriage returns and line feeds will be removed from extract + /// + public bool FlattenExtract + { + get + { + return this.flattenExtract; + } + set + { + this.flattenExtract = value; + } + } + + /// + /// Extracted text excerpt that best reflects search terms + /// + public string Extract + { + get + { + return extract; + } + } + + #endregion + + #region public methods + /// + /// Do the extraction and ranking + /// + /// + /// + public void Process(string rawText, string[] searchTerms) + { + ranking = 0; + extract = ""; + //System.Diagnostics.Debug.Assert(rawText!=null && rawText!="","EXTRACT AND RANK","EMPTY RAWTEXT, CHECK OBJECTS GetSearchResult() CODE TO ENSURE IT'S GOT THE correct SP (CHECK THE SP IF NOT)"); + if (rawText == null || rawText == "") return; + this.rawtext = rawText; + + if (searchTerms == null || searchTerms.Length == 0) return; + this.searchTerms = searchTerms; + + + ranking = score(0, this.rawtext.Length); + if (ranking > extractionThresholdRank) + DoExtract(); + } + #endregion + + #region Calculate score + /// + /// Give a percentage score for a given window of + /// text in the raw text string + /// 75% of the score is the percentage of all search terms found in the window + /// 25% of the score is the percentage of all characters in the search window that are search term characters + /// + /// + /// + /// + /// + /// + /// Float value of zero to one hundred + private float score(int nStartPos, int nEndPos) + { + //rewrite this as an integer based calculation + + System.Diagnostics.Debug.Assert(nStartPos < nEndPos); + if (nStartPos < 0) nStartPos = 0; + if (nEndPos > this.rawtext.Length) nEndPos = this.rawtext.Length; + + int nTermCharsInWindow = 0;//how many of the characters in the window are matching term characters + string SearchString = this.rawtext.Substring(nStartPos, nEndPos - nStartPos).ToLower(System.Globalization.CultureInfo.CurrentCulture); + + int nMatches = 0; + + foreach (string term in searchTerms) + { + //remove the wild card character if present and set to lower case + string lTerm = term.ToLower(System.Globalization.CultureInfo.CurrentCulture).Replace("%", ""); + int nLocation = SearchString.IndexOf(lTerm); + if (nLocation != -1) + { + nMatches++; + while (nLocation != -1) + { + nTermCharsInWindow += lTerm.Length; ; + nLocation = SearchString.IndexOf(lTerm, nLocation + 1); + + } + + } + } + + //If no matches then rank is automatically zero + if (nMatches == 0) + { + return 0; + } + + + + //Rank is calculated on a weighted scale + //75% for matching all search terms + //25% for the quantity of search terms versus other text found + float fTermsFoundPct = 75 * ((float)nMatches / (float)searchTerms.GetLength(0)); + float fTermsVsTextPct = 0; + if (nTermCharsInWindow > 0) + fTermsVsTextPct = 25 * ((float)nTermCharsInWindow / (float)SearchString.Length); + + return fTermsFoundPct + fTermsVsTextPct; + + } + #endregion + + #region Extract best excerpt + /// + /// Extract the best scoring excerpt fragments of + /// raw text + /// + private void DoExtract() + { + //If the whole thing is less than the max to extract + //just save time and return the whole thing + if (this.rawtext.Length < this.maximumCharactersToExtract) + { + this.extract = this.rawtext; + return; + } + + string BestWindow = ""; + float BestScore = 0; + float thisscore = 0; + int BestWindowStartPos = 0; + + //Get the shortest search term length so + //we can save time iterating over the window in the extract + //function below + int shortestSearchTermLength = int.MaxValue; + foreach (string s in this.searchTerms) + { + if (s.Length < shortestSearchTermLength) + shortestSearchTermLength = s.Length; + + } + + + //slide a window over the text and check it's score, the highest scoring window wins + //move the length of the shortest search term so as to ensure we won't + //miss it, but faster than moving one character at a time + for (int x = 0; x < this.rawtext.Length - maximumCharactersToExtract; x += shortestSearchTermLength) + { + thisscore = score(x, x + (maximumCharactersToExtract)); + + if (thisscore == 0) continue; + + if (thisscore > BestScore) + { + BestScore = thisscore; + BestWindow = this.rawtext.Substring(x, maximumCharactersToExtract); + //Best window to get if the future score is equal + //I.E. put the terms in the center of the window if + //the score is equal + BestWindowStartPos = x + (maximumCharactersToExtract / 2); + } + + //If it's equal to the last and we're positioned over + //the best spot (terms in center) then capture that + if (thisscore == BestScore && x == BestWindowStartPos) + { + BestWindow = this.rawtext.Substring(x, maximumCharactersToExtract); + + } + } + + if (this.flattenExtract) + this.extract = "..." + BestWindow.Trim().Replace("\r", "").Replace("\n", "").Replace("\t", "") + "...";//case 1593 added tab character removal + else + this.extract = "..." + BestWindow.Trim() + "..."; + + + } + + + //======================================================================== + + #endregion + + } + #endregion Xtract + + + #endregion #region ProcessKeywords into Database //Class to hold process input parameters diff --git a/server/AyaNova/models/dto/TypeAndIdInfo.cs b/server/AyaNova/models/dto/TypeAndIdInfo.cs deleted file mode 100644 index ec00e096..00000000 --- a/server/AyaNova/models/dto/TypeAndIdInfo.cs +++ /dev/null @@ -1,13 +0,0 @@ -using AyaNova.Biz; - -namespace AyaNova.Models -{ - - public class TypeAndIdInfo - { - public long ObjectId { get; set; } - public AyaType ObjectType { get; set; } - } - - -}