This commit is contained in:
@@ -69,7 +69,32 @@ namespace AyaNova.Api.Controllers
|
|||||||
return Ok(ApiOkResponse.Response(SearchResults, true));
|
return Ok(ApiOkResponse.Response(SearchResults, true));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Get search result summary
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="ayaType"></param>
|
||||||
|
/// <param name="id"></param>
|
||||||
|
/// <param name="phrase"></param>
|
||||||
|
/// <returns>A search result excerpt of object</returns>
|
||||||
|
[HttpGet("Info/{ayaType}/{id}")]
|
||||||
|
public async Task<IActionResult> GetInfo([FromRoute] AyaType ayaType, [FromRoute] long id, [FromQuery] string phrase)
|
||||||
|
{
|
||||||
|
if (serverState.IsClosed)
|
||||||
|
return StatusCode(503, new ApiErrorResponse(serverState.ApiErrorCode, null, serverState.Reason));
|
||||||
|
|
||||||
|
if (!Authorized.HasReadFullRole(HttpContext.Items, ayaType))
|
||||||
|
return StatusCode(403, new ApiNotAuthorizedResponse());
|
||||||
|
|
||||||
|
if (!ModelState.IsValid)
|
||||||
|
return BadRequest(new ApiErrorResponse(ModelState));
|
||||||
|
if(id==0){
|
||||||
|
return NotFound();
|
||||||
|
}
|
||||||
|
|
||||||
|
var res = await Search.GetInfoAsync(ct, UserTranslationIdFromContext.Id(HttpContext.Items), UserRolesFromContext.Roles(HttpContext.Items), phrase, ayaType, id);
|
||||||
|
|
||||||
|
return Ok(ApiOkResponse.Response(res, true));
|
||||||
|
}
|
||||||
|
|
||||||
//------------
|
//------------
|
||||||
|
|
||||||
|
|||||||
@@ -121,12 +121,6 @@ namespace AyaNova.Biz
|
|||||||
return ReturnObject;
|
return ReturnObject;
|
||||||
}
|
}
|
||||||
|
|
||||||
// todo: SEARCH UI
|
|
||||||
// - all searches without wildcards or quotes are "contains" searches by default and multiple phrases space delimited are accomodated
|
|
||||||
// - if user want's an exact search then they put it in quotes like google for MUST have in that exact form and case (if case insensitive mode)
|
|
||||||
|
|
||||||
|
|
||||||
//IF PHRASE SPECIFIED <---wtf? why wouldn't it be?
|
|
||||||
|
|
||||||
//escape literal percentage signs first just in case they are searching for 50% off or something
|
//escape literal percentage signs first just in case they are searching for 50% off or something
|
||||||
//https://www.postgresql.org/docs/current/functions-matching.html#FUNCTIONS-LIKE
|
//https://www.postgresql.org/docs/current/functions-matching.html#FUNCTIONS-LIKE
|
||||||
@@ -277,6 +271,313 @@ namespace AyaNova.Biz
|
|||||||
|
|
||||||
#endregion dosearch
|
#endregion dosearch
|
||||||
|
|
||||||
|
#region Get info (excerpt)
|
||||||
|
public static async Task<string> GetInfoAsync(AyContext ct, long translationId,
|
||||||
|
AuthorizationRoles currentUserRoles, string phrase, AyaType ayaType, long id)
|
||||||
|
{
|
||||||
|
//escape literal percentage signs first just in case they are searching for 50% off or something
|
||||||
|
//https://www.postgresql.org/docs/current/functions-matching.html#FUNCTIONS-LIKE
|
||||||
|
//need to get around breaking possibly losing the symbol so make it text
|
||||||
|
phrase = phrase.Replace("%", "pctsym");
|
||||||
|
|
||||||
|
//Modify Phrase to replace wildcard * with % as breakcore expects sql style wildcards
|
||||||
|
phrase = phrase.Replace("*", "%");
|
||||||
|
|
||||||
|
//BREAK SEARCH PHRASE INTO SEPARATE TERMS
|
||||||
|
var PhraseItems = await BreakSearchPhraseAsync(translationId, phrase);
|
||||||
|
|
||||||
|
PhraseItems.ToArray();
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#region Search rank and extract
|
||||||
|
/// <summary>
|
||||||
|
/// Rank and extract best excerpt of specified text and search terms
|
||||||
|
/// </summary>
|
||||||
|
public sealed class ExtractAndRank
|
||||||
|
{
|
||||||
|
|
||||||
|
#region Fields
|
||||||
|
private string[] searchTerms;
|
||||||
|
private string rawtext;
|
||||||
|
private string extract = "";
|
||||||
|
private bool flattenExtract = true;
|
||||||
|
private float ranking;
|
||||||
|
private int extractionThresholdRank = 10;
|
||||||
|
private int maximumCharactersToExtract = 80;
|
||||||
|
#endregion
|
||||||
|
|
||||||
|
#region Properties
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// This is the ranking of the source text as it pertains to the
|
||||||
|
/// search terms
|
||||||
|
///
|
||||||
|
/// A rank of zero means either there was no match or the rank that was calculated
|
||||||
|
/// was lower than the threshold ranking, either way, no excerpt extraction is done.
|
||||||
|
///
|
||||||
|
/// It is a percentage value on a scale of 0 to 100
|
||||||
|
/// and is weighted:
|
||||||
|
///
|
||||||
|
/// 75% of the score is the percentage of all search terms found in the text
|
||||||
|
/// 25% of the score is the percentage of all characters in the text that are search term characters
|
||||||
|
///
|
||||||
|
///
|
||||||
|
/// </summary>
|
||||||
|
public float Ranking
|
||||||
|
{
|
||||||
|
get
|
||||||
|
{
|
||||||
|
return ranking;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Maximum characters to appear in an extraction
|
||||||
|
/// default is 80
|
||||||
|
/// Minimum is 10
|
||||||
|
/// </summary>
|
||||||
|
public int MaximumCharactersToExtract
|
||||||
|
{
|
||||||
|
get
|
||||||
|
{
|
||||||
|
return maximumCharactersToExtract;
|
||||||
|
}
|
||||||
|
set
|
||||||
|
{
|
||||||
|
|
||||||
|
if (value > 10)
|
||||||
|
maximumCharactersToExtract = value;
|
||||||
|
else
|
||||||
|
maximumCharactersToExtract = 10;
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// ExtractionThresholdRank
|
||||||
|
/// Extraction will only take place if the rank is
|
||||||
|
/// this value or higher
|
||||||
|
///
|
||||||
|
/// default is 10, maximum is 100 minimum is 0
|
||||||
|
/// </summary>
|
||||||
|
public int ExtractionThresholdRank
|
||||||
|
{
|
||||||
|
get
|
||||||
|
{
|
||||||
|
return extractionThresholdRank;
|
||||||
|
}
|
||||||
|
set
|
||||||
|
{
|
||||||
|
if (value > 100)
|
||||||
|
extractionThresholdRank = 100;
|
||||||
|
else if (value < 0)
|
||||||
|
extractionThresholdRank = 0;
|
||||||
|
else
|
||||||
|
extractionThresholdRank = value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// If true, carriage returns and line feeds will be removed from extract
|
||||||
|
/// </summary>
|
||||||
|
public bool FlattenExtract
|
||||||
|
{
|
||||||
|
get
|
||||||
|
{
|
||||||
|
return this.flattenExtract;
|
||||||
|
}
|
||||||
|
set
|
||||||
|
{
|
||||||
|
this.flattenExtract = value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Extracted text excerpt that best reflects search terms
|
||||||
|
/// </summary>
|
||||||
|
public string Extract
|
||||||
|
{
|
||||||
|
get
|
||||||
|
{
|
||||||
|
return extract;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endregion
|
||||||
|
|
||||||
|
#region public methods
|
||||||
|
/// <summary>
|
||||||
|
/// Do the extraction and ranking
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="rawText"></param>
|
||||||
|
/// <param name="searchTerms"></param>
|
||||||
|
public void Process(string rawText, string[] searchTerms)
|
||||||
|
{
|
||||||
|
ranking = 0;
|
||||||
|
extract = "";
|
||||||
|
//System.Diagnostics.Debug.Assert(rawText!=null && rawText!="","EXTRACT AND RANK","EMPTY RAWTEXT, CHECK OBJECTS GetSearchResult() CODE TO ENSURE IT'S GOT THE correct SP (CHECK THE SP IF NOT)");
|
||||||
|
if (rawText == null || rawText == "") return;
|
||||||
|
this.rawtext = rawText;
|
||||||
|
|
||||||
|
if (searchTerms == null || searchTerms.Length == 0) return;
|
||||||
|
this.searchTerms = searchTerms;
|
||||||
|
|
||||||
|
|
||||||
|
ranking = score(0, this.rawtext.Length);
|
||||||
|
if (ranking > extractionThresholdRank)
|
||||||
|
DoExtract();
|
||||||
|
}
|
||||||
|
#endregion
|
||||||
|
|
||||||
|
#region Calculate score
|
||||||
|
/// <summary>
|
||||||
|
/// Give a percentage score for a given window of
|
||||||
|
/// text in the raw text string
|
||||||
|
/// 75% of the score is the percentage of all search terms found in the window
|
||||||
|
/// 25% of the score is the percentage of all characters in the search window that are search term characters
|
||||||
|
///
|
||||||
|
///
|
||||||
|
///
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="nStartPos"></param>
|
||||||
|
/// <param name="nEndPos"></param>
|
||||||
|
/// <returns>Float value of zero to one hundred</returns>
|
||||||
|
private float score(int nStartPos, int nEndPos)
|
||||||
|
{
|
||||||
|
//rewrite this as an integer based calculation
|
||||||
|
|
||||||
|
System.Diagnostics.Debug.Assert(nStartPos < nEndPos);
|
||||||
|
if (nStartPos < 0) nStartPos = 0;
|
||||||
|
if (nEndPos > this.rawtext.Length) nEndPos = this.rawtext.Length;
|
||||||
|
|
||||||
|
int nTermCharsInWindow = 0;//how many of the characters in the window are matching term characters
|
||||||
|
string SearchString = this.rawtext.Substring(nStartPos, nEndPos - nStartPos).ToLower(System.Globalization.CultureInfo.CurrentCulture);
|
||||||
|
|
||||||
|
int nMatches = 0;
|
||||||
|
|
||||||
|
foreach (string term in searchTerms)
|
||||||
|
{
|
||||||
|
//remove the wild card character if present and set to lower case
|
||||||
|
string lTerm = term.ToLower(System.Globalization.CultureInfo.CurrentCulture).Replace("%", "");
|
||||||
|
int nLocation = SearchString.IndexOf(lTerm);
|
||||||
|
if (nLocation != -1)
|
||||||
|
{
|
||||||
|
nMatches++;
|
||||||
|
while (nLocation != -1)
|
||||||
|
{
|
||||||
|
nTermCharsInWindow += lTerm.Length; ;
|
||||||
|
nLocation = SearchString.IndexOf(lTerm, nLocation + 1);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//If no matches then rank is automatically zero
|
||||||
|
if (nMatches == 0)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
//Rank is calculated on a weighted scale
|
||||||
|
//75% for matching all search terms
|
||||||
|
//25% for the quantity of search terms versus other text found
|
||||||
|
float fTermsFoundPct = 75 * ((float)nMatches / (float)searchTerms.GetLength(0));
|
||||||
|
float fTermsVsTextPct = 0;
|
||||||
|
if (nTermCharsInWindow > 0)
|
||||||
|
fTermsVsTextPct = 25 * ((float)nTermCharsInWindow / (float)SearchString.Length);
|
||||||
|
|
||||||
|
return fTermsFoundPct + fTermsVsTextPct;
|
||||||
|
|
||||||
|
}
|
||||||
|
#endregion
|
||||||
|
|
||||||
|
#region Extract best excerpt
|
||||||
|
/// <summary>
|
||||||
|
/// Extract the best scoring excerpt fragments of
|
||||||
|
/// raw text
|
||||||
|
/// </summary>
|
||||||
|
private void DoExtract()
|
||||||
|
{
|
||||||
|
//If the whole thing is less than the max to extract
|
||||||
|
//just save time and return the whole thing
|
||||||
|
if (this.rawtext.Length < this.maximumCharactersToExtract)
|
||||||
|
{
|
||||||
|
this.extract = this.rawtext;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
string BestWindow = "";
|
||||||
|
float BestScore = 0;
|
||||||
|
float thisscore = 0;
|
||||||
|
int BestWindowStartPos = 0;
|
||||||
|
|
||||||
|
//Get the shortest search term length so
|
||||||
|
//we can save time iterating over the window in the extract
|
||||||
|
//function below
|
||||||
|
int shortestSearchTermLength = int.MaxValue;
|
||||||
|
foreach (string s in this.searchTerms)
|
||||||
|
{
|
||||||
|
if (s.Length < shortestSearchTermLength)
|
||||||
|
shortestSearchTermLength = s.Length;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//slide a window over the text and check it's score, the highest scoring window wins
|
||||||
|
//move the length of the shortest search term so as to ensure we won't
|
||||||
|
//miss it, but faster than moving one character at a time
|
||||||
|
for (int x = 0; x < this.rawtext.Length - maximumCharactersToExtract; x += shortestSearchTermLength)
|
||||||
|
{
|
||||||
|
thisscore = score(x, x + (maximumCharactersToExtract));
|
||||||
|
|
||||||
|
if (thisscore == 0) continue;
|
||||||
|
|
||||||
|
if (thisscore > BestScore)
|
||||||
|
{
|
||||||
|
BestScore = thisscore;
|
||||||
|
BestWindow = this.rawtext.Substring(x, maximumCharactersToExtract);
|
||||||
|
//Best window to get if the future score is equal
|
||||||
|
//I.E. put the terms in the center of the window if
|
||||||
|
//the score is equal
|
||||||
|
BestWindowStartPos = x + (maximumCharactersToExtract / 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
//If it's equal to the last and we're positioned over
|
||||||
|
//the best spot (terms in center) then capture that
|
||||||
|
if (thisscore == BestScore && x == BestWindowStartPos)
|
||||||
|
{
|
||||||
|
BestWindow = this.rawtext.Substring(x, maximumCharactersToExtract);
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (this.flattenExtract)
|
||||||
|
this.extract = "..." + BestWindow.Trim().Replace("\r", "").Replace("\n", "").Replace("\t", "") + "...";//case 1593 added tab character removal
|
||||||
|
else
|
||||||
|
this.extract = "..." + BestWindow.Trim() + "...";
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//========================================================================
|
||||||
|
|
||||||
|
#endregion
|
||||||
|
|
||||||
|
}
|
||||||
|
#endregion Xtract
|
||||||
|
|
||||||
|
|
||||||
|
#endregion
|
||||||
#region ProcessKeywords into Database
|
#region ProcessKeywords into Database
|
||||||
|
|
||||||
//Class to hold process input parameters
|
//Class to hold process input parameters
|
||||||
|
|||||||
@@ -1,13 +0,0 @@
|
|||||||
using AyaNova.Biz;
|
|
||||||
|
|
||||||
namespace AyaNova.Models
|
|
||||||
{
|
|
||||||
|
|
||||||
public class TypeAndIdInfo
|
|
||||||
{
|
|
||||||
public long ObjectId { get; set; }
|
|
||||||
public AyaType ObjectType { get; set; }
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
Reference in New Issue
Block a user