This commit is contained in:
2020-01-24 21:12:07 +00:00
parent 0f17207d20
commit bacf09eefa
2 changed files with 57 additions and 80 deletions

View File

@@ -5,55 +5,6 @@ eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpYXQiOiIxNTcxODU5OTU0IiwiZXhwIjoiMTU3MjQ
## IMMEDIATE ITEMS
///////////////////////////////////////////////////////////////////////////////
TODO: Search indexing is painfully slow, it accounts for 16 of 22 seconds when creating 500 widgets with full paragraphs of text
- Try to see if it's just one part of the operation by timing it
- Re-code it not using EF but directly interacting with the DB
- Maybe it's a case for stored procedures or something?
SEARCH INDEXING PERFORMANCE WORK
Baseline from before doing anything seeding a medium level with full text
2020-01-21 16:49:17.4662|INFO|Seeder|75 Users seeded in 2279 ms
2020-01-21 16:49:39.4481|INFO|Seeder|500 Widgets seeded in 21968 ms
After round one of improvements (less text in seed data notes, not calling savechanges or add async)
//about 2 seconds came from the async db stuff and the rest was from using less text so less indexing which isn't really a permanent solution just a workaround
2020-01-23 16:57:57.0422|INFO|Seeder|75 Users seeded in 2398 ms
2020-01-23 16:58:11.9983|INFO|Seeder|500 Widgets seeded in 14958 ms
TODO: Find out if this is linear time for more widgets or exponential to see if it exposes part of the issue
X widgets, ms per widget:
100=32
500=29 (27 in non debug mode)
5000=29
Stripped out all text to index except single letter a in notes and c2
500=20
Now going to try the opposite, a *lot* of text 10 paragraphs in both c2 and notes
500=59ms
So the quantity of text directly affects the performance, so it's not just some overhead from the query being run, it's the amount of work it needs to do in the queries
THINGS TO TRY:
Completely alternate methods:
- https://stackoverflow.com/a/15089664/8939 Store a Digest of each record with that record then can just search the digests (would mean a search has to traverse all records of every table possibly)
DB INDEX TUNING?
- Play with the indexes and see if there is a slowup with an unnecessary index maybe affecting things
Async the keyword processing
- Fire off the indexing and return immediately so there would be a bit of time to come into compliance maybe more clashes?
Removing use of EF entirely in search indexing processing in favor of direct sql queries
cache or provide directly the locale to save time repeatedly fetching it when doing bulk ops!!!
-After doing this 500=21 That's as fast as when I stripped out all the text, what a huge overhead saving right there!:
2020-01-24 12:00:41.2547|INFO|Seeder|Seeding 500 Widgets....
2020-01-24 12:00:51.9138|INFO|Seeder|500 Widgets seeded in 10649 ms
///////////////////////////////////////////////////////////////////////////////

View File

@@ -524,7 +524,63 @@ WHERE a.word IN ('eos', 'quia', 'voluptate', 'delectus', 'sapiente', 'omnis', 's
var log = AyaNova.Util.ApplicationLogging.CreateLogger("### Search::ProcessKeywords ###");
#endif
//TODO: this foreach block needs to add all the words at once in a single range query rather than one word at a time
#region PERFORMANCE NOTES / EXPERIMENTS
/*
This next block is where all the slowness exists.
I've played with it and brought it down to half the original time it took, but could likely find more savings,
however not a good use of time right now and really only affects bulk ops which is seeding right now,
so keeping my notes here just in case I take another whack at it
///////////////////////////////////////////////////////////////////////////////
TODO: Search indexing is painfully slow, it accounts for 16 of 22 seconds when creating 500 widgets with full paragraphs of text
- Try to see if it's just one part of the operation by timing it
- Re-code it not using EF but directly interacting with the DB
- Maybe it's a case for stored procedures or something?
SEARCH INDEXING PERFORMANCE WORK
Baseline from before doing anything seeding a medium level with full text
2020-01-21 16:49:17.4662|INFO|Seeder|75 Users seeded in 2279 ms
2020-01-21 16:49:39.4481|INFO|Seeder|500 Widgets seeded in 21968 ms
After round one of improvements (less text in seed data notes, not calling savechanges or add async)
//about 2 seconds came from the async db stuff and the rest was from using less text so less indexing which isn't really a permanent solution just a workaround
2020-01-23 16:57:57.0422|INFO|Seeder|75 Users seeded in 2398 ms
2020-01-23 16:58:11.9983|INFO|Seeder|500 Widgets seeded in 14958 ms
TODO: Find out if this is linear time for more widgets or exponential to see if it exposes part of the issue
X widgets, ms per widget:
100=32
500=29 (27 in non debug mode)
5000=29
Stripped out all text to index except single letter a in notes and c2
500=20
Now going to try the opposite, a *lot* of text 10 paragraphs in both c2 and notes
500=59ms
So the quantity of text directly affects the performance, so it's not just some overhead from the query being run, it's the amount of work it needs to do in the queries
THINGS TO TRY:
Completely alternate methods:
- https://stackoverflow.com/a/15089664/8939 Store a Digest of each record with that record then can just search the digests (would mean a search has to traverse all records of every table possibly)
DB INDEX TUNING?
- Play with the indexes and see if there is a slowup with an unnecessary index maybe affecting things
Async the keyword processing
- Fire off the indexing and return immediately so there would be a bit of time to come into compliance maybe more clashes?
Removing use of EF entirely in search indexing processing in favor of direct sql queries
cache or provide directly the locale to save time repeatedly fetching it when doing bulk ops!!!
-After doing this 500=21 That's as fast as when I stripped out all the text, what a huge overhead saving right there!:
2020-01-24 12:00:41.2547|INFO|Seeder|Seeding 500 Widgets....
2020-01-24 12:00:51.9138|INFO|Seeder|500 Widgets seeded in 10649 ms
///////////////////////////////////////////////////////////////////////////////
#endregion performance notes experiments
*/
foreach (string KeyWord in KeyWordList)
{
if (!ExistingKeywordMatches.ContainsValue(KeyWord))
@@ -537,12 +593,6 @@ WHERE a.word IN ('eos', 'quia', 'voluptate', 'delectus', 'sapiente', 'omnis', 's
try
{
//ORIGINAL
// //ADD WORD TO DICTIONARY, SAVE THE ID INTO THE MATCHINGKEYWORDIDLIST
// var CtAdd = ServiceProviderProvider.DBContext;
// CtAdd.SearchDictionary.Add(NewWord);
// CtAdd.SaveChanges();
//ADD WORD TO DICTIONARY, SAVE THE ID INTO THE MATCHINGKEYWORDIDLIST
@@ -550,36 +600,12 @@ WHERE a.word IN ('eos', 'quia', 'voluptate', 'delectus', 'sapiente', 'omnis', 's
CtAdd.SearchDictionary.Add(NewWord);
CtAdd.SaveChanges();
/*
LOG of above operation:
2020-01-24 10:08:40.5768|DEBUG|Microsoft.EntityFrameworkCore.Database.Command|Executing DbCommand [Parameters=[@p0='quia' (Nullable = false)], CommandType='Text', CommandTimeout='30']
INSERT INTO asearchdictionary (word)
VALUES (@p0)
RETURNING id, xmin;
2020-01-24 10:08:40.5768|INFO|Microsoft.EntityFrameworkCore.Database.Command|Executed DbCommand (2ms) [Parameters=[@p0='quia' (Nullable = false)], CommandType='Text', CommandTimeout='30']
INSERT INTO asearchdictionary (word)
VALUES (@p0)
RETURNING id, xmin;
2020-01-24 10:08:40.5768|DEBUG|Microsoft.EntityFrameworkCore.ChangeTracking|Foreign key property 'SearchDictionary.Id' detected as changed from '-9223372036854774587' to '221' for entity with key '{Id: 221}'.
2020-01-24 10:08:40.5768|DEBUG|Microsoft.EntityFrameworkCore.Database.Command|A data reader was disposed.
2020-01-24 10:08:40.5768|DEBUG|Microsoft.EntityFrameworkCore.Database.Transaction|Committing transaction.
2020-01-24 10:08:40.5768|DEBUG|Microsoft.EntityFrameworkCore.Database.Transaction|Committing transaction.
2020-01-24 10:08:40.5768|DEBUG|Microsoft.EntityFrameworkCore.Database.Connection|Closing connection to database 'AyaNova' on server 'tcp://localhost:5432'.
2020-01-24 10:08:40.5768|DEBUG|Microsoft.EntityFrameworkCore.Database.Connection|Closed connection to database 'AyaNova' on server 'tcp://localhost:5432'.
2020-01-24 10:08:40.5768|DEBUG|Microsoft.EntityFrameworkCore.Database.Transaction|Disposing transaction.
2020-01-24 10:08:40.5768|DEBUG|Microsoft.EntityFrameworkCore.ChangeTracking|The 'SearchDictionary' entity with key '{Id: 221}' tracked by 'AyContext' changed from 'Added' to 'Unchanged'.
2020-01-24 10:08:40.5768|DEBUG|Microsoft.EntityFrameworkCore.Update|SaveChanges completed for 'AyContext' with 1 entities written to the database.
*/
//-------
//Add to matching keywords
MatchingKeywordIdList.Add(new MatchingDictionaryEntry() { DictionaryId = NewWord.Id, InName = NameKeyWordList.Contains(KeyWord) });
//-------
//It exists now
ExistingKeywordMatches.Add(NewWord.Id, NewWord.Word);
}