2018-10-03 00:39:36 +00:00
parent 6c272662ed
commit 896521fbed
7 changed files with 245 additions and 80 deletions
--- a/devdocs/specs/core-performance.txt
+++ b/devdocs/specs/core-performance.txt
@@ -0,0 +1,94 @@
+PERFORMANCE SPECS AND USEFUL INFO
+
+Useful queries to indicate how indexes are being used in postgresql
+
+This is a test query I used with widget and name fetching performance analysis:
+explain analyze SELECT m.name
+FROM awidget AS m
+WHERE m.id = 12989
+LIMIT 1
+
+//All index data collected by postgresql
+
+select * from pg_stat_user_indexes
+
+
+Reveals Unused indices
+=-=-=-=-=-=-=-=-=-=-=-
+
+SELECT 
+    relid::regclass AS table, 
+    indexrelid::regclass AS index, 
+    pg_size_pretty(pg_relation_size(indexrelid::regclass)) AS index_size, 
+    idx_tup_read, 
+    idx_tup_fetch, 
+    idx_scan
+FROM 
+    pg_stat_user_indexes 
+    JOIN pg_index USING (indexrelid) 
+WHERE 
+    idx_scan > 0 
+    AND indisunique IS FALSE
+
+
+Shows info on all indices
+=-=-=-=-=-=-=-=-=-=-=-=-=-
+SELECT
+    t.tablename,
+    indexname,
+    c.reltuples AS num_rows,
+    pg_size_pretty(pg_relation_size(quote_ident(t.tablename)::text)) AS table_size,
+    pg_size_pretty(pg_relation_size(quote_ident(indexrelname)::text)) AS index_size,
+    CASE WHEN indisunique THEN 'Y'
+       ELSE 'N'
+    END AS UNIQUE,
+    idx_scan AS number_of_scans,
+    idx_tup_read AS tuples_read,
+    idx_tup_fetch AS tuples_fetched
+FROM pg_tables t
+LEFT OUTER JOIN pg_class c ON t.tablename=c.relname
+LEFT OUTER JOIN
+    ( SELECT c.relname AS ctablename, ipg.relname AS indexname, x.indnatts AS number_of_columns, idx_scan, idx_tup_read, idx_tup_fetch, indexrelname, indisunique FROM pg_index x
+           JOIN pg_class c ON c.oid = x.indrelid
+           JOIN pg_class ipg ON ipg.oid = x.indexrelid
+           JOIN pg_stat_all_indexes psai ON x.indexrelid = psai.indexrelid )
+    AS foo
+    ON t.tablename = foo.ctablename
+WHERE t.schemaname='public'
+ORDER BY 7,1,2;
+
+
+Show performance of indices that are being used
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+SELECT indexrelname,cast(idx_tup_read AS numeric) / idx_scan AS avg_tuples,idx_scan,idx_tup_read FROM pg_stat_user_indexes WHERE idx_scan > 0;
+
+
+
+
+WORK IN PROGRESS:
+
+Search result list:
+
+//Before attempt to optimize name fetcher (unknown number of results)
+//22548, 21187, 20462, 22336, 20094 - AVG = 21325
+
+14244 results with index scan: 24141, 29549, 23366, 24085, 23335 AVG: 24895 = 1.7ms per result
+
+Removed index but kept data:
+14244 results without index scan: 23391, 22623, 21428, 22607, 23106  ANOMALOUS, disregarding
+
+### 14244 results without index scan (after a restart of server): 24124, 21157, 21178, 21187, 21932 AVG: 21915 = 1.53 per result #####
+
+14244 results without index scan (after a restart of server and using a fresh aycontext for each query): 32336, 31794...clearly much slower, abandoning this avenue
+14244 results without index scan (after a restart of server and using asnotracking for each query): 24625, 21387, 21905, 22190 ... not a dramatic difference, keeping the notracking code in as it makes sense but need to look elsewhere
+14244 results without index scan (after a restart of server and bypassing EF entirely with a direct query INITIAL NAIVE ATTEMPT): 13955, 13365, 13421, 13445, 13271
+
+
+### 14244 results without index scan (after a restart of server and bypassing EF entirely with a direct query OPTIMIZED TO REUSE CONNECTION): 12707, 12341, 12733, 12487, 12452 AVG: 12,544 = .88ms per result  ####
+
+Now I'm going to try it with the index put back in and data regenerated
+### 14244 results with index in place (after a restart of server and bypassing EF entirely with a direct query OPTIMIZED TO REUSE CONNECTION): 11229, 15480, 13763, 13051, 13178 AVG: 13,340 = .936 per result
+
+Now fresh test but without index being crated
+### 14244 results with index in place (after a restart of server, and bypassing EF entirely with a direct query OPTIMIZED TO REUSE CONNECTION): 14270 results - 13176, 12688, 13179, 12994, 12272 AVG: 12,861 = .90 per result
+
--- a/devdocs/todo.txt
+++ b/devdocs/todo.txt
@@ -27,7 +27,14 @@ Once that is done then can steam ahead on the biz objects but until I have the c

 IMMEDIATE ITEMS:
 ================
-
+Current huge seeding takes around 15 minutes
+Current unbounded search for "et*" in huge dataset takes avg 21 seconds to process 
+{
+  "phrase": "et*",
+  "nameOnly": false,
+  "typeOnly": 0,
+  "maxResults": 0
+}

 	- Search and search text indexing		
 	- https://gist.github.com/ruckus/5718112			
@@ -35,22 +42,12 @@ IMMEDIATE ITEMS:
 		- Can it be sped up with the huge dataset test
 		(The actual slowness is directly related to namefetch so that's where I am concentrating effort)
 		
-			- First up is to test generate data again and see if the name fetcher query uses the compound name/id index I added when data is generated freshly
-				- First add to ayschema these two indexes (2 in case one preferred over other or order issues)
-	
-	CREATE INDEX widget_idx_test_name_id2
-    ON public.awidget USING btree
-    (name COLLATE pg_catalog."default", id)
-    TABLESPACE pg_default;
-	
-				CREATE UNIQUE INDEX widget_idx_name_id
-    ON public.awidget USING btree
-    (id, name COLLATE pg_catalog."default")
-    TABLESPACE pg_default;
-
-
-			
-		
+		- One final test *WITH* the index again just to confirm the earlier results, if it's confirmed slower then remove the index 
+		 but keep the code in place in case it becomes faster again with it later
+		- Re-run the integration tests for search performance and set a new benchmark minimum for that shit as it should be way faster now				
+		- Change return object from a search to include a count of results plus the restricted list of results	
+			- this way the Client can know there are tons more than just weren't shown so they can narrow their terms
+		- see if any other callers to name fetcher are in tight loops and could benefit from using the new Direct version
 		- Update all the other routes to include search indexing (attachments, tags etc, anything with text in it)