finished this round of schema improvements

2020-05-21 19:42:17 +00:00
parent d62d7d3e8d
commit a8111b27d1
4 changed files with 95 additions and 65 deletions
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -51,7 +51,7 @@
        "AYANOVA_FOLDER_BACKUP_FILES": "c:\\temp\\RavenTestData\\backupfiles",
        "AYANOVA_METRICS_USE_INFLUXDB": "false",
        "AYANOVA_SERVER_TEST_MODE":"true",
-        "AYANOVA_SERVER_TEST_MODE_SEEDLEVEL":"huge",
+        "AYANOVA_SERVER_TEST_MODE_SEEDLEVEL":"small",
        "AYANOVA_SERVER_TEST_MODE_TZ_OFFSET":"-7",
        "AYANOVA_BACKUP_PG_DUMP_PATH":"C:\\data\\code\\PostgreSQLPortable_12.0\\App\\PgSQL\\bin\\"

--- a/devdocs/specs/core-performance.txt
+++ b/devdocs/specs/core-performance.txt
@@ -8,6 +8,10 @@ http://okigiveup.net/what-postgresql-tells-you-about-its-performance/
 Useful queries to indicate how indexes are being used in postgresql
 =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

+2020-05-21 THIS SUPERSEDES BELOW  
+HOW TO FIND SHITTY INDEXES: https://gist.github.com/jberkus/6b1bcaf7724dfc2a54f3
+
+==-=-=-
 OK: this is the best query to use to find non used indexes
 Run it and look for any indexes that are *not* primary keys (name ends in pkey, want ones that end in idx which are mine)
 that have zero in idx_scan, that means they are completely unused (if a lot of full test runs that excercise all routes and features have happened prior to checking)
--- a/devdocs/todo.txt
+++ b/devdocs/todo.txt
@@ -1,65 +1,6 @@
 {"login": "manager","password": "l3tm3in"}


-todo: Search indexing performance improvement and exception avoidance (Search.cs 828)
-	ON CONFLICT IDEA
-    https://www.postgresql.org/docs/current/sql-insert.html#SQL-ON-CONFLICT
-    Idea: do the insert manually with the clause "on conflict do nothing"
-    if detect it hasn't inserted (conflict) trigger a fetch instead
-    like what is being done now but won't have the exception to deal with!!
- var  CtAdd.SearchDictionary.FromSqlRaw("insert into asearchdictionary (word) values('{0}') on conflict (word) do update set word=excluded.word returning id",KeyWord ).FirstOrDefaultAsync();
-
-stored procedure?
-https://www.postgresqltutorial.com/plpgsql-loop-statements/
-------
-CREATE OR REPLACE PROCEDURE public.aydosearchindex(wordlist text[], ayobjectid bigint, ayobjecttype integer, cleanfirst boolean)
-    LANGUAGE 'plpgsql'
-    
-AS $BODY$DECLARE
- s text;
- wordid bigint;
-BEGIN
-  IF ayobjectid=0 THEN
-	RAISE EXCEPTION 'Bad object id --> %', ayobjectid;
-  END IF;
-  
-    IF ayobjecttype=0 THEN
-	RAISE EXCEPTION 'Bad object type --> %', ayobjecttype;
-  END IF;
-
-	  
-  IF cleanfirst=true THEN
-	delete from asearchkey where objectid=ayobjectid and objecttype=ayobjecttype;
-  END IF;
-
-	FOREACH s IN ARRAY wordlist
-	LOOP 
-		raise info 'processing word %',s;
-		SELECT id INTO wordid FROM asearchdictionary WHERE word = s;
-		raise info 'initial select found that word id is %', wordid;
-		IF wordid = NULL THEN
-			raise info 'since wordid was null inserting %...', wordid;
-			insert into asearchdictionary (word) values(s) on conflict (word) do update set word=excluded.word returning id into wordid;
-			raise info 'After insert new word returned word id %, inserting into searchkey', wordid;
-			insert into asearchkey (wordid,objectid,objecttype) values(wordid,ayobjectid,ayobjecttype);
-		ELSE
-			raise info 'since we have initial word id from select inserting into search key the Word id %', wordid;
-			insert into asearchkey (wordid,objectid,objecttype) values(wordid,ayobjectid,ayobjecttype);
-		END IF;
-   END LOOP;
-END;
-$BODY$;
-
------
-call aydosearchindex(ARRAY['sun','mon','tue','wed','thu','fri','sat'],3,3)
-------
-
-
-
-todo: Search confirm indexes are actually being used
-
-
-
 todo: OPS notification created for failed jobs
 	also maybe direct immediate email bypassing generator?
 	Add backup fail to this will stub out for now
--- a/server/AyaNova/util/AySchema.cs
+++ b/server/AyaNova/util/AySchema.cs
@@ -39,15 +39,98 @@ namespace AyaNova.Util
        AyaNova does a lot of name fetching so any tables that contain a lot of columns in addition to the name will benefit from a compound index on (id,name)

        Other indexes should be created with care and after a huge load and integration test periodically look for unused indexes and see how they are performing
+         HOW TO FIND SHITTY INDEXES: https://gist.github.com/jberkus/6b1bcaf7724dfc2a54f3
        see core-performance.txt for the relevant queries to view this info

        *****************************  WARNING: Be careful here, if a standard field is hideable and also it's DB SCHEMA is  set to NON NULLABLE then the CLIENT end needs to set a default
        *****************************  Otherwise the hidden field can't be set and the object can't be saved EVER

-
-        HOW TO FIND UNUSED INDEXES: https://gist.github.com/jberkus/6b1bcaf7724dfc2a54f3
+       

         */
+         #region unused index query
+/*
+COPY taken 2020-05-21 from link above "jerkus" :)
+
+WITH table_scans as (
+    SELECT relid,
+        tables.idx_scan + tables.seq_scan as all_scans,
+        ( tables.n_tup_ins + tables.n_tup_upd + tables.n_tup_del ) as writes,
+                pg_relation_size(relid) as table_size
+        FROM pg_stat_user_tables as tables
+),
+all_writes as (
+    SELECT sum(writes) as total_writes
+    FROM table_scans
+),
+indexes as (
+    SELECT idx_stat.relid, idx_stat.indexrelid,
+        idx_stat.schemaname, idx_stat.relname as tablename,
+        idx_stat.indexrelname as indexname,
+        idx_stat.idx_scan,
+        pg_relation_size(idx_stat.indexrelid) as index_bytes,
+        indexdef ~* 'USING btree' AS idx_is_btree
+    FROM pg_stat_user_indexes as idx_stat
+        JOIN pg_index
+            USING (indexrelid)
+        JOIN pg_indexes as indexes
+            ON idx_stat.schemaname = indexes.schemaname
+                AND idx_stat.relname = indexes.tablename
+                AND idx_stat.indexrelname = indexes.indexname
+    WHERE pg_index.indisunique = FALSE
+),
+index_ratios AS (
+SELECT schemaname, tablename, indexname,
+    idx_scan, all_scans,
+    round(( CASE WHEN all_scans = 0 THEN 0.0::NUMERIC
+        ELSE idx_scan::NUMERIC/all_scans * 100 END),2) as index_scan_pct,
+    writes,
+    round((CASE WHEN writes = 0 THEN idx_scan::NUMERIC ELSE idx_scan::NUMERIC/writes END),2)
+        as scans_per_write,
+    pg_size_pretty(index_bytes) as index_size,
+    pg_size_pretty(table_size) as table_size,
+    idx_is_btree, index_bytes
+    FROM indexes
+    JOIN table_scans
+    USING (relid)
+),
+index_groups AS (
+SELECT 'Never Used Indexes' as reason, *, 1 as grp
+FROM index_ratios
+WHERE
+    idx_scan = 0
+    and idx_is_btree
+UNION ALL
+SELECT 'Low Scans, High Writes' as reason, *, 2 as grp
+FROM index_ratios
+WHERE
+    scans_per_write <= 1
+    and index_scan_pct < 10
+    and idx_scan > 0
+    and writes > 100
+    and idx_is_btree
+UNION ALL
+SELECT 'Seldom Used Large Indexes' as reason, *, 3 as grp
+FROM index_ratios
+WHERE
+    index_scan_pct < 5
+    and scans_per_write > 1
+    and idx_scan > 0
+    and idx_is_btree
+    and index_bytes > 100000000
+UNION ALL
+SELECT 'High-Write Large Non-Btree' as reason, index_ratios.*, 4 as grp 
+FROM index_ratios, all_writes
+WHERE
+    ( writes::NUMERIC / ( total_writes + 1 ) ) > 0.02
+    AND NOT idx_is_btree
+    AND index_bytes > 100000000
+ORDER BY grp, index_bytes DESC )
+SELECT reason, schemaname, tablename, indexname,
+    index_scan_pct, scans_per_write, index_size, table_size
+FROM index_groups;
+*/
+         #endregion

        static int startingSchema = -1;
        public static int currentSchema = -1;
@@ -154,6 +237,8 @@ namespace AyaNova.Util
                "ayid bigint not null, ayatype integer not null, ayevent integer not null, textra varchar(255))");
                //INDEX: Most selective first as there is more unique ID's than unique types              
                await ExecQueryAsync("CREATE INDEX aevent_typeid_idx ON aevent (ayid, ayatype);");
+                
+                //TODO: this may be a very low used index, revisit it down the road
                await ExecQueryAsync("CREATE INDEX aevent_userid_idx ON aevent (userid);");


@@ -216,10 +301,10 @@ $BODY$;

                //create translation text tables
                await ExecQueryAsync("CREATE TABLE atranslation (id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, name varchar(255) not null, stock bool, cjkindex bool default false)");
-                //LOOKAT: I don't think this is doing anything:
-                //exec("CREATE UNIQUE INDEX atranslation_name_idx ON atranslation (name)");
+              
                await ExecQueryAsync("CREATE TABLE atranslationitem (id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, translationid bigint not null REFERENCES atranslation (id), key text not null, display text not null)");
-                await ExecQueryAsync("CREATE INDEX atranslationitem_translationid_key_display_idx ON atranslationitem (translationid,key, display)");
+               //This is not a well used index, not sure what it's point is
+               // await ExecQueryAsync("CREATE INDEX atranslationitem_translationid_key_display_idx ON atranslationitem (translationid,key, display)");

                //Load the default TRANSLATIONS
                await AyaNova.Biz.PrimeData.PrimeTranslations();