50ee68bf6b4f9c585f5ac772826fe59a7c890878
[online_analyze.git] / online_analyze.c
1 /*
2  * Copyright (c) 2011 Teodor Sigaev <teodor@sigaev.ru>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *        notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *        notice, this list of conditions and the following disclaimer in the
12  *        documentation and/or other materials provided with the distribution.
13  * 3. Neither the name of the author nor the names of any co-contributors
14  *        may be used to endorse or promote products derived from this software
15  *        without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY CONTRIBUTORS ``AS IS'' AND ANY EXPRESS
18  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED. IN NO EVENT SHALL CONTRIBUTORS BE LIABLE FOR ANY
21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
23  * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
25  * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
26  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
27  * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29
30 #include "postgres.h"
31
32 #include "pgstat.h"
33 #include "catalog/namespace.h"
34 #include "commands/vacuum.h"
35 #include "executor/executor.h"
36 #include "nodes/nodes.h"
37 #include "nodes/parsenodes.h"
38 #include "storage/bufmgr.h"
39 #include "utils/builtins.h"
40 #include "utils/lsyscache.h"
41 #include "utils/guc.h"
42 #if PG_VERSION_NUM >= 90200
43 #include "catalog/pg_class.h"
44 #include "nodes/primnodes.h"
45 #include "tcop/utility.h"
46 #include "utils/rel.h"
47 #include "utils/relcache.h"
48 #include "utils/timestamp.h"
49 #endif
50
51 #ifdef PG_MODULE_MAGIC
52 PG_MODULE_MAGIC;
53 #endif
54
55 static bool online_analyze_enable = true;
56 static bool online_analyze_verbose = true;
57 static double online_analyze_scale_factor = 0.1;
58 static int online_analyze_threshold = 50;
59 static double online_analyze_min_interval = 10000;
60
61 static ExecutorEnd_hook_type oldExecutorEndHook = NULL;
62 #if PG_VERSION_NUM >= 90200
63 static ProcessUtility_hook_type oldProcessUtilityHook = NULL;
64 #endif
65
66 typedef enum 
67 {
68         OATT_ALL                = 0x03,
69         OATT_PERSISTENT = 0x01,
70         OATT_TEMPORARY  = 0x02,
71         OATT_NONE               = 0x00
72 } OnlineAnalyzeTableType;
73
74 static const struct config_enum_entry online_analyze_table_type_options[] = 
75 {
76         {"all", OATT_ALL, false},
77         {"persistent", OATT_PERSISTENT, false},
78         {"temporary", OATT_TEMPORARY, false},
79         {"none", OATT_NONE, false},
80         {NULL, 0, false},
81 };
82
83 static int online_analyze_table_type = (int)OATT_ALL;
84
85 typedef struct TableList {
86         int             nTables;
87         Oid             *tables;
88         char    *tableStr;
89 } TableList;
90
91 static TableList excludeTables = {0, NULL, NULL};
92 static TableList includeTables = {0, NULL, NULL};
93
94 static int
95 oid_cmp(const void *a, const void *b)
96 {
97         if (*(Oid*)a == *(Oid*)b)
98                 return 0;
99         return (*(Oid*)a > *(Oid*)b) ? 1 : -1;
100 }
101
102 static const char *
103 tableListAssign(const char * newval, bool doit, TableList *tbl)
104 {
105         char       *rawname;
106         List       *namelist;
107         ListCell   *l;
108         Oid         *newOids = NULL;
109         int         nOids = 0,
110                                 i = 0;
111
112         rawname = pstrdup(newval);
113
114         if (!SplitIdentifierString(rawname, ',', &namelist))
115                 goto cleanup;
116
117         if (doit)
118         {
119                 nOids = list_length(namelist);
120                 newOids = malloc(sizeof(Oid) * (nOids+1));
121                 if (!newOids)
122                         elog(ERROR,"could not allocate %d bytes", (int)(sizeof(Oid) * (nOids+1)));
123         }
124
125         foreach(l, namelist)
126         {
127                 char        *curname = (char *) lfirst(l);
128                 Oid         relOid = RangeVarGetRelid(makeRangeVarFromNameList(stringToQualifiedNameList(curname)), 
129 #if PG_VERSION_NUM >= 90200
130                                                                                                 NoLock,
131 #endif
132                                                                                                 true);
133
134                 if (relOid == InvalidOid)
135                 {
136 #if PG_VERSION_NUM >= 90100
137                         if (doit == false)
138 #endif
139                         elog(WARNING,"'%s' does not exist", curname);
140                         continue;
141                 }
142                 else if ( get_rel_relkind(relOid) != RELKIND_RELATION )
143                 {
144 #if PG_VERSION_NUM >= 90100
145                         if (doit == false)
146 #endif
147                                 elog(WARNING,"'%s' is not an table", curname);
148                         continue;
149                 }
150                 else if (doit)
151                 {
152                         newOids[i++] = relOid;
153                 }
154         }
155
156         if (doit)
157         {
158                 tbl->nTables = i;
159                 if (tbl->tables)
160                         free(tbl->tables);
161                 tbl->tables = newOids;
162                 if (tbl->nTables > 1)
163                         qsort(tbl->tables, tbl->nTables, sizeof(tbl->tables[0]), oid_cmp);
164         }
165
166         pfree(rawname);
167         list_free(namelist);
168
169         return newval;
170
171 cleanup:
172         if (newOids)
173                 free(newOids);
174         pfree(rawname);
175         list_free(namelist);
176         return NULL;
177 }
178
179 #if PG_VERSION_NUM >= 90100
180 static bool
181 excludeTablesCheck(char **newval, void **extra, GucSource source)
182 {
183         char *val;
184
185         val = (char*)tableListAssign(*newval, false, &excludeTables);
186
187         if (val)
188         {
189                 *newval = val;
190                 return true;
191         }
192
193         return false;
194 }
195
196 static void
197 excludeTablesAssign(const char *newval, void *extra)
198 {
199         tableListAssign(newval, true, &excludeTables);
200 }
201
202 static bool
203 includeTablesCheck(char **newval, void **extra, GucSource source)
204 {
205         char *val;
206
207         val = (char*)tableListAssign(*newval, false, &includeTables);
208
209         if (val)
210         {
211                 *newval = val;
212                 return true;
213         }
214
215         return false;
216 }
217
218 static void
219 includeTablesAssign(const char *newval, void *extra)
220 {
221         tableListAssign(newval, true, &excludeTables);
222 }
223
224 #else /* PG_VERSION_NUM < 90100 */ 
225
226 static const char *
227 excludeTablesAssign(const char * newval, bool doit, GucSource source)
228 {
229         return tableListAssign(newval, doit, &excludeTables);
230 }
231
232 static const char *
233 includeTablesAssign(const char * newval, bool doit, GucSource source)
234 {
235         return tableListAssign(newval, doit, &includeTables);
236 }
237
238 #endif
239
240 static const char*
241 tableListShow(TableList *tbl)
242 {
243         char    *val, *ptr;
244         int     i,
245                         len;
246
247         len = 1 /* \0 */ + tbl->nTables * (2 * NAMEDATALEN + 2 /* ', ' */ + 1 /* . */);
248         ptr = val = palloc(len);
249         *ptr ='\0';
250         for(i=0; i<tbl->nTables; i++)
251         {
252                 char    *relname = get_rel_name(tbl->tables[i]);
253                 Oid     nspOid = get_rel_namespace(tbl->tables[i]);
254                 char    *nspname = get_namespace_name(nspOid);
255
256                 if ( relname == NULL || nspOid == InvalidOid || nspname == NULL )
257                         continue;
258
259                 ptr += snprintf(ptr, len - (ptr - val), "%s%s.%s",
260                                                                                                         (i==0) ? "" : ", ",
261                                                                                                         nspname, relname);
262         }
263
264         return val;
265 }
266
267 static const char*
268 excludeTablesShow(void)
269 {
270         return tableListShow(&excludeTables);
271 }
272
273 static const char*
274 includeTablesShow(void)
275 {
276         return tableListShow(&includeTables);
277 }
278
279 static bool
280 matchOid(TableList *tbl, Oid oid)
281 {
282         Oid     *StopLow = tbl->tables,
283                 *StopHigh = tbl->tables + tbl->nTables,
284                 *StopMiddle;
285
286         /* Loop invariant: StopLow <= val < StopHigh */
287         while (StopLow < StopHigh)
288         {
289                 StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
290
291                 if (*StopMiddle == oid)
292                         return true;
293                 else  if (*StopMiddle < oid)
294                         StopLow = StopMiddle + 1;
295                 else
296                         StopHigh = StopMiddle;
297         }
298
299         return false;
300 }
301
302 static void
303 makeAnalyze(Oid relOid, CmdType operation, uint32 naffected)
304 {
305         PgStat_StatTabEntry             *tabentry;
306         TimestampTz                     now = GetCurrentTimestamp();
307
308         if (relOid == InvalidOid)
309                 return;
310
311         tabentry = pgstat_fetch_stat_tabentry(relOid);
312
313 #if PG_VERSION_NUM >= 90000
314 #define changes_since_analyze(t)        ((t)->changes_since_analyze)
315 #else
316 #define changes_since_analyze(t)        ((t)->n_live_tuples + (t)->n_dead_tuples - (t)->last_anl_tuples)
317 #endif
318
319         if (    
320                 tabentry == NULL /* a new table */ ||
321                 (
322                         /* do not analyze too often, if both stamps are exceeded the go */
323                         TimestampDifferenceExceeds(tabentry->analyze_timestamp, now, online_analyze_min_interval) && 
324                         TimestampDifferenceExceeds(tabentry->autovac_analyze_timestamp, now, online_analyze_min_interval) &&
325                         /* be in sync with relation_needs_vacanalyze */
326                         ((double)(changes_since_analyze(tabentry) + naffected)) >=
327                                 online_analyze_scale_factor * ((double)(tabentry->n_dead_tuples + tabentry->n_live_tuples)) + 
328                                         (double)online_analyze_threshold
329                 )
330         )
331         {
332                 VacuumStmt                              vacstmt;
333                 TimestampTz                             startStamp, endStamp;
334
335                 /*
336                  * includeTables overwrites excludeTables
337                  */
338                 switch(online_analyze_table_type)
339                 {
340                         case OATT_ALL:
341                                 if (matchOid(&excludeTables, relOid) == true && matchOid(&includeTables, relOid) == false)
342                                         return;
343                                 break;
344                         case OATT_NONE:
345                                 if (matchOid(&includeTables, relOid) == false)
346                                         return;
347                                 break;
348                         case OATT_TEMPORARY:
349                         case OATT_PERSISTENT:
350                         default:
351                                 {
352                                         Relation                                rel;
353                                         OnlineAnalyzeTableType  reltype;
354
355                                         rel = RelationIdGetRelation(relOid);
356                                         reltype = 
357 #if PG_VERSION_NUM >= 90100
358                                                 (rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP)
359 #else
360                                                 (rel->rd_istemp || rel->rd_islocaltemp)
361 #endif
362                                                         ? OATT_TEMPORARY : OATT_PERSISTENT;
363                                         RelationClose(rel);
364
365                                         /*
366                                          * skip analyze if relation's type doesn't not match online_analyze_table_type
367                                          */
368                                         if ((online_analyze_table_type & reltype) == 0 || matchOid(&excludeTables, relOid) == true)
369                                         {
370                                                 if (matchOid(&includeTables, relOid) == false)
371                                                         return;
372                                         }
373                                 }
374                                 break;
375                 }
376
377                 vacstmt.type = T_VacuumStmt;
378                 vacstmt.freeze_min_age = -1;
379                 vacstmt.freeze_table_age = -1; /* ??? */
380                 vacstmt.relation = NULL;
381                 vacstmt.va_cols = NIL;
382
383 #if PG_VERSION_NUM >= 90000
384                 vacstmt.options = VACOPT_ANALYZE;
385                 if (online_analyze_verbose)
386                         vacstmt.options |= VACOPT_VERBOSE;
387 #else
388                 vacstmt.vacuum = vacstmt.full = false;
389                 vacstmt.analyze = true;
390                 vacstmt.verbose = online_analyze_verbose;
391 #endif
392
393                 if (online_analyze_verbose)
394                         startStamp = GetCurrentTimestamp();
395
396                 analyze_rel(relOid, &vacstmt, GetAccessStrategy(BAS_VACUUM)
397 #if (PG_VERSION_NUM < 90004) && (PG_VERSION_NUM >= 90000)
398                         , true
399 #endif
400                 );
401
402                 if (online_analyze_verbose)
403                 {
404                         long    secs;
405                         int             microsecs;
406
407                         endStamp = GetCurrentTimestamp();
408                         TimestampDifference(startStamp, endStamp, &secs, &microsecs);
409                         elog(INFO, "analyze \"%s\" took %.02f seconds", 
410                                 get_rel_name(relOid), ((double)secs) + ((double)microsecs)/1.0e6);
411                 }
412
413
414                 if (tabentry == NULL)
415                 {
416                         /* new table */
417                         pgstat_clear_snapshot();
418                 }
419                 else
420                 {
421                         /* update last analyze timestamp in local memory of backend */
422                         tabentry->analyze_timestamp = now;
423                 }
424         }
425 #if PG_VERSION_NUM >= 90000
426         else if (tabentry != NULL)
427         {
428                 tabentry->changes_since_analyze += naffected;
429         }
430 #endif
431 }
432
433 extern PGDLLIMPORT void onlineAnalyzeHooker(QueryDesc *queryDesc);
434 void
435 onlineAnalyzeHooker(QueryDesc *queryDesc) 
436 {
437         uint32  naffected = 0;
438
439         if (queryDesc->estate)
440                 naffected = queryDesc->estate->es_processed;    
441
442         if (online_analyze_enable && queryDesc->plannedstmt &&
443                         (queryDesc->operation == CMD_INSERT || 
444                          queryDesc->operation == CMD_UPDATE ||
445                          queryDesc->operation == CMD_DELETE ||
446 #if PG_VERSION_NUM >= 90200
447                          0 /* (queryDesc->operation == CMD_SELECT && queryDesc->dest && queryDesc->dest == DestIntoRel) */
448 #else
449                          (queryDesc->operation == CMD_SELECT && queryDesc->plannedstmt->intoClause)
450 #endif
451                          ))
452         {
453 #if PG_VERSION_NUM < 90200
454                 if (queryDesc->operation == CMD_SELECT)
455                 {
456                         Oid     relOid = RangeVarGetRelid(queryDesc->plannedstmt->intoClause->rel, true);
457
458                         makeAnalyze(relOid, queryDesc->operation, naffected);
459                 }
460                 else 
461 #endif
462                 if (queryDesc->plannedstmt->resultRelations &&
463                                  queryDesc->plannedstmt->rtable)
464                 {
465                         ListCell        *l;
466
467                         foreach(l, queryDesc->plannedstmt->resultRelations)
468                         {
469                                 int                     n = lfirst_int(l);
470                                 RangeTblEntry   *rte = list_nth(queryDesc->plannedstmt->rtable, n-1);
471                 
472                                 if (rte->rtekind == RTE_RELATION)
473                                         makeAnalyze(rte->relid, queryDesc->operation, naffected);
474                         }
475                 }
476         }
477
478         if (oldExecutorEndHook)
479                 oldExecutorEndHook(queryDesc);
480         else
481                 standard_ExecutorEnd(queryDesc);
482 }
483
484 #if PG_VERSION_NUM >= 90200
485 static void
486 onlineAnalyzeHookerUtility(Node *parsetree, const char *queryString, 
487                                                         ParamListInfo params, bool isTopLevel,
488                                                         DestReceiver *dest, char *completionTag) {
489         RangeVar        *tblname = NULL;
490
491         if (IsA(parsetree, CreateTableAsStmt) && ((CreateTableAsStmt*)parsetree)->into)
492                 tblname = (RangeVar*)copyObject(((CreateTableAsStmt*)parsetree)->into->rel);
493
494         if (oldProcessUtilityHook)
495                 oldProcessUtilityHook(parsetree, queryString, params, isTopLevel, dest, completionTag);
496         else
497                 standard_ProcessUtility(parsetree, queryString, params, isTopLevel, dest, completionTag);
498
499         if (tblname) {
500                 Oid     tblOid = RangeVarGetRelid(tblname, NoLock, true);
501
502                 makeAnalyze(tblOid, CMD_INSERT, 0); 
503         }
504 }
505 #endif
506
507 void _PG_init(void);
508 void
509 _PG_init(void)
510 {
511         oldExecutorEndHook = ExecutorEnd_hook;
512
513         ExecutorEnd_hook = onlineAnalyzeHooker;
514
515 #if PG_VERSION_NUM >= 90200
516         oldProcessUtilityHook = ProcessUtility_hook;
517
518         ProcessUtility_hook = onlineAnalyzeHookerUtility;
519 #endif
520
521
522         DefineCustomBoolVariable(
523                 "online_analyze.enable",
524                 "Enable on-line analyze",
525                 "Enables analyze of table directly after insert/update/delete/select into",
526                 &online_analyze_enable,
527 #if PG_VERSION_NUM >= 80400
528                 online_analyze_enable,
529 #endif
530                 PGC_USERSET,
531 #if PG_VERSION_NUM >= 80400
532                 GUC_NOT_IN_SAMPLE,
533 #if PG_VERSION_NUM >= 90100
534                 NULL,
535 #endif
536 #endif
537                 NULL,
538                 NULL
539         );
540
541         DefineCustomBoolVariable(
542                 "online_analyze.verbose",
543                 "Verbosity of on-line analyze",
544                 "Make ANALYZE VERBOSE after table's changes",
545                 &online_analyze_verbose,
546 #if PG_VERSION_NUM >= 80400
547                 online_analyze_verbose,
548 #endif
549                 PGC_USERSET,
550 #if PG_VERSION_NUM >= 80400
551                 GUC_NOT_IN_SAMPLE,
552 #if PG_VERSION_NUM >= 90100
553                 NULL,
554 #endif
555 #endif
556                 NULL,
557                 NULL
558         );
559
560     DefineCustomRealVariable(
561                 "online_analyze.scale_factor",
562                 "fraction of table size to start on-line analyze",
563                 "fraction of table size to start on-line analyze",
564                 &online_analyze_scale_factor,
565 #if PG_VERSION_NUM >= 80400
566                 online_analyze_scale_factor,
567 #endif
568                 0.0,
569                 1.0,
570                 PGC_USERSET,
571 #if PG_VERSION_NUM >= 80400
572                 GUC_NOT_IN_SAMPLE,
573 #if PG_VERSION_NUM >= 90100
574                 NULL,
575 #endif
576 #endif
577                 NULL,
578                 NULL
579         );
580
581     DefineCustomIntVariable(
582                 "online_analyze.threshold",
583                 "min number of row updates before on-line analyze",
584                 "min number of row updates before on-line analyze",
585                 &online_analyze_threshold,
586 #if PG_VERSION_NUM >= 80400
587                 online_analyze_threshold,
588 #endif
589                 0,
590                 0x7fffffff,
591                 PGC_USERSET,
592 #if PG_VERSION_NUM >= 80400
593                 GUC_NOT_IN_SAMPLE,
594 #if PG_VERSION_NUM >= 90100
595                 NULL,
596 #endif
597 #endif
598                 NULL,
599                 NULL
600         );
601
602     DefineCustomRealVariable(
603                 "online_analyze.min_interval",
604                 "minimum time interval between analyze call (in milliseconds)",
605                 "minimum time interval between analyze call (in milliseconds)",
606                 &online_analyze_scale_factor,
607 #if PG_VERSION_NUM >= 80400
608                 online_analyze_min_interval,
609 #endif
610                 0.0,
611                 1e30,
612                 PGC_USERSET,
613 #if PG_VERSION_NUM >= 80400
614                 GUC_NOT_IN_SAMPLE,
615 #if PG_VERSION_NUM >= 90100
616                 NULL,
617 #endif
618 #endif
619                 NULL,
620                 NULL
621         );
622
623         DefineCustomEnumVariable(
624                 "online_analyze.table_type",
625                 "Type(s) of table for online analyze: all(default), persistent, temporary, none",
626                 NULL,
627                 &online_analyze_table_type,
628 #if PG_VERSION_NUM >= 80400
629                 online_analyze_table_type,
630 #endif
631                 online_analyze_table_type_options,
632                 PGC_USERSET,
633 #if PG_VERSION_NUM >= 80400
634         GUC_NOT_IN_SAMPLE,
635 #if PG_VERSION_NUM >= 90100
636                 NULL,
637 #endif
638 #endif
639                 NULL,
640                 NULL
641         );
642
643     DefineCustomStringVariable(
644                 "online_analyze.exclude_tables",
645                 "List of tables which will not online analyze",
646                 NULL,
647                 &excludeTables.tableStr,
648 #if PG_VERSION_NUM >= 80400
649                 "",
650 #endif
651                 PGC_USERSET,
652                 0,
653 #if PG_VERSION_NUM >= 90100
654                 excludeTablesCheck,
655                 excludeTablesAssign,
656 #else
657                 excludeTablesAssign,
658 #endif
659                 excludeTablesShow
660         );
661
662     DefineCustomStringVariable(
663                 "online_analyze.include_tables",
664                 "List of tables which will online analyze",
665                 NULL,
666                 &includeTables.tableStr,
667 #if PG_VERSION_NUM >= 80400
668                 "",
669 #endif
670                 PGC_USERSET,
671                 0,
672 #if PG_VERSION_NUM >= 90100
673                 includeTablesCheck,
674                 includeTablesAssign,
675 #else
676                 includeTablesAssign,
677 #endif
678                 includeTablesShow
679         );
680 }
681
682 void _PG_fini(void);
683 void
684 _PG_fini(void)
685 {
686         ExecutorEnd_hook = oldExecutorEndHook;
687 #if PG_VERSION_NUM >= 90200
688         ProcessUtility_hook = oldProcessUtilityHook;
689 #endif
690
691         if (excludeTables.tables)
692                 free(excludeTables.tables);
693         if (includeTables.tables)
694                 free(includeTables.tables);
695
696         excludeTables.tables = includeTables.tables = NULL;
697         excludeTables.nTables = includeTables.nTables = 0;
698 }