From 36dfc34b6f6aac26ff32621823546bc8dbaba2f1 Mon Sep 17 00:00:00 2001 From: teodor Date: Tue, 1 Apr 2008 17:01:19 +0000 Subject: [PATCH] add gin_stat() function --- README.gevel | 16 ++- expected/gevel.out | 313 +++++++++++++++++++++++++++++++++++++++++++++ gevel.c | 262 ++++++++++++++++++++++++++++++++++++- gevel.sql.in | 6 + sql/gevel.sql | 10 ++ 5 files changed, 603 insertions(+), 4 deletions(-) diff --git a/README.gevel b/README.gevel index d4981b9..c3fbf2a 100644 --- a/README.gevel +++ b/README.gevel @@ -1,8 +1,8 @@ -Gevel contrib module provides several functions useful for analyzing GiST index. +Gevel contrib module provides several functions useful for analyzing GiST and GIN index. [Online version] of this document (http://www.sai.msu.su/~megera/oddmuse/index.cgi/Gevel) -Caution: This module was designed for developers of GiST based indices ! +Caution: This module was designed for advanced users of GIN and GiST indices ! Authors @@ -122,3 +122,15 @@ regression=# select gist_tree('pix'); 1 | t | (31179,50040),(28113,25556) 1 | t | (28048,49694),(25000,25000) (29 rows) + + * gin_stat(INDEXNAME) prints estimated counts for each indexed values + +# SELECT * FROM gin_stat('gin_idx') as t(value int, nrow int) where nrow > 250; + value | nrow + -------+------ + 31 | 254 + 47 | 251 + 52 | 257 + 59 | 259 + (4 rows) + diff --git a/expected/gevel.out b/expected/gevel.out index 982ab55..0faefea 100644 --- a/expected/gevel.out +++ b/expected/gevel.out @@ -96,3 +96,316 @@ SELECT * FROM gist_print('gist_idx') as t(level int, valid bool, a box) where le 1 | t | (31179,37276),(24921,25000) (32 rows) +CREATE TABLE test__int( a int[] ); +\copy test__int from 'data/test__int.data' +CREATE INDEX gin_idx ON test__int USING gin ( a ); +INSERT INTO test__int ( SELECT ARRAY[t] || '{1000}'::_int4 FROM generate_series (1,300) as t ); +INSERT INTO test__int ( SELECT ARRAY[t] || '{1001}'::_int4 FROM generate_series (1,300) as t, generate_series(1,12) ); +SELECT * FROM gin_stat('gin_idx') as t(value int, nrow int); + value | nrow +-------+------ + 0 | 38 + 1 | 257 + 2 | 244 + 3 | 222 + 4 | 228 + 5 | 227 + 6 | 243 + 7 | 238 + 8 | 236 + 9 | 224 + 10 | 236 + 11 | 248 + 12 | 224 + 13 | 235 + 14 | 237 + 15 | 253 + 16 | 234 + 17 | 245 + 18 | 246 + 19 | 234 + 20 | 236 + 21 | 213 + 22 | 235 + 23 | 222 + 24 | 197 + 25 | 253 + 26 | 218 + 27 | 239 + 28 | 198 + 29 | 244 + 30 | 247 + 31 | 267 + 32 | 234 + 33 | 209 + 34 | 231 + 35 | 218 + 36 | 242 + 37 | 232 + 38 | 221 + 39 | 232 + 40 | 241 + 41 | 239 + 42 | 221 + 43 | 211 + 44 | 231 + 45 | 213 + 46 | 236 + 47 | 264 + 48 | 221 + 49 | 235 + 50 | 219 + 51 | 250 + 52 | 270 + 53 | 222 + 54 | 242 + 55 | 237 + 56 | 237 + 57 | 251 + 58 | 231 + 59 | 272 + 60 | 219 + 61 | 233 + 62 | 235 + 63 | 260 + 64 | 252 + 65 | 221 + 66 | 228 + 67 | 253 + 68 | 196 + 69 | 232 + 70 | 232 + 71 | 223 + 72 | 247 + 73 | 214 + 74 | 232 + 75 | 229 + 76 | 233 + 77 | 221 + 78 | 227 + 79 | 233 + 80 | 216 + 81 | 244 + 82 | 223 + 83 | 254 + 84 | 227 + 85 | 247 + 86 | 255 + 87 | 239 + 88 | 258 + 89 | 249 + 90 | 244 + 91 | 226 + 92 | 225 + 93 | 230 + 94 | 218 + 95 | 232 + 96 | 239 + 97 | 226 + 98 | 209 + 99 | 211 + 100 | 216 + 101 | 49 + 102 | 47 + 103 | 59 + 104 | 55 + 105 | 48 + 106 | 49 + 107 | 49 + 108 | 51 + 109 | 47 + 110 | 51 + 111 | 45 + 112 | 46 + 113 | 48 + 114 | 38 + 115 | 39 + 116 | 43 + 117 | 44 + 118 | 46 + 119 | 45 + 120 | 52 + 121 | 41 + 122 | 64 + 123 | 50 + 124 | 41 + 125 | 55 + 126 | 41 + 127 | 50 + 128 | 54 + 129 | 43 + 130 | 44 + 131 | 50 + 132 | 57 + 133 | 40 + 134 | 41 + 135 | 44 + 136 | 58 + 137 | 48 + 138 | 44 + 139 | 50 + 140 | 49 + 141 | 48 + 142 | 43 + 143 | 36 + 144 | 43 + 145 | 49 + 146 | 46 + 147 | 40 + 148 | 52 + 149 | 46 + 150 | 49 + 151 | 42 + 152 | 58 + 153 | 49 + 154 | 51 + 155 | 49 + 156 | 45 + 157 | 53 + 158 | 59 + 159 | 38 + 160 | 53 + 161 | 48 + 162 | 41 + 163 | 40 + 164 | 52 + 165 | 50 + 166 | 40 + 167 | 48 + 168 | 45 + 169 | 49 + 170 | 50 + 171 | 45 + 172 | 46 + 173 | 47 + 174 | 41 + 175 | 51 + 176 | 50 + 177 | 41 + 178 | 42 + 179 | 48 + 180 | 47 + 181 | 49 + 182 | 40 + 183 | 46 + 184 | 52 + 185 | 36 + 186 | 45 + 187 | 41 + 188 | 47 + 189 | 49 + 190 | 46 + 191 | 45 + 192 | 39 + 193 | 50 + 194 | 39 + 195 | 54 + 196 | 43 + 197 | 50 + 198 | 46 + 199 | 42 + 200 | 31 + 201 | 28 + 202 | 28 + 203 | 28 + 204 | 30 + 205 | 22 + 206 | 25 + 207 | 27 + 208 | 32 + 209 | 22 + 210 | 21 + 211 | 31 + 212 | 22 + 213 | 37 + 214 | 19 + 215 | 25 + 216 | 27 + 217 | 26 + 218 | 24 + 219 | 25 + 220 | 26 + 221 | 21 + 222 | 23 + 223 | 20 + 224 | 26 + 225 | 25 + 226 | 25 + 227 | 23 + 228 | 30 + 229 | 30 + 230 | 24 + 231 | 22 + 232 | 27 + 233 | 27 + 234 | 21 + 235 | 27 + 236 | 24 + 237 | 24 + 238 | 29 + 239 | 32 + 240 | 31 + 241 | 24 + 242 | 36 + 243 | 21 + 244 | 29 + 245 | 22 + 246 | 23 + 247 | 21 + 248 | 26 + 249 | 29 + 250 | 24 + 251 | 29 + 252 | 25 + 253 | 28 + 254 | 25 + 255 | 19 + 256 | 23 + 257 | 20 + 258 | 24 + 259 | 31 + 260 | 29 + 261 | 20 + 262 | 25 + 263 | 23 + 264 | 25 + 265 | 23 + 266 | 31 + 267 | 22 + 268 | 26 + 269 | 33 + 270 | 25 + 271 | 27 + 272 | 29 + 273 | 29 + 274 | 26 + 275 | 28 + 276 | 30 + 277 | 28 + 278 | 29 + 279 | 22 + 280 | 29 + 281 | 23 + 282 | 30 + 283 | 29 + 284 | 23 + 285 | 37 + 286 | 27 + 287 | 31 + 288 | 28 + 289 | 26 + 290 | 25 + 291 | 29 + 292 | 22 + 293 | 26 + 294 | 29 + 295 | 24 + 296 | 27 + 297 | 30 + 298 | 29 + 299 | 26 + 300 | 13 + 1000 | 300 + 1001 | 3395 +(303 rows) + diff --git a/gevel.c b/gevel.c index 4c19212..6018003 100644 --- a/gevel.c +++ b/gevel.c @@ -1,6 +1,7 @@ #include "postgres.h" #include "access/genam.h" +#include "access/gin.h" #include "access/gist.h" #include "access/gist_private.h" #include "access/gistscan.h" @@ -10,6 +11,8 @@ #include "storage/lmgr.h" #include "catalog/namespace.h" #include "utils/builtins.h" +#include "utils/lsyscache.h" +#include "utils/datum.h" #include #include #include @@ -36,6 +39,7 @@ typedef struct { int len; } IdxInfo; +static Relation checkOpenedRelation(Relation r, Oid PgAmOid); #ifdef PG_MODULE_MAGIC /* >= 8.2 */ @@ -45,11 +49,21 @@ PG_MODULE_MAGIC; static Relation gist_index_open(RangeVar *relvar) { Oid relOid = RangeVarGetRelid(relvar, false); - return index_open(relOid, AccessExclusiveLock); + return checkOpenedRelation( + index_open(relOid, AccessExclusiveLock), GIST_AM_OID); } #define gist_index_close(r) index_close((r), AccessExclusiveLock) +static Relation +gin_index_open(RangeVar *relvar) { + Oid relOid = RangeVarGetRelid(relvar, false); + return checkOpenedRelation( + index_open(relOid, AccessShareLock), GIN_AM_OID); +} + +#define gin_index_close(r) index_close((r), AccessShareLock) + #else /* <8.2 */ static Relation @@ -57,7 +71,7 @@ gist_index_open(RangeVar *relvar) { Relation rel = index_openrv(relvar); LockRelation(rel, AccessExclusiveLock); - return rel; + return checkOpenedRelation(rel, GIST_AM_OID); } static void @@ -66,6 +80,20 @@ gist_index_close(Relation rel) { index_close(rel); } +static Relation +gin_index_open(RangeVar *relvar) { + Relation rel = index_openrv(relvar); + + LockRelation(rel, AccessShareLock); + return checkOpenedRelation(rel, GIN_AM_OID); +} + +static void +gin_index_close(Relation rel) { + UnlockRelation(rel, AccessShareLock); + index_close(rel); +} + #endif #if PG_VERSION_NUM >= 80300 @@ -76,6 +104,23 @@ gist_index_close(Relation rel) { #define SET_VARSIZE(p,l) VARATT_SIZEP(p)=(l) #endif +static Relation +checkOpenedRelation(Relation r, Oid PgAmOid) { + if ( r->rd_am == NULL ) + elog(ERROR, "Relation %s.%s is not an index", + get_namespace_name(RelationGetNamespace(r)), + RelationGetRelationName(r) + ); + + if ( r->rd_rel->relam != PgAmOid ) + elog(ERROR, "Index %s.%s has wrong type", + get_namespace_name(RelationGetNamespace(r)), + RelationGetRelationName(r) + ); + + return r; +} + static void gist_dumptree(Relation r, int level, BlockNumber blk, OffsetNumber coff, IdxInfo *info) { Buffer buffer; @@ -434,3 +479,216 @@ gist_print(PG_FUNCTION_ARGS) { SRF_RETURN_NEXT(funcctx, result); } +typedef struct GinStatState { + Relation index; + GinState ginstate; + + Buffer buffer; + OffsetNumber offset; + Datum curval; + Datum dvalues[2]; + char nulls[2]; +} GinStatState; + +static bool +moveRightIfItNeeded( GinStatState *st ) +{ + Page page = BufferGetPage(st->buffer); + + if ( st->offset > PageGetMaxOffsetNumber(page) ) { + /* + * We scaned the whole page, so we should take right page + */ + BlockNumber blkno = GinPageGetOpaque(page)->rightlink; + + if ( GinPageRightMost(page) ) + return false; /* no more page */ + + LockBuffer(st->buffer, GIN_UNLOCK); + st->buffer = ReleaseAndReadBuffer(st->buffer, st->index, blkno); + LockBuffer(st->buffer, GIN_SHARE); + st->offset = FirstOffsetNumber; + } + + return true; +} + +/* + * Refinds a previois position, at returns it has correctly + * set offset and buffer is locked + */ +static bool +refindPosition(GinStatState *st) +{ + Page page; + + /* find left if needed (it causes only for first search) */ + for (;;) { + IndexTuple itup; + BlockNumber blkno; + + LockBuffer(st->buffer, GIN_SHARE); + + page = BufferGetPage(st->buffer); + if (GinPageIsLeaf(page)) + break; + + itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, FirstOffsetNumber)); + blkno = GinItemPointerGetBlockNumber(&(itup)->t_tid); + + LockBuffer(st->buffer,GIN_UNLOCK); + st->buffer = ReleaseAndReadBuffer(st->buffer, st->index, blkno); + } + + if (st->offset == InvalidOffsetNumber) { + return (PageGetMaxOffsetNumber(page) >= FirstOffsetNumber ) ? true : false; /* first one */ + } + + for(;;) { + int cmp; + bool isnull; + Datum datum; + IndexTuple itup; + + if (moveRightIfItNeeded(st)==false) + return false; + + itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, st->offset)); + datum = index_getattr(itup, FirstOffsetNumber, st->ginstate.tupdesc, &isnull); + cmp = DatumGetInt32( + FunctionCall2( + &st->ginstate.compareFn, + st->curval, + datum + )); + if ( cmp == 0 ) + return true; + + st->offset++; + } + + return false; +} + +static void +gin_setup_firstcall(FuncCallContext *funcctx, text *name) { + MemoryContext oldcontext; + GinStatState *st; + char *relname=t2c(name); + TupleDesc tupdesc; + + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + st=(GinStatState*)palloc( sizeof(GinStatState) ); + memset(st,0,sizeof(GinStatState)); + st->index = gin_index_open( + makeRangeVarFromNameList(stringToQualifiedNameList(relname, "gist_tree"))); + initGinState( &st->ginstate, st->index ); + + funcctx->user_fctx = (void*)st; + + tupdesc = CreateTemplateTupleDesc(2, false); + TupleDescInitEntry(tupdesc, 1, "value", + st->index->rd_att->attrs[0]->atttypid, + st->index->rd_att->attrs[0]->atttypmod, + st->index->rd_att->attrs[0]->attndims); + TupleDescInitEntry(tupdesc, 2, "nrow", INT4OID, -1, 0); + + memset( st->nulls, ' ', 2*sizeof(char) ); + + funcctx->slot = TupleDescGetSlot(tupdesc); + funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc); + + MemoryContextSwitchTo(oldcontext); + pfree(relname); + + st->offset = InvalidOffsetNumber; + st->buffer = ReadBuffer( st->index, GIN_ROOT_BLKNO ); +} + +static void +processTuple( FuncCallContext *funcctx, GinStatState *st, IndexTuple itup ) { + MemoryContext oldcontext; + Datum datum; + bool isnull; + + datum = index_getattr(itup, FirstOffsetNumber, st->ginstate.tupdesc, &isnull); + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + st->curval = datumCopy( + index_getattr(itup, FirstOffsetNumber, st->ginstate.tupdesc, &isnull), + st->ginstate.tupdesc->attrs[0]->attbyval, + st->ginstate.tupdesc->attrs[0]->attlen ); + MemoryContextSwitchTo(oldcontext); + + st->dvalues[0] = st->curval; + + if ( GinIsPostingTree(itup) ) { + BlockNumber rootblkno = GinGetPostingTree(itup); + GinPostingTreeScan *gdi; + Buffer entrybuffer; + Page page; + + LockBuffer(st->buffer, GIN_UNLOCK); + gdi = prepareScanPostingTree(st->index, rootblkno, TRUE); + entrybuffer = scanBeginPostingTree(gdi); + + page = BufferGetPage(entrybuffer); + st->dvalues[1] = Int32GetDatum( gdi->stack->predictNumber * GinPageGetOpaque(page)->maxoff ); + + LockBuffer(entrybuffer, GIN_UNLOCK); + freeGinBtreeStack(gdi->stack); + pfree(gdi); + } else { + st->dvalues[1] = Int32GetDatum( GinGetNPosting(itup) ); + LockBuffer(st->buffer, GIN_UNLOCK); + } +} + +PG_FUNCTION_INFO_V1(gin_stat); +Datum gin_stat(PG_FUNCTION_ARGS); +Datum +gin_stat(PG_FUNCTION_ARGS) { + FuncCallContext *funcctx; + GinStatState *st; + Datum result=(Datum)0; + IndexTuple ituple; + HeapTuple htuple; + Page page; + + if (SRF_IS_FIRSTCALL()) { + text *name=PG_GETARG_TEXT_P(0); + funcctx = SRF_FIRSTCALL_INIT(); + gin_setup_firstcall(funcctx, name); + PG_FREE_IF_COPY(name,0); + } + + funcctx = SRF_PERCALL_SETUP(); + st = (GinStatState*)(funcctx->user_fctx); + + if ( refindPosition(st) == false ) { + UnlockReleaseBuffer( st->buffer ); + gin_index_close(st->index); + + SRF_RETURN_DONE(funcctx); + } + + st->offset++; + + if (moveRightIfItNeeded(st)==false) { + UnlockReleaseBuffer( st->buffer ); + gin_index_close(st->index); + + SRF_RETURN_DONE(funcctx); + } + + page = BufferGetPage(st->buffer); + ituple = (IndexTuple) PageGetItem(page, PageGetItemId(page, st->offset)); + + processTuple( funcctx, st, ituple ); + + htuple = heap_formtuple(funcctx->attinmeta->tupdesc, st->dvalues, st->nulls); + result = TupleGetDatum(funcctx->slot, htuple); + + SRF_RETURN_NEXT(funcctx, result); +} + diff --git a/gevel.sql.in b/gevel.sql.in index a5b9eea..6bfa5e8 100644 --- a/gevel.sql.in +++ b/gevel.sql.in @@ -25,4 +25,10 @@ create or replace function gist_print(text) language 'C' with (isstrict); +create or replace function gin_stat(text) + returns setof record + as 'MODULE_PATHNAME' + language 'C' + with (isstrict); + END; diff --git a/sql/gevel.sql b/sql/gevel.sql index 6024eae..23aaa8c 100644 --- a/sql/gevel.sql +++ b/sql/gevel.sql @@ -14,3 +14,13 @@ CREATE INDEX gist_idx ON gevelt USING gist ( t ); SELECT gist_stat('gist_idx'); SELECT gist_tree('gist_idx'); SELECT * FROM gist_print('gist_idx') as t(level int, valid bool, a box) where level=1; + +CREATE TABLE test__int( a int[] ); +\copy test__int from 'data/test__int.data' + +CREATE INDEX gin_idx ON test__int USING gin ( a ); + +INSERT INTO test__int ( SELECT ARRAY[t] || '{1000}'::_int4 FROM generate_series (1,300) as t ); +INSERT INTO test__int ( SELECT ARRAY[t] || '{1001}'::_int4 FROM generate_series (1,300) as t, generate_series(1,12) ); + +SELECT * FROM gin_stat('gin_idx') as t(value int, nrow int); -- 2.46.1