4 #include "catalog/pg_type.h"
5 #include "executor/spi.h"
6 #include "utils/array.h"
7 #include "utils/datum.h"
8 #include "utils/memutils.h"
10 static StatCache *PersistentDocStat = NULL;
13 cacheAlloc(MemoryContext ctx, size_t size)
15 if ( GetSmlarUsePersistent() )
17 void *ptr = malloc(size);
21 (errcode(ERRCODE_OUT_OF_MEMORY),
22 errmsg("out of memory")));
27 return MemoryContextAlloc(ctx, size);
31 cacheAllocZero(MemoryContext ctx, size_t size)
33 void *ptr = cacheAlloc(ctx, size);
39 initStatCache(MemoryContext ctx)
41 if (PersistentDocStat && GetSmlarUsePersistent())
42 return PersistentDocStat;
46 const char *tbl = GetSmlarTable();
47 StatCache *cache = NULL;
49 if ( tbl == NULL || *tbl == '\0' )
50 elog(ERROR,"smlar.stattable is not defined");
52 sprintf(buf,"SELECT * FROM \"%s\" ORDER BY 1;", tbl);
54 stat = SPI_execute(buf, true, 0);
56 if (stat != SPI_OK_SELECT)
57 elog(ERROR, "SPI_execute() returns %d", stat);
59 if ( SPI_processed == 0 )
61 elog(ERROR, "Stat table '%s' is empty", tbl);
66 double totaldocs = 0.0;
67 Oid ndocType = SPI_gettypeid(SPI_tuptable->tupdesc, 2);
69 if ( SPI_tuptable->tupdesc->natts != 2 )
70 elog(ERROR,"Stat table is not (type, int4)");
71 if ( !(ndocType == INT4OID || ndocType == INT8OID) )
72 elog(ERROR,"Stat table is not (type, int4) nor (type, int8)");
74 cache = cacheAllocZero(ctx, sizeof(StatCache));
75 cache->info = findProcs( SPI_gettypeid(SPI_tuptable->tupdesc, 1) );
76 if (cache->info->tupDesc)
77 elog(ERROR, "TF/IDF is not supported for composite (weighted) type");
78 getFmgrInfoCmp(cache->info);
79 cache->elems = cacheAlloc(ctx, sizeof(StatElem) * SPI_processed);
81 for(i=0; i<SPI_processed; i++)
83 bool isnullvalue, isnullndoc;
84 Datum datum = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnullvalue);
87 if (ndocType == INT4OID)
88 ndoc = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnullndoc));
90 ndoc = DatumGetInt64(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnullndoc));
93 elog(ERROR,"NULL value in second column of table '%s'", tbl);
97 /* total number of docs */
100 elog(ERROR,"Total number of document should be positive");
102 elog(ERROR,"Total number of document is repeated");
107 if ( i>0 && DatumGetInt32( FCall2( &cache->info->cmpFunc, cache->elems[i-1].datum, datum ) ) == 0 )
108 elog(ERROR,"Values of first column of table '%s' are not unique", tbl);
111 elog(ERROR,"Number of documents with current value should be positive");
113 if ( cache->info->typbyval )
114 cache->elems[i].datum = datum;
117 size_t size = datumGetSize(datum, false, cache->info->typlen);
119 cache->elems[i].datum = PointerGetDatum(cacheAlloc(ctx, size));
120 memcpy(DatumGetPointer(cache->elems[i].datum), DatumGetPointer(datum), size);
123 cache->elems[i].idf = ndoc;
128 elog(ERROR,"Total number of document is unknown");
129 cache->nelems = SPI_processed - 1;
131 for(i=0;i<cache->nelems;i++)
133 if ( totaldocs < cache->elems[i].idf )
134 elog(ERROR,"Inconsitent data in '%s': there is values with frequency > 1", tbl);
135 cache->elems[i].idf = log( totaldocs / cache->elems[i].idf + getOneAdd() );
141 if ( GetSmlarUsePersistent() )
142 PersistentDocStat = cache;
151 if ( PersistentDocStat )
154 if (!PersistentDocStat->info->typbyval)
157 for(i=0;i<PersistentDocStat->nelems;i++)
158 free( DatumGetPointer(PersistentDocStat->elems[i].datum) );
161 if (PersistentDocStat->helems)
162 free(PersistentDocStat->helems);
163 free(PersistentDocStat->elems);
164 free(PersistentDocStat);
167 PersistentDocStat = NULL;
171 findStat(StatCache *stat, Datum query, StatElem *low)
173 StatElem *StopLow = (low) ? low : stat->elems,
174 *StopHigh = stat->elems + stat->nelems,
178 if (stat->info->tupDesc)
179 elog(ERROR, "TF/IDF is not supported for composite (weighted) type");
181 while (StopLow < StopHigh)
183 StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
184 cmp = DatumGetInt32( FCall2( &stat->info->cmpFunc, StopMiddle->datum, query ) );
189 StopLow = StopMiddle + 1;
191 StopHigh = StopMiddle;
198 getHashStatCache(StatCache *stat, MemoryContext ctx, size_t n)
202 stat->helems = cacheAlloc(ctx, (stat->nelems +1) * sizeof(HashedElem));
203 stat->selems = cacheAllocZero(ctx, n * sizeof(SignedElem));