4 #include "access/gist.h"
5 #include "access/skey.h"
6 #if PG_VERSION_NUM < 130000
7 #include "access/tuptoaster.h"
9 #include "access/heaptoast.h"
11 #include "utils/memutils.h"
13 typedef struct SmlSign {
14 int32 vl_len_; /* varlena header (do not touch directly!) */
21 #define SMLSIGNHDRSZ (offsetof(SmlSign, data))
25 #define SIGLEN ( sizeof(int)*SIGLENINT )
26 #define SIGLENBIT (SIGLEN*BITBYTE - 1) /* see makesign */
27 typedef char BITVEC[SIGLEN];
28 typedef char *BITVECP;
32 #define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITBYTE ) ) )
33 #define GETBITBYTE(x,i) ( ((char)(x)) >> i & 0x01 )
34 #define CLRBIT(x,i) GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITBYTE ) )
35 #define SETBIT(x,i) GETBYTE(x,i) |= ( 0x01 << ( (i) % BITBYTE ) )
36 #define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITBYTE )) & 0x01 )
38 #define HASHVAL(val) (((unsigned int)(val)) % SIGLENBIT)
39 #define HASH(sign, val) SETBIT((sign), HASHVAL(val))
43 #define ALLISTRUE 0x04
45 #define ISARRKEY(x) ( ((SmlSign*)x)->flag & ARRKEY )
46 #define ISSIGNKEY(x) ( ((SmlSign*)x)->flag & SIGNKEY )
47 #define ISALLTRUE(x) ( ((SmlSign*)x)->flag & ALLISTRUE )
49 #define CALCGTSIZE(flag, len) ( SMLSIGNHDRSZ + ( ( (flag) & ARRKEY ) ? ((len)*sizeof(uint32)) : (((flag) & ALLISTRUE) ? 0 : SIGLEN) ) )
50 #define GETSIGN(x) ( (BITVECP)( (char*)x+SMLSIGNHDRSZ ) )
51 #define GETARR(x) ( (uint32*)( (char*)x+SMLSIGNHDRSZ ) )
53 #define GETENTRY(vec,pos) ((SmlSign *) DatumGetPointer((vec)->vector[(pos)].key))
62 PG_FUNCTION_INFO_V1(gsmlsign_in);
63 Datum gsmlsign_in(PG_FUNCTION_ARGS);
65 gsmlsign_in(PG_FUNCTION_ARGS)
67 elog(ERROR, "not implemented");
71 PG_FUNCTION_INFO_V1(gsmlsign_out);
72 Datum gsmlsign_out(PG_FUNCTION_ARGS);
74 gsmlsign_out(PG_FUNCTION_ARGS)
76 elog(ERROR, "not implemented");
84 /* Number of one-bits in an unsigned byte */
85 static const uint8 number_of_ones[256] = {
86 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
87 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
88 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
89 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
90 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
91 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
92 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
93 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
94 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
95 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
96 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
97 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
98 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
99 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
100 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
101 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
105 compareint(const void *va, const void *vb)
107 uint32 a = *((uint32 *) va);
108 uint32 b = *((uint32 *) vb);
112 return (a > b) ? 1 : -1;
116 * Removes duplicates from an array of int32. 'l' is
117 * size of the input array. Returns the new size of the array.
120 uniqueint(uint32 *a, int32 l, int32 *max)
133 qsort((void *) a, l, sizeof(uint32), compareint);
156 Array2HashedArray(ProcTypeInfo info, ArrayType *a)
158 SimpleArray *s = Array2SimpleArray(info, a);
163 len = CALCGTSIZE( ARRKEY, s->nelems );
165 getFmgrInfoHash(s->info);
166 if (s->info->tupDesc)
167 elog(ERROR, "GiST doesn't support composite (weighted) type");
169 sign = palloc( len );
171 sign->size = s->nelems;
174 for(i=0;i<s->nelems;i++)
175 ptr[i] = DatumGetUInt32(FunctionCall1Coll(&s->info->hashFunc,
176 DEFAULT_COLLATION_OID,
180 * there is a collision of hash-function; len is always equal or less than
183 sign->size = uniqueint( GETARR(sign), sign->size, &sign->maxrepeat );
184 len = CALCGTSIZE( ARRKEY, sign->size );
185 SET_VARSIZE(sign, len);
191 HashedElemCmp(const void *va, const void *vb)
193 uint32 a = ((HashedElem *) va)->hash;
194 uint32 b = ((HashedElem *) vb)->hash;
198 double ma = ((HashedElem *) va)->idfMin;
199 double mb = ((HashedElem *) va)->idfMin;
204 return ( ma > mb ) ? 1 : -1;
207 return (a > b) ? 1 : -1;
211 uniqueHashedElem(HashedElem *a, int32 l)
221 qsort(a, l, sizeof(HashedElem), HashedElemCmp);
224 if (ptr->hash != res->hash)
228 res->idfMax = ptr->idfMax;
236 getHashedCache(void *cache)
238 StatCache *stat = getStat(cache, SIGLENBIT);
240 if ( stat->nhelems < 0 )
247 if (stat->info->tupDesc)
248 elog(ERROR, "GiST doesn't support composite (weighted) type");
249 getFmgrInfoHash(stat->info);
250 for(i=0;i<stat->nelems;i++)
255 hash = DatumGetUInt32(FunctionCall1Coll(&stat->info->hashFunc,
256 DEFAULT_COLLATION_OID,
257 stat->elems[i].datum));
258 index = HASHVAL(hash);
260 stat->helems[i].hash = hash;
261 stat->helems[i].idfMin = stat->helems[i].idfMax = stat->elems[i].idf;
262 if ( stat->selems[index].idfMin == 0.0 )
263 stat->selems[index].idfMin = stat->selems[index].idfMax = stat->elems[i].idf;
264 else if ( stat->selems[index].idfMin > stat->elems[i].idf )
265 stat->selems[index].idfMin = stat->elems[i].idf;
266 else if ( stat->selems[index].idfMax < stat->elems[i].idf )
267 stat->selems[index].idfMax = stat->elems[i].idf;
270 stat->nhelems = uniqueHashedElem( stat->helems, stat->nelems);
277 getHashedElemIdf(StatCache *stat, uint32 hash, HashedElem *StopLow)
279 HashedElem *StopMiddle,
280 *StopHigh = stat->helems + stat->nhelems;
283 StopLow = stat->helems;
285 while (StopLow < StopHigh) {
286 StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
288 if ( StopMiddle->hash == hash )
290 else if ( StopMiddle->hash < hash )
291 StopLow = StopMiddle + 1;
293 StopHigh = StopMiddle;
300 fillHashVal(void *cache, SimpleArray *a)
307 allocateHash(cache, a);
309 if (a->info->tupDesc)
310 elog(ERROR, "GiST doesn't support composite (weighted) type");
311 getFmgrInfoHash(a->info);
313 for(i=0;i<a->nelems;i++)
314 a->hash[i] = DatumGetUInt32(FunctionCall1Coll(&a->info->hashFunc,
315 DEFAULT_COLLATION_OID,
321 hasHashedElem(SmlSign *a, uint32 h)
323 uint32 *StopLow = GETARR(a),
324 *StopHigh = GETARR(a) + a->size,
327 while (StopLow < StopHigh) {
328 StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
330 if ( *StopMiddle == h )
332 else if ( *StopMiddle < h )
333 StopLow = StopMiddle + 1;
335 StopHigh = StopMiddle;
342 makesign(BITVECP sign, SmlSign *a)
345 uint32 *ptr = GETARR(a);
347 MemSet((void *) sign, 0, sizeof(BITVEC));
348 SETBIT(sign, SIGLENBIT); /* set last unused bit */
350 for (i = 0; i < a->size; i++)
355 sizebitvec(BITVECP sign)
361 size += number_of_ones[(unsigned char) sign[i]];
366 PG_FUNCTION_INFO_V1(gsmlsign_compress);
367 Datum gsmlsign_compress(PG_FUNCTION_ARGS);
369 gsmlsign_compress(PG_FUNCTION_ARGS)
371 GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
372 GISTENTRY *retval = entry;
374 if (entry->leafkey) /* new key */
377 ArrayType *a = DatumGetArrayTypeP(entry->key);
379 sign = Array2HashedArray(NULL, a);
381 if ( VARSIZE(sign) > TOAST_INDEX_TARGET )
382 { /* make signature due to its big size */
386 len = CALCGTSIZE( SIGNKEY, sign->size );
387 tmpsign = palloc( len );
388 tmpsign->flag = SIGNKEY;
389 SET_VARSIZE(tmpsign, len);
391 makesign(GETSIGN(tmpsign), sign);
392 tmpsign->size = sizebitvec(GETSIGN(tmpsign));
393 tmpsign->maxrepeat = sign->maxrepeat;
397 retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
398 gistentryinit(*retval, PointerGetDatum(sign),
399 entry->rel, entry->page,
400 entry->offset, false);
402 else if ( ISSIGNKEY(DatumGetPointer(entry->key)) &&
403 !ISALLTRUE(DatumGetPointer(entry->key)) )
405 SmlSign *sign = (SmlSign*)DatumGetPointer(entry->key);
407 Assert( sign->size == sizebitvec(GETSIGN(sign)) );
409 if ( sign->size == SIGLENBIT )
411 int32 len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
412 int32 maxrepeat = sign->maxrepeat;
414 sign = (SmlSign *) palloc(len);
415 SET_VARSIZE(sign, len);
416 sign->flag = SIGNKEY | ALLISTRUE;
417 sign->size = SIGLENBIT;
418 sign->maxrepeat = maxrepeat;
420 retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
422 gistentryinit(*retval, PointerGetDatum(sign),
423 entry->rel, entry->page,
424 entry->offset, false);
428 PG_RETURN_POINTER(retval);
431 PG_FUNCTION_INFO_V1(gsmlsign_decompress);
432 Datum gsmlsign_decompress(PG_FUNCTION_ARGS);
434 gsmlsign_decompress(PG_FUNCTION_ARGS)
436 GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
437 SmlSign *key = (SmlSign*)PG_DETOAST_DATUM(entry->key);
439 if (key != (SmlSign *) DatumGetPointer(entry->key))
441 GISTENTRY *retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
443 gistentryinit(*retval, PointerGetDatum(key),
444 entry->rel, entry->page,
445 entry->offset, false);
447 PG_RETURN_POINTER(retval);
450 PG_RETURN_POINTER(entry);
457 unionkey(BITVECP sbase, SmlSign *add)
463 BITVECP sadd = GETSIGN(add);
473 uint32 *ptr = GETARR(add);
475 for (i = 0; i < add->size; i++)
482 PG_FUNCTION_INFO_V1(gsmlsign_union);
483 Datum gsmlsign_union(PG_FUNCTION_ARGS);
485 gsmlsign_union(PG_FUNCTION_ARGS)
487 GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
488 int *size = (int *) PG_GETARG_POINTER(1);
496 MemSet((void *) base, 0, sizeof(BITVEC));
497 for (i = 0; i < entryvec->n; i++)
499 if (GETENTRY(entryvec, i)->maxrepeat > maxrepeat)
500 maxrepeat = GETENTRY(entryvec, i)->maxrepeat;
501 if (unionkey(base, GETENTRY(entryvec, i)))
509 len = CALCGTSIZE(flag, 0);
510 result = (SmlSign *) palloc(len);
512 SET_VARSIZE(result, len);
514 result->maxrepeat = maxrepeat;
516 if (!ISALLTRUE(result))
518 memcpy((void *) GETSIGN(result), (void *) base, sizeof(BITVEC));
519 result->size = sizebitvec(GETSIGN(result));
522 result->size = SIGLENBIT;
524 PG_RETURN_POINTER(result);
531 PG_FUNCTION_INFO_V1(gsmlsign_same);
532 Datum gsmlsign_same(PG_FUNCTION_ARGS);
534 gsmlsign_same(PG_FUNCTION_ARGS)
536 SmlSign *a = (SmlSign*)PG_GETARG_POINTER(0);
537 SmlSign *b = (SmlSign*)PG_GETARG_POINTER(1);
538 bool *result = (bool *) PG_GETARG_POINTER(2);
540 if (a->size != b->size)
544 else if (ISSIGNKEY(a))
545 { /* then b also ISSIGNKEY */
548 /* in this case b is all true too - other cases is catched
555 BITVECP sa = GETSIGN(a),
575 uint32 *ptra = GETARR(a),
580 for (i = 0; i < a->size; i++)
582 if ( ptra[i] != ptrb[i])
590 PG_RETURN_POINTER(result);
597 hemdistsign(BITVECP a, BITVECP b)
605 diff = (unsigned char) (a[i] ^ b[i]);
606 dist += number_of_ones[diff];
612 hemdist(SmlSign *a, SmlSign *b)
619 return SIGLENBIT - b->size;
621 else if (ISALLTRUE(b))
622 return SIGLENBIT - b->size;
624 return hemdistsign(GETSIGN(a), GETSIGN(b));
627 PG_FUNCTION_INFO_V1(gsmlsign_penalty);
628 Datum gsmlsign_penalty(PG_FUNCTION_ARGS);
630 gsmlsign_penalty(PG_FUNCTION_ARGS)
632 GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
633 GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
634 float *penalty = (float *) PG_GETARG_POINTER(2);
635 SmlSign *origval = (SmlSign *) DatumGetPointer(origentry->key);
636 SmlSign *newval = (SmlSign *) DatumGetPointer(newentry->key);
637 BITVECP orig = GETSIGN(origval);
641 if (ISARRKEY(newval))
645 makesign(sign, newval);
647 if (ISALLTRUE(origval))
648 *penalty = ((float) (SIGLENBIT - sizebitvec(sign))) / (float) (SIGLENBIT + 1);
650 *penalty = hemdistsign(sign, orig);
653 *penalty = hemdist(origval, newval);
655 PG_RETURN_POINTER(penalty);
670 fillcache(CACHESIGN *item, SmlSign *key)
672 item->allistrue = false;
673 item->size = key->size;
677 makesign(item->sign, key);
678 item->size = sizebitvec( item->sign );
680 else if (ISALLTRUE(key))
681 item->allistrue = true;
683 memcpy((void *) item->sign, (void *) GETSIGN(key), sizeof(BITVEC));
686 #define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
695 comparecost(const void *va, const void *vb)
697 SPLITCOST *a = (SPLITCOST *) va;
698 SPLITCOST *b = (SPLITCOST *) vb;
700 if (a->cost == b->cost)
703 return (a->cost > b->cost) ? 1 : -1;
707 hemdistcache(CACHESIGN *a, CACHESIGN *b)
714 return SIGLENBIT - b->size;
716 else if (b->allistrue)
717 return SIGLENBIT - a->size;
719 return hemdistsign(a->sign, b->sign);
722 PG_FUNCTION_INFO_V1(gsmlsign_picksplit);
723 Datum gsmlsign_picksplit(PG_FUNCTION_ARGS);
725 gsmlsign_picksplit(PG_FUNCTION_ARGS)
727 GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
728 GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
740 OffsetNumber seed_1 = 0,
748 SPLITCOST *costvector;
750 maxoff = entryvec->n - 2;
751 nbytes = (maxoff + 2) * sizeof(OffsetNumber);
752 v->spl_left = (OffsetNumber *) palloc(nbytes);
753 v->spl_right = (OffsetNumber *) palloc(nbytes);
755 cache = (CACHESIGN *) palloc(sizeof(CACHESIGN) * (maxoff + 2));
756 fillcache(&cache[FirstOffsetNumber], GETENTRY(entryvec, FirstOffsetNumber));
758 for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
760 for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
762 if (k == FirstOffsetNumber)
763 fillcache(&cache[j], GETENTRY(entryvec, j));
765 size_waste = hemdistcache(&(cache[j]), &(cache[k]));
766 if (size_waste > waste)
777 right = v->spl_right;
780 if (seed_1 == 0 || seed_2 == 0)
786 /* form initial .. */
787 if (cache[seed_1].allistrue)
789 datum_l = (SmlSign *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
790 SET_VARSIZE(datum_l, CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
791 datum_l->flag = SIGNKEY | ALLISTRUE;
792 datum_l->size = SIGLENBIT;
796 datum_l = (SmlSign *) palloc(CALCGTSIZE(SIGNKEY, 0));
797 SET_VARSIZE(datum_l, CALCGTSIZE(SIGNKEY, 0));
798 datum_l->flag = SIGNKEY;
799 memcpy((void *) GETSIGN(datum_l), (void *) cache[seed_1].sign, sizeof(BITVEC));
800 datum_l->size = cache[seed_1].size;
802 if (cache[seed_2].allistrue)
804 datum_r = (SmlSign *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
805 SET_VARSIZE(datum_r, CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
806 datum_r->flag = SIGNKEY | ALLISTRUE;
807 datum_r->size = SIGLENBIT;
811 datum_r = (SmlSign *) palloc(CALCGTSIZE(SIGNKEY, 0));
812 SET_VARSIZE(datum_r, CALCGTSIZE(SIGNKEY, 0));
813 datum_r->flag = SIGNKEY;
814 memcpy((void *) GETSIGN(datum_r), (void *) cache[seed_2].sign, sizeof(BITVEC));
815 datum_r->size = cache[seed_2].size;
818 union_l = GETSIGN(datum_l);
819 union_r = GETSIGN(datum_r);
820 maxoff = OffsetNumberNext(maxoff);
821 fillcache(&cache[maxoff], GETENTRY(entryvec, maxoff));
822 /* sort before ... */
823 costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
824 for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
826 costvector[j - 1].pos = j;
827 size_alpha = hemdistcache(&(cache[seed_1]), &(cache[j]));
828 size_beta = hemdistcache(&(cache[seed_2]), &(cache[j]));
829 costvector[j - 1].cost = Abs(size_alpha - size_beta);
831 qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
833 datum_l->maxrepeat = datum_r->maxrepeat = 1;
835 for (k = 0; k < maxoff; k++)
837 j = costvector[k].pos;
844 else if (j == seed_2)
851 if (ISALLTRUE(datum_l) || cache[j].allistrue)
853 if (ISALLTRUE(datum_l) && cache[j].allistrue)
856 size_alpha = SIGLENBIT - (
857 (cache[j].allistrue) ? datum_l->size : cache[j].size
861 size_alpha = hemdistsign(cache[j].sign, GETSIGN(datum_l));
863 if (ISALLTRUE(datum_r) || cache[j].allistrue)
865 if (ISALLTRUE(datum_r) && cache[j].allistrue)
868 size_beta = SIGLENBIT - (
869 (cache[j].allistrue) ? datum_r->size : cache[j].size
873 size_beta = hemdistsign(cache[j].sign, GETSIGN(datum_r));
875 if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.1))
877 if (ISALLTRUE(datum_l) || cache[j].allistrue)
879 if (!ISALLTRUE(datum_l))
880 MemSet((void *) GETSIGN(datum_l), 0xff, sizeof(BITVEC));
881 datum_l->size = SIGLENBIT;
887 union_l[i] |= ptr[i];
888 datum_l->size = sizebitvec(union_l);
895 if (ISALLTRUE(datum_r) || cache[j].allistrue)
897 if (!ISALLTRUE(datum_r))
898 MemSet((void *) GETSIGN(datum_r), 0xff, sizeof(BITVEC));
899 datum_r->size = SIGLENBIT;
905 union_r[i] |= ptr[i];
906 datum_r->size = sizebitvec(union_r);
912 *right = *left = FirstOffsetNumber;
913 v->spl_ldatum = PointerGetDatum(datum_l);
914 v->spl_rdatum = PointerGetDatum(datum_r);
916 Assert( datum_l->size = sizebitvec(GETSIGN(datum_l)) );
917 Assert( datum_r->size = sizebitvec(GETSIGN(datum_r)) );
919 PG_RETURN_POINTER(v);
923 getIdfMaxLimit(SmlSign *key)
925 switch( getTFMethod() )
931 return (double)(key->maxrepeat);
934 return 1.0 + log( (double)(key->maxrepeat) );
937 elog(ERROR,"Unknown TF method: %d", getTFMethod());
944 * Consistent function
946 PG_FUNCTION_INFO_V1(gsmlsign_consistent);
947 Datum gsmlsign_consistent(PG_FUNCTION_ARGS);
949 gsmlsign_consistent(PG_FUNCTION_ARGS)
951 GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
952 StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
953 bool *recheck = (bool *) PG_GETARG_POINTER(4);
955 SmlSign *key = (SmlSign*)DatumGetPointer(entry->key);
961 fcinfo->flinfo->fn_extra = SearchArrayCache(
962 fcinfo->flinfo->fn_extra,
963 fcinfo->flinfo->fn_mcxt,
964 PG_GETARG_DATUM(1), &a, &s, &query);
971 if ( strategy == SmlarOverlapStrategy )
977 else if (ISARRKEY(key))
979 uint32 *kptr = GETARR(key),
980 *qptr = GETARR(query);
982 while( kptr - GETARR(key) < key->size && qptr - GETARR(query) < query->size )
986 else if ( *kptr > *qptr )
998 BITVECP sign = GETSIGN(key);
1000 fillHashVal(fcinfo->flinfo->fn_extra, s);
1002 for(i=0; i<s->nelems; i++)
1004 if ( GETBIT(sign, HASHVAL(s->hash[i])) )
1013 * SmlarSimilarityStrategy
1015 else if (ISALLTRUE(key))
1017 if ( GIST_LEAF(entry) )
1020 * With TF/IDF similarity we cannot say anything useful
1022 if ( query->size < SIGLENBIT && getSmlType() != ST_TFIDF )
1024 double power = ((double)(query->size)) * ((double)(SIGLENBIT));
1026 if ( ((double)(query->size)) / sqrt(power) >= GetSmlarLimit() )
1037 else if (ISARRKEY(key))
1039 uint32 *kptr = GETARR(key),
1040 *qptr = GETARR(query);
1042 Assert( GIST_LEAF(entry) );
1044 switch(getSmlType())
1048 StatCache *stat = getHashedCache(fcinfo->flinfo->fn_extra);
1052 double maxKTF = getIdfMaxLimit(key);
1056 fillHashVal(fcinfo->flinfo->fn_extra, s);
1057 if ( stat->info != s->info )
1058 elog(ERROR,"Statistic and actual argument have different type");
1060 for(i=0;i<s->nelems;i++)
1062 sumQ += s->df[i] * s->df[i];
1064 h = getHashedElemIdf(stat, s->hash[i], NULL);
1065 if ( h && hasHashedElem(key, s->hash[i]) )
1067 sumK += h->idfMin * h->idfMin;
1068 sumU += h->idfMax * maxKTF * s->df[i];
1072 if ( sumK > 0.0 && sumQ > 0.0 && sumU / sqrt( sumK * sumQ ) >= GetSmlarLimit() )
1075 * More precisely calculate sumK
1080 for(i=0;i<key->size;i++)
1082 h = getHashedElemIdf(stat, GETARR(key)[i], h);
1084 sumK += h->idfMin * h->idfMin;
1087 if ( sumK > 0.0 && sumQ > 0.0 && sumU / sqrt( sumK * sumQ ) >= GetSmlarLimit() )
1095 power = sqrt( ((double)(key->size)) * ((double)(s->nelems)) );
1097 if ( ((double)Min(key->size, s->nelems)) / power >= GetSmlarLimit() )
1101 while( kptr - GETARR(key) < key->size && qptr - GETARR(query) < query->size )
1103 if ( *kptr < *qptr )
1105 else if ( *kptr > *qptr )
1115 if ( ((double)cnt) / power >= GetSmlarLimit() )
1121 elog(ERROR,"GiST doesn't support current formula type of similarity");
1126 BITVECP sign = GETSIGN(key);
1129 fillHashVal(fcinfo->flinfo->fn_extra, s);
1131 if ( GIST_LEAF(entry) )
1133 switch(getSmlType())
1137 StatCache *stat = getHashedCache(fcinfo->flinfo->fn_extra);
1141 double maxKTF = getIdfMaxLimit(key);
1144 if ( stat->info != s->info )
1145 elog(ERROR,"Statistic and actual argument have different type");
1147 for(i=0;i<s->nelems;i++)
1149 int32 hbit = HASHVAL(s->hash[i]);
1151 sumQ += s->df[i] * s->df[i];
1152 if ( GETBIT(sign, hbit) )
1154 sumK += stat->selems[ hbit ].idfMin * stat->selems[ hbit ].idfMin;
1155 sumU += stat->selems[ hbit ].idfMax * maxKTF * s->df[i];
1159 if ( sumK > 0.0 && sumQ > 0.0 && sumU / sqrt( sumK * sumQ ) >= GetSmlarLimit() )
1162 * More precisely calculate sumK
1166 for(i=0;i<SIGLENBIT;i++)
1167 if ( GETBIT(sign,i) )
1168 sumK += stat->selems[ i ].idfMin * stat->selems[ i ].idfMin;
1170 if ( sumK > 0.0 && sumQ > 0.0 && sumU / sqrt( sumK * sumQ ) >= GetSmlarLimit() )
1179 power = sqrt( ((double)(key->size)) * ((double)(s->nelems)) );
1181 for(i=0; i<s->nelems; i++)
1182 count += GETBIT(sign, HASHVAL(s->hash[i]));
1184 if ( ((double)count) / power >= GetSmlarLimit() )
1189 elog(ERROR,"GiST doesn't support current formula type of similarity");
1192 else /* non-leaf page */
1194 switch(getSmlType())
1198 StatCache *stat = getHashedCache(fcinfo->flinfo->fn_extra);
1202 double maxKTF = getIdfMaxLimit(key);
1205 if ( stat->info != s->info )
1206 elog(ERROR,"Statistic and actual argument have different type");
1208 for(i=0;i<s->nelems;i++)
1210 int32 hbit = HASHVAL(s->hash[i]);
1212 sumQ += s->df[i] * s->df[i];
1213 if ( GETBIT(sign, hbit) )
1215 sumU += stat->selems[ hbit ].idfMax * maxKTF * s->df[i];
1216 if ( minK > stat->selems[ hbit ].idfMin || minK < 0.0 )
1217 minK = stat->selems[ hbit ].idfMin;
1221 if ( sumQ > 0.0 && minK > 0.0 && sumU / sqrt( sumQ * minK ) >= GetSmlarLimit() )
1227 for(i=0; i<s->nelems; i++)
1228 count += GETBIT(sign, HASHVAL(s->hash[i]));
1230 if ( s->nelems == count || sqrt(((double)count) / ((double)(s->nelems))) >= GetSmlarLimit() )
1235 elog(ERROR,"GiST doesn't support current formula type of similarity");
1242 static int nnres = 0;
1243 static int nres = 0;
1244 if ( GIST_LEAF(entry) ) {
1249 elog(NOTICE,"%s nn:%d n:%d", (ISARRKEY(key)) ? "ARR" : ( (ISALLTRUE(key)) ? "TRUE" : "SIGN" ), nnres, nres );
1254 PG_RETURN_BOOL(res);