4 #include "access/gist.h"
5 #include "access/skey.h"
6 #if PG_VERSION_NUM < 130000
7 #include "access/tuptoaster.h"
9 #include "access/heaptoast.h"
11 #include "utils/memutils.h"
13 typedef struct SmlSign {
14 int32 vl_len_; /* varlena header (do not touch directly!) */
21 #define SMLSIGNHDRSZ (offsetof(SmlSign, data))
25 #define SIGLEN ( sizeof(int)*SIGLENINT )
26 #define SIGLENBIT (SIGLEN*BITBYTE - 1) /* see makesign */
27 typedef char BITVEC[SIGLEN];
28 typedef char *BITVECP;
32 #define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITBYTE ) ) )
33 #define GETBITBYTE(x,i) ( ((char)(x)) >> i & 0x01 )
34 #define CLRBIT(x,i) GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITBYTE ) )
35 #define SETBIT(x,i) GETBYTE(x,i) |= ( 0x01 << ( (i) % BITBYTE ) )
36 #define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITBYTE )) & 0x01 )
38 #define HASHVAL(val) (((unsigned int)(val)) % SIGLENBIT)
39 #define HASH(sign, val) SETBIT((sign), HASHVAL(val))
43 #define ALLISTRUE 0x04
45 #define ISARRKEY(x) ( ((SmlSign*)x)->flag & ARRKEY )
46 #define ISSIGNKEY(x) ( ((SmlSign*)x)->flag & SIGNKEY )
47 #define ISALLTRUE(x) ( ((SmlSign*)x)->flag & ALLISTRUE )
49 #define CALCGTSIZE(flag, len) ( SMLSIGNHDRSZ + ( ( (flag) & ARRKEY ) ? ((len)*sizeof(uint32)) : (((flag) & ALLISTRUE) ? 0 : SIGLEN) ) )
50 #define GETSIGN(x) ( (BITVECP)( (char*)x+SMLSIGNHDRSZ ) )
51 #define GETARR(x) ( (uint32*)( (char*)x+SMLSIGNHDRSZ ) )
53 #define GETENTRY(vec,pos) ((SmlSign *) DatumGetPointer((vec)->vector[(pos)].key))
58 PG_FUNCTION_INFO_V1(gsmlsign_in);
59 Datum gsmlsign_in(PG_FUNCTION_ARGS);
61 gsmlsign_in(PG_FUNCTION_ARGS)
63 elog(ERROR, "not implemented");
67 PG_FUNCTION_INFO_V1(gsmlsign_out);
68 Datum gsmlsign_out(PG_FUNCTION_ARGS);
70 gsmlsign_out(PG_FUNCTION_ARGS)
72 elog(ERROR, "not implemented");
80 /* Number of one-bits in an unsigned byte */
81 static const uint8 number_of_ones[256] = {
82 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
83 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
84 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
85 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
86 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
87 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
88 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
89 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
90 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
91 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
92 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
93 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
94 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
95 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
96 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
97 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
101 compareint(const void *va, const void *vb)
103 uint32 a = *((uint32 *) va);
104 uint32 b = *((uint32 *) vb);
108 return (a > b) ? 1 : -1;
112 * Removes duplicates from an array of int32. 'l' is
113 * size of the input array. Returns the new size of the array.
116 uniqueint(uint32 *a, int32 l, int32 *max)
129 qsort((void *) a, l, sizeof(uint32), compareint);
152 Array2HashedArray(ProcTypeInfo info, ArrayType *a)
154 SimpleArray *s = Array2SimpleArray(info, a);
159 len = CALCGTSIZE( ARRKEY, s->nelems );
161 getFmgrInfoHash(s->info);
162 if (s->info->tupDesc)
163 elog(ERROR, "GiST doesn't support composite (weighted) type");
165 sign = palloc( len );
167 sign->size = s->nelems;
170 for(i=0;i<s->nelems;i++)
171 ptr[i] = DatumGetUInt32(FunctionCall1Coll(&s->info->hashFunc,
172 DEFAULT_COLLATION_OID,
176 * there is a collision of hash-function; len is always equal or less than
179 sign->size = uniqueint( GETARR(sign), sign->size, &sign->maxrepeat );
180 len = CALCGTSIZE( ARRKEY, sign->size );
181 SET_VARSIZE(sign, len);
187 HashedElemCmp(const void *va, const void *vb)
189 uint32 a = ((HashedElem *) va)->hash;
190 uint32 b = ((HashedElem *) vb)->hash;
194 double ma = ((HashedElem *) va)->idfMin;
195 double mb = ((HashedElem *) va)->idfMin;
200 return ( ma > mb ) ? 1 : -1;
203 return (a > b) ? 1 : -1;
207 uniqueHashedElem(HashedElem *a, int32 l)
217 qsort(a, l, sizeof(HashedElem), HashedElemCmp);
220 if (ptr->hash != res->hash)
224 res->idfMax = ptr->idfMax;
232 getHashedCache(void *cache)
234 StatCache *stat = getStat(cache, SIGLENBIT);
236 if ( stat->nhelems < 0 )
243 if (stat->info->tupDesc)
244 elog(ERROR, "GiST doesn't support composite (weighted) type");
245 getFmgrInfoHash(stat->info);
246 for(i=0;i<stat->nelems;i++)
251 hash = DatumGetUInt32(FunctionCall1Coll(&stat->info->hashFunc,
252 DEFAULT_COLLATION_OID,
253 stat->elems[i].datum));
254 index = HASHVAL(hash);
256 stat->helems[i].hash = hash;
257 stat->helems[i].idfMin = stat->helems[i].idfMax = stat->elems[i].idf;
258 if ( stat->selems[index].idfMin == 0.0 )
259 stat->selems[index].idfMin = stat->selems[index].idfMax = stat->elems[i].idf;
260 else if ( stat->selems[index].idfMin > stat->elems[i].idf )
261 stat->selems[index].idfMin = stat->elems[i].idf;
262 else if ( stat->selems[index].idfMax < stat->elems[i].idf )
263 stat->selems[index].idfMax = stat->elems[i].idf;
266 stat->nhelems = uniqueHashedElem( stat->helems, stat->nelems);
273 getHashedElemIdf(StatCache *stat, uint32 hash, HashedElem *StopLow)
275 HashedElem *StopMiddle,
276 *StopHigh = stat->helems + stat->nhelems;
279 StopLow = stat->helems;
281 while (StopLow < StopHigh) {
282 StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
284 if ( StopMiddle->hash == hash )
286 else if ( StopMiddle->hash < hash )
287 StopLow = StopMiddle + 1;
289 StopHigh = StopMiddle;
296 fillHashVal(void *cache, SimpleArray *a)
303 allocateHash(cache, a);
305 if (a->info->tupDesc)
306 elog(ERROR, "GiST doesn't support composite (weighted) type");
307 getFmgrInfoHash(a->info);
309 for(i=0;i<a->nelems;i++)
310 a->hash[i] = DatumGetUInt32(FunctionCall1Coll(&a->info->hashFunc,
311 DEFAULT_COLLATION_OID,
317 hasHashedElem(SmlSign *a, uint32 h)
319 uint32 *StopLow = GETARR(a),
320 *StopHigh = GETARR(a) + a->size,
323 while (StopLow < StopHigh) {
324 StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
326 if ( *StopMiddle == h )
328 else if ( *StopMiddle < h )
329 StopLow = StopMiddle + 1;
331 StopHigh = StopMiddle;
338 makesign(BITVECP sign, SmlSign *a)
341 uint32 *ptr = GETARR(a);
343 MemSet((void *) sign, 0, sizeof(BITVEC));
344 SETBIT(sign, SIGLENBIT); /* set last unused bit */
346 for (i = 0; i < a->size; i++)
351 sizebitvec(BITVECP sign)
357 size += number_of_ones[(unsigned char) sign[i]];
362 PG_FUNCTION_INFO_V1(gsmlsign_compress);
363 Datum gsmlsign_compress(PG_FUNCTION_ARGS);
365 gsmlsign_compress(PG_FUNCTION_ARGS)
367 GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
368 GISTENTRY *retval = entry;
370 if (entry->leafkey) /* new key */
373 ArrayType *a = DatumGetArrayTypeP(entry->key);
375 sign = Array2HashedArray(NULL, a);
377 if ( VARSIZE(sign) > TOAST_INDEX_TARGET )
378 { /* make signature due to its big size */
382 len = CALCGTSIZE( SIGNKEY, sign->size );
383 tmpsign = palloc( len );
384 tmpsign->flag = SIGNKEY;
385 SET_VARSIZE(tmpsign, len);
387 makesign(GETSIGN(tmpsign), sign);
388 tmpsign->size = sizebitvec(GETSIGN(tmpsign));
389 tmpsign->maxrepeat = sign->maxrepeat;
393 retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
394 gistentryinit(*retval, PointerGetDatum(sign),
395 entry->rel, entry->page,
396 entry->offset, false);
398 else if ( ISSIGNKEY(DatumGetPointer(entry->key)) &&
399 !ISALLTRUE(DatumGetPointer(entry->key)) )
401 SmlSign *sign = (SmlSign*)DatumGetPointer(entry->key);
403 Assert( sign->size == sizebitvec(GETSIGN(sign)) );
405 if ( sign->size == SIGLENBIT )
407 int32 len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
408 int32 maxrepeat = sign->maxrepeat;
410 sign = (SmlSign *) palloc(len);
411 SET_VARSIZE(sign, len);
412 sign->flag = SIGNKEY | ALLISTRUE;
413 sign->size = SIGLENBIT;
414 sign->maxrepeat = maxrepeat;
416 retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
418 gistentryinit(*retval, PointerGetDatum(sign),
419 entry->rel, entry->page,
420 entry->offset, false);
424 PG_RETURN_POINTER(retval);
427 PG_FUNCTION_INFO_V1(gsmlsign_decompress);
428 Datum gsmlsign_decompress(PG_FUNCTION_ARGS);
430 gsmlsign_decompress(PG_FUNCTION_ARGS)
432 GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
433 SmlSign *key = (SmlSign*)DatumGetPointer(PG_DETOAST_DATUM(entry->key));
435 if (key != (SmlSign *) DatumGetPointer(entry->key))
437 GISTENTRY *retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
439 gistentryinit(*retval, PointerGetDatum(key),
440 entry->rel, entry->page,
441 entry->offset, false);
443 PG_RETURN_POINTER(retval);
446 PG_RETURN_POINTER(entry);
453 unionkey(BITVECP sbase, SmlSign *add)
459 BITVECP sadd = GETSIGN(add);
469 uint32 *ptr = GETARR(add);
471 for (i = 0; i < add->size; i++)
478 PG_FUNCTION_INFO_V1(gsmlsign_union);
479 Datum gsmlsign_union(PG_FUNCTION_ARGS);
481 gsmlsign_union(PG_FUNCTION_ARGS)
483 GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
484 int *size = (int *) PG_GETARG_POINTER(1);
492 MemSet((void *) base, 0, sizeof(BITVEC));
493 for (i = 0; i < entryvec->n; i++)
495 if (GETENTRY(entryvec, i)->maxrepeat > maxrepeat)
496 maxrepeat = GETENTRY(entryvec, i)->maxrepeat;
497 if (unionkey(base, GETENTRY(entryvec, i)))
505 len = CALCGTSIZE(flag, 0);
506 result = (SmlSign *) palloc(len);
508 SET_VARSIZE(result, len);
510 result->maxrepeat = maxrepeat;
512 if (!ISALLTRUE(result))
514 memcpy((void *) GETSIGN(result), (void *) base, sizeof(BITVEC));
515 result->size = sizebitvec(GETSIGN(result));
518 result->size = SIGLENBIT;
520 PG_RETURN_POINTER(result);
527 PG_FUNCTION_INFO_V1(gsmlsign_same);
528 Datum gsmlsign_same(PG_FUNCTION_ARGS);
530 gsmlsign_same(PG_FUNCTION_ARGS)
532 SmlSign *a = (SmlSign*)PG_GETARG_POINTER(0);
533 SmlSign *b = (SmlSign*)PG_GETARG_POINTER(1);
534 bool *result = (bool *) PG_GETARG_POINTER(2);
536 if (a->size != b->size)
540 else if (ISSIGNKEY(a))
541 { /* then b also ISSIGNKEY */
544 /* in this case b is all true too - other cases is catched
551 BITVECP sa = GETSIGN(a),
571 uint32 *ptra = GETARR(a),
576 for (i = 0; i < a->size; i++)
578 if ( ptra[i] != ptrb[i])
586 PG_RETURN_POINTER(result);
593 hemdistsign(BITVECP a, BITVECP b)
601 diff = (unsigned char) (a[i] ^ b[i]);
602 dist += number_of_ones[diff];
608 hemdist(SmlSign *a, SmlSign *b)
615 return SIGLENBIT - b->size;
617 else if (ISALLTRUE(b))
618 return SIGLENBIT - b->size;
620 return hemdistsign(GETSIGN(a), GETSIGN(b));
623 PG_FUNCTION_INFO_V1(gsmlsign_penalty);
624 Datum gsmlsign_penalty(PG_FUNCTION_ARGS);
626 gsmlsign_penalty(PG_FUNCTION_ARGS)
628 GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
629 GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
630 float *penalty = (float *) PG_GETARG_POINTER(2);
631 SmlSign *origval = (SmlSign *) DatumGetPointer(origentry->key);
632 SmlSign *newval = (SmlSign *) DatumGetPointer(newentry->key);
633 BITVECP orig = GETSIGN(origval);
637 if (ISARRKEY(newval))
641 makesign(sign, newval);
643 if (ISALLTRUE(origval))
644 *penalty = ((float) (SIGLENBIT - sizebitvec(sign))) / (float) (SIGLENBIT + 1);
646 *penalty = hemdistsign(sign, orig);
649 *penalty = hemdist(origval, newval);
651 PG_RETURN_POINTER(penalty);
666 fillcache(CACHESIGN *item, SmlSign *key)
668 item->allistrue = false;
669 item->size = key->size;
673 makesign(item->sign, key);
674 item->size = sizebitvec( item->sign );
676 else if (ISALLTRUE(key))
677 item->allistrue = true;
679 memcpy((void *) item->sign, (void *) GETSIGN(key), sizeof(BITVEC));
682 #define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
691 comparecost(const void *va, const void *vb)
693 SPLITCOST *a = (SPLITCOST *) va;
694 SPLITCOST *b = (SPLITCOST *) vb;
696 if (a->cost == b->cost)
699 return (a->cost > b->cost) ? 1 : -1;
703 hemdistcache(CACHESIGN *a, CACHESIGN *b)
710 return SIGLENBIT - b->size;
712 else if (b->allistrue)
713 return SIGLENBIT - a->size;
715 return hemdistsign(a->sign, b->sign);
718 PG_FUNCTION_INFO_V1(gsmlsign_picksplit);
719 Datum gsmlsign_picksplit(PG_FUNCTION_ARGS);
721 gsmlsign_picksplit(PG_FUNCTION_ARGS)
723 GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
724 GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
736 OffsetNumber seed_1 = 0,
744 SPLITCOST *costvector;
746 maxoff = entryvec->n - 2;
747 nbytes = (maxoff + 2) * sizeof(OffsetNumber);
748 v->spl_left = (OffsetNumber *) palloc(nbytes);
749 v->spl_right = (OffsetNumber *) palloc(nbytes);
751 cache = (CACHESIGN *) palloc(sizeof(CACHESIGN) * (maxoff + 2));
752 fillcache(&cache[FirstOffsetNumber], GETENTRY(entryvec, FirstOffsetNumber));
754 for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
756 for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
758 if (k == FirstOffsetNumber)
759 fillcache(&cache[j], GETENTRY(entryvec, j));
761 size_waste = hemdistcache(&(cache[j]), &(cache[k]));
762 if (size_waste > waste)
773 right = v->spl_right;
776 if (seed_1 == 0 || seed_2 == 0)
782 /* form initial .. */
783 if (cache[seed_1].allistrue)
785 datum_l = (SmlSign *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
786 SET_VARSIZE(datum_l, CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
787 datum_l->flag = SIGNKEY | ALLISTRUE;
788 datum_l->size = SIGLENBIT;
792 datum_l = (SmlSign *) palloc(CALCGTSIZE(SIGNKEY, 0));
793 SET_VARSIZE(datum_l, CALCGTSIZE(SIGNKEY, 0));
794 datum_l->flag = SIGNKEY;
795 memcpy((void *) GETSIGN(datum_l), (void *) cache[seed_1].sign, sizeof(BITVEC));
796 datum_l->size = cache[seed_1].size;
798 if (cache[seed_2].allistrue)
800 datum_r = (SmlSign *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
801 SET_VARSIZE(datum_r, CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
802 datum_r->flag = SIGNKEY | ALLISTRUE;
803 datum_r->size = SIGLENBIT;
807 datum_r = (SmlSign *) palloc(CALCGTSIZE(SIGNKEY, 0));
808 SET_VARSIZE(datum_r, CALCGTSIZE(SIGNKEY, 0));
809 datum_r->flag = SIGNKEY;
810 memcpy((void *) GETSIGN(datum_r), (void *) cache[seed_2].sign, sizeof(BITVEC));
811 datum_r->size = cache[seed_2].size;
814 union_l = GETSIGN(datum_l);
815 union_r = GETSIGN(datum_r);
816 maxoff = OffsetNumberNext(maxoff);
817 fillcache(&cache[maxoff], GETENTRY(entryvec, maxoff));
818 /* sort before ... */
819 costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
820 for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
822 costvector[j - 1].pos = j;
823 size_alpha = hemdistcache(&(cache[seed_1]), &(cache[j]));
824 size_beta = hemdistcache(&(cache[seed_2]), &(cache[j]));
825 costvector[j - 1].cost = Abs(size_alpha - size_beta);
827 qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
829 datum_l->maxrepeat = datum_r->maxrepeat = 1;
831 for (k = 0; k < maxoff; k++)
833 j = costvector[k].pos;
840 else if (j == seed_2)
847 if (ISALLTRUE(datum_l) || cache[j].allistrue)
849 if (ISALLTRUE(datum_l) && cache[j].allistrue)
852 size_alpha = SIGLENBIT - (
853 (cache[j].allistrue) ? datum_l->size : cache[j].size
857 size_alpha = hemdistsign(cache[j].sign, GETSIGN(datum_l));
859 if (ISALLTRUE(datum_r) || cache[j].allistrue)
861 if (ISALLTRUE(datum_r) && cache[j].allistrue)
864 size_beta = SIGLENBIT - (
865 (cache[j].allistrue) ? datum_r->size : cache[j].size
869 size_beta = hemdistsign(cache[j].sign, GETSIGN(datum_r));
871 if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.1))
873 if (ISALLTRUE(datum_l) || cache[j].allistrue)
875 if (!ISALLTRUE(datum_l))
876 MemSet((void *) GETSIGN(datum_l), 0xff, sizeof(BITVEC));
877 datum_l->size = SIGLENBIT;
883 union_l[i] |= ptr[i];
884 datum_l->size = sizebitvec(union_l);
891 if (ISALLTRUE(datum_r) || cache[j].allistrue)
893 if (!ISALLTRUE(datum_r))
894 MemSet((void *) GETSIGN(datum_r), 0xff, sizeof(BITVEC));
895 datum_r->size = SIGLENBIT;
901 union_r[i] |= ptr[i];
902 datum_r->size = sizebitvec(union_r);
908 *right = *left = FirstOffsetNumber;
909 v->spl_ldatum = PointerGetDatum(datum_l);
910 v->spl_rdatum = PointerGetDatum(datum_r);
912 Assert( datum_l->size = sizebitvec(GETSIGN(datum_l)) );
913 Assert( datum_r->size = sizebitvec(GETSIGN(datum_r)) );
915 PG_RETURN_POINTER(v);
919 getIdfMaxLimit(SmlSign *key)
921 switch( getTFMethod() )
927 return (double)(key->maxrepeat);
930 return 1.0 + log( (double)(key->maxrepeat) );
933 elog(ERROR,"Unknown TF method: %d", getTFMethod());
940 * Consistent function
942 PG_FUNCTION_INFO_V1(gsmlsign_consistent);
943 Datum gsmlsign_consistent(PG_FUNCTION_ARGS);
945 gsmlsign_consistent(PG_FUNCTION_ARGS)
947 GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
948 StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
949 bool *recheck = (bool *) PG_GETARG_POINTER(4);
951 SmlSign *key = (SmlSign*)DatumGetPointer(entry->key);
957 fcinfo->flinfo->fn_extra = SearchArrayCache(
958 fcinfo->flinfo->fn_extra,
959 fcinfo->flinfo->fn_mcxt,
960 PG_GETARG_DATUM(1), &a, &s, &query);
967 if ( strategy == SmlarOverlapStrategy )
973 else if (ISARRKEY(key))
975 uint32 *kptr = GETARR(key),
976 *qptr = GETARR(query);
978 while( kptr - GETARR(key) < key->size && qptr - GETARR(query) < query->size )
982 else if ( *kptr > *qptr )
994 BITVECP sign = GETSIGN(key);
996 fillHashVal(fcinfo->flinfo->fn_extra, s);
998 for(i=0; i<s->nelems; i++)
1000 if ( GETBIT(sign, HASHVAL(s->hash[i])) )
1009 * SmlarSimilarityStrategy
1011 else if (ISALLTRUE(key))
1013 if ( GIST_LEAF(entry) )
1016 * With TF/IDF similarity we cannot say anything useful
1018 if ( query->size < SIGLENBIT && getSmlType() != ST_TFIDF )
1020 double power = ((double)(query->size)) * ((double)(SIGLENBIT));
1022 if ( ((double)(query->size)) / sqrt(power) >= GetSmlarLimit() )
1033 else if (ISARRKEY(key))
1035 uint32 *kptr = GETARR(key),
1036 *qptr = GETARR(query);
1038 Assert( GIST_LEAF(entry) );
1040 switch(getSmlType())
1044 StatCache *stat = getHashedCache(fcinfo->flinfo->fn_extra);
1048 double maxKTF = getIdfMaxLimit(key);
1052 fillHashVal(fcinfo->flinfo->fn_extra, s);
1053 if ( stat->info != s->info )
1054 elog(ERROR,"Statistic and actual argument have different type");
1056 for(i=0;i<s->nelems;i++)
1058 sumQ += s->df[i] * s->df[i];
1060 h = getHashedElemIdf(stat, s->hash[i], NULL);
1061 if ( h && hasHashedElem(key, s->hash[i]) )
1063 sumK += h->idfMin * h->idfMin;
1064 sumU += h->idfMax * maxKTF * s->df[i];
1068 if ( sumK > 0.0 && sumQ > 0.0 && sumU / sqrt( sumK * sumQ ) >= GetSmlarLimit() )
1071 * More precisely calculate sumK
1076 for(i=0;i<key->size;i++)
1078 h = getHashedElemIdf(stat, GETARR(key)[i], h);
1080 sumK += h->idfMin * h->idfMin;
1083 if ( sumK > 0.0 && sumQ > 0.0 && sumU / sqrt( sumK * sumQ ) >= GetSmlarLimit() )
1091 power = sqrt( ((double)(key->size)) * ((double)(s->nelems)) );
1093 if ( ((double)Min(key->size, s->nelems)) / power >= GetSmlarLimit() )
1097 while( kptr - GETARR(key) < key->size && qptr - GETARR(query) < query->size )
1099 if ( *kptr < *qptr )
1101 else if ( *kptr > *qptr )
1111 if ( ((double)cnt) / power >= GetSmlarLimit() )
1117 elog(ERROR,"GiST doesn't support current formula type of similarity");
1122 BITVECP sign = GETSIGN(key);
1125 fillHashVal(fcinfo->flinfo->fn_extra, s);
1127 if ( GIST_LEAF(entry) )
1129 switch(getSmlType())
1133 StatCache *stat = getHashedCache(fcinfo->flinfo->fn_extra);
1137 double maxKTF = getIdfMaxLimit(key);
1140 if ( stat->info != s->info )
1141 elog(ERROR,"Statistic and actual argument have different type");
1143 for(i=0;i<s->nelems;i++)
1145 int32 hbit = HASHVAL(s->hash[i]);
1147 sumQ += s->df[i] * s->df[i];
1148 if ( GETBIT(sign, hbit) )
1150 sumK += stat->selems[ hbit ].idfMin * stat->selems[ hbit ].idfMin;
1151 sumU += stat->selems[ hbit ].idfMax * maxKTF * s->df[i];
1155 if ( sumK > 0.0 && sumQ > 0.0 && sumU / sqrt( sumK * sumQ ) >= GetSmlarLimit() )
1158 * More precisely calculate sumK
1162 for(i=0;i<SIGLENBIT;i++)
1163 if ( GETBIT(sign,i) )
1164 sumK += stat->selems[ i ].idfMin * stat->selems[ i ].idfMin;
1166 if ( sumK > 0.0 && sumQ > 0.0 && sumU / sqrt( sumK * sumQ ) >= GetSmlarLimit() )
1175 power = sqrt( ((double)(key->size)) * ((double)(s->nelems)) );
1177 for(i=0; i<s->nelems; i++)
1178 count += GETBIT(sign, HASHVAL(s->hash[i]));
1180 if ( ((double)count) / power >= GetSmlarLimit() )
1185 elog(ERROR,"GiST doesn't support current formula type of similarity");
1188 else /* non-leaf page */
1190 switch(getSmlType())
1194 StatCache *stat = getHashedCache(fcinfo->flinfo->fn_extra);
1198 double maxKTF = getIdfMaxLimit(key);
1201 if ( stat->info != s->info )
1202 elog(ERROR,"Statistic and actual argument have different type");
1204 for(i=0;i<s->nelems;i++)
1206 int32 hbit = HASHVAL(s->hash[i]);
1208 sumQ += s->df[i] * s->df[i];
1209 if ( GETBIT(sign, hbit) )
1211 sumU += stat->selems[ hbit ].idfMax * maxKTF * s->df[i];
1212 if ( minK > stat->selems[ hbit ].idfMin || minK < 0.0 )
1213 minK = stat->selems[ hbit ].idfMin;
1217 if ( sumQ > 0.0 && minK > 0.0 && sumU / sqrt( sumQ * minK ) >= GetSmlarLimit() )
1223 for(i=0; i<s->nelems; i++)
1224 count += GETBIT(sign, HASHVAL(s->hash[i]));
1226 if ( s->nelems == count || sqrt(((double)count) / ((double)(s->nelems))) >= GetSmlarLimit() )
1231 elog(ERROR,"GiST doesn't support current formula type of similarity");
1238 static int nnres = 0;
1239 static int nres = 0;
1240 if ( GIST_LEAF(entry) ) {
1245 elog(NOTICE,"%s nn:%d n:%d", (ISARRKEY(key)) ? "ARR" : ( (ISALLTRUE(key)) ? "TRUE" : "SIGN" ), nnres, nres );
1250 PG_RETURN_BOOL(res);