4 #include "access/gist.h"
5 #include "access/skey.h"
6 #if PG_VERSION_NUM < 130000
7 #include "access/tuptoaster.h"
9 #include "access/heaptoast.h"
11 #include "utils/memutils.h"
13 typedef struct SmlSign {
14 int32 vl_len_; /* varlena header (do not touch directly!) */
21 #define SMLSIGNHDRSZ (offsetof(SmlSign, data))
25 #define SIGLEN ( sizeof(int)*SIGLENINT )
26 #define SIGLENBIT (SIGLEN*BITBYTE - 1) /* see makesign */
27 typedef char BITVEC[SIGLEN];
28 typedef char *BITVECP;
32 #define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITBYTE ) ) )
33 #define GETBITBYTE(x,i) ( ((char)(x)) >> i & 0x01 )
34 #define CLRBIT(x,i) GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITBYTE ) )
35 #define SETBIT(x,i) GETBYTE(x,i) |= ( 0x01 << ( (i) % BITBYTE ) )
36 #define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITBYTE )) & 0x01 )
38 #define HASHVAL(val) (((unsigned int)(val)) % SIGLENBIT)
39 #define HASH(sign, val) SETBIT((sign), HASHVAL(val))
43 #define ALLISTRUE 0x04
45 #define ISARRKEY(x) ( ((SmlSign*)x)->flag & ARRKEY )
46 #define ISSIGNKEY(x) ( ((SmlSign*)x)->flag & SIGNKEY )
47 #define ISALLTRUE(x) ( ((SmlSign*)x)->flag & ALLISTRUE )
49 #define CALCGTSIZE(flag, len) ( SMLSIGNHDRSZ + ( ( (flag) & ARRKEY ) ? ((len)*sizeof(uint32)) : (((flag) & ALLISTRUE) ? 0 : SIGLEN) ) )
50 #define GETSIGN(x) ( (BITVECP)( (char*)x+SMLSIGNHDRSZ ) )
51 #define GETARR(x) ( (uint32*)( (char*)x+SMLSIGNHDRSZ ) )
53 #define GETENTRY(vec,pos) ((SmlSign *) DatumGetPointer((vec)->vector[(pos)].key))
58 PG_FUNCTION_INFO_V1(gsmlsign_in);
59 Datum gsmlsign_in(PG_FUNCTION_ARGS);
61 gsmlsign_in(PG_FUNCTION_ARGS)
63 elog(ERROR, "not implemented");
67 PG_FUNCTION_INFO_V1(gsmlsign_out);
68 Datum gsmlsign_out(PG_FUNCTION_ARGS);
70 gsmlsign_out(PG_FUNCTION_ARGS)
72 elog(ERROR, "not implemented");
80 /* Number of one-bits in an unsigned byte */
81 static const uint8 number_of_ones[256] = {
82 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
83 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
84 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
85 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
86 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
87 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
88 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
89 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
90 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
91 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
92 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
93 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
94 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
95 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
96 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
97 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
101 compareint(const void *va, const void *vb)
103 uint32 a = *((uint32 *) va);
104 uint32 b = *((uint32 *) vb);
108 return (a > b) ? 1 : -1;
112 * Removes duplicates from an array of int32. 'l' is
113 * size of the input array. Returns the new size of the array.
116 uniqueint(uint32 *a, int32 l, int32 *max)
129 qsort((void *) a, l, sizeof(uint32), compareint);
152 Array2HashedArray(ProcTypeInfo info, ArrayType *a)
154 SimpleArray *s = Array2SimpleArray(info, a);
159 len = CALCGTSIZE( ARRKEY, s->nelems );
161 getFmgrInfoHash(s->info);
162 if (s->info->tupDesc)
163 elog(ERROR, "GiST doesn't support composite (weighted) type");
165 sign = palloc( len );
167 sign->size = s->nelems;
170 for(i=0;i<s->nelems;i++)
171 ptr[i] = DatumGetUInt32(FunctionCall1Coll(&s->info->hashFunc,
172 DEFAULT_COLLATION_OID,
176 * there is a collision of hash-function; len is always equal or less than
179 sign->size = uniqueint( GETARR(sign), sign->size, &sign->maxrepeat );
180 len = CALCGTSIZE( ARRKEY, sign->size );
181 SET_VARSIZE(sign, len);
187 HashedElemCmp(const void *va, const void *vb)
189 uint32 a = ((HashedElem *) va)->hash;
190 uint32 b = ((HashedElem *) vb)->hash;
194 double ma = ((HashedElem *) va)->idfMin;
195 double mb = ((HashedElem *) va)->idfMin;
200 return ( ma > mb ) ? 1 : -1;
203 return (a > b) ? 1 : -1;
207 uniqueHashedElem(HashedElem *a, int32 l)
217 qsort(a, l, sizeof(HashedElem), HashedElemCmp);
220 if (ptr->hash != res->hash)
224 res->idfMax = ptr->idfMax;
232 getHashedCache(void *cache)
234 StatCache *stat = getStat(cache, SIGLENBIT);
236 if ( stat->nhelems < 0 )
243 if (stat->info->tupDesc)
244 elog(ERROR, "GiST doesn't support composite (weighted) type");
245 getFmgrInfoHash(stat->info);
246 for(i=0;i<stat->nelems;i++)
250 hash = DatumGetUInt32(FunctionCall1Coll(&stat->info->hashFunc,
251 DEFAULT_COLLATION_OID,
252 stat->elems[i].datum));
253 int index = HASHVAL(hash);
255 stat->helems[i].hash = hash;
256 stat->helems[i].idfMin = stat->helems[i].idfMax = stat->elems[i].idf;
257 if ( stat->selems[index].idfMin == 0.0 )
258 stat->selems[index].idfMin = stat->selems[index].idfMax = stat->elems[i].idf;
259 else if ( stat->selems[index].idfMin > stat->elems[i].idf )
260 stat->selems[index].idfMin = stat->elems[i].idf;
261 else if ( stat->selems[index].idfMax < stat->elems[i].idf )
262 stat->selems[index].idfMax = stat->elems[i].idf;
265 stat->nhelems = uniqueHashedElem( stat->helems, stat->nelems);
272 getHashedElemIdf(StatCache *stat, uint32 hash, HashedElem *StopLow)
274 HashedElem *StopMiddle,
275 *StopHigh = stat->helems + stat->nhelems;
278 StopLow = stat->helems;
280 while (StopLow < StopHigh) {
281 StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
283 if ( StopMiddle->hash == hash )
285 else if ( StopMiddle->hash < hash )
286 StopLow = StopMiddle + 1;
288 StopHigh = StopMiddle;
295 fillHashVal(void *cache, SimpleArray *a)
302 allocateHash(cache, a);
304 if (a->info->tupDesc)
305 elog(ERROR, "GiST doesn't support composite (weighted) type");
306 getFmgrInfoHash(a->info);
308 for(i=0;i<a->nelems;i++)
309 a->hash[i] = DatumGetUInt32(FunctionCall1Coll(&a->info->hashFunc,
310 DEFAULT_COLLATION_OID,
316 hasHashedElem(SmlSign *a, uint32 h)
318 uint32 *StopLow = GETARR(a),
319 *StopHigh = GETARR(a) + a->size,
322 while (StopLow < StopHigh) {
323 StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
325 if ( *StopMiddle == h )
327 else if ( *StopMiddle < h )
328 StopLow = StopMiddle + 1;
330 StopHigh = StopMiddle;
337 makesign(BITVECP sign, SmlSign *a)
340 uint32 *ptr = GETARR(a);
342 MemSet((void *) sign, 0, sizeof(BITVEC));
343 SETBIT(sign, SIGLENBIT); /* set last unused bit */
345 for (i = 0; i < a->size; i++)
350 sizebitvec(BITVECP sign)
356 size += number_of_ones[(unsigned char) sign[i]];
361 PG_FUNCTION_INFO_V1(gsmlsign_compress);
362 Datum gsmlsign_compress(PG_FUNCTION_ARGS);
364 gsmlsign_compress(PG_FUNCTION_ARGS)
366 GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
367 GISTENTRY *retval = entry;
369 if (entry->leafkey) /* new key */
372 ArrayType *a = DatumGetArrayTypeP(entry->key);
374 sign = Array2HashedArray(NULL, a);
376 if ( VARSIZE(sign) > TOAST_INDEX_TARGET )
377 { /* make signature due to its big size */
381 len = CALCGTSIZE( SIGNKEY, sign->size );
382 tmpsign = palloc( len );
383 tmpsign->flag = SIGNKEY;
384 SET_VARSIZE(tmpsign, len);
386 makesign(GETSIGN(tmpsign), sign);
387 tmpsign->size = sizebitvec(GETSIGN(tmpsign));
388 tmpsign->maxrepeat = sign->maxrepeat;
392 retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
393 gistentryinit(*retval, PointerGetDatum(sign),
394 entry->rel, entry->page,
395 entry->offset, false);
397 else if ( ISSIGNKEY(DatumGetPointer(entry->key)) &&
398 !ISALLTRUE(DatumGetPointer(entry->key)) )
400 SmlSign *sign = (SmlSign*)DatumGetPointer(entry->key);
402 Assert( sign->size == sizebitvec(GETSIGN(sign)) );
404 if ( sign->size == SIGLENBIT )
406 int32 len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
407 int32 maxrepeat = sign->maxrepeat;
409 sign = (SmlSign *) palloc(len);
410 SET_VARSIZE(sign, len);
411 sign->flag = SIGNKEY | ALLISTRUE;
412 sign->size = SIGLENBIT;
413 sign->maxrepeat = maxrepeat;
415 retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
417 gistentryinit(*retval, PointerGetDatum(sign),
418 entry->rel, entry->page,
419 entry->offset, false);
423 PG_RETURN_POINTER(retval);
426 PG_FUNCTION_INFO_V1(gsmlsign_decompress);
427 Datum gsmlsign_decompress(PG_FUNCTION_ARGS);
429 gsmlsign_decompress(PG_FUNCTION_ARGS)
431 GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
432 SmlSign *key = (SmlSign*)DatumGetPointer(PG_DETOAST_DATUM(entry->key));
434 if (key != (SmlSign *) DatumGetPointer(entry->key))
436 GISTENTRY *retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
438 gistentryinit(*retval, PointerGetDatum(key),
439 entry->rel, entry->page,
440 entry->offset, false);
442 PG_RETURN_POINTER(retval);
445 PG_RETURN_POINTER(entry);
452 unionkey(BITVECP sbase, SmlSign *add)
458 BITVECP sadd = GETSIGN(add);
468 uint32 *ptr = GETARR(add);
470 for (i = 0; i < add->size; i++)
477 PG_FUNCTION_INFO_V1(gsmlsign_union);
478 Datum gsmlsign_union(PG_FUNCTION_ARGS);
480 gsmlsign_union(PG_FUNCTION_ARGS)
482 GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
483 int *size = (int *) PG_GETARG_POINTER(1);
491 MemSet((void *) base, 0, sizeof(BITVEC));
492 for (i = 0; i < entryvec->n; i++)
494 if (GETENTRY(entryvec, i)->maxrepeat > maxrepeat)
495 maxrepeat = GETENTRY(entryvec, i)->maxrepeat;
496 if (unionkey(base, GETENTRY(entryvec, i)))
504 len = CALCGTSIZE(flag, 0);
505 result = (SmlSign *) palloc(len);
507 SET_VARSIZE(result, len);
509 result->maxrepeat = maxrepeat;
511 if (!ISALLTRUE(result))
513 memcpy((void *) GETSIGN(result), (void *) base, sizeof(BITVEC));
514 result->size = sizebitvec(GETSIGN(result));
517 result->size = SIGLENBIT;
519 PG_RETURN_POINTER(result);
526 PG_FUNCTION_INFO_V1(gsmlsign_same);
527 Datum gsmlsign_same(PG_FUNCTION_ARGS);
529 gsmlsign_same(PG_FUNCTION_ARGS)
531 SmlSign *a = (SmlSign*)PG_GETARG_POINTER(0);
532 SmlSign *b = (SmlSign*)PG_GETARG_POINTER(1);
533 bool *result = (bool *) PG_GETARG_POINTER(2);
535 if (a->size != b->size)
539 else if (ISSIGNKEY(a))
540 { /* then b also ISSIGNKEY */
543 /* in this case b is all true too - other cases is catched
550 BITVECP sa = GETSIGN(a),
570 uint32 *ptra = GETARR(a),
575 for (i = 0; i < a->size; i++)
577 if ( ptra[i] != ptrb[i])
585 PG_RETURN_POINTER(result);
592 hemdistsign(BITVECP a, BITVECP b)
600 diff = (unsigned char) (a[i] ^ b[i]);
601 dist += number_of_ones[diff];
607 hemdist(SmlSign *a, SmlSign *b)
614 return SIGLENBIT - b->size;
616 else if (ISALLTRUE(b))
617 return SIGLENBIT - b->size;
619 return hemdistsign(GETSIGN(a), GETSIGN(b));
622 PG_FUNCTION_INFO_V1(gsmlsign_penalty);
623 Datum gsmlsign_penalty(PG_FUNCTION_ARGS);
625 gsmlsign_penalty(PG_FUNCTION_ARGS)
627 GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
628 GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
629 float *penalty = (float *) PG_GETARG_POINTER(2);
630 SmlSign *origval = (SmlSign *) DatumGetPointer(origentry->key);
631 SmlSign *newval = (SmlSign *) DatumGetPointer(newentry->key);
632 BITVECP orig = GETSIGN(origval);
636 if (ISARRKEY(newval))
640 makesign(sign, newval);
642 if (ISALLTRUE(origval))
643 *penalty = ((float) (SIGLENBIT - sizebitvec(sign))) / (float) (SIGLENBIT + 1);
645 *penalty = hemdistsign(sign, orig);
648 *penalty = hemdist(origval, newval);
650 PG_RETURN_POINTER(penalty);
665 fillcache(CACHESIGN *item, SmlSign *key)
667 item->allistrue = false;
668 item->size = key->size;
672 makesign(item->sign, key);
673 item->size = sizebitvec( item->sign );
675 else if (ISALLTRUE(key))
676 item->allistrue = true;
678 memcpy((void *) item->sign, (void *) GETSIGN(key), sizeof(BITVEC));
681 #define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
690 comparecost(const void *va, const void *vb)
692 SPLITCOST *a = (SPLITCOST *) va;
693 SPLITCOST *b = (SPLITCOST *) vb;
695 if (a->cost == b->cost)
698 return (a->cost > b->cost) ? 1 : -1;
702 hemdistcache(CACHESIGN *a, CACHESIGN *b)
709 return SIGLENBIT - b->size;
711 else if (b->allistrue)
712 return SIGLENBIT - a->size;
714 return hemdistsign(a->sign, b->sign);
717 PG_FUNCTION_INFO_V1(gsmlsign_picksplit);
718 Datum gsmlsign_picksplit(PG_FUNCTION_ARGS);
720 gsmlsign_picksplit(PG_FUNCTION_ARGS)
722 GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
723 GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
735 OffsetNumber seed_1 = 0,
743 SPLITCOST *costvector;
745 maxoff = entryvec->n - 2;
746 nbytes = (maxoff + 2) * sizeof(OffsetNumber);
747 v->spl_left = (OffsetNumber *) palloc(nbytes);
748 v->spl_right = (OffsetNumber *) palloc(nbytes);
750 cache = (CACHESIGN *) palloc(sizeof(CACHESIGN) * (maxoff + 2));
751 fillcache(&cache[FirstOffsetNumber], GETENTRY(entryvec, FirstOffsetNumber));
753 for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
755 for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
757 if (k == FirstOffsetNumber)
758 fillcache(&cache[j], GETENTRY(entryvec, j));
760 size_waste = hemdistcache(&(cache[j]), &(cache[k]));
761 if (size_waste > waste)
772 right = v->spl_right;
775 if (seed_1 == 0 || seed_2 == 0)
781 /* form initial .. */
782 if (cache[seed_1].allistrue)
784 datum_l = (SmlSign *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
785 SET_VARSIZE(datum_l, CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
786 datum_l->flag = SIGNKEY | ALLISTRUE;
787 datum_l->size = SIGLENBIT;
791 datum_l = (SmlSign *) palloc(CALCGTSIZE(SIGNKEY, 0));
792 SET_VARSIZE(datum_l, CALCGTSIZE(SIGNKEY, 0));
793 datum_l->flag = SIGNKEY;
794 memcpy((void *) GETSIGN(datum_l), (void *) cache[seed_1].sign, sizeof(BITVEC));
795 datum_l->size = cache[seed_1].size;
797 if (cache[seed_2].allistrue)
799 datum_r = (SmlSign *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
800 SET_VARSIZE(datum_r, CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
801 datum_r->flag = SIGNKEY | ALLISTRUE;
802 datum_r->size = SIGLENBIT;
806 datum_r = (SmlSign *) palloc(CALCGTSIZE(SIGNKEY, 0));
807 SET_VARSIZE(datum_r, CALCGTSIZE(SIGNKEY, 0));
808 datum_r->flag = SIGNKEY;
809 memcpy((void *) GETSIGN(datum_r), (void *) cache[seed_2].sign, sizeof(BITVEC));
810 datum_r->size = cache[seed_2].size;
813 union_l = GETSIGN(datum_l);
814 union_r = GETSIGN(datum_r);
815 maxoff = OffsetNumberNext(maxoff);
816 fillcache(&cache[maxoff], GETENTRY(entryvec, maxoff));
817 /* sort before ... */
818 costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
819 for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
821 costvector[j - 1].pos = j;
822 size_alpha = hemdistcache(&(cache[seed_1]), &(cache[j]));
823 size_beta = hemdistcache(&(cache[seed_2]), &(cache[j]));
824 costvector[j - 1].cost = Abs(size_alpha - size_beta);
826 qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
828 datum_l->maxrepeat = datum_r->maxrepeat = 1;
830 for (k = 0; k < maxoff; k++)
832 j = costvector[k].pos;
839 else if (j == seed_2)
846 if (ISALLTRUE(datum_l) || cache[j].allistrue)
848 if (ISALLTRUE(datum_l) && cache[j].allistrue)
851 size_alpha = SIGLENBIT - (
852 (cache[j].allistrue) ? datum_l->size : cache[j].size
856 size_alpha = hemdistsign(cache[j].sign, GETSIGN(datum_l));
858 if (ISALLTRUE(datum_r) || cache[j].allistrue)
860 if (ISALLTRUE(datum_r) && cache[j].allistrue)
863 size_beta = SIGLENBIT - (
864 (cache[j].allistrue) ? datum_r->size : cache[j].size
868 size_beta = hemdistsign(cache[j].sign, GETSIGN(datum_r));
870 if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.1))
872 if (ISALLTRUE(datum_l) || cache[j].allistrue)
874 if (!ISALLTRUE(datum_l))
875 MemSet((void *) GETSIGN(datum_l), 0xff, sizeof(BITVEC));
876 datum_l->size = SIGLENBIT;
882 union_l[i] |= ptr[i];
883 datum_l->size = sizebitvec(union_l);
890 if (ISALLTRUE(datum_r) || cache[j].allistrue)
892 if (!ISALLTRUE(datum_r))
893 MemSet((void *) GETSIGN(datum_r), 0xff, sizeof(BITVEC));
894 datum_r->size = SIGLENBIT;
900 union_r[i] |= ptr[i];
901 datum_r->size = sizebitvec(union_r);
907 *right = *left = FirstOffsetNumber;
908 v->spl_ldatum = PointerGetDatum(datum_l);
909 v->spl_rdatum = PointerGetDatum(datum_r);
911 Assert( datum_l->size = sizebitvec(GETSIGN(datum_l)) );
912 Assert( datum_r->size = sizebitvec(GETSIGN(datum_r)) );
914 PG_RETURN_POINTER(v);
918 getIdfMaxLimit(SmlSign *key)
920 switch( getTFMethod() )
926 return (double)(key->maxrepeat);
929 return 1.0 + log( (double)(key->maxrepeat) );
932 elog(ERROR,"Unknown TF method: %d", getTFMethod());
939 * Consistent function
941 PG_FUNCTION_INFO_V1(gsmlsign_consistent);
942 Datum gsmlsign_consistent(PG_FUNCTION_ARGS);
944 gsmlsign_consistent(PG_FUNCTION_ARGS)
946 GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
947 StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
948 bool *recheck = (bool *) PG_GETARG_POINTER(4);
950 SmlSign *key = (SmlSign*)DatumGetPointer(entry->key);
956 fcinfo->flinfo->fn_extra = SearchArrayCache(
957 fcinfo->flinfo->fn_extra,
958 fcinfo->flinfo->fn_mcxt,
959 PG_GETARG_DATUM(1), &a, &s, &query);
966 if ( strategy == SmlarOverlapStrategy )
972 else if (ISARRKEY(key))
974 uint32 *kptr = GETARR(key),
975 *qptr = GETARR(query);
977 while( kptr - GETARR(key) < key->size && qptr - GETARR(query) < query->size )
981 else if ( *kptr > *qptr )
993 BITVECP sign = GETSIGN(key);
995 fillHashVal(fcinfo->flinfo->fn_extra, s);
997 for(i=0; i<s->nelems; i++)
999 if ( GETBIT(sign, HASHVAL(s->hash[i])) )
1008 * SmlarSimilarityStrategy
1010 else if (ISALLTRUE(key))
1012 if ( GIST_LEAF(entry) )
1015 * With TF/IDF similarity we cannot say anything useful
1017 if ( query->size < SIGLENBIT && getSmlType() != ST_TFIDF )
1019 double power = ((double)(query->size)) * ((double)(SIGLENBIT));
1021 if ( ((double)(query->size)) / sqrt(power) >= GetSmlarLimit() )
1032 else if (ISARRKEY(key))
1034 uint32 *kptr = GETARR(key),
1035 *qptr = GETARR(query);
1037 Assert( GIST_LEAF(entry) );
1039 switch(getSmlType())
1043 StatCache *stat = getHashedCache(fcinfo->flinfo->fn_extra);
1047 double maxKTF = getIdfMaxLimit(key);
1051 fillHashVal(fcinfo->flinfo->fn_extra, s);
1052 if ( stat->info != s->info )
1053 elog(ERROR,"Statistic and actual argument have different type");
1055 for(i=0;i<s->nelems;i++)
1057 sumQ += s->df[i] * s->df[i];
1059 h = getHashedElemIdf(stat, s->hash[i], NULL);
1060 if ( h && hasHashedElem(key, s->hash[i]) )
1062 sumK += h->idfMin * h->idfMin;
1063 sumU += h->idfMax * maxKTF * s->df[i];
1067 if ( sumK > 0.0 && sumQ > 0.0 && sumU / sqrt( sumK * sumQ ) >= GetSmlarLimit() )
1070 * More precisely calculate sumK
1075 for(i=0;i<key->size;i++)
1077 h = getHashedElemIdf(stat, GETARR(key)[i], h);
1079 sumK += h->idfMin * h->idfMin;
1082 if ( sumK > 0.0 && sumQ > 0.0 && sumU / sqrt( sumK * sumQ ) >= GetSmlarLimit() )
1090 power = sqrt( ((double)(key->size)) * ((double)(s->nelems)) );
1092 if ( ((double)Min(key->size, s->nelems)) / power >= GetSmlarLimit() )
1096 while( kptr - GETARR(key) < key->size && qptr - GETARR(query) < query->size )
1098 if ( *kptr < *qptr )
1100 else if ( *kptr > *qptr )
1110 if ( ((double)cnt) / power >= GetSmlarLimit() )
1116 elog(ERROR,"GiST doesn't support current formula type of similarity");
1121 BITVECP sign = GETSIGN(key);
1124 fillHashVal(fcinfo->flinfo->fn_extra, s);
1126 if ( GIST_LEAF(entry) )
1128 switch(getSmlType())
1132 StatCache *stat = getHashedCache(fcinfo->flinfo->fn_extra);
1136 double maxKTF = getIdfMaxLimit(key);
1139 if ( stat->info != s->info )
1140 elog(ERROR,"Statistic and actual argument have different type");
1142 for(i=0;i<s->nelems;i++)
1144 int32 hbit = HASHVAL(s->hash[i]);
1146 sumQ += s->df[i] * s->df[i];
1147 if ( GETBIT(sign, hbit) )
1149 sumK += stat->selems[ hbit ].idfMin * stat->selems[ hbit ].idfMin;
1150 sumU += stat->selems[ hbit ].idfMax * maxKTF * s->df[i];
1154 if ( sumK > 0.0 && sumQ > 0.0 && sumU / sqrt( sumK * sumQ ) >= GetSmlarLimit() )
1157 * More precisely calculate sumK
1161 for(i=0;i<SIGLENBIT;i++)
1162 if ( GETBIT(sign,i) )
1163 sumK += stat->selems[ i ].idfMin * stat->selems[ i ].idfMin;
1165 if ( sumK > 0.0 && sumQ > 0.0 && sumU / sqrt( sumK * sumQ ) >= GetSmlarLimit() )
1174 power = sqrt( ((double)(key->size)) * ((double)(s->nelems)) );
1176 for(i=0; i<s->nelems; i++)
1177 count += GETBIT(sign, HASHVAL(s->hash[i]));
1179 if ( ((double)count) / power >= GetSmlarLimit() )
1184 elog(ERROR,"GiST doesn't support current formula type of similarity");
1187 else /* non-leaf page */
1189 switch(getSmlType())
1193 StatCache *stat = getHashedCache(fcinfo->flinfo->fn_extra);
1197 double maxKTF = getIdfMaxLimit(key);
1200 if ( stat->info != s->info )
1201 elog(ERROR,"Statistic and actual argument have different type");
1203 for(i=0;i<s->nelems;i++)
1205 int32 hbit = HASHVAL(s->hash[i]);
1207 sumQ += s->df[i] * s->df[i];
1208 if ( GETBIT(sign, hbit) )
1210 sumU += stat->selems[ hbit ].idfMax * maxKTF * s->df[i];
1211 if ( minK > stat->selems[ hbit ].idfMin || minK < 0.0 )
1212 minK = stat->selems[ hbit ].idfMin;
1216 if ( sumQ > 0.0 && minK > 0.0 && sumU / sqrt( sumQ * minK ) >= GetSmlarLimit() )
1222 for(i=0; i<s->nelems; i++)
1223 count += GETBIT(sign, HASHVAL(s->hash[i]));
1225 if ( s->nelems == count || sqrt(((double)count) / ((double)(s->nelems))) >= GetSmlarLimit() )
1230 elog(ERROR,"GiST doesn't support current formula type of similarity");
1237 static int nnres = 0;
1238 static int nres = 0;
1239 if ( GIST_LEAF(entry) ) {
1244 elog(NOTICE,"%s nn:%d n:%d", (ISARRKEY(key)) ? "ARR" : ( (ISALLTRUE(key)) ? "TRUE" : "SIGN" ), nnres, nres );
1249 PG_RETURN_BOOL(res);