4 #include "access/gist.h"
5 #include "access/skey.h"
6 #include "access/tuptoaster.h"
7 #include "utils/memutils.h"
9 typedef struct SmlSign {
10 int32 vl_len_; /* varlena header (do not touch directly!) */
17 #define SMLSIGNHDRSZ (offsetof(SmlSign, data))
21 #define SIGLEN ( sizeof(int)*SIGLENINT )
22 #define SIGLENBIT (SIGLEN*BITBYTE - 1) /* see makesign */
23 typedef char BITVEC[SIGLEN];
24 typedef char *BITVECP;
28 #define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITBYTE ) ) )
29 #define GETBITBYTE(x,i) ( ((char)(x)) >> i & 0x01 )
30 #define CLRBIT(x,i) GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITBYTE ) )
31 #define SETBIT(x,i) GETBYTE(x,i) |= ( 0x01 << ( (i) % BITBYTE ) )
32 #define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITBYTE )) & 0x01 )
34 #define HASHVAL(val) (((unsigned int)(val)) % SIGLENBIT)
35 #define HASH(sign, val) SETBIT((sign), HASHVAL(val))
39 #define ALLISTRUE 0x04
41 #define ISARRKEY(x) ( ((SmlSign*)x)->flag & ARRKEY )
42 #define ISSIGNKEY(x) ( ((SmlSign*)x)->flag & SIGNKEY )
43 #define ISALLTRUE(x) ( ((SmlSign*)x)->flag & ALLISTRUE )
45 #define CALCGTSIZE(flag, len) ( SMLSIGNHDRSZ + ( ( (flag) & ARRKEY ) ? ((len)*sizeof(uint32)) : (((flag) & ALLISTRUE) ? 0 : SIGLEN) ) )
46 #define GETSIGN(x) ( (BITVECP)( (char*)x+SMLSIGNHDRSZ ) )
47 #define GETARR(x) ( (uint32*)( (char*)x+SMLSIGNHDRSZ ) )
49 #define GETENTRY(vec,pos) ((SmlSign *) DatumGetPointer((vec)->vector[(pos)].key))
54 PG_FUNCTION_INFO_V1(gsmlsign_in);
55 Datum gsmlsign_in(PG_FUNCTION_ARGS);
57 gsmlsign_in(PG_FUNCTION_ARGS)
59 elog(ERROR, "not implemented");
63 PG_FUNCTION_INFO_V1(gsmlsign_out);
64 Datum gsmlsign_out(PG_FUNCTION_ARGS);
66 gsmlsign_out(PG_FUNCTION_ARGS)
68 elog(ERROR, "not implemented");
76 /* Number of one-bits in an unsigned byte */
77 static const uint8 number_of_ones[256] = {
78 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
79 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
80 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
81 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
82 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
83 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
84 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
85 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
86 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
87 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
88 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
89 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
90 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
91 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
92 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
93 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
97 compareint(const void *va, const void *vb)
99 uint32 a = *((uint32 *) va);
100 uint32 b = *((uint32 *) vb);
104 return (a > b) ? 1 : -1;
108 * Removes duplicates from an array of int32. 'l' is
109 * size of the input array. Returns the new size of the array.
112 uniqueint(uint32 *a, int32 l, int32 *max)
125 qsort((void *) a, l, sizeof(uint32), compareint);
148 Array2HashedArray(ProcTypeInfo info, ArrayType *a)
150 SimpleArray *s = Array2SimpleArray(info, a);
155 len = CALCGTSIZE( ARRKEY, s->nelems );
157 getFmgrInfoHash(s->info);
158 if (s->info->tupDesc)
159 elog(ERROR, "GiST doesn't support composite (weighted) type");
161 sign = palloc( len );
163 sign->size = s->nelems;
166 for(i=0;i<s->nelems;i++)
167 ptr[i] = DatumGetUInt32(FunctionCall1Coll(&s->info->hashFunc,
168 DEFAULT_COLLATION_OID,
172 * there is a collision of hash-function; len is always equal or less than
175 sign->size = uniqueint( GETARR(sign), sign->size, &sign->maxrepeat );
176 len = CALCGTSIZE( ARRKEY, sign->size );
177 SET_VARSIZE(sign, len);
183 HashedElemCmp(const void *va, const void *vb)
185 uint32 a = ((HashedElem *) va)->hash;
186 uint32 b = ((HashedElem *) vb)->hash;
190 double ma = ((HashedElem *) va)->idfMin;
191 double mb = ((HashedElem *) va)->idfMin;
196 return ( ma > mb ) ? 1 : -1;
199 return (a > b) ? 1 : -1;
203 uniqueHashedElem(HashedElem *a, int32 l)
213 qsort(a, l, sizeof(HashedElem), HashedElemCmp);
216 if (ptr->hash != res->hash)
220 res->idfMax = ptr->idfMax;
228 getHashedCache(void *cache)
230 StatCache *stat = getStat(cache, SIGLENBIT);
232 if ( stat->nhelems < 0 )
239 if (stat->info->tupDesc)
240 elog(ERROR, "GiST doesn't support composite (weighted) type");
241 getFmgrInfoHash(stat->info);
242 for(i=0;i<stat->nelems;i++)
246 hash = DatumGetUInt32(FunctionCall1Coll(&stat->info->hashFunc,
247 DEFAULT_COLLATION_OID,
248 stat->elems[i].datum));
249 int index = HASHVAL(hash);
251 stat->helems[i].hash = hash;
252 stat->helems[i].idfMin = stat->helems[i].idfMax = stat->elems[i].idf;
253 if ( stat->selems[index].idfMin == 0.0 )
254 stat->selems[index].idfMin = stat->selems[index].idfMax = stat->elems[i].idf;
255 else if ( stat->selems[index].idfMin > stat->elems[i].idf )
256 stat->selems[index].idfMin = stat->elems[i].idf;
257 else if ( stat->selems[index].idfMax < stat->elems[i].idf )
258 stat->selems[index].idfMax = stat->elems[i].idf;
261 stat->nhelems = uniqueHashedElem( stat->helems, stat->nelems);
268 getHashedElemIdf(StatCache *stat, uint32 hash, HashedElem *StopLow)
270 HashedElem *StopMiddle,
271 *StopHigh = stat->helems + stat->nhelems;
274 StopLow = stat->helems;
276 while (StopLow < StopHigh) {
277 StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
279 if ( StopMiddle->hash == hash )
281 else if ( StopMiddle->hash < hash )
282 StopLow = StopMiddle + 1;
284 StopHigh = StopMiddle;
291 fillHashVal(void *cache, SimpleArray *a)
298 allocateHash(cache, a);
300 if (a->info->tupDesc)
301 elog(ERROR, "GiST doesn't support composite (weighted) type");
302 getFmgrInfoHash(a->info);
304 for(i=0;i<a->nelems;i++)
305 a->hash[i] = DatumGetUInt32(FunctionCall1Coll(&a->info->hashFunc,
306 DEFAULT_COLLATION_OID,
312 hasHashedElem(SmlSign *a, uint32 h)
314 uint32 *StopLow = GETARR(a),
315 *StopHigh = GETARR(a) + a->size,
318 while (StopLow < StopHigh) {
319 StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
321 if ( *StopMiddle == h )
323 else if ( *StopMiddle < h )
324 StopLow = StopMiddle + 1;
326 StopHigh = StopMiddle;
333 makesign(BITVECP sign, SmlSign *a)
336 uint32 *ptr = GETARR(a);
338 MemSet((void *) sign, 0, sizeof(BITVEC));
339 SETBIT(sign, SIGLENBIT); /* set last unused bit */
341 for (i = 0; i < a->size; i++)
346 sizebitvec(BITVECP sign)
352 size += number_of_ones[(unsigned char) sign[i]];
357 PG_FUNCTION_INFO_V1(gsmlsign_compress);
358 Datum gsmlsign_compress(PG_FUNCTION_ARGS);
360 gsmlsign_compress(PG_FUNCTION_ARGS)
362 GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
363 GISTENTRY *retval = entry;
365 if (entry->leafkey) /* new key */
368 ArrayType *a = DatumGetArrayTypeP(entry->key);
370 sign = Array2HashedArray(NULL, a);
372 if ( VARSIZE(sign) > TOAST_INDEX_TARGET )
373 { /* make signature due to its big size */
377 len = CALCGTSIZE( SIGNKEY, sign->size );
378 tmpsign = palloc( len );
379 tmpsign->flag = SIGNKEY;
380 SET_VARSIZE(tmpsign, len);
382 makesign(GETSIGN(tmpsign), sign);
383 tmpsign->size = sizebitvec(GETSIGN(tmpsign));
384 tmpsign->maxrepeat = sign->maxrepeat;
388 retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
389 gistentryinit(*retval, PointerGetDatum(sign),
390 entry->rel, entry->page,
391 entry->offset, false);
393 else if ( ISSIGNKEY(DatumGetPointer(entry->key)) &&
394 !ISALLTRUE(DatumGetPointer(entry->key)) )
396 SmlSign *sign = (SmlSign*)DatumGetPointer(entry->key);
398 Assert( sign->size == sizebitvec(GETSIGN(sign)) );
400 if ( sign->size == SIGLENBIT )
402 int32 len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
403 int32 maxrepeat = sign->maxrepeat;
405 sign = (SmlSign *) palloc(len);
406 SET_VARSIZE(sign, len);
407 sign->flag = SIGNKEY | ALLISTRUE;
408 sign->size = SIGLENBIT;
409 sign->maxrepeat = maxrepeat;
411 retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
413 gistentryinit(*retval, PointerGetDatum(sign),
414 entry->rel, entry->page,
415 entry->offset, false);
419 PG_RETURN_POINTER(retval);
422 PG_FUNCTION_INFO_V1(gsmlsign_decompress);
423 Datum gsmlsign_decompress(PG_FUNCTION_ARGS);
425 gsmlsign_decompress(PG_FUNCTION_ARGS)
427 GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
428 SmlSign *key = (SmlSign*)DatumGetPointer(PG_DETOAST_DATUM(entry->key));
430 if (key != (SmlSign *) DatumGetPointer(entry->key))
432 GISTENTRY *retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
434 gistentryinit(*retval, PointerGetDatum(key),
435 entry->rel, entry->page,
436 entry->offset, false);
438 PG_RETURN_POINTER(retval);
441 PG_RETURN_POINTER(entry);
448 unionkey(BITVECP sbase, SmlSign *add)
454 BITVECP sadd = GETSIGN(add);
464 uint32 *ptr = GETARR(add);
466 for (i = 0; i < add->size; i++)
473 PG_FUNCTION_INFO_V1(gsmlsign_union);
474 Datum gsmlsign_union(PG_FUNCTION_ARGS);
476 gsmlsign_union(PG_FUNCTION_ARGS)
478 GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
479 int *size = (int *) PG_GETARG_POINTER(1);
487 MemSet((void *) base, 0, sizeof(BITVEC));
488 for (i = 0; i < entryvec->n; i++)
490 if (GETENTRY(entryvec, i)->maxrepeat > maxrepeat)
491 maxrepeat = GETENTRY(entryvec, i)->maxrepeat;
492 if (unionkey(base, GETENTRY(entryvec, i)))
500 len = CALCGTSIZE(flag, 0);
501 result = (SmlSign *) palloc(len);
503 SET_VARSIZE(result, len);
505 result->maxrepeat = maxrepeat;
507 if (!ISALLTRUE(result))
509 memcpy((void *) GETSIGN(result), (void *) base, sizeof(BITVEC));
510 result->size = sizebitvec(GETSIGN(result));
513 result->size = SIGLENBIT;
515 PG_RETURN_POINTER(result);
522 PG_FUNCTION_INFO_V1(gsmlsign_same);
523 Datum gsmlsign_same(PG_FUNCTION_ARGS);
525 gsmlsign_same(PG_FUNCTION_ARGS)
527 SmlSign *a = (SmlSign*)PG_GETARG_POINTER(0);
528 SmlSign *b = (SmlSign*)PG_GETARG_POINTER(1);
529 bool *result = (bool *) PG_GETARG_POINTER(2);
531 if (a->size != b->size)
535 else if (ISSIGNKEY(a))
536 { /* then b also ISSIGNKEY */
539 /* in this case b is all true too - other cases is catched
546 BITVECP sa = GETSIGN(a),
566 uint32 *ptra = GETARR(a),
571 for (i = 0; i < a->size; i++)
573 if ( ptra[i] != ptrb[i])
581 PG_RETURN_POINTER(result);
588 hemdistsign(BITVECP a, BITVECP b)
596 diff = (unsigned char) (a[i] ^ b[i]);
597 dist += number_of_ones[diff];
603 hemdist(SmlSign *a, SmlSign *b)
610 return SIGLENBIT - b->size;
612 else if (ISALLTRUE(b))
613 return SIGLENBIT - b->size;
615 return hemdistsign(GETSIGN(a), GETSIGN(b));
618 PG_FUNCTION_INFO_V1(gsmlsign_penalty);
619 Datum gsmlsign_penalty(PG_FUNCTION_ARGS);
621 gsmlsign_penalty(PG_FUNCTION_ARGS)
623 GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
624 GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
625 float *penalty = (float *) PG_GETARG_POINTER(2);
626 SmlSign *origval = (SmlSign *) DatumGetPointer(origentry->key);
627 SmlSign *newval = (SmlSign *) DatumGetPointer(newentry->key);
628 BITVECP orig = GETSIGN(origval);
632 if (ISARRKEY(newval))
636 makesign(sign, newval);
638 if (ISALLTRUE(origval))
639 *penalty = ((float) (SIGLENBIT - sizebitvec(sign))) / (float) (SIGLENBIT + 1);
641 *penalty = hemdistsign(sign, orig);
644 *penalty = hemdist(origval, newval);
646 PG_RETURN_POINTER(penalty);
661 fillcache(CACHESIGN *item, SmlSign *key)
663 item->allistrue = false;
664 item->size = key->size;
668 makesign(item->sign, key);
669 item->size = sizebitvec( item->sign );
671 else if (ISALLTRUE(key))
672 item->allistrue = true;
674 memcpy((void *) item->sign, (void *) GETSIGN(key), sizeof(BITVEC));
677 #define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
686 comparecost(const void *va, const void *vb)
688 SPLITCOST *a = (SPLITCOST *) va;
689 SPLITCOST *b = (SPLITCOST *) vb;
691 if (a->cost == b->cost)
694 return (a->cost > b->cost) ? 1 : -1;
698 hemdistcache(CACHESIGN *a, CACHESIGN *b)
705 return SIGLENBIT - b->size;
707 else if (b->allistrue)
708 return SIGLENBIT - a->size;
710 return hemdistsign(a->sign, b->sign);
713 PG_FUNCTION_INFO_V1(gsmlsign_picksplit);
714 Datum gsmlsign_picksplit(PG_FUNCTION_ARGS);
716 gsmlsign_picksplit(PG_FUNCTION_ARGS)
718 GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
719 GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
731 OffsetNumber seed_1 = 0,
739 SPLITCOST *costvector;
741 maxoff = entryvec->n - 2;
742 nbytes = (maxoff + 2) * sizeof(OffsetNumber);
743 v->spl_left = (OffsetNumber *) palloc(nbytes);
744 v->spl_right = (OffsetNumber *) palloc(nbytes);
746 cache = (CACHESIGN *) palloc(sizeof(CACHESIGN) * (maxoff + 2));
747 fillcache(&cache[FirstOffsetNumber], GETENTRY(entryvec, FirstOffsetNumber));
749 for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
751 for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
753 if (k == FirstOffsetNumber)
754 fillcache(&cache[j], GETENTRY(entryvec, j));
756 size_waste = hemdistcache(&(cache[j]), &(cache[k]));
757 if (size_waste > waste)
768 right = v->spl_right;
771 if (seed_1 == 0 || seed_2 == 0)
777 /* form initial .. */
778 if (cache[seed_1].allistrue)
780 datum_l = (SmlSign *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
781 SET_VARSIZE(datum_l, CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
782 datum_l->flag = SIGNKEY | ALLISTRUE;
783 datum_l->size = SIGLENBIT;
787 datum_l = (SmlSign *) palloc(CALCGTSIZE(SIGNKEY, 0));
788 SET_VARSIZE(datum_l, CALCGTSIZE(SIGNKEY, 0));
789 datum_l->flag = SIGNKEY;
790 memcpy((void *) GETSIGN(datum_l), (void *) cache[seed_1].sign, sizeof(BITVEC));
791 datum_l->size = cache[seed_1].size;
793 if (cache[seed_2].allistrue)
795 datum_r = (SmlSign *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
796 SET_VARSIZE(datum_r, CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
797 datum_r->flag = SIGNKEY | ALLISTRUE;
798 datum_r->size = SIGLENBIT;
802 datum_r = (SmlSign *) palloc(CALCGTSIZE(SIGNKEY, 0));
803 SET_VARSIZE(datum_r, CALCGTSIZE(SIGNKEY, 0));
804 datum_r->flag = SIGNKEY;
805 memcpy((void *) GETSIGN(datum_r), (void *) cache[seed_2].sign, sizeof(BITVEC));
806 datum_r->size = cache[seed_2].size;
809 union_l = GETSIGN(datum_l);
810 union_r = GETSIGN(datum_r);
811 maxoff = OffsetNumberNext(maxoff);
812 fillcache(&cache[maxoff], GETENTRY(entryvec, maxoff));
813 /* sort before ... */
814 costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
815 for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
817 costvector[j - 1].pos = j;
818 size_alpha = hemdistcache(&(cache[seed_1]), &(cache[j]));
819 size_beta = hemdistcache(&(cache[seed_2]), &(cache[j]));
820 costvector[j - 1].cost = Abs(size_alpha - size_beta);
822 qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
824 datum_l->maxrepeat = datum_r->maxrepeat = 1;
826 for (k = 0; k < maxoff; k++)
828 j = costvector[k].pos;
835 else if (j == seed_2)
842 if (ISALLTRUE(datum_l) || cache[j].allistrue)
844 if (ISALLTRUE(datum_l) && cache[j].allistrue)
847 size_alpha = SIGLENBIT - (
848 (cache[j].allistrue) ? datum_l->size : cache[j].size
852 size_alpha = hemdistsign(cache[j].sign, GETSIGN(datum_l));
854 if (ISALLTRUE(datum_r) || cache[j].allistrue)
856 if (ISALLTRUE(datum_r) && cache[j].allistrue)
859 size_beta = SIGLENBIT - (
860 (cache[j].allistrue) ? datum_r->size : cache[j].size
864 size_beta = hemdistsign(cache[j].sign, GETSIGN(datum_r));
866 if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.1))
868 if (ISALLTRUE(datum_l) || cache[j].allistrue)
870 if (!ISALLTRUE(datum_l))
871 MemSet((void *) GETSIGN(datum_l), 0xff, sizeof(BITVEC));
872 datum_l->size = SIGLENBIT;
878 union_l[i] |= ptr[i];
879 datum_l->size = sizebitvec(union_l);
886 if (ISALLTRUE(datum_r) || cache[j].allistrue)
888 if (!ISALLTRUE(datum_r))
889 MemSet((void *) GETSIGN(datum_r), 0xff, sizeof(BITVEC));
890 datum_r->size = SIGLENBIT;
896 union_r[i] |= ptr[i];
897 datum_r->size = sizebitvec(union_r);
903 *right = *left = FirstOffsetNumber;
904 v->spl_ldatum = PointerGetDatum(datum_l);
905 v->spl_rdatum = PointerGetDatum(datum_r);
907 Assert( datum_l->size = sizebitvec(GETSIGN(datum_l)) );
908 Assert( datum_r->size = sizebitvec(GETSIGN(datum_r)) );
910 PG_RETURN_POINTER(v);
914 getIdfMaxLimit(SmlSign *key)
916 switch( getTFMethod() )
922 return (double)(key->maxrepeat);
925 return 1.0 + log( (double)(key->maxrepeat) );
928 elog(ERROR,"Unknown TF method: %d", getTFMethod());
935 * Consistent function
937 PG_FUNCTION_INFO_V1(gsmlsign_consistent);
938 Datum gsmlsign_consistent(PG_FUNCTION_ARGS);
940 gsmlsign_consistent(PG_FUNCTION_ARGS)
942 GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
943 StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
944 bool *recheck = (bool *) PG_GETARG_POINTER(4);
946 SmlSign *key = (SmlSign*)DatumGetPointer(entry->key);
952 fcinfo->flinfo->fn_extra = SearchArrayCache(
953 fcinfo->flinfo->fn_extra,
954 fcinfo->flinfo->fn_mcxt,
955 PG_GETARG_DATUM(1), &a, &s, &query);
962 if ( strategy == SmlarOverlapStrategy )
968 else if (ISARRKEY(key))
970 uint32 *kptr = GETARR(key),
971 *qptr = GETARR(query);
973 while( kptr - GETARR(key) < key->size && qptr - GETARR(query) < query->size )
977 else if ( *kptr > *qptr )
989 BITVECP sign = GETSIGN(key);
991 fillHashVal(fcinfo->flinfo->fn_extra, s);
993 for(i=0; i<s->nelems; i++)
995 if ( GETBIT(sign, HASHVAL(s->hash[i])) )
1004 * SmlarSimilarityStrategy
1006 else if (ISALLTRUE(key))
1008 if ( GIST_LEAF(entry) )
1011 * With TF/IDF similarity we cannot say anything useful
1013 if ( query->size < SIGLENBIT && getSmlType() != ST_TFIDF )
1015 double power = ((double)(query->size)) * ((double)(SIGLENBIT));
1017 if ( ((double)(query->size)) / sqrt(power) >= GetSmlarLimit() )
1028 else if (ISARRKEY(key))
1030 uint32 *kptr = GETARR(key),
1031 *qptr = GETARR(query);
1033 Assert( GIST_LEAF(entry) );
1035 switch(getSmlType())
1039 StatCache *stat = getHashedCache(fcinfo->flinfo->fn_extra);
1043 double maxKTF = getIdfMaxLimit(key);
1047 fillHashVal(fcinfo->flinfo->fn_extra, s);
1048 if ( stat->info != s->info )
1049 elog(ERROR,"Statistic and actual argument have different type");
1051 for(i=0;i<s->nelems;i++)
1053 sumQ += s->df[i] * s->df[i];
1055 h = getHashedElemIdf(stat, s->hash[i], NULL);
1056 if ( h && hasHashedElem(key, s->hash[i]) )
1058 sumK += h->idfMin * h->idfMin;
1059 sumU += h->idfMax * maxKTF * s->df[i];
1063 if ( sumK > 0.0 && sumQ > 0.0 && sumU / sqrt( sumK * sumQ ) >= GetSmlarLimit() )
1066 * More precisely calculate sumK
1071 for(i=0;i<key->size;i++)
1073 h = getHashedElemIdf(stat, GETARR(key)[i], h);
1075 sumK += h->idfMin * h->idfMin;
1078 if ( sumK > 0.0 && sumQ > 0.0 && sumU / sqrt( sumK * sumQ ) >= GetSmlarLimit() )
1086 power = sqrt( ((double)(key->size)) * ((double)(s->nelems)) );
1088 if ( ((double)Min(key->size, s->nelems)) / power >= GetSmlarLimit() )
1092 while( kptr - GETARR(key) < key->size && qptr - GETARR(query) < query->size )
1094 if ( *kptr < *qptr )
1096 else if ( *kptr > *qptr )
1106 if ( ((double)cnt) / power >= GetSmlarLimit() )
1112 elog(ERROR,"GiST doesn't support current formula type of similarity");
1117 BITVECP sign = GETSIGN(key);
1120 fillHashVal(fcinfo->flinfo->fn_extra, s);
1122 if ( GIST_LEAF(entry) )
1124 switch(getSmlType())
1128 StatCache *stat = getHashedCache(fcinfo->flinfo->fn_extra);
1132 double maxKTF = getIdfMaxLimit(key);
1135 if ( stat->info != s->info )
1136 elog(ERROR,"Statistic and actual argument have different type");
1138 for(i=0;i<s->nelems;i++)
1140 int32 hbit = HASHVAL(s->hash[i]);
1142 sumQ += s->df[i] * s->df[i];
1143 if ( GETBIT(sign, hbit) )
1145 sumK += stat->selems[ hbit ].idfMin * stat->selems[ hbit ].idfMin;
1146 sumU += stat->selems[ hbit ].idfMax * maxKTF * s->df[i];
1150 if ( sumK > 0.0 && sumQ > 0.0 && sumU / sqrt( sumK * sumQ ) >= GetSmlarLimit() )
1153 * More precisely calculate sumK
1157 for(i=0;i<SIGLENBIT;i++)
1158 if ( GETBIT(sign,i) )
1159 sumK += stat->selems[ i ].idfMin * stat->selems[ i ].idfMin;
1161 if ( sumK > 0.0 && sumQ > 0.0 && sumU / sqrt( sumK * sumQ ) >= GetSmlarLimit() )
1170 power = sqrt( ((double)(key->size)) * ((double)(s->nelems)) );
1172 for(i=0; i<s->nelems; i++)
1173 count += GETBIT(sign, HASHVAL(s->hash[i]));
1175 if ( ((double)count) / power >= GetSmlarLimit() )
1180 elog(ERROR,"GiST doesn't support current formula type of similarity");
1183 else /* non-leaf page */
1185 switch(getSmlType())
1189 StatCache *stat = getHashedCache(fcinfo->flinfo->fn_extra);
1193 double maxKTF = getIdfMaxLimit(key);
1196 if ( stat->info != s->info )
1197 elog(ERROR,"Statistic and actual argument have different type");
1199 for(i=0;i<s->nelems;i++)
1201 int32 hbit = HASHVAL(s->hash[i]);
1203 sumQ += s->df[i] * s->df[i];
1204 if ( GETBIT(sign, hbit) )
1206 sumU += stat->selems[ hbit ].idfMax * maxKTF * s->df[i];
1207 if ( minK > stat->selems[ hbit ].idfMin || minK < 0.0 )
1208 minK = stat->selems[ hbit ].idfMin;
1212 if ( sumQ > 0.0 && minK > 0.0 && sumU / sqrt( sumQ * minK ) >= GetSmlarLimit() )
1218 for(i=0; i<s->nelems; i++)
1219 count += GETBIT(sign, HASHVAL(s->hash[i]));
1221 if ( s->nelems == count || sqrt(((double)count) / ((double)(s->nelems))) >= GetSmlarLimit() )
1226 elog(ERROR,"GiST doesn't support current formula type of similarity");
1233 static int nnres = 0;
1234 static int nres = 0;
1235 if ( GIST_LEAF(entry) ) {
1240 elog(NOTICE,"%s nn:%d n:%d", (ISARRKEY(key)) ? "ARR" : ( (ISALLTRUE(key)) ? "TRUE" : "SIGN" ), nnres, nres );
1245 PG_RETURN_BOOL(res);