fix GiST estimation
[smlar.git] / smlar_gin.c
1 #include "smlar.h"
2
3 #include "fmgr.h"
4 #include "access/gin.h"
5 #include "access/skey.h"
6 #include "access/tuptoaster.h"
7
8 PG_FUNCTION_INFO_V1(smlararrayextract);
9 Datum smlararrayextract(PG_FUNCTION_ARGS);
10 Datum
11 smlararrayextract(PG_FUNCTION_ARGS)
12 {
13         ArrayType       *array;
14         int32      *nentries = (int32 *) PG_GETARG_POINTER(1);
15         SimpleArray     *sa;
16
17         /*
18          * we should guarantee that array will not be destroyed during all
19          * operation
20          */
21         array = PG_GETARG_ARRAYTYPE_P_COPY(0);
22
23         CHECKARRVALID(array);
24
25         sa = Array2SimpleArrayU(NULL, array, NULL);
26
27         *nentries = sa->nelems;
28
29         if (sa->nelems == 0 && PG_NARGS() == 3)
30         {
31                 switch (PG_GETARG_UINT16(2))    /* StrategyNumber */
32                 {
33                         case    SmlarOverlapStrategy:
34                         case    SmlarSimilarityStrategy:
35                                 *nentries = -1; /* nobody can be found */
36                                 break;
37                         default:
38                                 break;
39                 }
40         }
41
42         PG_RETURN_POINTER( sa->elems );
43 }
44
45 PG_FUNCTION_INFO_V1(smlarqueryarrayextract);
46 Datum smlarqueryarrayextract(PG_FUNCTION_ARGS);
47 Datum
48 smlarqueryarrayextract(PG_FUNCTION_ARGS)
49 {
50         PG_RETURN_DATUM(DirectFunctionCall3(smlararrayextract,
51                                                                                 PG_GETARG_DATUM(0),
52                                                                                 PG_GETARG_DATUM(1),
53                                                                                 PG_GETARG_DATUM(2)));
54 }
55
56 PG_FUNCTION_INFO_V1(smlararrayconsistent);
57 Datum smlararrayconsistent(PG_FUNCTION_ARGS);
58 Datum
59 smlararrayconsistent(PG_FUNCTION_ARGS)
60 {
61         bool                    *check = (bool *) PG_GETARG_POINTER(0);
62         StrategyNumber  strategy = PG_GETARG_UINT16(1);
63         SimpleArray             *sa;
64         bool                    res = false;
65         int                             i,
66                                         cnt = 0;
67         bool                    *recheck = (bool *) PG_GETARG_POINTER(5);
68
69         *recheck = true;
70
71         switch (strategy)
72         {
73                 case SmlarOverlapStrategy:
74                         /* at least one element in check[] is true, so result = true */
75                         res = true;
76                         *recheck = false;
77                         break;
78                 case SmlarSimilarityStrategy:
79
80                         fcinfo->flinfo->fn_extra = SearchArrayCache(
81                                                                                                 fcinfo->flinfo->fn_extra,
82                                                                                                 fcinfo->flinfo->fn_mcxt,
83                                                                                                 PG_GETARG_DATUM(2), NULL, &sa, NULL );
84
85                         for(i=0; i<sa->nelems; i++)
86                                 cnt += check[i];
87
88                         /*
89                          * cnt is a lower limit of elements's number in indexed array;
90                          */
91
92                         switch(getSmlType())
93                         {
94                                 case    ST_TFIDF:
95                                                 {
96                                                         double  weight = 0.0, /* exact weight of union */
97                                                                         saSum = 0.0,  /* exact length of query */
98                                                                         siSum = 0.0;  /* lower limit of length of indexed value */ 
99
100                                                         if ( getTFMethod() != TF_CONST )
101                                                                 elog(ERROR,"GIN supports only smlar.tf_method = \"const\"" );
102
103                                                         Assert(sa->df);
104
105                                                         for(i=0; i<sa->nelems; i++)
106                                                         {
107                                                                 /*
108                                                                  * With smlar.tf_method = "const"   sa->df[i] is 
109                                                                  * equal to its idf, so lookup of StatElem is not needed
110                                                                  */
111                                                                 if ( check[i] )
112                                                                 {
113                                                                         weight += sa->df[i] * sa->df[i];
114                                                                         siSum += sa->df[i] * sa->df[i];
115                                                                 }
116                                                                 saSum += sa->df[i] * sa->df[i];
117                                                         }
118
119                                                         if ( saSum > 0.0 && siSum > 0.0 && weight / sqrt(saSum * siSum ) > GetSmlarLimit() )
120                                                                 res = true;
121                                                 }
122                                                 break;
123                                 case    ST_COSINE:
124                                                 {
125                                                         double                  power;
126
127                                                         power = sqrt( ((double)(sa->nelems)) * ((double)(cnt)) );
128
129                                                         if (  ((double)cnt) / power >= GetSmlarLimit()  )
130                                                                 res = true;
131                                                 }
132                                                 break;
133                                 case    ST_OVERLAP:
134                                                 if (cnt >= GetSmlarLimit())
135                                                         res = true;
136                                                 break;
137                                 default:
138                                         elog(ERROR,"GIN doesn't support current formula type of similarity");
139                         }
140                         break;
141                 default:
142                         elog(ERROR, "smlararrayconsistent: unknown strategy number: %d", strategy);
143         }
144
145         PG_RETURN_BOOL(res);
146 }