798b9be9d1c0756895bbafd13102afa0da411af2
[smlar.git] / smlar_gin.c
1 #include <math.h>
2
3 #include "smlar.h"
4
5 #include "fmgr.h"
6 #include "access/gin.h"
7 #include "access/skey.h"
8 #include "access/heaptoast.h"
9
10 PG_FUNCTION_INFO_V1(smlararrayextract);
11 Datum smlararrayextract(PG_FUNCTION_ARGS);
12 Datum
13 smlararrayextract(PG_FUNCTION_ARGS)
14 {
15         ArrayType       *array;
16         int32           *nentries = (int32 *) PG_GETARG_POINTER(1);
17         SimpleArray     *sa;
18
19         /*
20          * we should guarantee that array will not be destroyed during all
21          * operation
22          */
23         array = PG_GETARG_ARRAYTYPE_P_COPY(0);
24
25         CHECKARRVALID(array);
26
27         sa = Array2SimpleArrayU(NULL, array, NULL);
28
29         *nentries = sa->nelems;
30
31         if (sa->nelems == 0 && PG_NARGS() == 3)
32         {
33                 switch (PG_GETARG_UINT16(2))    /* StrategyNumber */
34                 {
35                         case    SmlarOverlapStrategy:
36                         case    SmlarSimilarityStrategy:
37                                 *nentries = -1; /* nobody can be found */
38                                 break;
39                         default:
40                                 break;
41                 }
42         }
43
44         PG_RETURN_POINTER( sa->elems );
45 }
46
47 PG_FUNCTION_INFO_V1(smlarqueryarrayextract);
48 Datum smlarqueryarrayextract(PG_FUNCTION_ARGS);
49 Datum
50 smlarqueryarrayextract(PG_FUNCTION_ARGS)
51 {
52         PG_RETURN_DATUM(DirectFunctionCall3(smlararrayextract,
53                                                                                 PG_GETARG_DATUM(0),
54                                                                                 PG_GETARG_DATUM(1),
55                                                                                 PG_GETARG_DATUM(2)));
56 }
57
58 PG_FUNCTION_INFO_V1(smlararrayconsistent);
59 Datum smlararrayconsistent(PG_FUNCTION_ARGS);
60 Datum
61 smlararrayconsistent(PG_FUNCTION_ARGS)
62 {
63         bool                    *check = (bool *) PG_GETARG_POINTER(0);
64         StrategyNumber  strategy = PG_GETARG_UINT16(1);
65         SimpleArray             *sa;
66         bool                    res = false;
67         int                             i,
68                                         cnt = 0;
69         bool                    *recheck = (bool *) PG_GETARG_POINTER(5);
70
71         *recheck = true;
72
73         switch (strategy)
74         {
75                 case SmlarOverlapStrategy:
76                         /* at least one element in check[] is true, so result = true */
77                         res = true;
78                         *recheck = false;
79                         break;
80                 case SmlarSimilarityStrategy:
81
82                         fcinfo->flinfo->fn_extra = SearchArrayCache(
83                                                                                                 fcinfo->flinfo->fn_extra,
84                                                                                                 fcinfo->flinfo->fn_mcxt,
85                                                                                                 PG_GETARG_DATUM(2), NULL, &sa, NULL );
86
87                         for(i=0; i<sa->nelems; i++)
88                                 cnt += check[i];
89
90                         /*
91                          * cnt is a lower limit of elements's number in indexed array;
92                          */
93
94                         switch(getSmlType())
95                         {
96                                 case ST_TFIDF:
97                                                 {
98                                                         double  weight = 0.0, /* exact weight of union */
99                                                                         saSum = 0.0,  /* exact length of query */
100                                                                         siSum = 0.0;  /* lower limit of length of indexed value */ 
101
102                                                         if ( getTFMethod() != TF_CONST )
103                                                                 elog(ERROR,"GIN supports only smlar.tf_method = \"const\"" );
104
105                                                         Assert(sa->df);
106
107                                                         for(i=0; i<sa->nelems; i++)
108                                                         {
109                                                                 /*
110                                                                  * With smlar.tf_method = "const"   sa->df[i] is 
111                                                                  * equal to its idf, so lookup of StatElem is not needed
112                                                                  */
113                                                                 if ( check[i] )
114                                                                 {
115                                                                         weight += sa->df[i] * sa->df[i];
116                                                                         siSum += sa->df[i] * sa->df[i];
117                                                                 }
118                                                                 saSum += sa->df[i] * sa->df[i];
119                                                         }
120
121                                                         if ( saSum > 0.0 && siSum > 0.0 && weight / sqrt(saSum * siSum ) > GetSmlarLimit() )
122                                                                 res = true;
123                                                 }
124                                                 break;
125                                 case ST_COSINE:
126                                                 {
127                                                         double                  power;
128
129                                                         power = sqrt( ((double)(sa->nelems)) * ((double)(cnt)) );
130
131                                                         if (  ((double)cnt) / power >= GetSmlarLimit()  )
132                                                                 res = true;
133                                                 }
134                                                 break;
135                                 case ST_OVERLAP:
136                                                 if (cnt >= GetSmlarLimit())
137                                                         res = true;
138                                                 break;
139                                 default:
140                                         elog(ERROR,"GIN doesn't support current formula type of similarity");
141                         }
142                         break;
143                 default:
144                         elog(ERROR, "smlararrayconsistent: unknown strategy number: %d", strategy);
145         }
146
147         PG_RETURN_BOOL(res);
148 }