support pgsql versions up to 11
[smlar.git] / smlar.h
1 #ifndef _SMLAR_H_
2 #define _SMLAR_H_
3
4 #include "postgres.h"
5 #include "utils/array.h"
6 #include "access/tupdesc.h"
7 #include "catalog/pg_collation.h"
8
9 #include <math.h>
10
11 typedef struct ProcTypeInfoData *ProcTypeInfo;
12
13 typedef struct ProcTypeInfoData {
14         Oid                             typid;
15         Oid                             hashFuncOid;
16         Oid                             cmpFuncOid;
17         int16                   typlen;
18         bool                    typbyval;
19         char                    typalign;
20
21         /* support of composite type */
22         char                    typtype;
23         TupleDesc               tupDesc;
24
25         /*
26          * Following member can become invalid,
27          * so fill it just before using
28          */
29         bool                    hashFuncInited;
30         FmgrInfo                hashFunc;
31         bool                    cmpFuncInited;
32         FmgrInfo                cmpFunc;
33 } ProcTypeInfoData;
34
35 ProcTypeInfo findProcs(Oid typid);
36 void getFmgrInfoHash(ProcTypeInfo info);
37 void getFmgrInfoCmp(ProcTypeInfo info);
38
39 #define NDIM 1
40 /* reject arrays we can't handle; but allow a NULL or empty array */
41 #define CHECKARRVALID(x) \
42         do { \
43                 if (x) { \
44                         if (ARR_NDIM(x) != NDIM && ARR_NDIM(x) != 0) \
45                                 ereport(ERROR, \
46                                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), \
47                                                  errmsg("array must be one-dimensional"))); \
48                         if (ARR_HASNULL(x)) \
49                                 ereport(ERROR, \
50                                                 (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), \
51                                                  errmsg("array must not contain nulls"))); \
52                 } \
53         } while(0)
54
55 #define ARRISVOID(x)  ((x) == NULL || ARRNELEMS(x) == 0)
56 #define ARRNELEMS(x)  ArrayGetNItems(ARR_NDIM(x), ARR_DIMS(x))
57
58
59 typedef struct SimpleArray {
60         Datum              *elems;
61         double             *df;  /* frequency in current doc */
62         uint32             *hash;
63         int                             nelems;
64         ProcTypeInfo    info;
65 } SimpleArray;
66
67 SimpleArray     * Array2SimpleArray(ProcTypeInfo info, ArrayType *a);
68 SimpleArray     * Array2SimpleArrayS(ProcTypeInfo info, ArrayType *a);
69 SimpleArray     * Array2SimpleArrayU(ProcTypeInfo info, ArrayType *a, void *cache);
70 void allocateHash(void *cache, SimpleArray *a);
71
72 /*
73  * GUC vars
74  */
75 double GetSmlarLimit(void);
76 const char* GetSmlarTable(void);
77 bool GetSmlarUsePersistent(void);
78 double getOneAdd(void);
79 int getTFMethod(void);
80 int getSmlType(void); 
81 /*
82  * GiST
83  */
84
85 #define SmlarOverlapStrategy            1
86 #define SmlarSimilarityStrategy         2
87
88 struct SmlSign;
89 struct SmlSign* Array2HashedArray(ProcTypeInfo info, ArrayType *a);
90 /*
91  * Cache subsystem
92  */
93 void*   SearchArrayCache( void *cache, MemoryContext ctx, Datum a, ArrayType **da, SimpleArray **sa,  struct SmlSign  **ss );
94
95 typedef struct StatElem {
96         Datum           datum;
97         double          idf; /*  log(d/df) */
98 } StatElem;
99
100 typedef struct HashedElem {
101         uint32          hash;
102         double          idfMin;
103         double          idfMax;
104 } HashedElem;
105
106 typedef struct SignedElem {
107         double          idfMin;
108         double          idfMax;
109 } SignedElem;
110
111 typedef struct StatCache {
112         StatElem                *elems;
113         int                             nelems;
114         int64_t                 ndoc;
115         HashedElem              *helems;
116         int                             nhelems;
117         SignedElem              *selems;
118         ProcTypeInfo    info;
119 } StatCache;
120
121 StatCache *initStatCache(MemoryContext ctx);
122 void getHashStatCache(StatCache *stat, MemoryContext ctx, size_t n);
123
124 void    resetStatCache(void);
125 StatElem  *findStat(StatCache *stat, Datum query, StatElem *low);
126 StatElem  *fingArrayStat(void *cache, Oid typoid, Datum query, StatElem *low);
127 StatCache *getStat(void *cache, size_t n);
128
129 /*
130  * Formula's type of similarity
131  */
132 #define ST_COSINE       1
133 #define ST_TFIDF        2
134 #define ST_OVERLAP      3
135 /*
136  * TF methods
137  */
138 #define TF_N            1
139 #define TF_LOG          2
140 #define TF_CONST        3
141
142 #define FCall2(f, x1, x2)   FunctionCall2Coll((f), C_COLLATION_OID, (x1), (x2))
143
144 #endif