3d69a1865ed14e2e8f89cca179d160ada5cf1ad1
[ftsbench.git] / ftsbench.c
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <unistd.h>
4 #include <string.h>
5 #include <errno.h>
6 #include <sys/time.h>
7
8 #include "ftsbench.h"
9
10 typedef enum RDBMS {
11         PostgreSQL = 0,
12         MySQL = 1,
13         NULLSQL
14 } RDBMS;
15
16 typedef struct RDBMSDesc {
17         RDBMS   rdbms;
18         char    *shortname;
19         char    *longname;
20         ftsDB*  (*init)(char *);
21 } RDBMSDesc;
22
23 static RDBMSDesc DBDesc[] = {
24         { PostgreSQL, "pgsql", "PostgreSQL", PGInit }, 
25         { MySQL,          "mysql", "MySQL",      MYInit },
26         { NULLSQL,        NULL,    NULL,         NULL   }
27 };
28
29 static void
30 usage() {
31         char buf[1024];
32         int i, first=0;
33
34         *buf = '\0';
35         for(i=0; DBDesc[i].rdbms != NULLSQL; i++) {
36                 if ( DBDesc[i].init == NULL )
37                         continue;
38                 if ( first != 0 )
39                         strcat(buf, ", ");
40                 strcat(buf, DBDesc[i].shortname);
41                 if ( first == 0 ) 
42                         strcat(buf, "(default)");
43                 first++;
44         }
45
46         fputs(
47                 "ftsbench - full text search benchmark ofr RDBMS\n"
48                 "Initialization of DB:\n"
49                 "\tftsbench -i [-b RDBMS] [-n NUMROW] [-l LEXFILE] [-g GAMMAFILE] [-f FLAGS] -d DBNAME\n"
50                 "FLAGS are comma-separate list of:\n"
51                 "       gin  - use GIN index\n"
52                 "       gist - use GiST index\n"
53                 "       func - use functional index\n",
54                 stdout
55         );
56         fputs(
57                 "Run tests:\n"
58                 "\tftsbench [-b RDBMS] [-c NCLIENTS] [-n NUMQUERY] [-l LEXFILE] [-g GAMMAFILE] [-f FLAGS] -d DBNAME\n"
59                 "FLAGS are comma-separate list of:\n"
60                 "       and  - AND'ing lexemes in query (default)\n"
61                 "       or   - OR'ing lexemes in query\n"
62                 "       sort - sort result of query\n"
63                 "Options are:\n"
64                 "       -b RDBMS\t- type of DB: ",
65                 stdout
66         );
67         fputs( buf, stdout );
68         fputs(
69                 "\n"
70                 "       -l LEXFILE\t- file with words and its frequents\n"
71                 "       -g GAMMAFILE\t- file with doc's length distribution\n",
72                 stdout
73         );
74         exit(1);
75 }
76
77 static RDBMS
78 getRDBMS(char *name) {
79         int     i;
80
81         for(i=0; DBDesc[i].rdbms != NULLSQL; i++) {
82                 if ( name == NULL ) {
83                         if ( DBDesc[i].init )
84                                 return DBDesc[i].rdbms; 
85                 } else if ( strcasecmp(name,DBDesc[i].shortname) == 0 ) {
86                         if ( DBDesc[i].init == NULL ) {
87                                 fprintf(stderr,"Support of '%s' isn't compiled-in\n", DBDesc[i].longname);
88                                 exit(1);
89                         }
90                         return DBDesc[i].rdbms;
91                 }
92         }
93
94         fprintf(stderr,"Can't find a RDBMS\n");
95         exit(1);
96         
97         return NULLSQL;
98 }
99
100 static int
101 getFLAGS(char *flg) {
102         int flags = 0;
103
104         if ( strcasestr(flg,"gist") )
105                 flags |= FLG_GIST;
106         if ( strcasestr(flg,"gin") )
107                 flags |= FLG_GIN;
108         if ( strcasestr(flg,"func") )
109                 flags |= FLG_FUNC;
110         if ( strcasestr(flg,"and") )
111                 flags |= FLG_AND;
112         if ( strcasestr(flg,"or") )
113                 flags |= FLG_OR;
114         if ( strcasestr(flg,"sort") )
115                 flags |= FLG_SORT;
116
117         if ( (flags & FLG_GIST) && (flags & FLG_GIN) ) {
118                 fprintf(stderr,"GIN and GiST flags are mutually exclusive\n");
119                 exit(1);
120         }
121         if ( (flags & FLG_AND) && (flags & FLG_OR) ) {
122                 fprintf(stderr,"AND and OR flags are mutually exclusive\n");
123                 exit(1);
124         }
125
126         return flags;
127 }
128
129 static ftsDB **
130 initConnections(RDBMS rdbms, int n, char *connstr) {
131         ftsDB   **dbs = (ftsDB**)malloc(sizeof(ftsDB*) * n);
132         int i;
133
134         if (!dbs) {
135                 fprintf(stderr,"Not enough mwmory\n");
136                 exit(1);
137         }
138
139         for(i=0;i<n;i++) { 
140                 dbs[i] = DBDesc[rdbms].init(connstr);
141                 pthread_mutex_init(&dbs[i]->nqueryMutex, NULL);
142         }
143
144         return dbs;
145 }
146
147 static double
148 timediff(struct timeval *begin, struct timeval *end) {
149     return ((double)( end->tv_sec - begin->tv_sec )) + ( (double)( end->tv_usec-begin->tv_usec ) ) / 1.0e+6;
150 }
151
152 static double
153 elapsedtime(struct timeval *begin) {
154     struct timeval end;
155         gettimeofday(&end,NULL);
156         return timediff(begin,&end);
157 }
158
159 static int benchFlags  = 0;
160 static int benchCount  = 0;
161 static pthread_cond_t condFinish = PTHREAD_COND_INITIALIZER;
162 static pthread_mutex_t mutexFinish = PTHREAD_MUTEX_INITIALIZER;
163 static pthread_mutex_t mutexWordGen = PTHREAD_MUTEX_INITIALIZER;
164
165 static void*
166 execBench(void *in) {
167         ftsDB *db = (ftsDB*)in;
168         int i;
169         char **words;
170
171         for(i=0;i<benchCount;i++) {
172                 /*
173                  * generate_querywords() isn't a thread safe
174                  */
175                 pthread_mutex_lock( &mutexWordGen );
176                 words = generate_querywords();
177                 pthread_mutex_unlock( &mutexWordGen );
178
179                 db->execQuery(db, words, benchFlags);
180                 free(words);
181         }
182
183         /*
184          * send message about exitting
185          */
186     pthread_mutex_lock( &mutexFinish );
187         pthread_cond_broadcast( &condFinish );
188         pthread_mutex_unlock( &mutexFinish );
189
190         return NULL;    
191 }
192
193 extern char *optarg;
194
195 int
196 main(int argn, char *argv[]) {
197         int             initMode = 0;
198         int             n = 0, nclients = 1;
199         char    *lex = NULL;
200         char    *doc = NULL;
201         char    *dbname = NULL;
202         RDBMS   rdbms = NULLSQL;
203         int             flags = 0;
204         int i;
205         StringBuf       b = {NULL,0,0};
206
207         while((i=getopt(argn,argv,"ib:n:l:g:d:c:hf:")) != EOF) {
208                 switch(i) {
209                         case 'i': initMode = 1; break;
210                         case 'b': rdbms = getRDBMS(optarg); break;
211                         case 'n': n=atoi(optarg); break;
212                         case 'c': nclients=atoi(optarg); break;
213                         case 'l': lex = strdup(optarg); break;
214                         case 'g': doc = strdup(optarg); break;
215                         case 'd': dbname = strdup(optarg); break;
216                         case 'f': flags = getFLAGS(optarg); break;
217                         case 'h':
218                         default:
219                                 usage();
220                 }
221         }
222
223         if (rdbms == NULLSQL)
224                 rdbms = getRDBMS(NULL);
225
226         if ( dbname == NULL || n<0 || nclients<1 )
227                 usage();
228
229         printf("Running with '%s' RDBMS\n", DBDesc[ rdbms ].longname); 
230
231         if ( initMode ) {
232                 ftsDB   *db = *initConnections(rdbms, 1, dbname);
233                 time_t  prev;
234
235                 if (!lex)  lex = "gendata/lex";
236                 if (!doc)  doc = "gendata/gamma-lens";
237                 finnegan_init(lex, doc);
238
239                 db->startCreateScheme(db, flags);
240                 prev = time(NULL);
241                 for(i=0;i<n;i++) {
242                         generate_doc(&b);
243                         db->InsertRow(db, i+1, b.str);
244                         if ( prev!=time(NULL) ) {
245                                 printf("\r%d(%.02f%%) rows inserted", i, (100.0*i)/n);
246                                 fflush(stdout);
247                                 prev = time(NULL);
248                         }
249                 }
250                 printf("\r%d(100.00%%) rows inserted. Finalyze insertion... ", i);
251                 fflush(stdout);
252                 db->finishCreateScheme(db);
253                 printf("done\n");
254         } else {
255                 ftsDB   **dbs = initConnections(rdbms, nclients, dbname);
256                 pthread_t       *tid = (pthread_t*)malloc( sizeof(pthread_t) * nclients);
257                 struct  timeval begin;
258                 double  elapsed;
259                 int     total=0;
260                 struct      timespec  sleepTo = { 0, 0 };
261
262                 /*
263                  * startup generator
264                  */
265                 if (!lex)  lex = "gendata/query-lex";
266                 if (!doc)  doc = "gendata/query-lens";
267                 finnegan_init(lex, doc);
268
269                 /*
270                  * Initial query
271                  */
272                 printf("\r0(0.00%%) queries proceed");
273                 fflush(stdout);
274                 benchFlags = flags;
275                 benchCount = n;
276
277                 gettimeofday(&begin,NULL);
278
279         pthread_mutex_lock( &mutexFinish );
280                 for(i=0;i<nclients;i++) {
281                         if ( pthread_create(tid+i, NULL, execBench, (void*)dbs[i]) != 0 ) {
282                                 fprintf(stderr,"pthread_create failed: %s\n", strerror(errno));
283                                 exit(1);
284                         }
285                 }
286
287                 printf("\r%d(%.02f%%) queries proceed", 0, 0.0);
288                 fflush(stdout);
289
290                 for(;;) {
291                         int res, ntogo = 0;
292
293                         total = 0;
294                         for(i=0;i<nclients;i++) {
295                                 pthread_mutex_lock(&dbs[i]->nqueryMutex);
296                                 total +=dbs[i]->nquery;
297                                 if ( dbs[i]->nquery < n )
298                                         ntogo++;
299                                 pthread_mutex_unlock(&dbs[i]->nqueryMutex);
300                         }
301
302                         if ( ntogo == 0 ) 
303                                 break;
304
305                         printf("\r%d(%.02f%%) queries proceed", total, (100.0*(float)total)/(nclients * n));
306                         fflush(stdout);
307                         
308                         sleepTo.tv_sec = time(NULL) + 1;
309                         res = pthread_cond_timedwait( &condFinish, &mutexFinish, &sleepTo );
310
311                         if ( !(res == ETIMEDOUT || res == 0) ) {
312                                 fprintf(stderr,"pthread_cond_timedwait failed: %s", strerror(errno));
313                                 exit(1);
314                         }
315                 }
316                 elapsed = elapsedtime(&begin);
317                 pthread_mutex_unlock( &mutexFinish );
318
319                 for(i=0;i<nclients;i++)
320                         pthread_join(tid[i], NULL);
321
322                 printf("\r%d(%.02f%%) queries proceed\n", total, (100.0*(float)total)/(nclients * n));
323                 printf("Total time: %.02f sec, Queries per second: %.02f\n", elapsed, total/elapsed);
324                 fflush(stdout);
325         }
326
327         return 0;
328 }