Fix examples
[ftsbench.git] / ftsbench.c
1 /*
2  * Copyright (c) 2006 Teodor Sigaev <teodor@sigaev.ru>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *        notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *        notice, this list of conditions and the following disclaimer in the
12  *        documentation and/or other materials provided with the distribution.
13  * 3. Neither the name of the author nor the names of any co-contributors
14  *        may be used to endorse or promote products derived from this software
15  *        without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY CONTRIBUTORS ``AS IS'' AND ANY EXPRESS
18  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED. IN NO EVENT SHALL CONTRIBUTORS BE LIABLE FOR ANY
21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
23  * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
25  * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
26  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
27  * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <unistd.h>
33 #include <string.h>
34 #include <errno.h>
35 #include <sys/time.h>
36
37 #include "ftsbench.h"
38
39 typedef enum RDBMS {
40         PostgreSQL = 0,
41         MySQL = 1,
42         NULLSQL
43 } RDBMS;
44
45 typedef struct RDBMSDesc {
46         RDBMS   rdbms;
47         char    *shortname;
48         char    *longname;
49         ftsDB*  (*init)(char *);
50 } RDBMSDesc;
51
52 static RDBMSDesc DBDesc[] = {
53         { PostgreSQL, "pgsql", "PostgreSQL", PGInit }, 
54         { MySQL,          "mysql", "MySQL",      MYInit },
55         { NULLSQL,        NULL,    NULL,         NULL   }
56 };
57
58 static void
59 usage() {
60         char buf[1024];
61         int i, first=0;
62
63         *buf = '\0';
64         for(i=0; DBDesc[i].rdbms != NULLSQL; i++) {
65                 if ( DBDesc[i].init == NULL )
66                         continue;
67                 if ( first != 0 )
68                         strcat(buf, ", ");
69                 strcat(buf, DBDesc[i].shortname);
70                 if ( first == 0 ) 
71                         strcat(buf, "(default)");
72                 first++;
73         }
74
75         fputs(
76                 "ftsbench - full text search benchmark ofr RDBMS\n"
77                 "Initialization of DB:\n"
78                 "\tftsbench -i [-b RDBMS] [-n NUMROW] [-l LEXFILE] [-g GAMMAFILE] [-f FLAGS] -d DBNAME\n"
79                 "FLAGS are comma-separate list of:\n"
80                 "       gin  - use GIN index\n"
81                 "       gist - use GiST index\n"
82                 "       func - use functional index\n",
83                 stdout
84         );
85         fputs(
86                 "Run tests:\n"
87                 "\tftsbench [-b RDBMS] [-c NCLIENTS] [-n NUMQUERY] [-l LEXFILE] [-g GAMMAFILE] [-f FLAGS] -d DBNAME\n"
88                 "FLAGS are comma-separate list of:\n"
89                 "       and  - AND'ing lexemes in query (default)\n"
90                 "       or   - OR'ing lexemes in query\n"
91                 "       sort - sort result of query\n"
92                 "Options are:\n"
93                 "       -b RDBMS\t- type of DB: ",
94                 stdout
95         );
96         fputs( buf, stdout );
97         fputs(
98                 "\n"
99                 "       -l LEXFILE\t- file with words and its frequents\n"
100                 "       -g GAMMAFILE\t- file with doc's length distribution\n",
101                 stdout
102         );
103         exit(1);
104 }
105
106 static RDBMS
107 getRDBMS(char *name) {
108         int     i;
109
110         for(i=0; DBDesc[i].rdbms != NULLSQL; i++) {
111                 if ( name == NULL ) {
112                         if ( DBDesc[i].init )
113                                 return DBDesc[i].rdbms; 
114                 } else if ( strcasecmp(name,DBDesc[i].shortname) == 0 ) {
115                         if ( DBDesc[i].init == NULL ) {
116                                 fprintf(stderr,"Support of '%s' isn't compiled-in\n", DBDesc[i].longname);
117                                 exit(1);
118                         }
119                         return DBDesc[i].rdbms;
120                 }
121         }
122
123         fprintf(stderr,"Can't find a RDBMS\n");
124         exit(1);
125         
126         return NULLSQL;
127 }
128
129 static int
130 getFLAGS(char *flg) {
131         int flags = 0;
132
133         if ( strcasestr(flg,"gist") )
134                 flags |= FLG_GIST;
135         if ( strcasestr(flg,"gin") )
136                 flags |= FLG_GIN;
137         if ( strcasestr(flg,"func") )
138                 flags |= FLG_FUNC;
139         if ( strcasestr(flg,"and") )
140                 flags |= FLG_AND;
141         if ( strcasestr(flg,"or") )
142                 flags |= FLG_OR;
143         if ( strcasestr(flg,"sort") )
144                 flags |= FLG_SORT;
145
146         if ( (flags & FLG_GIST) && (flags & FLG_GIN) ) {
147                 fprintf(stderr,"GIN and GiST flags are mutually exclusive\n");
148                 exit(1);
149         }
150         if ( (flags & FLG_AND) && (flags & FLG_OR) ) {
151                 fprintf(stderr,"AND and OR flags are mutually exclusive\n");
152                 exit(1);
153         }
154
155         return flags;
156 }
157
158 static ftsDB **
159 initConnections(RDBMS rdbms, int n, char *connstr) {
160         ftsDB   **dbs = (ftsDB**)malloc(sizeof(ftsDB*) * n);
161         int i;
162
163         if (!dbs) {
164                 fprintf(stderr,"Not enough mwmory\n");
165                 exit(1);
166         }
167
168         for(i=0;i<n;i++) { 
169                 dbs[i] = DBDesc[rdbms].init(connstr);
170                 pthread_mutex_init(&dbs[i]->nqueryMutex, NULL);
171         }
172
173         return dbs;
174 }
175
176 static double
177 timediff(struct timeval *begin, struct timeval *end) {
178     return ((double)( end->tv_sec - begin->tv_sec )) + ( (double)( end->tv_usec-begin->tv_usec ) ) / 1.0e+6;
179 }
180
181 static double
182 elapsedtime(struct timeval *begin) {
183     struct timeval end;
184         gettimeofday(&end,NULL);
185         return timediff(begin,&end);
186 }
187
188 static int benchFlags  = 0;
189 static int benchCount  = 0;
190 static pthread_cond_t condFinish = PTHREAD_COND_INITIALIZER;
191 static pthread_mutex_t mutexFinish = PTHREAD_MUTEX_INITIALIZER;
192 static pthread_mutex_t mutexWordGen = PTHREAD_MUTEX_INITIALIZER;
193
194 static void*
195 execBench(void *in) {
196         ftsDB *db = (ftsDB*)in;
197         int i;
198         char **words;
199
200         for(i=0;i<benchCount;i++) {
201                 /*
202                  * generate_querywords() isn't a thread safe
203                  */
204                 pthread_mutex_lock( &mutexWordGen );
205                 words = generate_querywords();
206                 pthread_mutex_unlock( &mutexWordGen );
207
208                 db->execQuery(db, words, benchFlags);
209                 free(words);
210         }
211
212         /*
213          * send message about exitting
214          */
215     pthread_mutex_lock( &mutexFinish );
216         pthread_cond_broadcast( &condFinish );
217         pthread_mutex_unlock( &mutexFinish );
218
219         return NULL;    
220 }
221
222 extern char *optarg;
223
224 int
225 main(int argn, char *argv[]) {
226         int             initMode = 0;
227         int             n = 0, nclients = 1;
228         char    *lex = NULL;
229         char    *doc = NULL;
230         char    *dbname = NULL;
231         RDBMS   rdbms = NULLSQL;
232         int             flags = 0;
233         int i;
234         StringBuf       b = {NULL,0,0};
235
236         while((i=getopt(argn,argv,"ib:n:l:g:d:c:hf:")) != EOF) {
237                 switch(i) {
238                         case 'i': initMode = 1; break;
239                         case 'b': rdbms = getRDBMS(optarg); break;
240                         case 'n': n=atoi(optarg); break;
241                         case 'c': nclients=atoi(optarg); break;
242                         case 'l': lex = strdup(optarg); break;
243                         case 'g': doc = strdup(optarg); break;
244                         case 'd': dbname = strdup(optarg); break;
245                         case 'f': flags = getFLAGS(optarg); break;
246                         case 'h':
247                         default:
248                                 usage();
249                 }
250         }
251
252         if (rdbms == NULLSQL)
253                 rdbms = getRDBMS(NULL);
254
255         if ( dbname == NULL || n<0 || nclients<1 )
256                 usage();
257
258         printf("Running with '%s' RDBMS\n", DBDesc[ rdbms ].longname); 
259
260         if ( initMode ) {
261                 ftsDB   *db = *initConnections(rdbms, 1, dbname);
262                 time_t  prev;
263
264                 if (!lex)  lex = "gendata/lex";
265                 if (!doc)  doc = "gendata/gamma-lens";
266                 finnegan_init(lex, doc);
267
268                 db->startCreateScheme(db, flags);
269                 prev = time(NULL);
270                 for(i=0;i<n;i++) {
271                         generate_doc(&b);
272                         db->InsertRow(db, i+1, b.str);
273                         if ( prev!=time(NULL) ) {
274                                 printf("\r%d(%.02f%%) rows inserted", i, (100.0*i)/n);
275                                 fflush(stdout);
276                                 prev = time(NULL);
277                         }
278                 }
279                 printf("\r%d(100.00%%) rows inserted. Finalyze insertion... ", i);
280                 fflush(stdout);
281                 db->finishCreateScheme(db);
282                 printf("done\n");
283         } else {
284                 ftsDB   **dbs = initConnections(rdbms, nclients, dbname);
285                 pthread_t       *tid = (pthread_t*)malloc( sizeof(pthread_t) * nclients);
286                 struct  timeval begin;
287                 double  elapsed;
288                 int     total=0;
289                 struct      timespec  sleepTo = { 0, 0 };
290
291                 /*
292                  * startup generator
293                  */
294                 if (!lex)  lex = "gendata/query-lex";
295                 if (!doc)  doc = "gendata/query-lens";
296                 finnegan_init(lex, doc);
297
298                 /*
299                  * Initial query
300                  */
301                 printf("\r0(0.00%%) queries proceed");
302                 fflush(stdout);
303                 benchFlags = flags;
304                 benchCount = n;
305
306                 gettimeofday(&begin,NULL);
307
308         pthread_mutex_lock( &mutexFinish );
309                 for(i=0;i<nclients;i++) {
310                         if ( pthread_create(tid+i, NULL, execBench, (void*)dbs[i]) != 0 ) {
311                                 fprintf(stderr,"pthread_create failed: %s\n", strerror(errno));
312                                 exit(1);
313                         }
314                 }
315
316                 printf("\r%d(%.02f%%) queries proceed", 0, 0.0);
317                 fflush(stdout);
318
319                 for(;;) {
320                         int res, ntogo = 0;
321
322                         total = 0;
323                         for(i=0;i<nclients;i++) {
324                                 pthread_mutex_lock(&dbs[i]->nqueryMutex);
325                                 total +=dbs[i]->nquery;
326                                 if ( dbs[i]->nquery < n )
327                                         ntogo++;
328                                 pthread_mutex_unlock(&dbs[i]->nqueryMutex);
329                         }
330
331                         if ( ntogo == 0 ) 
332                                 break;
333
334                         printf("\r%d(%.02f%%) queries proceed", total, (100.0*(float)total)/(nclients * n));
335                         fflush(stdout);
336                         
337                         sleepTo.tv_sec = time(NULL) + 1;
338                         res = pthread_cond_timedwait( &condFinish, &mutexFinish, &sleepTo );
339
340                         if ( !(res == ETIMEDOUT || res == 0) ) {
341                                 fprintf(stderr,"pthread_cond_timedwait failed: %s", strerror(errno));
342                                 exit(1);
343                         }
344                 }
345                 elapsed = elapsedtime(&begin);
346                 pthread_mutex_unlock( &mutexFinish );
347
348                 for(i=0;i<nclients;i++)
349                         pthread_join(tid[i], NULL);
350
351                 printf("\r%d(%.02f%%) queries proceed\n", total, (100.0*(float)total)/(nclients * n));
352                 printf("Total time: %.02f sec, Queries per second: %.02f\n", elapsed, total/elapsed);
353                 fflush(stdout);
354         }
355
356         return 0;
357 }