Add quiet mode
[ftsbench.git] / ftsbench.c
1 /*
2  * Copyright (c) 2006 Teodor Sigaev <teodor@sigaev.ru>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *        notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *        notice, this list of conditions and the following disclaimer in the
12  *        documentation and/or other materials provided with the distribution.
13  * 3. Neither the name of the author nor the names of any co-contributors
14  *        may be used to endorse or promote products derived from this software
15  *        without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY CONTRIBUTORS ``AS IS'' AND ANY EXPRESS
18  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED. IN NO EVENT SHALL CONTRIBUTORS BE LIABLE FOR ANY
21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
23  * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
25  * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
26  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
27  * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <unistd.h>
33 #include <string.h>
34 #include <errno.h>
35 #include <sys/time.h>
36
37 #include "ftsbench.h"
38
39 typedef enum RDBMS {
40         PostgreSQL = 0,
41         MySQL = 1,
42         NULLSQL
43 } RDBMS;
44
45 typedef struct RDBMSDesc {
46         RDBMS   rdbms;
47         char    *shortname;
48         char    *longname;
49         ftsDB*  (*init)(char *);
50 } RDBMSDesc;
51
52 static RDBMSDesc DBDesc[] = {
53         { PostgreSQL, "pgsql", "PostgreSQL", PGInit }, 
54         { MySQL,          "mysql", "MySQL",      MYInit },
55         { NULLSQL,        NULL,    NULL,         NULL   }
56 };
57
58 static void
59 usage() {
60         char buf[1024];
61         int i, first=0;
62
63         *buf = '\0';
64         for(i=0; DBDesc[i].rdbms != NULLSQL; i++) {
65                 if ( DBDesc[i].init == NULL )
66                         continue;
67                 if ( first != 0 )
68                         strcat(buf, ", ");
69                 strcat(buf, DBDesc[i].shortname);
70                 if ( first == 0 ) 
71                         strcat(buf, "(default)");
72                 first++;
73         }
74
75         fputs(
76                 "ftsbench - full text search benchmark ofr RDBMS\n"
77                 "Initialization of DB:\n"
78                 "\tftsbench -i [-b RDBMS] [-n NUMROW] [-l LEXFILE] [-g GAMMAFILE] [-f FLAGS] [-q] -d DBNAME\n"
79                 "FLAGS are comma-separate list of:\n"
80                 "       gin  - use GIN index\n"
81                 "       gist - use GiST index\n"
82                 "       func - use functional index\n",
83                 stdout
84         );
85         fputs(
86                 "Run tests:\n"
87                 "\tftsbench [-b RDBMS] [-c NCLIENTS] [-n NUMQUERY] [-l LEXFILE] [-g GAMMAFILE] [-f FLAGS] [-q] -d DBNAME\n"
88                 "FLAGS are comma-separate list of:\n"
89                 "       and  - AND'ing lexemes in query (default)\n"
90                 "       or   - OR'ing lexemes in query\n"
91                 "       sort - sort result of query\n"
92                 "Options are:\n"
93                 "       -b RDBMS\t- type of DB: ",
94                 stdout
95         );
96         fputs( buf, stdout );
97         fputs(
98                 "\n"
99                 "       -l LEXFILE\t- file with words and its frequents\n"
100                 "       -g GAMMAFILE\t- file with doc's length distribution\n",
101                 stdout
102         );
103         exit(1);
104 }
105
106 static RDBMS
107 getRDBMS(char *name) {
108         int     i;
109
110         for(i=0; DBDesc[i].rdbms != NULLSQL; i++) {
111                 if ( name == NULL ) {
112                         if ( DBDesc[i].init )
113                                 return DBDesc[i].rdbms; 
114                 } else if ( strcasecmp(name,DBDesc[i].shortname) == 0 ) {
115                         if ( DBDesc[i].init == NULL ) {
116                                 fprintf(stderr,"Support of '%s' isn't compiled-in\n", DBDesc[i].longname);
117                                 exit(1);
118                         }
119                         return DBDesc[i].rdbms;
120                 }
121         }
122
123         fprintf(stderr,"Can't find a RDBMS\n");
124         exit(1);
125         
126         return NULLSQL;
127 }
128
129 static int
130 getFLAGS(char *flg) {
131         int flags = 0;
132
133         if ( strcasestr(flg,"gist") )
134                 flags |= FLG_GIST;
135         if ( strcasestr(flg,"gin") )
136                 flags |= FLG_GIN;
137         if ( strcasestr(flg,"func") )
138                 flags |= FLG_FUNC;
139         if ( strcasestr(flg,"and") )
140                 flags |= FLG_AND;
141         if ( strcasestr(flg,"or") )
142                 flags |= FLG_OR;
143         if ( strcasestr(flg,"sort") )
144                 flags |= FLG_SORT;
145
146         if ( (flags & FLG_GIST) && (flags & FLG_GIN) ) {
147                 fprintf(stderr,"GIN and GiST flags are mutually exclusive\n");
148                 exit(1);
149         }
150         if ( (flags & FLG_AND) && (flags & FLG_OR) ) {
151                 fprintf(stderr,"AND and OR flags are mutually exclusive\n");
152                 exit(1);
153         }
154
155         return flags;
156 }
157
158 static ftsDB **
159 initConnections(RDBMS rdbms, int n, char *connstr) {
160         ftsDB   **dbs = (ftsDB**)malloc(sizeof(ftsDB*) * n);
161         int i;
162
163         if (!dbs) {
164                 fprintf(stderr,"Not enough mwmory\n");
165                 exit(1);
166         }
167
168         for(i=0;i<n;i++) { 
169                 dbs[i] = DBDesc[rdbms].init(connstr);
170                 pthread_mutex_init(&dbs[i]->nqueryMutex, NULL);
171         }
172
173         return dbs;
174 }
175
176 static double
177 timediff(struct timeval *begin, struct timeval *end) {
178     return ((double)( end->tv_sec - begin->tv_sec )) + ( (double)( end->tv_usec-begin->tv_usec ) ) / 1.0e+6;
179 }
180
181 static double
182 elapsedtime(struct timeval *begin) {
183     struct timeval end;
184         gettimeofday(&end,NULL);
185         return timediff(begin,&end);
186 }
187
188 static int benchFlags  = 0;
189 static int benchCount  = 0;
190 static pthread_cond_t condFinish = PTHREAD_COND_INITIALIZER;
191 static pthread_mutex_t mutexFinish = PTHREAD_MUTEX_INITIALIZER;
192 static pthread_mutex_t mutexWordGen = PTHREAD_MUTEX_INITIALIZER;
193
194 static void*
195 execBench(void *in) {
196         ftsDB *db = (ftsDB*)in;
197         int i;
198         char **words;
199
200         for(i=0;i<benchCount;i++) {
201                 /*
202                  * generate_querywords() isn't a thread safe
203                  */
204                 pthread_mutex_lock( &mutexWordGen );
205                 words = generate_querywords();
206                 pthread_mutex_unlock( &mutexWordGen );
207
208                 db->execQuery(db, words, benchFlags);
209                 free(words);
210         }
211
212         /*
213          * send message about exitting
214          */
215     pthread_mutex_lock( &mutexFinish );
216         pthread_cond_broadcast( &condFinish );
217         pthread_mutex_unlock( &mutexFinish );
218
219         return NULL;    
220 }
221
222 extern char *optarg;
223
224 int
225 main(int argn, char *argv[]) {
226         int             initMode = 0;
227         int             n = 0, nclients = 1;
228         char    *lex = NULL;
229         char    *doc = NULL;
230         char    *dbname = NULL;
231         RDBMS   rdbms = NULLSQL;
232         int             flags = 0;
233         int i;
234         int             quiet = 0;
235         StringBuf       b = {NULL,0,0};
236
237         while((i=getopt(argn,argv,"ib:n:l:g:d:c:hf:q")) != EOF) {
238                 switch(i) {
239                         case 'i': initMode = 1; break;
240                         case 'b': rdbms = getRDBMS(optarg); break;
241                         case 'n': n=atoi(optarg); break;
242                         case 'c': nclients=atoi(optarg); break;
243                         case 'l': lex = strdup(optarg); break;
244                         case 'g': doc = strdup(optarg); break;
245                         case 'd': dbname = strdup(optarg); break;
246                         case 'f': flags = getFLAGS(optarg); break;
247                         case 'q': quiet = 1; break;
248                         case 'h':
249                         default:
250                                 usage();
251                 }
252         }
253
254         if (rdbms == NULLSQL)
255                 rdbms = getRDBMS(NULL);
256
257         if ( dbname == NULL || n<0 || nclients<1 )
258                 usage();
259
260         printf("Running with '%s' RDBMS\n", DBDesc[ rdbms ].longname); 
261
262         if ( initMode ) {
263                 ftsDB   *db = *initConnections(rdbms, 1, dbname);
264                 time_t  prev;
265
266                 if (!lex)  lex = "gendata/lex";
267                 if (!doc)  doc = "gendata/gamma-lens";
268                 finnegan_init(lex, doc);
269
270                 db->startCreateScheme(db, flags);
271                 prev = time(NULL);
272                 for(i=0;i<n;i++) {
273                         generate_doc(&b);
274                         db->InsertRow(db, i+1, b.str);
275                         if ( !quiet && prev!=time(NULL) ) {
276                                 printf("\r%d(%.02f%%) rows inserted", i, (100.0*i)/n);
277                                 fflush(stdout);
278                                 prev = time(NULL);
279                         }
280                 }
281                 printf("%s%d(100.00%%) rows inserted. Finalyze insertion... ", 
282                         (quiet) ? "" : "\r", i);
283                 fflush(stdout);
284                 db->finishCreateScheme(db);
285                 printf("done\n");
286                 db->Close(db);
287         } else {
288                 ftsDB   **dbs = initConnections(rdbms, nclients, dbname);
289                 pthread_t       *tid = (pthread_t*)malloc( sizeof(pthread_t) * nclients);
290                 struct  timeval begin;
291                 double  elapsed;
292                 int     total=0;
293                 struct      timespec  sleepTo = { 0, 0 };
294
295                 /*
296                  * startup generator
297                  */
298                 if (!lex)  lex = "gendata/query-lex";
299                 if (!doc)  doc = "gendata/query-lens";
300                 finnegan_init(lex, doc);
301
302                 /*
303                  * Initial query
304                  */
305                 if ( !quiet ) {
306                         printf("\r0(0.00%%) queries proceed");
307                         fflush(stdout);
308                 }
309                 benchFlags = flags;
310                 benchCount = n;
311
312                 gettimeofday(&begin,NULL);
313
314         pthread_mutex_lock( &mutexFinish );
315                 for(i=0;i<nclients;i++) {
316                         if ( pthread_create(tid+i, NULL, execBench, (void*)dbs[i]) != 0 ) {
317                                 fprintf(stderr,"pthread_create failed: %s\n", strerror(errno));
318                                 exit(1);
319                         }
320                 }
321
322                 for(;;) {
323                         int res, ntogo = 0;
324
325                         total = 0;
326                         for(i=0;i<nclients;i++) {
327                                 pthread_mutex_lock(&dbs[i]->nqueryMutex);
328                                 total +=dbs[i]->nquery;
329                                 if ( dbs[i]->nquery < n )
330                                         ntogo++;
331                                 pthread_mutex_unlock(&dbs[i]->nqueryMutex);
332                         }
333
334                         if ( ntogo == 0 ) 
335                                 break;
336
337                         if ( !quiet ) {
338                                 printf("\r%d(%.02f%%) queries proceed", total, (100.0*(float)total)/(nclients * n));
339                                 fflush(stdout);
340                         }
341                         
342                         sleepTo.tv_sec = time(NULL) + 1;
343                         res = pthread_cond_timedwait( &condFinish, &mutexFinish, &sleepTo );
344
345                         if ( !(res == ETIMEDOUT || res == 0) ) {
346                                 fprintf(stderr,"pthread_cond_timedwait failed: %s\n", strerror(errno));
347                                 exit(1);
348                         }
349                 }
350                 elapsed = elapsedtime(&begin);
351                 pthread_mutex_unlock( &mutexFinish );
352
353                 for(i=0;i<nclients;i++) {
354                         pthread_join(tid[i], NULL);
355                         dbs[i]->Close(dbs[i]);
356                 }
357
358                 printf("%s%d(%.02f%%) queries proceed\n", 
359                         (quiet) ? "" : "\r", total, (100.0*(float)total)/(nclients * n));
360                 printf("Total time: %.02f sec, Queries per second: %.02f\n", elapsed, total/elapsed);
361                 fflush(stdout);
362         }
363
364         return 0;
365 }