From 34dc32fe558e25dfe4c7c80b94b126e597ebac2f Mon Sep 17 00:00:00 2001 From: teodor Date: Fri, 27 Oct 2006 10:56:19 +0000 Subject: [PATCH] Add SQL-mode of result output --- README | 15 +++++ finnegan.c | 10 ++-- ftsbench.c | 158 +++++++++++++++++++++++++++++++++++++------------- ftsbench.h | 8 ++- mysqldriver.c | 22 +++---- pgdriver.c | 16 ++--- utils.c | 3 +- 7 files changed, 161 insertions(+), 71 deletions(-) diff --git a/README b/README index 8960c0b..ed1772e 100644 --- a/README +++ b/README @@ -19,6 +19,21 @@ MySQL prerequisite: tested on 5.0.24a ./configure --enable-thread-safe-client --with-pthread +SQL mode (output may be redirected into file ot to database): +% ftsbench -S | psql postgres +% ftsbench -i -n 10 -f gin -s 17 -d contrib_regression +INSERT INTO fb_create (id, rdbms, f_gin, f_gist, f_func, rows, elapsed) VALUES (17, 'pgsql', 't', 'f', 'f', 10, 0.528075); +% ftsbench -c 2 -n 2 -f gin -s 17 -d contrib_regression +INSERT INTO fb_search (id, f_and, f_or, nclients, nqueries, nres, elapsed) VALUES (17, 't', 'f', 2, 2, 0, 0.063508); +% ftsbench -c 2 -n 2 -f gin -s 17 -r -d contrib_regression +INSERT INTO fb_row (id, f_and, f_or, nclients, nres, elapsed) VALUES (17, 't', 'f', 2, 0, 0.039024); +INSERT INTO fb_row (id, f_and, f_or, nclients, nres, elapsed) VALUES (17, 't', 'f', 2, 0, 0.009069); +INSERT INTO fb_row (id, f_and, f_or, nclients, nres, elapsed) VALUES (17, 't', 'f', 2, 0, 0.061368); +INSERT INTO fb_row (id, f_and, f_or, nclients, nres, elapsed) VALUES (17, 't', 'f', 2, 0, 0.003654); + + + +Notice: Although ftsbench is covered by BSD license redistribution of rand.c and finnegan.c is prohibited without the permission of J. Zobel (jz at cs.rmit.edu.au). diff --git a/finnegan.c b/finnegan.c index 24a4abd..efb0a5a 100644 --- a/finnegan.c +++ b/finnegan.c @@ -229,15 +229,17 @@ generate_querywords() { void -finnegan_init(char *lex_file, char *doc_file) { +finnegan_init(char *lex_file, char *doc_file, int quiet) { if ( isInited ) { fprintf(stderr,"finnegan is already inited\n"); exit(1); } - printf("Initialize text generator with:\n"); - printf("\tfile '%s' - lexeme's distribution\n", lex_file); - printf("\tfile '%s' - length's distribution\n", doc_file); + if (!quiet) { + printf("Initialize text generator with:\n"); + printf("\tfile '%s' - lexeme's distribution\n", lex_file); + printf("\tfile '%s' - length's distribution\n", doc_file); + } srnd(INITIAL_SEED); no_of_words = no_newline(lex_file); no_of_docs = no_newline(doc_file); diff --git a/ftsbench.c b/ftsbench.c index 4eb5c4b..0232353 100644 --- a/ftsbench.c +++ b/ftsbench.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "ftsbench.h" @@ -76,7 +77,7 @@ usage() { "Copyright (c) 2006 Teodor Sigaev . All rights reserved.\n" "ftsbench - full text search benchmark for RDBMS\n" "Initialization of DB:\n" - "ftsbench -i [-b RDBMS] [-n NUMROW] [-l LEXFILE] [-g GAMMAFILE] [-f FLAGS] [-q] -d DBNAME\n" + "ftsbench -i [-b RDBMS] [-n NUMROW] [-l LEXFILE] [-g GAMMAFILE] [-f FLAGS] [-q | -s ID] -d DBNAME\n" " -b RDBMS\t- type of DB: ", stdout ); @@ -85,14 +86,17 @@ usage() { "\n" " -n NUMROW - number of row in table\n" " -l LEXFILE - file with words and its frequents (default gendata/lex)\n" - " -g GAMMAFILE - file with doc's length distribution (default gendata/gamma-lens)\n" + " -g GAMMAFILE - file with doc's length distribution (default \n" + " gendata/gamma-lens)\n" " -l FLGAS - options for db's schema (see below)\n" + " -s ID - SQL mode: output is a SQL queries, ID is an identifier for insert\n" + " statement\n" " -q - do not print progress message\n", stdout ); fputs( "Run tests:\n" - "ftsbench [-b RDBMS] [-c NCLIENTS] [-n NUMQUERY] [-l LEXFILE] [-g GAMMAFILE] [-f FLAGS] [-q] -d DBNAME\n" + "ftsbench [-b RDBMS] [-c NCLIENTS] [-n NUMQUERY] [-l LEXFILE] [-g GAMMAFILE] [-f FLAGS] [-q | -s ID [-r]] -d DBNAME\n" " -b RDBMS\t- type of DB: ", stdout ); @@ -102,8 +106,12 @@ usage() { " -c NCLIENTS - number of clients in parallel\n" " -n NUMQUERY - number of queries per client\n" " -l LEXFILE - file with words and its frequents (default gendata/query-lex)\n" - " -g GAMMAFILE - file with doc's length distribution (default gendata/query-lens)\n" + " -g GAMMAFILE - file with doc's length distribution (default \n" + " gendata/query-lens)\n" " -l FLGAS - options for db's schema (see below)\n" + " -s ID - SQL mode: output is a SQL queries, ID is an identifier for insert\n" + " statement\n" + " -r - row mode: timing every query\n" " -q - do not print progress message\n", stdout ); @@ -169,7 +177,8 @@ getFLAGS(char *flg) { if ( (flags & FLG_AND) && (flags & FLG_OR) ) { fprintf(stderr,"AND and OR flags are mutually exclusive\n"); exit(1); - } + } else if ( ( flags & ( FLG_AND | FLG_OR ) ) == 0 ) + flags |= FLG_AND; return flags; } @@ -204,17 +213,26 @@ elapsedtime(struct timeval *begin) { return timediff(begin,&end); } +static int Id = 0; +static int sqlMode = 0; +static int rowMode = 0; static int benchFlags = 0; static int benchCount = 0; +static int nClients = 0; static pthread_cond_t condFinish = PTHREAD_COND_INITIALIZER; static pthread_mutex_t mutexFinish = PTHREAD_MUTEX_INITIALIZER; static pthread_mutex_t mutexWordGen = PTHREAD_MUTEX_INITIALIZER; +/* + * main test function, executed in thread + */ static void* execBench(void *in) { ftsDB *db = (ftsDB*)in; - int i; + int i, nres=0; char **words; + struct timeval begin; + double elapsed; for(i=0;iexecQuery(db, words, benchFlags); + + if ( rowMode ) { + elapsed = elapsedtime(&begin); + printf("INSERT INTO fb_row (id, f_and, f_or, nclients, nres, elapsed) VALUES (%d, '%c', '%c', %d, %d, %g);\n", + Id, + ( benchFlags & FLG_AND ) ? 't' : 'f', + ( benchFlags & FLG_OR ) ? 't' : 'f', + nClients, + db->nres - nres, + elapsed + ); + nres = db->nres; + } free(words); } @@ -238,33 +272,51 @@ execBench(void *in) { return NULL; } +void +report(const char *format, ...) { + va_list args; + + if (benchFlags & FLG_SQL) + return; + + va_start(args, format); + vfprintf(stdout, format, args); + va_end(args); + + fflush(stdout); +} + extern char *optarg; int main(int argn, char *argv[]) { int initMode = 0; - int n = 0, nclients = 1; + int n = 0; char *lex = NULL; char *doc = NULL; char *dbname = NULL; RDBMS rdbms = NULLSQL; int flags = 0; - int i; + int i; int quiet = 0, scheme=0; StringBuf b = {NULL,0,0}; + struct timeval begin; + double elapsed; - while((i=getopt(argn,argv,"ib:n:l:g:d:c:hf:qS")) != EOF) { + while((i=getopt(argn,argv,"ib:n:l:g:d:c:hf:qSs:r")) != EOF) { switch(i) { case 'i': initMode = 1; break; case 'b': rdbms = getRDBMS(optarg); break; case 'n': n=atoi(optarg); break; - case 'c': nclients=atoi(optarg); break; + case 'c': nClients=atoi(optarg); break; case 'l': lex = strdup(optarg); break; case 'g': doc = strdup(optarg); break; case 'd': dbname = strdup(optarg); break; case 'f': flags = getFLAGS(optarg); break; case 'q': quiet = 1; break; case 'S': scheme = 1; break; + case 's': sqlMode = 1; Id = atoi(optarg); break; + case 'r': rowMode = 1; break; case 'h': default: usage(); @@ -279,10 +331,19 @@ main(int argn, char *argv[]) { if (rdbms == NULLSQL) rdbms = getRDBMS(NULL); - if ( dbname == NULL || n<0 || nclients<1 ) + if ( dbname == NULL || n<0 || (initMode == 0 && nClients<1) ) usage(); - printf("Running with '%s' RDBMS\n", DBDesc[ rdbms ].longname); + if ( sqlMode ) { + quiet = 1; + flags |= FLG_SQL; + } else + rowMode = 0; + + benchFlags = flags; + benchCount = n; + + report("Running with '%s' RDBMS\n", DBDesc[ rdbms ].longname); if ( initMode ) { ftsDB *db = *initConnections(rdbms, 1, dbname); @@ -290,7 +351,9 @@ main(int argn, char *argv[]) { if (!lex) lex = "gendata/lex"; if (!doc) doc = "gendata/gamma-lens"; - finnegan_init(lex, doc); + finnegan_init(lex, doc, sqlMode); + + gettimeofday(&begin,NULL); db->startCreateScheme(db, flags); prev = time(NULL); @@ -298,22 +361,33 @@ main(int argn, char *argv[]) { generate_doc(&b); db->InsertRow(db, i+1, b.str); if ( !quiet && prev!=time(NULL) ) { - printf("\r%d(%.02f%%) rows inserted", i, (100.0*i)/n); - fflush(stdout); + report("\r%d(%.02f%%) rows inserted", i, (100.0*i)/n); prev = time(NULL); } } - printf("%s%d(100.00%%) rows inserted. Finalyze insertion... ", + + report("%s%d(100.00%%) rows inserted. Finalyze insertion... ", (quiet) ? "" : "\r", i); - fflush(stdout); db->finishCreateScheme(db); - printf("done\n"); + elapsed = elapsedtime(&begin); + + report("done\nTime: %.02f secs\n", elapsed); + if (sqlMode) { + printf("INSERT INTO fb_create (id, rdbms, f_gin, f_gist, f_func, rows, elapsed) VALUES (%d, '%s', '%c', '%c', '%c', %d, %g);\n", + Id, + DBDesc[ rdbms ].shortname, + ( flags & FLG_GIN ) ? 't' : 'f', + ( flags & FLG_GIST ) ? 't' : 'f', + ( flags & FLG_FUNC ) ? 't' : 'f', + n, + elapsed + ); + } db->Close(db); } else { - ftsDB **dbs = initConnections(rdbms, nclients, dbname); - pthread_t *tid = (pthread_t*)malloc( sizeof(pthread_t) * nclients); + ftsDB **dbs = initConnections(rdbms, nClients, dbname); + pthread_t *tid = (pthread_t*)malloc( sizeof(pthread_t) * nClients); struct timeval begin; - double elapsed; int total=0, nres=0; struct timespec sleepTo = { 0, 0 }; @@ -322,22 +396,18 @@ main(int argn, char *argv[]) { */ if (!lex) lex = "gendata/query-lex"; if (!doc) doc = "gendata/query-lens"; - finnegan_init(lex, doc); + finnegan_init(lex, doc, sqlMode); /* * Initial query */ - if ( !quiet ) { - printf("\r0(0.00%%) queries proceed"); - fflush(stdout); - } - benchFlags = flags; - benchCount = n; + if ( !quiet ) + report("\r0(0.00%%) queries proceed"); gettimeofday(&begin,NULL); pthread_mutex_lock( &mutexFinish ); - for(i=0;inqueryMutex); total +=dbs[i]->nquery; if ( dbs[i]->nquery < n ) @@ -359,10 +429,8 @@ main(int argn, char *argv[]) { if ( ntogo == 0 ) break; - if ( !quiet ) { - printf("\r%d(%.02f%%) queries proceed", total, (100.0*(float)total)/(nclients * n)); - fflush(stdout); - } + if ( !quiet ) + report("\r%d(%.02f%%) queries proceed", total, (100.0*(float)total)/(nClients * n)); sleepTo.tv_sec = time(NULL) + 1; res = pthread_cond_timedwait( &condFinish, &mutexFinish, &sleepTo ); @@ -375,17 +443,27 @@ main(int argn, char *argv[]) { elapsed = elapsedtime(&begin); pthread_mutex_unlock( &mutexFinish ); - for(i=0;inres; dbs[i]->Close(dbs[i]); } - printf("%s%d(%.02f%%) queries proceed\n", - (quiet) ? "" : "\r", total, (100.0*(float)total)/(nclients * n)); - printf("Total number of result: %d\n", nres); - printf("Total time: %.02f sec, Queries per second: %.02f\n", elapsed, total/elapsed); - fflush(stdout); + report("%s%d(%.02f%%) queries proceed\n", + (quiet) ? "" : "\r", total, (100.0*(float)total)/(nClients * n)); + report("Total number of result: %d\n", nres); + report("Total time: %.02f sec, Queries per second: %.02f\n", elapsed, total/elapsed); + if (sqlMode && !rowMode) { + printf("INSERT INTO fb_search (id, f_and, f_or, nclients, nqueries, nres, elapsed) VALUES (%d, '%c', '%c', %d, %d, %d, %g);\n", + Id, + ( flags & FLG_AND ) ? 't' : 'f', + ( flags & FLG_OR ) ? 't' : 'f', + nClients, + n, + nres, + elapsed + ); + } } return 0; diff --git a/ftsbench.h b/ftsbench.h index c29c5ed..ee3ae76 100644 --- a/ftsbench.h +++ b/ftsbench.h @@ -49,8 +49,11 @@ void srnd(long seed); /* finngan.c */ void generate_doc(StringBuf *b); char** generate_querywords(); -void finnegan_init(char *lex_file, char *doc_file); +void finnegan_init(char *lex_file, char *doc_file, int quiet); +/* main part */ + +void report(const char *format, ...); typedef struct ftsDB { void (*execQuery)(struct ftsDB*, char **, int); void (*startCreateScheme)(struct ftsDB*, int); @@ -83,6 +86,7 @@ ftsDB* MYInit(char * connstr); #define FLG_FUNC (0x00000004) #define FLG_AND (0x00000008) #define FLG_OR (0x00000010) -#define FLG_SORT (0x00000020) + +#define FLG_SQL (0x00000020) #endif diff --git a/mysqldriver.c b/mysqldriver.c index 68f5b40..0fddf61 100644 --- a/mysqldriver.c +++ b/mysqldriver.c @@ -149,10 +149,10 @@ startCreateScheme(ftsDB *adb, int flags) { db->flags = flags; if ( flags & FLG_FUNC ) - printf("Flag 'func' is ignored by MySQL\n"); + report("Flag 'func' is ignored by MySQL\n"); - if ( flags & (FLG_GIN | FLG_GIST) ) - printf("MySQL doesn't distinguish 'gin' and 'gist' flags\n"); + if ( (flags & (FLG_GIN | FLG_GIST)) && (db->flags & FLG_SQL) == 0 ) + report("MySQL doesn't distinguish 'gin' and 'gist' flags\n"); if ( mysql_query(db->conn, "DROP TABLE IF EXISTS ftsbench CASCADE;")!= 0 ) { fprintf(stderr,"mysql_query failed: %s\n", mysql_error(db->conn)); @@ -170,27 +170,23 @@ finishCreateScheme(ftsDB *adb) { ftsMY *db = (ftsMY*)adb; if ( db->flags & (FLG_GIN | FLG_GIST) ) { - printf("(create index, "); - fflush(stdout); + report("(create index, "); if ( mysql_query(db->conn, "CREATE FULLTEXT INDEX fts ON ftsbench (body);")!= 0 ) { fprintf(stderr,"mysql_query failed: %s\n", mysql_error(db->conn)); exit(1); } - } else { - printf("("); - fflush(stdout); - } + } else + report("("); - printf("optimize"); - fflush(stdout); + report("optimize"); + if ( mysql_query(db->conn, "OPTIMIZE TABLE ftsbench;")!= 0 ) { fprintf(stderr,"mysql_query failed: %s\n", mysql_error(db->conn)); exit(1); } - printf(") "); - fflush(stdout); + report(") "); } static void diff --git a/pgdriver.c b/pgdriver.c index ed15a57..adf329f 100644 --- a/pgdriver.c +++ b/pgdriver.c @@ -345,8 +345,7 @@ finishCreateScheme(ftsDB* adb) { sprintf(buf,"CREATE INDEX ftsindex ON ftsbench USING %s ( fts );", (db->flags & FLG_GIST) ? "GiST" : "GIN" ); - printf("(create index, "); - fflush(stdout); + report("(create index, "); res = PQexec(db->conn, buf); if (PQresultStatus(res) != PGRES_COMMAND_OK) { @@ -354,13 +353,10 @@ finishCreateScheme(ftsDB* adb) { exit(1); } PQclear(res); - } else { - printf("("); - fflush(stdout); - } + } else + report("("); - printf("vacuum"); - fflush(stdout); + report("vacuum"); res = PQexec(db->conn, "VACUUM ANALYZE ftsbench;"); if (PQresultStatus(res) != PGRES_COMMAND_OK) { @@ -368,8 +364,8 @@ finishCreateScheme(ftsDB* adb) { exit(1); } PQclear(res); - printf(") "); - fflush(stdout); + + report(") "); return; } diff --git a/utils.c b/utils.c index 213f2fb..5f7c6a9 100644 --- a/utils.c +++ b/utils.c @@ -87,8 +87,8 @@ printScheme() { fputs( "--summary stats\n" "CREATE TABLE fb_search (\n" - " id integer NOT NULL,\n" "--link to fb_create.id\n" + " id integer NOT NULL,\n" " f_and boolean NOT NULL,\n" " f_or boolean NOT NULL,\n" " nclients integer NOT NULL,\n" @@ -107,7 +107,6 @@ printScheme() { " f_and boolean NOT NULL,\n" " f_or boolean NOT NULL,\n" " nclients integer NOT NULL,\n" - " query text NOT NULL,\n" " nres integer NOT NULL,\n" " elapsed double precision NOT NULL\n" ");\n", -- 2.37.3