Add output query words in sql mode, add most frequent wrods to filter of query-lex
authorteodor <teodor>
Fri, 27 Oct 2006 12:58:50 +0000 (12:58 +0000)
committerteodor <teodor>
Fri, 27 Oct 2006 12:58:50 +0000 (12:58 +0000)
ftsbench.c
gendata/english.stop
utils.c

index 69cb79d..44788fd 100644 (file)
@@ -223,6 +223,27 @@ static pthread_cond_t condFinish = PTHREAD_COND_INITIALIZER;
 static pthread_mutex_t mutexFinish = PTHREAD_MUTEX_INITIALIZER;
 static pthread_mutex_t mutexWordGen = PTHREAD_MUTEX_INITIALIZER;
 
+static void
+printQueryWords(StringBuf *b, char **words) {
+       char **wptr = words, *ptr;
+
+       b->strlen = 0;
+       while(*wptr) {
+               if ( wptr != words ) 
+                       sb_add(b, " ", 1);
+
+               ptr = *wptr;
+               while( *ptr ) {
+                       if ( *ptr == '\'' )
+                               sb_add( b, "'", 1 );
+                       sb_add( b, ptr, 1 );
+                       ptr++;
+               }
+
+               wptr++;
+       }
+}
+
 /*
  * main test function, executed in thread
  */
@@ -233,6 +254,7 @@ execBench(void *in) {
        char **words;
        struct  timeval begin;
        double  elapsed;
+       StringBuf       b = {NULL,0,0};
 
        for(i=0;i<benchCount;i++) {
                /*
@@ -249,11 +271,14 @@ execBench(void *in) {
 
                if ( rowMode ) {
                        elapsed = elapsedtime(&begin);
-                       printf("INSERT INTO fb_row (id, f_and, f_or, nclients, nres, elapsed) VALUES (%d, '%c', '%c', %d, %d, %g);\n",
+                       printQueryWords(&b, words);
+
+                       printf("INSERT INTO fb_row (id, f_and, f_or, nclients, query, nres, elapsed) VALUES (%d, '%c', '%c', %d, '%s', %d, %g);\n",
                                        Id,
                                        ( benchFlags & FLG_AND ) ? 't' : 'f',
                                        ( benchFlags & FLG_OR ) ? 't' : 'f',
                                        nClients,
+                                       b.str,
                                        db->nres - nres,
                                        elapsed
                        );
index a913011..9b8ecb3 100644 (file)
@@ -125,4 +125,36 @@ just
 don
 should
 now
+#mark most frequent words as a stop words for queries
+act
+acting
+sections
+section
+sect
+shall
+person
+persons
+AMENDMENT
+AMENDMENTS
+State
+States
+relation
+RELATIONS
+SCHEDULE
+amended
+relating
+amount
+respect
+Goods
+paragraph
+purpose
+PURPOSES
+years
+Services
+Service
+amend
+stating
+reference
+referred
+would
 
diff --git a/utils.c b/utils.c
index 5f7c6a9..89a2b30 100644 (file)
--- a/utils.c
+++ b/utils.c
@@ -107,6 +107,7 @@ printScheme() {
                "  f_and    boolean    NOT NULL,\n"
                "  f_or     boolean    NOT NULL,\n"
                "  nclients integer    NOT NULL,\n"
+               "  query    text       NOT NULL,\n"
                "  nres     integer    NOT NULL,\n"
                "  elapsed  double precision NOT NULL\n"
                ");\n",