/* * Copyright (c) 2004 Teodor Sigaev * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY CONTRIBUTORS ``AS IS'' AND ANY EXPRESS * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL CONTRIBUTORS BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include "tools.h" #include "tlog.h" #include "tmalloc.h" #include "sfxstr.h" extern char *optarg; static int verbose=1; static int STRINGCMP(const void *a, const void *b) { return strcmp( *(char**)a, *(char**)b ); } static void usage() { puts("Usage:"); puts("./sfxtest -d datafile [-b| [-a addondatafile] [-P [-R] [-D] [-l | -p | -r | -g] [-n]] [-q]"); puts("Detailed description:"); puts(" Binary search"); puts(" ./sfxtest -b -d datafile [-q]"); puts(" -q - quiet (no output)"); puts(" Optimal suffix search"); puts(" ./sfxtest -d datafile [-a addondatafile] [-P [-R]] [-D] [-l | -p | -r | -g] [-n] [-q]"); puts(" -a addondatafile - addon file data for loading"); puts(" -g - histogramm mode"); puts(" -l - listing mode"); puts(" -p - prefix search mode"); puts(" -r - range search mode"); puts(" -n - enumerate entries"); puts(" -P - use plain memory"); puts(" -R - use plain memory"); puts(" -D - dump/load tree"); puts(" -q - quiet (no output)"); exit(1); } static void gistogramm(SFSNode *node, int *gist) { if ( !node ) return; if ( node->isskip ) { if ( node->haschild ) { gist[1]++; gistogramm( *(SFSNode**)(node->data), gist ); } else gist[0]++; } else { SFSNodeData *data; gist[ node->nchild ]++; data=node->data; while(data - node->data < node->nchar) { if ( data->haschild ) gistogramm( *(SFSNode**)( ((char*)data) + data->child ), gist); data++; } } } int main(int argn, char *argv[]) { struct timeval begin,end; char *datafile=NULL; int i, binary=0; FILE *in; char buf[4096]; double sumtime=0.0; int structsize=0, list=0, prefix=0, range=0, enumerate=0,gmode=0, plain=0, revert=0, dump=0; char *addondatafile=NULL; int len=4096, curlen=0, checked=0, success=0; char **data; void *res; while ((i = getopt(argn, argv, "pd:bqha:lrngPRD")) != EOF) { switch(i) { case 'a': addondatafile=optarg; break; case 'd': datafile=optarg; break; case 'n': enumerate=1; break; case 'g': gmode=1; break; case 'r': range=1; break; case 'p': prefix=1; break; case 'l': list=1; break; case 'b': binary=1; break; case 'q': verbose=0; break; case 'P': plain=1; break; case 'R': revert=1; break; case 'D': dump=1; break; case 'h': default: usage(); } } if (!datafile) usage(); opentlog(TL_OPEN_STDERR|TL_OPEN_SYSLOG|TL_OPEN_FILE, TL_INFO, "./sfxtest.log"); if ( (in=fopen(datafile,"r"))==NULL ) tlog(TL_CRIT|TL_EXIT,"Beda with %s", datafile); data=(char**)tmalloc(len*sizeof(char*)); while(fgets(buf,4096,in)) { structsize+=clrspace(buf)+1; if ( !*buf ) continue; if (curlen+2 > len) { len*=2; data=(char**)trealloc(data, len*sizeof(char*)); } data[curlen]=tstrdup(buf); curlen++; } fclose(in); data[curlen]=NULL; structsize+=sizeof(char*)*curlen; if ( binary == 1 ) { gettimeofday( &begin, NULL ); qsort(data, curlen, sizeof(char**), STRINGCMP); tlog(TL_INFO,"Init time: %.03f secs", elapsedtime(&begin) ); tlog(TL_INFO,"Memory allocated: %.2fMb", ((float)structsize)/(1024.0*1024.0)); gettimeofday( &begin, NULL ); while(fgets(buf,4096,stdin)) { char *ptr=buf; len = clrspace(buf); if (!len) continue; res = bsearch(&ptr, data, curlen, sizeof(char**), STRINGCMP); if (verbose) puts( (res) ? "Y" : "N" ); checked++; if (res) success++; } gettimeofday( &end, NULL ); } else { SFSTree info; SFSDataIO n = {NULL,0,NULL}; n.data = (void*)&enumerate; gettimeofday( &begin, NULL ); if (enumerate) { char **ptr=data; SFSInit_dp(&info,sizeof(enumerate),NULL); while(*ptr) { n.key = *ptr; n.keylen=0; SFSAdd(&info, &n); enumerate++; ptr++; } } else SFSInit_c(&info,data); tlog(TL_INFO,"Init time: %.03f secs", elapsedtime(&begin) ); tlog(TL_INFO,"Memory allocated: %.2fMb", ((float)info.totalen)/(1024.0*1024.0)); tlog(TL_INFO,"Number of nodes: %d", info.nnodes); for(i=0;i'); fputs(out.key, stdout); printf(" %d\n", *(int*)(out.data)); } else { putchar('>'); puts(out.key); } } success++; } checked++; } gettimeofday( &end, NULL ); } else if ( range ) { SFSDataIO f,l; gettimeofday( &begin, NULL ); while(fgets(buf,4096,stdin)) { len = clrspace(buf); if (!len) continue; if ( SFSRange(&info,buf,&f,&l) ) { if (verbose) { if (enumerate) { putchar('>'); fputs(f.key, stdout); printf(" %d\n", *(int*)(f.data)); putchar('>'); fputs(l.key, stdout); printf(" %d\n", *(int*)(l.data)); } else { putchar('>'); puts(f.key); putchar('>'); puts(l.key); } } success++; } checked++; } gettimeofday( &end, NULL ); } else { gettimeofday( &begin, NULL ); while(fgets(buf,4096,stdin)) { len = clrspace(buf); if (!len) continue; res = SFSFindData(&info,buf,0); if (verbose) { if (enumerate && res) printf("%d\n", *(int*)(res)); else puts( (res) ? "Y" : "N" ); } checked++; if (res) success++; } gettimeofday( &end, NULL ); } SFSFree(&info,NULL); } sumtime = timediff(&begin,&end); tlog(TL_INFO,"Total execution time: %.03f secs", sumtime); tlog(TL_INFO,"Total words in data: %d; Checked: %d; Success: %d", curlen, checked, success); tlog(TL_INFO,"%.2f words per second", ((double)checked)/sumtime); closetlog(); return 0; }