2 * Copyright (c) 2004 Teodor Sigaev <teodor@sigaev.ru>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the author nor the names of any co-contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY CONTRIBUTORS ``AS IS'' AND ANY EXPRESS
18 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL CONTRIBUTORS BE LIABLE FOR ANY
21 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
23 * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
25 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
26 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
27 * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
46 STRINGCMP(const void *a, const void *b) {
47 return strcmp( *(char**)a, *(char**)b );
53 puts("./sfxtest -d datafile [-b| [-a addondatafile] [-P [-R] [-D] [-l | -p | -r | -g] [-n]] [-q]");
54 puts("Detailed description:");
56 puts(" Binary search");
57 puts(" ./sfxtest -b -d datafile [-q]");
58 puts(" -q - quiet (no output)");
60 puts(" Optimal suffix search");
61 puts(" ./sfxtest -d datafile [-a addondatafile] [-P [-R]] [-D] [-l | -p | -r | -g] [-n] [-q]");
62 puts(" -a addondatafile - addon file data for loading");
63 puts(" -g - histogramm mode");
64 puts(" -l - listing mode");
65 puts(" -p - prefix search mode");
66 puts(" -r - range search mode");
67 puts(" -n - enumerate entries");
68 puts(" -P - use plain memory");
69 puts(" -R - use plain memory");
70 puts(" -D - dump/load tree");
71 puts(" -q - quiet (no output)");
77 gistogramm(SFSNode *node, int *gist) {
82 if ( node->haschild ) {
84 gistogramm( *(SFSNode**)(node->data), gist );
90 gist[ node->nchild ]++;
93 while(data - node->data < node->nchar) {
95 gistogramm( *(SFSNode**)( ((char*)data) + data->child ), gist);
102 main(int argn, char *argv[]) {
103 struct timeval begin,end;
109 int structsize=0, list=0, prefix=0, range=0, enumerate=0,gmode=0, plain=0, revert=0, dump=0;
110 char *addondatafile=NULL;
112 int len=4096, curlen=0, checked=0, success=0;
116 while ((i = getopt(argn, argv, "pd:bqha:lrngPRD")) != EOF) {
119 addondatafile=optarg;
160 if (!datafile) usage();
162 opentlog(TL_OPEN_STDERR|TL_OPEN_SYSLOG|TL_OPEN_FILE, TL_INFO, "./sfxtest.log");
163 if ( (in=fopen(datafile,"r"))==NULL )
164 tlog(TL_CRIT|TL_EXIT,"Beda with %s", datafile);
166 data=(char**)tmalloc(len*sizeof(char*));
167 while(fgets(buf,4096,in)) {
168 structsize+=clrspace(buf)+1;
169 if ( !*buf ) continue;
170 if (curlen+2 > len) {
172 data=(char**)trealloc(data, len*sizeof(char*));
174 data[curlen]=tstrdup(buf);
179 structsize+=sizeof(char*)*curlen;
182 gettimeofday( &begin, NULL );
183 qsort(data, curlen, sizeof(char**), STRINGCMP);
184 tlog(TL_INFO,"Init time: %.03f secs", elapsedtime(&begin) );
185 tlog(TL_INFO,"Memory allocated: %.2fMb", ((float)structsize)/(1024.0*1024.0));
187 gettimeofday( &begin, NULL );
188 while(fgets(buf,4096,stdin)) {
192 res = bsearch(&ptr, data, curlen, sizeof(char**), STRINGCMP);
195 puts( (res) ? "Y" : "N" );
200 gettimeofday( &end, NULL );
203 SFSDataIO n = {NULL,0,NULL};
204 n.data = (void*)&enumerate;
206 gettimeofday( &begin, NULL );
210 SFSInit_dp(&info,sizeof(enumerate),NULL);
219 SFSInit_c(&info,data);
220 tlog(TL_INFO,"Init time: %.03f secs", elapsedtime(&begin) );
221 tlog(TL_INFO,"Memory allocated: %.2fMb", ((float)info.totalen)/(1024.0*1024.0));
222 tlog(TL_INFO,"Number of nodes: %d", info.nnodes);
224 for(i=0;i<curlen;i++)
228 if ( addondatafile ) {
231 if ( (in=fopen(addondatafile,"r"))==NULL )
232 tlog(TL_CRIT|TL_EXIT,"Beda with %s", addondatafile);
235 gettimeofday( &begin, NULL );
236 while(fgets(buf,4096,in)) {
237 if ( !clrspace(buf) ) continue;
240 if (enumerate) enumerate++;
243 gettimeofday( &end, NULL );
245 sumtime = timediff(&begin,&end);
246 tlog(TL_INFO,"Add time: %.03f secs, %.03f per second", sumtime, ((double)cntadd)/sumtime);
247 tlog(TL_INFO,"Added item: %d", cntadd);
248 tlog(TL_INFO,"Memory allocated: %.2fMb", ((float)info.totalen)/(1024.0*1024.0));
249 tlog(TL_INFO,"Number of nodes: %d", info.nnodes);
253 if ( plain && !gmode ) {
254 SFSMakePlain(&info, NULL);
256 tfree( SFSRevertPlain(&info) );
264 SFSWriteDump(&info, "./sfxtest.dump", &n, sizeof(n) );
266 SFSReadDump(&info, "./sfxtest.dump", &pn, &nz);
268 tassert(*(int*)pn == n);
269 tassert(sizeof(n) == nz);
275 gettimeofday( &begin, NULL );
276 SFSIteratorStart(&info);
277 while( SFSIterate(&info, &out) ) {
280 fputs(out.key, stdout);
281 printf(" %d\n", *(int*)(out.data));
285 checked++; success++;
287 gettimeofday( &end, NULL );
288 } else if ( gmode ) {
291 memset(Gist,0,sizeof(int)*256);
292 gettimeofday( &begin, NULL );
293 gistogramm(info.node, Gist);
294 gettimeofday( &end, NULL );
296 printf("%d\t%d\n",i,Gist[i]);
297 } else if ( prefix ) {
300 gettimeofday( &begin, NULL );
301 while(fgets(buf,4096,stdin)) {
305 SFSPrefixIteratorStart(&info,buf);
306 while( SFSIterate(&info, &out)) {
310 fputs(out.key, stdout);
311 printf(" %d\n", *(int*)(out.data));
321 gettimeofday( &end, NULL );
322 } else if ( range ) {
325 gettimeofday( &begin, NULL );
326 while(fgets(buf,4096,stdin)) {
330 if ( SFSRange(&info,buf,&f,&l) ) {
334 fputs(f.key, stdout);
335 printf(" %d\n", *(int*)(f.data));
337 fputs(l.key, stdout);
338 printf(" %d\n", *(int*)(l.data));
340 putchar('>'); puts(f.key);
341 putchar('>'); puts(l.key);
348 gettimeofday( &end, NULL );
350 gettimeofday( &begin, NULL );
351 while(fgets(buf,4096,stdin)) {
355 res = SFSFindData(&info,buf,0);
357 if (enumerate && res)
358 printf("%d\n", *(int*)(res));
360 puts( (res) ? "Y" : "N" );
366 gettimeofday( &end, NULL );
371 sumtime = timediff(&begin,&end);
372 tlog(TL_INFO,"Total execution time: %.03f secs", sumtime);
373 tlog(TL_INFO,"Total words in data: %d; Checked: %d; Success: %d", curlen, checked, success);
374 tlog(TL_INFO,"%.2f words per second", ((double)checked)/sumtime);