2 * Copyright (c) 2004 Teodor Sigaev <teodor@sigaev.ru>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the author nor the names of any co-contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY CONTRIBUTORS ``AS IS'' AND ANY EXPRESS
18 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL CONTRIBUTORS BE LIABLE FOR ANY
21 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
23 * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
25 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
26 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
27 * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
46 STRINGCMP(const void *a, const void *b) {
47 return strcmp( *(char**)a, *(char**)b );
53 puts("./sfxtest -d datafile [-b| [-a addondatafile] [-l | -p | -r | -g] [-n]] [-q]");
54 puts("Detailed description:");
56 puts(" Binary search");
57 puts(" ./sfxtest -b -d datafile [-q]");
58 puts(" -q - quiet (no output)");
60 puts(" Optimal suffix search");
61 puts(" ./sfxtest -d datafile [-a addondatafile] [-l | -p | -r | -g] [-n] [-q]");
62 puts(" -a addondatafile - addon file data for loading");
63 puts(" -g - histogramm mode");
64 puts(" -l - listing mode");
65 puts(" -p - prefix search mode");
66 puts(" -r - range search mode");
67 puts(" -n - enumerate entries");
68 puts(" -q - quiet (no output)");
74 gistogramm(SFSNode *node, int *gist) {
79 if ( node->haschild ) {
81 gistogramm( *(SFSNode**)(node->data), gist );
87 gist[ node->nchild ]++;
90 while(data - node->data < node->nchar) {
92 gistogramm( *(SFSNode**)( ((char*)data) + data->child ), gist);
99 main(int argn, char *argv[]) {
100 struct timeval begin,end;
106 int structsize=0, list=0, prefix=0, range=0, enumerate=0,gmode=0;
107 char *addondatafile=NULL;
109 int len=4096, curlen=0, checked=0, success=0;
113 while ((i = getopt(argn, argv, "pd:bqha:lrng")) != EOF) {
116 addondatafile=optarg;
148 if (!datafile) usage();
150 opentlog(TL_OPEN_STDERR|TL_OPEN_SYSLOG|TL_OPEN_FILE, TL_INFO, "./sfxtest.log");
151 if ( (in=fopen(datafile,"r"))==NULL )
152 tlog(TL_CRIT|TL_EXIT,"Beda with %s", datafile);
154 data=(char**)tmalloc(len*sizeof(char*));
155 while(fgets(buf,4096,in)) {
156 structsize+=clrspace(buf)+1;
157 if ( !*buf ) continue;
158 if (curlen+2 > len) {
160 data=(char**)trealloc(data, len*sizeof(char*));
162 data[curlen]=tstrdup(buf);
167 structsize+=sizeof(char*)*curlen;
170 gettimeofday( &begin, NULL );
171 qsort(data, curlen, sizeof(char**), STRINGCMP);
172 tlog(TL_INFO,"Init time: %.03f secs", elapsedtime(&begin) );
173 tlog(TL_INFO,"Memory allocated: %.2fMb", ((float)structsize)/(1024.0*1024.0));
175 gettimeofday( &begin, NULL );
176 while(fgets(buf,4096,stdin)) {
180 res = bsearch(&ptr, data, curlen, sizeof(char**), STRINGCMP);
183 puts( (res) ? "Y" : "N" );
188 gettimeofday( &end, NULL );
191 SFSDataIO n = {NULL,0,NULL};
192 n.data = (void*)&enumerate;
194 gettimeofday( &begin, NULL );
198 SFSInit_dp(&info,sizeof(enumerate),NULL);
207 SFSInit_c(&info,data);
208 tlog(TL_INFO,"Init time: %.03f secs", elapsedtime(&begin) );
209 tlog(TL_INFO,"Memory allocated: %.2fMb", ((float)info.totalen)/(1024.0*1024.0));
210 tlog(TL_INFO,"Number of nodes: %d", info.nnodes);
212 for(i=0;i<curlen;i++)
216 if ( addondatafile ) {
219 if ( (in=fopen(addondatafile,"r"))==NULL )
220 tlog(TL_CRIT|TL_EXIT,"Beda with %s", addondatafile);
223 gettimeofday( &begin, NULL );
224 while(fgets(buf,4096,in)) {
225 if ( !clrspace(buf) ) continue;
228 if (enumerate) enumerate++;
231 gettimeofday( &end, NULL );
233 sumtime = timediff(&begin,&end);
234 tlog(TL_INFO,"Add time: %.03f secs, %.03f per second", sumtime, ((double)cntadd)/sumtime);
235 tlog(TL_INFO,"Added item: %d", cntadd);
236 tlog(TL_INFO,"Memory allocated: %.2fMb", ((float)info.totalen)/(1024.0*1024.0));
237 tlog(TL_INFO,"Number of nodes: %d", info.nnodes);
244 gettimeofday( &begin, NULL );
245 SFSIteratorStart(&info);
246 while( SFSIterate(&info, &out) ) {
249 fputs(out.key, stdout);
250 printf(" %d\n", *(int*)(out.data));
254 checked++; success++;
256 gettimeofday( &end, NULL );
257 } else if ( gmode ) {
260 memset(Gist,0,sizeof(int)*256);
261 gettimeofday( &begin, NULL );
262 gistogramm(info.node, Gist);
263 gettimeofday( &end, NULL );
265 printf("%d\t%d\n",i,Gist[i]);
266 } else if ( prefix ) {
269 gettimeofday( &begin, NULL );
270 while(fgets(buf,4096,stdin)) {
274 SFSPrefixIteratorStart(&info,buf);
275 while( SFSIterate(&info, &out)) {
279 fputs(out.key, stdout);
280 printf(" %d\n", *(int*)(out.data));
290 gettimeofday( &end, NULL );
291 } else if ( range ) {
294 gettimeofday( &begin, NULL );
295 while(fgets(buf,4096,stdin)) {
299 if ( SFSRange(&info,buf,&f,&l) ) {
303 fputs(f.key, stdout);
304 printf(" %d\n", *(int*)(f.data));
306 fputs(l.key, stdout);
307 printf(" %d\n", *(int*)(l.data));
309 putchar('>'); puts(f.key);
310 putchar('>'); puts(l.key);
317 gettimeofday( &end, NULL );
319 gettimeofday( &begin, NULL );
320 while(fgets(buf,4096,stdin)) {
324 res = SFSFindData(&info,buf);
326 if (enumerate && res)
327 printf("%d\n", *(int*)(res));
329 puts( (res) ? "Y" : "N" );
335 gettimeofday( &end, NULL );
340 sumtime = timediff(&begin,&end);
341 tlog(TL_INFO,"Total execution time: %.03f secs", sumtime);
342 tlog(TL_INFO,"Total words in data: %d; Checked: %d; Success: %d", curlen, checked, success);
343 tlog(TL_INFO,"%.2f words per second", ((double)checked)/sumtime);