2 * Copyright (c) 2004 Teodor Sigaev <teodor@sigaev.ru>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the author nor the names of any co-contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY CONTRIBUTORS ``AS IS'' AND ANY EXPRESS
18 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL CONTRIBUTORS BE LIABLE FOR ANY
21 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
23 * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
25 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
26 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
27 * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
46 STRINGCMP(const void *a, const void *b) {
47 return strcmp( *(char**)a, *(char**)b );
53 puts("./sfxtest -d datafile [-b| [-a addondatafile] [-l | -p | -r | -g] [-n]] [-q]");
54 puts("Detailed description:");
56 puts(" Binary search");
57 puts(" ./sfxtest -b -d datafile [-q]");
58 puts(" -q - quiet (no output)");
60 puts(" Optimal suffix search");
61 puts(" ./sfxtest -d datafile [-a addondatafile] [-l | -p | -r | -g] [-n] [-q]");
62 puts(" -a addondatafile - addon file data for loading");
63 puts(" -g - histogramm mode");
64 puts(" -l - listing mode");
65 puts(" -p - prefix search mode");
66 puts(" -r - range search mode");
67 puts(" -n - enumerate entries");
68 puts(" -q - quiet (no output)");
74 gistogramm(SFSNode *node, int *gist) {
79 if ( node->haschild ) {
81 gistogramm( *(SFSNode**)(node->data), gist );
87 gist[ node->nchild ]++;
90 while(data - node->data < node->nchar) {
92 gistogramm( *(SFSNode**)( ((char*)data) + data->child ), gist);
99 main(int argn, char *argv[]) {
100 struct timeval begin,end;
106 int structsize=0, list=0, prefix=0, range=0, enumerate=0,gmode=0;
107 char *addondatafile=NULL;
109 int len=4096, curlen=0, checked=0, success=0;
113 while ((i = getopt(argn, argv, "pd:bqha:lrng")) != EOF) {
116 addondatafile=optarg;
148 if (!datafile) usage();
150 opentlog(TL_OPEN_STDERR|TL_OPEN_SYSLOG|TL_OPEN_FILE, TL_INFO, "./sfxtest.log");
151 if ( (in=fopen(datafile,"r"))==NULL )
152 tlog(TL_CRIT|TL_EXIT,"Beda with %s", datafile);
154 data=(char**)tmalloc(len*sizeof(char*));
155 while(fgets(buf,4096,in)) {
156 structsize+=clrspace(buf)+1;
157 if ( !*buf ) continue;
158 if (curlen+2 > len) {
160 data=(char**)trealloc(data, len*sizeof(char*));
162 data[curlen]=tstrdup(buf);
167 structsize+=sizeof(char*)*curlen;
170 gettimeofday( &begin, NULL );
171 qsort(data, curlen, sizeof(char**), STRINGCMP);
172 tlog(TL_INFO,"Init time: %.03f secs", elapsedtime(&begin) );
173 tlog(TL_INFO,"Memory allocated: %.2fMb", ((float)structsize)/(1024.0*1024.0));
175 gettimeofday( &begin, NULL );
176 while(fgets(buf,4096,stdin)) {
180 res = bsearch(&ptr, data, curlen, sizeof(char**), STRINGCMP);
183 puts( (res) ? "Y" : "N" );
188 gettimeofday( &end, NULL );
191 SFSDataIO n = {NULL,0,NULL};
192 n.data = (void*)&enumerate;
194 gettimeofday( &begin, NULL );
198 SFSInit_dp(&info,sizeof(enumerate),NULL);
206 SFSInit_c(&info,data);
207 tlog(TL_INFO,"Init time: %.03f secs", elapsedtime(&begin) );
208 tlog(TL_INFO,"Memory allocated: %.2fMb", ((float)info.totalen)/(1024.0*1024.0));
209 tlog(TL_INFO,"Number of nodes: %d", info.nnodes);
211 for(i=0;i<curlen;i++)
215 if ( addondatafile ) {
218 if ( (in=fopen(addondatafile,"r"))==NULL )
219 tlog(TL_CRIT|TL_EXIT,"Beda with %s", addondatafile);
222 gettimeofday( &begin, NULL );
223 while(fgets(buf,4096,in)) {
224 if ( !clrspace(buf) ) continue;
227 if (enumerate) enumerate++;
230 gettimeofday( &end, NULL );
232 sumtime = timediff(&begin,&end);
233 tlog(TL_INFO,"Add time: %.03f secs, %.03f per second", sumtime, ((double)cntadd)/sumtime);
234 tlog(TL_INFO,"Added item: %d", cntadd);
235 tlog(TL_INFO,"Memory allocated: %.2fMb", ((float)info.totalen)/(1024.0*1024.0));
236 tlog(TL_INFO,"Number of nodes: %d", info.nnodes);
243 gettimeofday( &begin, NULL );
244 SFSIteratorStart(&info);
245 while( SFSIterate(&info, &out) ) {
248 fputs(out.key, stdout);
249 printf(" %d\n", *(int*)(out.data));
253 checked++; success++;
255 gettimeofday( &end, NULL );
256 } else if ( gmode ) {
259 memset(Gist,0,sizeof(int)*256);
260 gettimeofday( &begin, NULL );
261 gistogramm(info.node, Gist);
262 gettimeofday( &end, NULL );
264 printf("%d\t%d\n",i,Gist[i]);
265 } else if ( prefix ) {
268 gettimeofday( &begin, NULL );
269 while(fgets(buf,4096,stdin)) {
273 SFSPrefixIteratorStart(&info,buf);
274 while( SFSIterate(&info, &out)) {
278 fputs(out.key, stdout);
279 printf(" %d\n", *(int*)(out.data));
289 gettimeofday( &end, NULL );
290 } else if ( range ) {
293 gettimeofday( &begin, NULL );
294 while(fgets(buf,4096,stdin)) {
298 if ( SFSRange(&info,buf,&f,&l) ) {
302 fputs(f.key, stdout);
303 printf(" %d\n", *(int*)(f.data));
305 fputs(l.key, stdout);
306 printf(" %d\n", *(int*)(l.data));
308 putchar('>'); puts(f.key);
309 putchar('>'); puts(l.key);
316 gettimeofday( &end, NULL );
318 gettimeofday( &begin, NULL );
319 while(fgets(buf,4096,stdin)) {
323 res = SFSFindData(&info,buf);
325 if (enumerate && res)
326 printf("%d\n", *(int*)(res));
328 puts( (res) ? "Y" : "N" );
334 gettimeofday( &end, NULL );
339 sumtime = timediff(&begin,&end);
340 tlog(TL_INFO,"Total execution time: %.03f secs", sumtime);
341 tlog(TL_INFO,"Total words in data: %d; Checked: %d; Success: %d", curlen, checked, success);
342 tlog(TL_INFO,"%.2f words per second", ((double)checked)/sumtime);