From: teodor Date: Mon, 7 Jul 2008 13:53:06 +0000 (+0000) Subject: make suffix dumpable and some minors cleanup X-Git-Url: http://sigaev.ru/git/gitweb.cgi?a=commitdiff_plain;h=5b73fa667ec282c3421d587db58e572d7f5828c2;p=tedtools.git make suffix dumpable and some minors cleanup --- diff --git a/Makefile b/Makefile index 6307b40..da86c8b 100644 --- a/Makefile +++ b/Makefile @@ -13,7 +13,7 @@ include $(topbuilddir)/Makefile.global clean: clean-test clean-test: - rm -rf sfxtest.log BTREE + rm -rf sfxtest.log sfxtest.dump BTREE rm -rf results diffs temp test: all diff --git a/expected/sfxmem b/expected/sfxmem index 9b74d2c..7fbc15b 100644 --- a/expected/sfxmem +++ b/expected/sfxmem @@ -17930,3 +17930,88 @@ Exact search Range search >g 10044 >gzwkwsu8b 1211 +Prefix search with plain tree +>zz 10024 +>zz1sifk 6450 +>zzbms 9163 +>zzkfgm2pg 8526 +>zzn7vtfxw 9315 +>zzoc6we 4909 +>zzupylnv 7483 +>zzuq 7330 +>zzyikntah 5462 +Exact search with plain tree +384 +453 +622 +985 +1287 +1348 +1367 +1614 +1783 +1816 +1876 +2007 +2172 +2637 +2723 +2787 +2927 +3075 +3124 +3189 +3205 +3335 +3431 +3726 +4925 +4974 +5021 +5493 +5538 +5608 +5704 +5705 +6030 +7954 +6542 +7460 +7954 +8095 +8673 +8675 +8712 +8977 +9036 +9211 +9362 +9609 +9623 +9657 +9851 +9869 +9979 +Range search with plain tree +>g 10044 +>gzwkwsu8b 1211 +Prefix search reload dump +>zz 10024 +>zz1sifk 6450 +>zzbms 9163 +>zzkfgm2pg 8526 +>zzn7vtfxw 9315 +>zzoc6we 4909 +>zzupylnv 7483 +>zzuq 7330 +>zzyikntah 5462 +Prefix search with plain tree and reload dump +>zz 10024 +>zz1sifk 6450 +>zzbms 9163 +>zzkfgm2pg 8526 +>zzn7vtfxw 9315 +>zzoc6we 4909 +>zzupylnv 7483 +>zzuq 7330 +>zzyikntah 5462 diff --git a/sfxstr.c b/sfxstr.c index 895b053..36d1f31 100644 --- a/sfxstr.c +++ b/sfxstr.c @@ -27,16 +27,74 @@ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include #include +#include +#include +#include +#include #include "tlog.h" #include "tmalloc.h" #include "sfxstr.h" +#include "tools.h" #define EN_VAL 0x01 #define EN_CHLD 0x02 #define EN_DATA 0x04 +#define SFSTREE_VERSION 0x0100 + +typedef unsigned long Opaque; /* XXX sizeof(Opaque) == sizeof(void *) */ + +#define CHECK_MEMORY(tree) ( ( (tree)->plainmemory ) ? \ + tlog(TL_CRIT|TL_EXIT, "Tree in plain memory - read only access") : (void)0 ) + +static __inline__ int +getNodeSize(SFSTree *info, SFSNode *node) { + int size = SFSNHRDSZ; + + if ( node->isskip ) { + if ( node->isword ) + size += info->datasize; + if ( node->haschild ) + size += sizeof(SFSNode*); + size += node->nchar; + } else { + u_int32_t i; + u_int32_t nfound = 0; + SFSNodeData *data = node->data; + + size += sizeof(SFSNodeData) * node->nchar; + size += sizeof(SFSNode*) * node->nchild; + + for(i=0;inchar;i++) + nfound += data[i].isword; + + size += nfound*info->datasize; + } + + return PTRALIGN(size); +} + +static __inline__ SFSNode* +getChildPointer(SFSTree *info, SFSNodeData *nodedata) { + char *p = ((char*)nodedata) + nodedata->child; + + if ( info->plainmemory ) + return (SFSNode*) ( ((char*)(info->node)) + *(Opaque*)p ); + + return *(SFSNode**)p; +} + +static __inline__ SFSNode* +getSkipChildPointer(SFSTree *info, SFSNode *node) { + if ( info->plainmemory ) + return (SFSNode*) ( ((char*)(info->node)) + *(Opaque*)node->data ); + + return *(SFSNode**)( (char*)(node->data) ); +} + static SFSNode* enlargeNode(SFSTree *info, SFSNode* node, u_int8_t val, int flag, SFSNodeData **nd); static SFSNode* addRecord(SFSTree *info, SFSNode* node, SFSDataIO *in, int level); @@ -93,7 +151,7 @@ SFSFindData(SFSTree *info, char *word) { if ( *ptr=='\0' && node->isword) { return (void*) ( ((char*)(node->data)) + ((node->haschild) ? sizeof(SFSNode*) : 0) ); } else if ( node->haschild ) { - node = *(SFSNode**)(node->data); + node = getSkipChildPointer(info, node); } else { return NULL; } @@ -103,13 +161,13 @@ SFSFindData(SFSTree *info, char *word) { StopLow = node->data; StopHigh = StopLow + node->nchar; while (StopLow < StopHigh) { - StopMiddle = StopLow + ((StopHigh - StopLow) >> 1); - if ( StopMiddle->val == *ptr ) { - ptr++; - if ( *ptr=='\0' && StopMiddle->isword ) { + StopMiddle = StopLow + ((StopHigh - StopLow) >> 1); + if ( StopMiddle->val == *ptr ) { + ptr++; + if ( *ptr=='\0' && StopMiddle->isword ) { return (void*)( ((char*)node) + node->dataptr + info->datasize * StopMiddle->data ); } else if ( StopMiddle->haschild ) { - node=*(SFSNode**)( ((char*)StopMiddle) + StopMiddle->child ); + node = getChildPointer(info, StopMiddle); } else { return NULL; } @@ -158,7 +216,7 @@ void SFSFree(SFSTree *info, void (*freefunc)(void*)) { SFSNodeStack *s=info->stack; - if (info->node) freeFSFNode(info, info->node, freefunc); + if (info->node && !info->plainmemory) freeFSFNode(info, info->node, freefunc); if (info->buf) tfree(info->buf); info->buf = NULL; info->tlen=0; @@ -182,9 +240,9 @@ makeSkipNode(SFSTree *info, SFSDataIO *io, int level) { len = (io->keylen > 255) ? 255 : io->keylen; size = SFSNHRDSZ + ((io->keylen > 255) ? sizeof(SFSNode*) : info->datasize) + len; + size = PTRALIGN(size); res = (SFSNode*)tmalloc(size); - info->nnodes++; info->totalen+=size; @@ -250,18 +308,24 @@ splitSkipNode(SFSTree *info, SFSNode* node, SFSDataIO *io, int level) { } if ( node->haschild ) *(SFSNode**)( ((char*)ndata) + ndata->child ) = *(SFSNode**)( node->data ); - info->totalen -= SFSNHRDSZ + ((node->isword) ? info->datasize : 0) + - ((node->haschild) ? sizeof(SFSNodeData*) : 0) + node->nchar; + info->totalen -= getNodeSize(info, node); info->nnodes--; tfree(node); } else { + int size; + res = enlargeNode(info, NULL, *(u_int8_t*)(((char*)node) + node->dataptr), EN_VAL|EN_CHLD, &ndata); + + size = getNodeSize(info,node); + info->totalen-=size; + node->nchar--; memmove( ((char*)node) + node->dataptr, ((char*)node) + node->dataptr + 1, node->nchar); - info->totalen--; - *(SFSNode**)( ((char*)ndata) + ndata->child ) = (SFSNode*)trealloc(node, - SFSNHRDSZ + ((node->haschild) ? sizeof(SFSNode*) : 0) + - ((node->isword) ? info->datasize : 0 )+ node->nchar); + + size = getNodeSize(info,node); + info->totalen+=size; + + *(SFSNode**)( ((char*)ndata) + ndata->child ) = (SFSNode*)trealloc(node, size); } res = addRecord(info, res, io, 0); } else if (prefixlen==io->keylen) { @@ -274,10 +338,12 @@ splitSkipNode(SFSTree *info, SFSNode* node, SFSDataIO *io, int level) { node->isword=1; res=node; } else { - int size = SFSNHRDSZ + info->datasize + ((node->haschild) ? sizeof(SFSNodeData*) : 0) + node->nchar; + int osize = PTRALIGN(SFSNHRDSZ + ((node->haschild) ? sizeof(SFSNodeData*) : 0) + node->nchar); + int nsize = PTRALIGN(SFSNHRDSZ + info->datasize + ((node->haschild) ? sizeof(SFSNodeData*) : 0) + node->nchar); - info->totalen+=info->datasize; - res=(SFSNode*)trealloc(node,size); + info->totalen += nsize - osize; + + res=(SFSNode*)trealloc(node,nsize); res->dataptr=SFSNHRDSZ + info->datasize + ((res->haschild) ? sizeof(SFSNodeData*) : 0); res->isword=1; memmove(((char*)res) + res->dataptr, @@ -286,6 +352,8 @@ splitSkipNode(SFSTree *info, SFSNode* node, SFSDataIO *io, int level) { } } else { int size = SFSNHRDSZ + info->datasize + sizeof(SFSNodeData*) + prefixlen; + size = PTRALIGN(size); + info->totalen+=size; info->nnodes++; res = (SFSNode*)tmalloc(size); @@ -297,11 +365,12 @@ splitSkipNode(SFSTree *info, SFSNode* node, SFSDataIO *io, int level) { memcpy(((char*)res)+res->dataptr, io->key, prefixlen); if (info->datasize) memcpy(((char*)(res->data)) + sizeof(SFSNodeData*), io->data, info->datasize); + info->totalen-=getNodeSize(info,node); node->nchar-=prefixlen; memmove( ((char*)node) + node->dataptr, ((char*)node) + node->dataptr + prefixlen, node->nchar); - info->totalen-=prefixlen; - *(SFSNode**)(res->data) = (SFSNode*)trealloc(node, SFSNHRDSZ + ((node->isword) ? info->datasize : 0) + - ((node->haschild) ? sizeof(SFSNodeData*) : 0) + node->nchar); + size = getNodeSize(info,node); + info->totalen+=size; + *(SFSNode**)(res->data) = (SFSNode*)trealloc(node, size); } } else if ( prefixlen==node->nchar ) { int size = SFSNHRDSZ + info->datasize + sizeof(SFSNode*) + node->nchar; @@ -313,6 +382,7 @@ splitSkipNode(SFSTree *info, SFSNode* node, SFSDataIO *io, int level) { *(SFSNode**)(res->data) = makeSkipNode(info, io, prefixlen); } else { int size = SFSNHRDSZ + sizeof(SFSNodeData*) + prefixlen; + size = PTRALIGN(size); info->totalen+=size; info->nnodes++; res = (SFSNode*)tmalloc(size); @@ -323,16 +393,16 @@ splitSkipNode(SFSTree *info, SFSNode* node, SFSDataIO *io, int level) { res->dataptr = SFSNHRDSZ + sizeof(SFSNodeData*); memcpy(((char*)res)+res->dataptr, io->key, prefixlen); + info->totalen-= getNodeSize(info,node); node->nchar-=prefixlen; + memmove( ((char*)node) + node->dataptr, ((char*)node) + node->dataptr + prefixlen, node->nchar); - info->totalen-=prefixlen; - *(SFSNode**)(res->data) = (SFSNode*)trealloc(node, - SFSNHRDSZ + ((node->isword) ? info->datasize : 0) + - ((node->haschild) ? sizeof(SFSNodeData*) : 0) + node->nchar - ); - *(SFSNode**)(res->data) = splitSkipNode(info, *(SFSNode**)(res->data), io, prefixlen); + size = getNodeSize(info,node); + info->totalen+= size; + *(SFSNode**)(res->data) = (SFSNode*)trealloc(node, size); + *(SFSNode**)(res->data) = splitSkipNode(info, *(SFSNode**)(res->data), io, prefixlen); } io->key-=level; @@ -362,7 +432,8 @@ enlargeNode(SFSTree *info, SFSNode* node, u_int8_t val, int flag, SFSNodeData ** } if ( node ) - info->totalen -= SFSNHRDSZ+nchar*sizeof(SFSNodeData)+nchild*sizeof(SFSNode*)+nfound*info->datasize; + /*info->totalen -= PTRALIGN(SFSNHRDSZ+nchar*sizeof(SFSNodeData)+nchild*sizeof(SFSNode*)+nfound*info->datasize);*/ + info->totalen -= getNodeSize(info, node); if ( flag & EN_VAL ) nchar++; if ( flag & EN_CHLD ) nchild++; @@ -370,6 +441,7 @@ enlargeNode(SFSTree *info, SFSNode* node, u_int8_t val, int flag, SFSNodeData ** sizenode = SFSNHRDSZ+nchar*sizeof(SFSNodeData)+nchild*sizeof(SFSNode*)+nfound*info->datasize; + sizenode = PTRALIGN(sizenode); info->totalen+=sizenode; if ( !node ) info->nnodes++; @@ -503,7 +575,6 @@ addRecord(SFSTree *info, SFSNode* node, SFSDataIO *in, int level) { SFSNodeData *StopLow, *StopHigh, *StopMiddle; u_int8_t *ptr = ((u_int8_t*)in->key) + level; - if ( node ) { if ( node->isskip ) { if ( node->haschild && in->keylen-level > node->nchar && @@ -559,8 +630,7 @@ addRecord(SFSTree *info, SFSNode* node, SFSDataIO *in, int level) { } } } else - node = makeSkipNode(info, in, level); - + node = makeSkipNode(info, in, level); return node; @@ -568,6 +638,7 @@ addRecord(SFSTree *info, SFSNode* node, SFSDataIO *in, int level) { void SFSAdd(SFSTree *info, SFSDataIO *in) { + CHECK_MEMORY(info); if (in->keylen<=0) in->keylen=strlen(in->key); info->node = addRecord(info, info->node, in, 0); @@ -623,7 +694,7 @@ SFSPrefixIteratorStart(SFSTree *info, char *word) { return; } else if ( node->haschild ) { ptr+=node->nchar; - node = *(SFSNode**)(node->data); + node = getSkipChildPointer(info, node); } else { return; } @@ -643,10 +714,10 @@ SFSPrefixIteratorStart(SFSTree *info, char *word) { info->wdata = (void*)( ((char*)node) + node->dataptr + info->datasize * StopMiddle->data ); } if ( StopMiddle->haschild ) - info->stack = pushStack(NULL, *(SFSNode**)( ((char*)StopMiddle) + StopMiddle->child ), len); + info->stack = pushStack(NULL, getChildPointer( info, StopMiddle ), len); return; } else if ( StopMiddle->haschild ) { - node=*(SFSNode**)( ((char*)StopMiddle) + StopMiddle->child ); + node = getChildPointer( info, StopMiddle ); } else { return; } @@ -696,7 +767,7 @@ SFSIterate(SFSTree *info, SFSDataIO *out) { return 1; } if ( s->node->haschild && !s->checkedchild) { - info->stack = pushStack(s, *(SFSNode**)( (char*)(s->node->data) ), s->level+s->node->nchar); + info->stack = pushStack(s, getSkipChildPointer(info, s->node), s->level+s->node->nchar); s->checkedchild=1; if ( SFSIterate(info, out) ) return 1; @@ -713,7 +784,8 @@ SFSIterate(SFSTree *info, SFSDataIO *out) { return 1; } if ( s->checkedchild==0 && s->data->haschild ) { - info->stack = pushStack(s, *(SFSNode**)( ((char*)s->data) + s->data->child ), s->level+1); + info->stack = pushStack(s, + getChildPointer( info, s->data ), s->level+1); s->checkedchild=1; if ( SFSIterate(info, out) ) return 1; @@ -771,7 +843,7 @@ SFSRange(SFSTree *info, char *word, SFSDataIO *f, SFSDataIO *l) { ((char*)(s->node))+s->node->dataptr, s->node->nchar); s->level+=s->node->nchar; if (s->node->haschild) { - s->node=*(SFSNode**)( s->node->data ); + s->node=getSkipChildPointer(info, s->node); } else { /* if (s->node->isword) */ info->buf[ f->keylen + 1 + s->level ] = '\0'; l->data = (void*)(s->node->data); @@ -783,7 +855,7 @@ SFSRange(SFSTree *info, char *word, SFSDataIO *f, SFSDataIO *l) { while( s->data - s->node->data >= 0 ) { info->buf[ f->keylen + 1 + s->level ] = (char)s->data->val; if ( s->data->haschild ) { - s->node = *(SFSNode**)( ((char*)s->data) + s->data->child ); + s->node = getChildPointer( info, s->data ); s->level++; break; } @@ -804,3 +876,281 @@ SFSRange(SFSTree *info, char *word, SFSDataIO *f, SFSDataIO *l) { return 1; } + +typedef struct WorkPlain { + SFSTree *info; + char *node; + off_t offset; +} WorkPlain; + +static Opaque +plainNode(WorkPlain *wp, SFSNode *node) { + int size = getNodeSize(wp->info, node); + off_t myoffset = wp->offset; + + memcpy( wp->node + wp->offset, node, size ); + wp->offset += size; + + tassert( wp->offset <= wp->info->totalen ); + + if ( node->isskip ) { + if (node->haschild) + *(Opaque*)(wp->node + myoffset + SFSNHRDSZ) = + plainNode(wp, getSkipChildPointer(wp->info, node)); + } else { + SFSNodeData *nd = node->data; + u_int32_t i; + + for(i=0;inchar;i++) { + if (nd->haschild) + *(Opaque*)(wp->node + myoffset + ( ((char*)nd) - ((char*)node) ) + nd->child) = + plainNode(wp, getChildPointer( wp->info, nd ) ); + nd++; + } + } + + tfree(node); + + return myoffset; +} + +void +SFSMakePlain(SFSTree *info, void *pointer) { + WorkPlain wp; + + if ( info->plainmemory ) + return; + + wp.info = info; + wp.offset = 0; + if ( pointer ) + wp.node = (char*)pointer; + else + wp.node = (char*)tmalloc(sizeof(char*) * info->totalen); + + plainNode(&wp, info->node); + tassert( wp.offset == info->totalen ); + + info->node = (SFSNode*)wp.node; + info->plainmemory = 1; +} + +static SFSNode* +revertNode(SFSTree *info, SFSNode* node) { + int size = getNodeSize(info, node); + SFSNode *newnode; + + newnode = (SFSNode*)tmalloc( size ); + memcpy(newnode, node, size); + + if ( node->isskip ) { + if (node->haschild) + *(SFSNode**)( (char*)(newnode->data) ) = + revertNode(info, getSkipChildPointer(info, node)); + } else { + SFSNodeData *nd = node->data; + SFSNodeData *nnd = newnode->data; + u_int32_t i; + + for(i=0;inchar;i++) { + if (nd->haschild) + *(SFSNode**) (((char*)nnd) + nnd->child ) = + revertNode(info, getChildPointer( info, nd )); + nd++; nnd++; + } + } + + return newnode; +} + +void * +SFSRevertPlain(SFSTree *info) { + void *pointer = info->node; + + if (! info->plainmemory ) + return NULL; + + info->node = revertNode(info, info->node); + info->plainmemory = 0; + + return pointer; +} + +static Opaque +writeNode(WorkPlain *wp, int fd, SFSNode *node) { + int size = getNodeSize(wp->info, node); + SFSNode *wnode = (SFSNode*)tmalloc(size); + off_t myoffset = wp->offset; + + memcpy( wnode, node, size ); + wp->offset += size; + + tassert( wp->offset <= wp->info->totalen ); + + if ( node->isskip ) { + if (node->haschild) + *(Opaque*)( ((char*)wnode) + SFSNHRDSZ) = + writeNode(wp, fd, getSkipChildPointer(wp->info, node)); + } else { + SFSNodeData *nd = node->data; + u_int32_t i; + + for(i=0;inchar;i++) { + if (nd->haschild) + *(Opaque*)(((char*)wnode) + ( ((char*)nd) - ((char*)node) ) + nd->child) = + writeNode(wp, fd, getChildPointer( wp->info, nd ) ); + nd++; + } + } + + if ( lseek(fd, myoffset + SFSTDHSZ, SEEK_SET) < 0 ) + tlog(TL_CRIT|TL_EXIT, "lseek failed: %s", strerror(errno)); + if ( write(fd, wnode, size) != size ) + tlog(TL_CRIT|TL_EXIT, "write failed: %s", strerror(errno)); + + tfree(wnode); + + return myoffset; +} + +void +SFSWriteDump(SFSTree *info, char *filename) { + int fd; + off_t size = info->totalen + SFSTDHSZ; + SFSTreeDumpHeader dh; + + if ( (fd = open(filename, O_RDWR|O_CREAT|O_TRUNC|O_EXLOCK, 0666)) < 0 ) + tlog(TL_CRIT|TL_EXIT, "Can not open file '%s': %s", strerror(errno)); + + if ( lseek(fd, size, SEEK_SET) < 0 ) + tlog(TL_CRIT|TL_EXIT, "lseek failed: %s", strerror(errno)); + + dh.version = SFSTREE_VERSION; + dh.opaquesize = sizeof(Opaque); + dh.headersize = SFSTDHSZ; + dh.datasize = info->datasize; + dh.totalen = info->totalen; + dh.nnodes = info->nnodes; + + if ( lseek(fd, 0, SEEK_SET) < 0 ) + tlog(TL_CRIT|TL_EXIT, "lseek failed: %s", strerror(errno)); + if ( write(fd, &dh, SFSTDHSZ) != SFSTDHSZ ) + tlog(TL_CRIT|TL_EXIT, "write failed: %s", strerror(errno)); + + if ( info->plainmemory ) { + if ( write(fd, info->node, info->totalen) != info->totalen ) + tlog(TL_CRIT|TL_EXIT, "write failed: %s", strerror(errno)); + } else { + WorkPlain wp; + + wp.info = info; + wp.offset = 0; + + writeNode(&wp, fd, info->node); + } + + close(fd); +} + +static SFSNode* +readNode(SFSTree *info, int fd, char *buf, int bufsize) { + SFSNode *node; + int size; + + size = read(fd, buf, bufsize ); + if ( size < SFSNHRDSZ + sizeof(void*) ) + tlog(TL_CRIT|TL_EXIT, "read failed: %s", strerror(errno)); + + size = getNodeSize(info, (SFSNode*)buf); + tassert( size <= bufsize ); + node = (SFSNode*)tmalloc( size ); + memcpy(node, buf, size); + + if ( node->isskip ) { + if (node->haschild) { + if ( lseek(fd, *(Opaque*)node->data + SFSTDHSZ, SEEK_SET) < 0 ) + tlog(TL_CRIT|TL_EXIT, "lseek failed: %s", strerror(errno)); + *(SFSNode**)( (char*)(node->data) ) = + readNode(info, fd, buf, bufsize); + } + } else { + SFSNodeData *nd = node->data; + u_int32_t i; + + for(i=0;inchar;i++) { + if (nd->haschild) { + if ( lseek(fd, *(Opaque*)(((char*)nd) + nd->child ) + SFSTDHSZ, SEEK_SET) < 0 ) + tlog(TL_CRIT|TL_EXIT, "lseek failed: %s", strerror(errno)); + *(SFSNode**) (((char*)nd) + nd->child ) = + readNode(info, fd, buf, bufsize); + } + nd++; + } + } + + return node; +} + +void +SFSReadDump(SFSTree *info, char *filename) { + int fd; + SFSTreeDumpHeader dh; + char *buf; + int bufsize; + + memset(info,0,sizeof(SFSTree)); + + if ( (fd = open(filename, O_RDONLY|O_SHLOCK)) < 0 ) + tlog(TL_CRIT|TL_EXIT, "Can not open file '%s': %s", strerror(errno)); + + if ( read(fd, &dh, SFSTDHSZ) != SFSTDHSZ ) + tlog(TL_CRIT|TL_EXIT, "read failed: %s", strerror(errno)); + + if ( dh.version != SFSTREE_VERSION ) + tlog(TL_CRIT|TL_EXIT, "Tree version mismatch (should be 0x%04x but 0x%04x)", SFSTREE_VERSION, dh.version); + if ( dh.opaquesize != sizeof(Opaque) ) + tlog(TL_CRIT|TL_EXIT, "sizeof(Opaque) mismatch"); + if ( dh.headersize != SFSTDHSZ ) + tlog(TL_CRIT|TL_EXIT, "Tree's header size mismatch (should be %d but %d bytes)", SFSTDHSZ, dh.headersize); + + info->totalen = dh.totalen; + info->nnodes = dh.nnodes; + info->datasize = dh.datasize; + + /* allocate buffer with max allowed size */ + bufsize = SFSNHRDSZ + 256*(sizeof(SFSNodeData) + sizeof(void*) + info->datasize); + buf = tmalloc( bufsize ); + info->node = readNode(info, fd, buf, bufsize); + tfree(buf); + + close(fd); +} + +void +SFSInitFromDump(SFSTree *info, void *pointer, u_int64_t size) { + SFSTreeDumpHeader *dh; + + memset(info,0,sizeof(SFSTree)); + + if ( size && size < SFSTDHSZ ) + tlog(TL_CRIT|TL_EXIT, "Memsize too small"); + + dh = (SFSTreeDumpHeader*)pointer; + + if ( dh->version != SFSTREE_VERSION ) + tlog(TL_CRIT|TL_EXIT, "Tree version mismatch (should be 0x%04x but 0x%04x)", SFSTREE_VERSION, dh->version); + if ( dh->opaquesize != sizeof(Opaque) ) + tlog(TL_CRIT|TL_EXIT, "sizeof(Opaque) mismatch"); + if ( dh->headersize != SFSTDHSZ ) + tlog(TL_CRIT|TL_EXIT, "Tree's header size mismatch (should be %d but %d bytes)", SFSTDHSZ, dh->headersize); + if ( size && size != dh->totalen + SFSTDHSZ ) + tlog(TL_CRIT|TL_EXIT, "Memory size mismatch (should be %d but %d bytes)", dh->totalen + SFSTDHSZ, size); + + info->totalen = dh->totalen; + info->nnodes = dh->nnodes; + info->datasize = dh->datasize; + info->plainmemory = 1; + + if ( info->totalen && info->nnodes ) + info->node = (SFSNode*)( ((char*)pointer) + SFSTDHSZ ); +} diff --git a/sfxstr.h b/sfxstr.h index d5a3c58..7683e99 100644 --- a/sfxstr.h +++ b/sfxstr.h @@ -124,8 +124,9 @@ typedef struct SFSNodeStack { typedef struct { /* óÔÁÔÉÓÔÉÞÅÓËÉÅ ÄÁÎÎÙÅ */ - u_int32_t totalen; /* ÏÂÝÅÅ ËÏÌ-×Ï ÍÁÌÌÏÃÉÒÏ×ÁÎÎÏÊ ÐÁÍÑÔÉ */ - u_int32_t nnodes; /* ÏÂÝÅÅ ËÏÌ-×Ï ÕÚÌÏ× ÄÅÒÅ×Á */ + u_int64_t totalen; /* ÏÂÝÅÅ ËÏÌ-×Ï ÍÁÌÌÏÃÉÒÏ×ÁÎÎÏÊ ÐÁÍÑÔÉ */ + u_int64_t nnodes; /* ÏÂÝÅÅ ËÏÌ-×Ï ÕÚÌÏ× ÄÅÒÅ×Á */ + char plainmemory; /* true ÅÓÌÉ ÄÅÒÅ×Ï × ÐÌÏÓËÏÊ ÐÁÍÑÔÉ */ u_int32_t datasize; /* ÒÁÚÍÅÒ ÚÎÁÞÅÎÉÑ (ËÒÁÔÅÎ sizeof(u_int32_t))*/ SFSNode *node; /* ËÏÒÎÅ×ÏÊ ÕÚÅÌ ÄÅÒÅ×Á */ @@ -216,5 +217,45 @@ int SFSIterate(SFSTree *info, SFSDataIO *out); */ int SFSRange(SFSTree *info, char *word, SFSDataIO *f, SFSDataIO *l); +/* + * ëÏÐÉÒÕÅÔ ÄÅÒÅ×Ï × "ÐÌÏÓËÕÀ" ÐÁÍÑÔØ. "ðÌÏÓËÏÅ" ÄÅÒÅ×Ï Read Only. + */ +void SFSMakePlain(SFSTree *info, void *pointer); + +/* + * ëÏÎ×ÅÒÔÉÒÕÅÔ "ÐÌÏÓËÏÅ" ÄÅÒÅ×Ï × ÏÂÙÞÎÏÅ. + */ +void * SFSRevertPlain(SFSTree *info); + +typedef struct SFSTreeDumpHeader { + u_int32_t version; + u_int32_t opaquesize; + u_int32_t headersize; + u_int32_t datasize; + u_int64_t totalen; + u_int64_t nnodes; +} SFSTreeDumpHeader; + +#define SFSTDHSZ ( MAXALIGN(sizeof(SFSTreeDumpHeader)) ) + +/* + * óÏÚÄÁÅÔ dump ÄÅÒÅ×Á, ÎÅ ÔÒÅÂÕÅÔ ÂÏÌØÛÏÇÏ ÒÁÓÈÏÄÁ ÐÁÍÑÔÉ + */ +void SFSWriteDump(SFSTree *info, char *filename); + +/* + * óÏÚÄÁÅÔ "ÏÂÙÞÎÏÅ" (ÎÅ ÐÌÏÓËÏÅ) ÄÅÒÅ×Ï ÉÚ ÄÁÍÐÁ, + * ÎÅ ÔÒÅÂÕÅÔ ÂÏÌØÛÏÇÏ ÒÁÓÈÏÄÁ ÐÁÍÑÔÉ + * ïÂÎÕÌÑÅÔ info! + */ +void SFSReadDump(SFSTree *info, char *filename); + +/* + * éÎÉÃÉÁÌÉÚÉÒÕÅÔ "ÐÌÏÓËÏÅ" ÄÅÒÅ×Ï ÉÚ ÏÂÒÁÚÁ ÄÁÍÐÁ + * ïÂÎÕÌÑÅÔ info! + */ +void SFSInitFromDump(SFSTree *info, void *pointer, u_int64_t size); + + #endif diff --git a/sfxtest.c b/sfxtest.c index 4f2a10d..f374036 100644 --- a/sfxtest.c +++ b/sfxtest.c @@ -50,7 +50,7 @@ STRINGCMP(const void *a, const void *b) { static void usage() { puts("Usage:"); - puts("./sfxtest -d datafile [-b| [-a addondatafile] [-l | -p | -r | -g] [-n]] [-q]"); + puts("./sfxtest -d datafile [-b| [-a addondatafile] [-P [-R] [-D] [-l | -p | -r | -g] [-n]] [-q]"); puts("Detailed description:"); puts(" Binary search"); @@ -58,13 +58,16 @@ usage() { puts(" -q - quiet (no output)"); puts(" Optimal suffix search"); - puts(" ./sfxtest -d datafile [-a addondatafile] [-l | -p | -r | -g] [-n] [-q]"); + puts(" ./sfxtest -d datafile [-a addondatafile] [-P [-R]] [-D] [-l | -p | -r | -g] [-n] [-q]"); puts(" -a addondatafile - addon file data for loading"); puts(" -g - histogramm mode"); puts(" -l - listing mode"); puts(" -p - prefix search mode"); puts(" -r - range search mode"); puts(" -n - enumerate entries"); + puts(" -P - use plain memory"); + puts(" -R - use plain memory"); + puts(" -D - dump/load tree"); puts(" -q - quiet (no output)"); exit(1); @@ -103,14 +106,14 @@ main(int argn, char *argv[]) { FILE *in; char buf[4096]; double sumtime=0.0; - int structsize=0, list=0, prefix=0, range=0, enumerate=0,gmode=0; + int structsize=0, list=0, prefix=0, range=0, enumerate=0,gmode=0, plain=0, revert=0, dump=0; char *addondatafile=NULL; int len=4096, curlen=0, checked=0, success=0; char **data; void *res; - while ((i = getopt(argn, argv, "pd:bqha:lrng")) != EOF) { + while ((i = getopt(argn, argv, "pd:bqha:lrngPRD")) != EOF) { switch(i) { case 'a': addondatafile=optarg; @@ -139,6 +142,15 @@ main(int argn, char *argv[]) { case 'q': verbose=0; break; + case 'P': + plain=1; + break; + case 'R': + revert=1; + break; + case 'D': + dump=1; + break; case 'h': default: usage(); @@ -146,7 +158,7 @@ main(int argn, char *argv[]) { } if (!datafile) usage(); - + opentlog(TL_OPEN_STDERR|TL_OPEN_SYSLOG|TL_OPEN_FILE, TL_INFO, "./sfxtest.log"); if ( (in=fopen(datafile,"r"))==NULL ) tlog(TL_CRIT|TL_EXIT,"Beda with %s", datafile); @@ -236,6 +248,18 @@ main(int argn, char *argv[]) { tlog(TL_INFO,"Memory allocated: %.2fMb", ((float)info.totalen)/(1024.0*1024.0)); tlog(TL_INFO,"Number of nodes: %d", info.nnodes); curlen+=cntadd; + } + + if ( plain && !gmode ) { + SFSMakePlain(&info, NULL); + if ( revert ) + tfree( SFSRevertPlain(&info) ); + } + + if ( dump ) { + SFSWriteDump(&info, "./sfxtest.dump"); + SFSFree(&info,NULL); + SFSReadDump(&info, "./sfxtest.dump"); } if (list) { diff --git a/tests/sfxmem b/tests/sfxmem index 283f358..5e16dae 100644 --- a/tests/sfxmem +++ b/tests/sfxmem @@ -10,3 +10,13 @@ echo Exact search cat data/sfxmem.search | ./sfxtest -d data/sfxmem.data -a data/sfxmem.addon -n || exit 1 echo Range search echo g | ./sfxtest -d data/sfxmem.data -a data/sfxmem.addon -n -r || exit 1 +echo Prefix search with plain tree +echo zz | ./sfxtest -d data/sfxmem.data -a data/sfxmem.addon -n -p -P || exit 1 +echo Exact search with plain tree +cat data/sfxmem.search | ./sfxtest -d data/sfxmem.data -a data/sfxmem.addon -n -P || exit 1 +echo Range search with plain tree +echo g | ./sfxtest -d data/sfxmem.data -a data/sfxmem.addon -n -r -P || exit 1 +echo Prefix search reload dump +echo zz | ./sfxtest -d data/sfxmem.data -a data/sfxmem.addon -n -p -D || exit 1 +echo Prefix search with plain tree and reload dump +echo zz | ./sfxtest -d data/sfxmem.data -a data/sfxmem.addon -n -p -P -D || exit 1 diff --git a/tools.h b/tools.h index 6db856d..473e21b 100644 --- a/tools.h +++ b/tools.h @@ -48,9 +48,9 @@ double elapsedtime(struct timeval *begin); #define TYPEALIGN(ALIGNVAL,LEN) \ (((long) (LEN) + ((ALIGNVAL) - 1)) & ~((long) ((ALIGNVAL) - 1))) -#define SHORTALIGN(LEN) TYPEALIGN(sizeof(int16), (LEN)) -#define INTALIGN(LEN) TYPEALIGN(sizeof(int32), (LEN)) -#define MAXALIGN(LEN) TYPEALIGN(sizeof(int64), (LEN)) +#define SHORTALIGN(LEN) TYPEALIGN(sizeof(int16_t), (LEN)) +#define INTALIGN(LEN) TYPEALIGN(sizeof(int32_t), (LEN)) +#define MAXALIGN(LEN) TYPEALIGN(sizeof(int64_t), (LEN)) #define PTRALIGN(LEN) TYPEALIGN(sizeof(void*), (LEN)) #endif