make suffix dumpable and some minors cleanup
authorteodor <teodor>
Mon, 7 Jul 2008 13:53:06 +0000 (13:53 +0000)
committerteodor <teodor>
Mon, 7 Jul 2008 13:53:06 +0000 (13:53 +0000)
Makefile
expected/sfxmem
sfxstr.c
sfxstr.h
sfxtest.c
tests/sfxmem
tools.h

index 6307b40..da86c8b 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -13,7 +13,7 @@ include $(topbuilddir)/Makefile.global
 clean: clean-test
 
 clean-test:
-       rm -rf sfxtest.log BTREE
+       rm -rf sfxtest.log sfxtest.dump BTREE
        rm -rf results diffs temp
 
 test: all
index 9b74d2c..7fbc15b 100644 (file)
@@ -17930,3 +17930,88 @@ Exact search
 Range search
 >g 10044
 >gzwkwsu8b 1211
+Prefix search with plain tree
+>zz 10024
+>zz1sifk 6450
+>zzbms 9163
+>zzkfgm2pg 8526
+>zzn7vtfxw 9315
+>zzoc6we 4909
+>zzupylnv 7483
+>zzuq 7330
+>zzyikntah 5462
+Exact search with plain tree
+384
+453
+622
+985
+1287
+1348
+1367
+1614
+1783
+1816
+1876
+2007
+2172
+2637
+2723
+2787
+2927
+3075
+3124
+3189
+3205
+3335
+3431
+3726
+4925
+4974
+5021
+5493
+5538
+5608
+5704
+5705
+6030
+7954
+6542
+7460
+7954
+8095
+8673
+8675
+8712
+8977
+9036
+9211
+9362
+9609
+9623
+9657
+9851
+9869
+9979
+Range search with plain tree
+>g 10044
+>gzwkwsu8b 1211
+Prefix search reload dump
+>zz 10024
+>zz1sifk 6450
+>zzbms 9163
+>zzkfgm2pg 8526
+>zzn7vtfxw 9315
+>zzoc6we 4909
+>zzupylnv 7483
+>zzuq 7330
+>zzyikntah 5462
+Prefix search with plain tree and reload dump
+>zz 10024
+>zz1sifk 6450
+>zzbms 9163
+>zzkfgm2pg 8526
+>zzn7vtfxw 9315
+>zzoc6we 4909
+>zzupylnv 7483
+>zzuq 7330
+>zzyikntah 5462
index 895b053..36d1f31 100644 (file)
--- a/sfxstr.c
+++ b/sfxstr.c
  * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include <errno.h>
 #include <string.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <unistd.h>
+#include <fcntl.h>
 
 #include "tlog.h"
 #include "tmalloc.h"
 #include "sfxstr.h"
+#include "tools.h"
 
 #define EN_VAL 0x01
 #define EN_CHLD        0x02
 #define EN_DATA        0x04
 
+#define SFSTREE_VERSION                0x0100
+
+typedef unsigned long Opaque;  /* XXX sizeof(Opaque) == sizeof(void *) */
+
+#define CHECK_MEMORY(tree)     ( ( (tree)->plainmemory ) ? \
+       tlog(TL_CRIT|TL_EXIT, "Tree in plain memory - read only access") : (void)0 ) 
+
+static __inline__ int
+getNodeSize(SFSTree *info, SFSNode *node) {
+       int size =      SFSNHRDSZ;
+
+       if ( node->isskip ) {
+               if ( node->isword )
+                       size += info->datasize;
+               if ( node->haschild )
+                       size += sizeof(SFSNode*);
+               size += node->nchar;
+       } else {
+               u_int32_t       i;
+               u_int32_t       nfound = 0;
+               SFSNodeData *data = node->data;
+
+               size += sizeof(SFSNodeData) * node->nchar;
+               size += sizeof(SFSNode*) * node->nchild;
+
+               for(i=0;i<node->nchar;i++) 
+                       nfound += data[i].isword;
+
+               size += nfound*info->datasize;
+       }
+
+       return PTRALIGN(size);
+}
+
+static __inline__ SFSNode*
+getChildPointer(SFSTree *info, SFSNodeData *nodedata) {
+       char *p = ((char*)nodedata) + nodedata->child; 
+
+       if ( info->plainmemory ) 
+               return (SFSNode*) ( ((char*)(info->node)) + *(Opaque*)p );
+
+       return *(SFSNode**)p;
+}
+
+static __inline__ SFSNode*
+getSkipChildPointer(SFSTree *info, SFSNode *node) {
+       if ( info->plainmemory )
+               return (SFSNode*) ( ((char*)(info->node)) + *(Opaque*)node->data );
+
+       return *(SFSNode**)( (char*)(node->data) );
+}
+
 static SFSNode* enlargeNode(SFSTree *info, SFSNode* node, u_int8_t val, int flag, SFSNodeData **nd);
 static SFSNode* addRecord(SFSTree *info, SFSNode* node, SFSDataIO *in, int level);
 
@@ -93,7 +151,7 @@ SFSFindData(SFSTree *info, char *word) {
                                if ( *ptr=='\0' && node->isword) {
                                        return (void*) ( ((char*)(node->data)) + ((node->haschild) ? sizeof(SFSNode*) : 0) );
                                } else if ( node->haschild ) {
-                                       node = *(SFSNode**)(node->data);
+                                       node = getSkipChildPointer(info, node);
                                } else {
                                        return NULL;
                                }
@@ -103,13 +161,13 @@ SFSFindData(SFSTree *info, char *word) {
                        StopLow = node->data;
                        StopHigh = StopLow + node->nchar;
                        while (StopLow < StopHigh) {
-                              StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
-                              if ( StopMiddle->val == *ptr ) {
-                                      ptr++;
-                                      if ( *ptr=='\0' && StopMiddle->isword ) {
+                               StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
+                               if ( StopMiddle->val == *ptr ) {
+                                       ptr++;
+                                       if ( *ptr=='\0' && StopMiddle->isword ) {
                                                return (void*)( ((char*)node) + node->dataptr + info->datasize * StopMiddle->data );
                                        } else if ( StopMiddle->haschild ) {
-                                               node=*(SFSNode**)( ((char*)StopMiddle) + StopMiddle->child );
+                                               node = getChildPointer(info, StopMiddle);
                                        } else {
                                                return NULL;
                                        }
@@ -158,7 +216,7 @@ void
 SFSFree(SFSTree *info, void (*freefunc)(void*)) {
        SFSNodeStack *s=info->stack;
 
-       if (info->node) freeFSFNode(info, info->node, freefunc);
+       if (info->node && !info->plainmemory) freeFSFNode(info, info->node, freefunc);
        if (info->buf) tfree(info->buf);
        info->buf = NULL;
        info->tlen=0;
@@ -182,9 +240,9 @@ makeSkipNode(SFSTree *info, SFSDataIO *io, int level) {
 
        len = (io->keylen > 255) ? 255 : io->keylen;
        size = SFSNHRDSZ + ((io->keylen > 255) ? sizeof(SFSNode*) : info->datasize) + len;
+       size = PTRALIGN(size);
 
        res = (SFSNode*)tmalloc(size);
-
        info->nnodes++;
        info->totalen+=size;
 
@@ -250,18 +308,24 @@ splitSkipNode(SFSTree *info, SFSNode* node, SFSDataIO *io, int level) {
                        }
                        if ( node->haschild ) 
                                *(SFSNode**)( ((char*)ndata) + ndata->child ) = *(SFSNode**)( node->data );
-                       info->totalen -= SFSNHRDSZ + ((node->isword) ? info->datasize : 0) +
-                               ((node->haschild) ? sizeof(SFSNodeData*) : 0) + node->nchar;
+                       info->totalen -= getNodeSize(info, node);
                        info->nnodes--;
                        tfree(node);
                } else {
+                       int size;
+
                        res = enlargeNode(info, NULL, *(u_int8_t*)(((char*)node) + node->dataptr), EN_VAL|EN_CHLD, &ndata);
+
+                       size = getNodeSize(info,node);
+                       info->totalen-=size;
+
                        node->nchar--;
                        memmove( ((char*)node) + node->dataptr, ((char*)node) + node->dataptr + 1, node->nchar);
-                       info->totalen--;
-                       *(SFSNode**)( ((char*)ndata) + ndata->child ) = (SFSNode*)trealloc(node,
-                               SFSNHRDSZ + ((node->haschild) ? sizeof(SFSNode*) : 0) + 
-                               ((node->isword) ? info->datasize : 0 )+ node->nchar);
+
+                       size = getNodeSize(info,node);
+                       info->totalen+=size;
+
+                       *(SFSNode**)( ((char*)ndata) + ndata->child ) = (SFSNode*)trealloc(node, size);
                }
                res = addRecord(info, res, io, 0);
        } else if (prefixlen==io->keylen) {
@@ -274,10 +338,12 @@ splitSkipNode(SFSTree *info, SFSNode* node, SFSDataIO *io, int level) {
                                node->isword=1;
                                res=node;
                        } else {
-                               int size = SFSNHRDSZ + info->datasize + ((node->haschild) ? sizeof(SFSNodeData*) : 0) + node->nchar;
+                               int osize = PTRALIGN(SFSNHRDSZ + ((node->haschild) ? sizeof(SFSNodeData*) : 0) + node->nchar);
+                               int nsize = PTRALIGN(SFSNHRDSZ + info->datasize + ((node->haschild) ? sizeof(SFSNodeData*) : 0) + node->nchar);
 
-                               info->totalen+=info->datasize;
-                               res=(SFSNode*)trealloc(node,size);
+                               info->totalen += nsize - osize;
+
+                               res=(SFSNode*)trealloc(node,nsize);
                                res->dataptr=SFSNHRDSZ + info->datasize + ((res->haschild) ? sizeof(SFSNodeData*) : 0);
                                res->isword=1;
                                memmove(((char*)res) + res->dataptr,
@@ -286,6 +352,8 @@ splitSkipNode(SFSTree *info, SFSNode* node, SFSDataIO *io, int level) {
                        }
                } else {
                        int size = SFSNHRDSZ + info->datasize + sizeof(SFSNodeData*) + prefixlen;
+                       size = PTRALIGN(size);
+
                        info->totalen+=size;
                        info->nnodes++;
                        res = (SFSNode*)tmalloc(size);
@@ -297,11 +365,12 @@ splitSkipNode(SFSTree *info, SFSNode* node, SFSDataIO *io, int level) {
                        memcpy(((char*)res)+res->dataptr, io->key, prefixlen);
                        if (info->datasize)
                                memcpy(((char*)(res->data)) + sizeof(SFSNodeData*), io->data, info->datasize);
+                       info->totalen-=getNodeSize(info,node);
                        node->nchar-=prefixlen;
                        memmove( ((char*)node) + node->dataptr, ((char*)node) + node->dataptr + prefixlen, node->nchar);
-                       info->totalen-=prefixlen;
-                       *(SFSNode**)(res->data) = (SFSNode*)trealloc(node, SFSNHRDSZ + ((node->isword) ? info->datasize : 0) +
-                               ((node->haschild) ? sizeof(SFSNodeData*) : 0) + node->nchar);
+                       size = getNodeSize(info,node);
+                       info->totalen+=size;
+                       *(SFSNode**)(res->data) = (SFSNode*)trealloc(node, size);
                }
        } else if ( prefixlen==node->nchar ) {
                int size = SFSNHRDSZ + info->datasize +  sizeof(SFSNode*) + node->nchar;
@@ -313,6 +382,7 @@ splitSkipNode(SFSTree *info, SFSNode* node, SFSDataIO *io, int level) {
                *(SFSNode**)(res->data) = makeSkipNode(info, io, prefixlen); 
        } else { 
                int size = SFSNHRDSZ + sizeof(SFSNodeData*) + prefixlen;
+               size = PTRALIGN(size);
                info->totalen+=size;
                info->nnodes++;
                res = (SFSNode*)tmalloc(size);
@@ -323,16 +393,16 @@ splitSkipNode(SFSTree *info, SFSNode* node, SFSDataIO *io, int level) {
                res->dataptr = SFSNHRDSZ + sizeof(SFSNodeData*);
                memcpy(((char*)res)+res->dataptr, io->key, prefixlen);
 
+               info->totalen-= getNodeSize(info,node);
                node->nchar-=prefixlen;
+
                memmove( ((char*)node) + node->dataptr, ((char*)node) + node->dataptr + prefixlen, node->nchar);
-               info->totalen-=prefixlen;
 
-               *(SFSNode**)(res->data) = (SFSNode*)trealloc(node, 
-                       SFSNHRDSZ + ((node->isword) ? info->datasize : 0) +
-                               ((node->haschild) ? sizeof(SFSNodeData*) : 0) + node->nchar
-               );
-               *(SFSNode**)(res->data) = splitSkipNode(info, *(SFSNode**)(res->data), io, prefixlen); 
+               size = getNodeSize(info,node);
+               info->totalen+= size;
 
+               *(SFSNode**)(res->data) = (SFSNode*)trealloc(node, size);
+               *(SFSNode**)(res->data) = splitSkipNode(info, *(SFSNode**)(res->data), io, prefixlen); 
        }
 
        io->key-=level;
@@ -362,7 +432,8 @@ enlargeNode(SFSTree *info, SFSNode* node, u_int8_t val, int flag, SFSNodeData **
        }
 
        if ( node )
-               info->totalen -= SFSNHRDSZ+nchar*sizeof(SFSNodeData)+nchild*sizeof(SFSNode*)+nfound*info->datasize;
+               /*info->totalen -= PTRALIGN(SFSNHRDSZ+nchar*sizeof(SFSNodeData)+nchild*sizeof(SFSNode*)+nfound*info->datasize);*/
+               info->totalen -= getNodeSize(info, node);
 
        if ( flag & EN_VAL )   nchar++; 
        if ( flag & EN_CHLD ) nchild++; 
@@ -370,6 +441,7 @@ enlargeNode(SFSTree *info, SFSNode* node, u_int8_t val, int flag, SFSNodeData **
 
 
        sizenode = SFSNHRDSZ+nchar*sizeof(SFSNodeData)+nchild*sizeof(SFSNode*)+nfound*info->datasize;
+       sizenode = PTRALIGN(sizenode);
 
        info->totalen+=sizenode;
        if ( !node ) info->nnodes++;
@@ -503,7 +575,6 @@ addRecord(SFSTree *info, SFSNode* node, SFSDataIO *in, int level) {
        SFSNodeData *StopLow, *StopHigh, *StopMiddle;
        u_int8_t *ptr = ((u_int8_t*)in->key) + level;
 
-
        if ( node ) {
                if ( node->isskip ) {
                        if ( node->haschild && in->keylen-level > node->nchar && 
@@ -559,8 +630,7 @@ addRecord(SFSTree *info, SFSNode* node, SFSDataIO *in, int level) {
                        }
                }
        } else 
-               node = makeSkipNode(info, in, level); 
-
+               node = makeSkipNode(info, in, level);
 
        return node;
 
@@ -568,6 +638,7 @@ addRecord(SFSTree *info, SFSNode* node, SFSDataIO *in, int level) {
 
 void
 SFSAdd(SFSTree *info, SFSDataIO *in) {
+       CHECK_MEMORY(info);
        if (in->keylen<=0)
                in->keylen=strlen(in->key);
        info->node = addRecord(info, info->node, in, 0);
@@ -623,7 +694,7 @@ SFSPrefixIteratorStart(SFSTree *info, char *word) {
                                        return;
                                } else if ( node->haschild ) {
                                        ptr+=node->nchar;
-                                       node = *(SFSNode**)(node->data);
+                                       node = getSkipChildPointer(info, node);
                                } else {
                                        return;
                                }
@@ -643,10 +714,10 @@ SFSPrefixIteratorStart(SFSTree *info, char *word) {
                                                        info->wdata = (void*)( ((char*)node) + node->dataptr + info->datasize * StopMiddle->data );
                                                }
                                                if ( StopMiddle->haschild )
-                                                       info->stack = pushStack(NULL, *(SFSNode**)( ((char*)StopMiddle) + StopMiddle->child ), len);
+                                                       info->stack = pushStack(NULL, getChildPointer( info, StopMiddle ), len);
                                                return;
                                        } else if ( StopMiddle->haschild ) {
-                                               node=*(SFSNode**)( ((char*)StopMiddle) + StopMiddle->child );
+                                               node = getChildPointer( info, StopMiddle ); 
                                        } else {
                                                return;
                                        }
@@ -696,7 +767,7 @@ SFSIterate(SFSTree *info, SFSDataIO *out) {
                        return 1;
                }
                if ( s->node->haschild && !s->checkedchild) {
-                       info->stack = pushStack(s, *(SFSNode**)( (char*)(s->node->data) ), s->level+s->node->nchar);
+                       info->stack = pushStack(s, getSkipChildPointer(info, s->node), s->level+s->node->nchar);
                        s->checkedchild=1;
                        if ( SFSIterate(info, out) )
                                return 1;
@@ -713,7 +784,8 @@ SFSIterate(SFSTree *info, SFSDataIO *out) {
                                return 1;
                        }
                        if ( s->checkedchild==0 && s->data->haschild ) {
-                               info->stack = pushStack(s, *(SFSNode**)( ((char*)s->data) + s->data->child ), s->level+1);
+                               info->stack = pushStack(s,
+                                       getChildPointer( info, s->data ), s->level+1);
                                s->checkedchild=1;
                                if ( SFSIterate(info, out) )
                                        return 1;
@@ -771,7 +843,7 @@ SFSRange(SFSTree *info, char *word, SFSDataIO *f, SFSDataIO *l) {
                                ((char*)(s->node))+s->node->dataptr, s->node->nchar);
                        s->level+=s->node->nchar;
                        if (s->node->haschild) {
-                               s->node=*(SFSNode**)( s->node->data );  
+                               s->node=getSkipChildPointer(info, s->node);
                        } else { /* if (s->node->isword) */
                                info->buf[ f->keylen + 1 + s->level  ] = '\0';
                                l->data = (void*)(s->node->data);
@@ -783,7 +855,7 @@ SFSRange(SFSTree *info, char *word, SFSDataIO *f, SFSDataIO *l) {
                        while( s->data - s->node->data >= 0 ) {
                                info->buf[ f->keylen + 1 + s->level ] = (char)s->data->val;
                                if ( s->data->haschild ) {
-                                       s->node = *(SFSNode**)( ((char*)s->data) + s->data->child );
+                                       s->node = getChildPointer( info, s->data ); 
                                        s->level++;
                                        break;
                                }
@@ -804,3 +876,281 @@ SFSRange(SFSTree *info, char *word, SFSDataIO *f, SFSDataIO *l) {
 
        return 1;
 }
+
+typedef struct WorkPlain {
+       SFSTree                 *info;
+       char                    *node;
+       off_t                   offset;
+} WorkPlain;
+
+static Opaque
+plainNode(WorkPlain *wp, SFSNode *node) {
+       int     size = getNodeSize(wp->info, node);
+       off_t   myoffset = wp->offset;
+
+       memcpy( wp->node + wp->offset, node, size );
+       wp->offset += size;
+
+       tassert( wp->offset <= wp->info->totalen );
+
+       if ( node->isskip ) {
+               if (node->haschild) 
+                       *(Opaque*)(wp->node + myoffset + SFSNHRDSZ) = 
+                                       plainNode(wp, getSkipChildPointer(wp->info, node));
+       } else {
+               SFSNodeData *nd = node->data;
+               u_int32_t       i;
+
+               for(i=0;i<node->nchar;i++) {
+                       if (nd->haschild)
+                               *(Opaque*)(wp->node + myoffset + ( ((char*)nd) - ((char*)node) ) + nd->child) =
+                                               plainNode(wp, getChildPointer( wp->info, nd ) );        
+                       nd++;
+               }
+       }
+       tfree(node);
+
+       return myoffset;
+}
+
+void
+SFSMakePlain(SFSTree *info, void *pointer) {
+       WorkPlain       wp;
+
+       if ( info->plainmemory )
+               return;
+
+       wp.info = info;
+       wp.offset = 0;
+       if ( pointer )
+               wp.node = (char*)pointer;
+       else
+               wp.node = (char*)tmalloc(sizeof(char*) * info->totalen);
+
+       plainNode(&wp, info->node);
+       tassert( wp.offset == info->totalen );
+
+       info->node = (SFSNode*)wp.node; 
+       info->plainmemory = 1;
+}
+
+static SFSNode*
+revertNode(SFSTree *info, SFSNode* node) {
+       int size = getNodeSize(info, node);
+       SFSNode *newnode;
+
+       newnode = (SFSNode*)tmalloc( size );
+       memcpy(newnode, node, size);
+
+       if ( node->isskip ) {
+               if (node->haschild)
+                       *(SFSNode**)( (char*)(newnode->data) ) = 
+                               revertNode(info, getSkipChildPointer(info, node));
+       } else {
+               SFSNodeData *nd = node->data;
+               SFSNodeData *nnd = newnode->data;
+               u_int32_t   i;
+
+               for(i=0;i<node->nchar;i++) {
+                       if (nd->haschild)
+                               *(SFSNode**) (((char*)nnd) + nnd->child ) = 
+                                       revertNode(info, getChildPointer( info, nd ));
+                       nd++; nnd++;
+               }
+       }
+
+       return newnode;
+}
+
+void *
+SFSRevertPlain(SFSTree *info) {
+       void *pointer = info->node;
+
+       if (! info->plainmemory )
+               return NULL;
+
+       info->node = revertNode(info, info->node);
+       info->plainmemory = 0;
+
+       return pointer;
+}
+
+static Opaque
+writeNode(WorkPlain *wp, int fd, SFSNode *node) {
+       int     size = getNodeSize(wp->info, node);
+       SFSNode  *wnode = (SFSNode*)tmalloc(size);
+       off_t   myoffset = wp->offset;
+
+       memcpy( wnode, node, size );
+       wp->offset += size;
+
+       tassert( wp->offset <= wp->info->totalen );
+
+       if ( node->isskip ) {
+               if (node->haschild) 
+                       *(Opaque*)( ((char*)wnode) + SFSNHRDSZ) = 
+                                       writeNode(wp, fd, getSkipChildPointer(wp->info, node));
+       } else {
+               SFSNodeData *nd = node->data;
+               u_int32_t       i;
+
+               for(i=0;i<node->nchar;i++) {
+                       if (nd->haschild)
+                               *(Opaque*)(((char*)wnode) + ( ((char*)nd) - ((char*)node) ) + nd->child) =
+                                               writeNode(wp, fd, getChildPointer( wp->info, nd ) );    
+                       nd++;
+               }
+       }
+
+       if ( lseek(fd, myoffset + SFSTDHSZ, SEEK_SET) < 0 )
+               tlog(TL_CRIT|TL_EXIT, "lseek failed: %s", strerror(errno));
+       if ( write(fd, wnode, size) != size )
+               tlog(TL_CRIT|TL_EXIT, "write failed: %s", strerror(errno));
+       
+       tfree(wnode);
+
+       return myoffset;
+}
+
+void 
+SFSWriteDump(SFSTree *info, char *filename) {
+       int                             fd;
+       off_t                           size = info->totalen + SFSTDHSZ;
+       SFSTreeDumpHeader       dh;
+
+       if ( (fd = open(filename, O_RDWR|O_CREAT|O_TRUNC|O_EXLOCK, 0666)) < 0 )
+               tlog(TL_CRIT|TL_EXIT, "Can not open file '%s': %s", strerror(errno));
+
+       if ( lseek(fd, size, SEEK_SET) < 0 )
+               tlog(TL_CRIT|TL_EXIT, "lseek failed: %s", strerror(errno));
+
+       dh.version = SFSTREE_VERSION;
+       dh.opaquesize = sizeof(Opaque);
+       dh.headersize = SFSTDHSZ;
+       dh.datasize = info->datasize;
+       dh.totalen = info->totalen;
+       dh.nnodes = info->nnodes;
+
+       if ( lseek(fd, 0, SEEK_SET) < 0 )
+               tlog(TL_CRIT|TL_EXIT, "lseek failed: %s", strerror(errno));
+       if ( write(fd, &dh, SFSTDHSZ) != SFSTDHSZ )
+               tlog(TL_CRIT|TL_EXIT, "write failed: %s", strerror(errno));
+
+       if ( info->plainmemory ) {
+               if ( write(fd, info->node, info->totalen) != info->totalen )
+                       tlog(TL_CRIT|TL_EXIT, "write failed: %s", strerror(errno));
+       } else {
+               WorkPlain       wp;
+
+               wp.info = info;
+               wp.offset = 0;
+               
+               writeNode(&wp, fd, info->node);
+       }
+       
+       close(fd);
+}
+
+static SFSNode*
+readNode(SFSTree *info, int fd, char *buf, int bufsize) {
+       SFSNode *node;
+       int             size;
+
+       size = read(fd, buf, bufsize );
+       if ( size < SFSNHRDSZ + sizeof(void*) )
+               tlog(TL_CRIT|TL_EXIT, "read failed: %s", strerror(errno));
+
+       size = getNodeSize(info, (SFSNode*)buf);
+       tassert( size <= bufsize );
+       node = (SFSNode*)tmalloc( size );
+       memcpy(node, buf, size);
+
+       if ( node->isskip ) {
+               if (node->haschild) {
+                       if ( lseek(fd, *(Opaque*)node->data + SFSTDHSZ, SEEK_SET) < 0 )
+                               tlog(TL_CRIT|TL_EXIT, "lseek failed: %s", strerror(errno));
+                       *(SFSNode**)( (char*)(node->data) ) = 
+                               readNode(info, fd, buf, bufsize);
+               }
+       } else {
+               SFSNodeData *nd = node->data;
+               u_int32_t   i;
+
+               for(i=0;i<node->nchar;i++) {
+                       if (nd->haschild) {
+                               if ( lseek(fd, *(Opaque*)(((char*)nd) + nd->child ) + SFSTDHSZ, SEEK_SET) < 0 )
+                                       tlog(TL_CRIT|TL_EXIT, "lseek failed: %s", strerror(errno));
+                               *(SFSNode**) (((char*)nd) + nd->child ) = 
+                                       readNode(info, fd, buf, bufsize);
+                       }
+                       nd++;
+               }
+       }
+
+       return node;
+}
+
+void
+SFSReadDump(SFSTree *info, char *filename) {
+       int                 fd;
+       SFSTreeDumpHeader   dh;
+       char                            *buf;
+       int                                     bufsize; 
+       
+       memset(info,0,sizeof(SFSTree));
+
+       if ( (fd = open(filename, O_RDONLY|O_SHLOCK)) < 0 )
+               tlog(TL_CRIT|TL_EXIT, "Can not open file '%s': %s", strerror(errno));
+
+       if ( read(fd, &dh, SFSTDHSZ) != SFSTDHSZ )
+               tlog(TL_CRIT|TL_EXIT, "read failed: %s", strerror(errno));
+
+       if ( dh.version != SFSTREE_VERSION )
+               tlog(TL_CRIT|TL_EXIT, "Tree version mismatch (should be 0x%04x but 0x%04x)", SFSTREE_VERSION, dh.version);
+       if ( dh.opaquesize != sizeof(Opaque) )
+               tlog(TL_CRIT|TL_EXIT, "sizeof(Opaque) mismatch");
+       if ( dh.headersize != SFSTDHSZ )
+               tlog(TL_CRIT|TL_EXIT, "Tree's header size mismatch (should be %d but %d bytes)", SFSTDHSZ, dh.headersize);
+
+       info->totalen = dh.totalen;
+       info->nnodes = dh.nnodes;
+       info->datasize = dh.datasize;
+
+       /* allocate buffer with max allowed size */
+       bufsize = SFSNHRDSZ + 256*(sizeof(SFSNodeData) + sizeof(void*) + info->datasize);
+       buf = tmalloc( bufsize );
+       info->node = readNode(info, fd, buf, bufsize);
+       tfree(buf);
+
+       close(fd);
+}
+
+void
+SFSInitFromDump(SFSTree *info, void *pointer, u_int64_t size) {
+       SFSTreeDumpHeader       *dh;
+
+       memset(info,0,sizeof(SFSTree));
+
+       if ( size && size < SFSTDHSZ )
+               tlog(TL_CRIT|TL_EXIT, "Memsize too small");
+
+       dh = (SFSTreeDumpHeader*)pointer;
+       
+       if ( dh->version != SFSTREE_VERSION )
+               tlog(TL_CRIT|TL_EXIT, "Tree version mismatch (should be 0x%04x but 0x%04x)", SFSTREE_VERSION, dh->version);
+       if ( dh->opaquesize != sizeof(Opaque) )
+               tlog(TL_CRIT|TL_EXIT, "sizeof(Opaque) mismatch");
+       if ( dh->headersize != SFSTDHSZ )
+               tlog(TL_CRIT|TL_EXIT, "Tree's header size mismatch (should be %d but %d bytes)", SFSTDHSZ, dh->headersize);
+       if ( size && size != dh->totalen + SFSTDHSZ )
+               tlog(TL_CRIT|TL_EXIT, "Memory size mismatch (should be %d but %d bytes)", dh->totalen + SFSTDHSZ, size);
+
+       info->totalen = dh->totalen;
+       info->nnodes = dh->nnodes;
+       info->datasize = dh->datasize;
+       info->plainmemory = 1;
+
+       if ( info->totalen && info->nnodes ) 
+               info->node = (SFSNode*)( ((char*)pointer) + SFSTDHSZ );
+}
index d5a3c58..7683e99 100644 (file)
--- a/sfxstr.h
+++ b/sfxstr.h
@@ -124,8 +124,9 @@ typedef struct SFSNodeStack {
 
 typedef struct {
        /* óÔÁÔÉÓÔÉÞÅÓËÉÅ ÄÁÎÎÙÅ */
-       u_int32_t       totalen; /* ÏÂÝÅÅ ËÏÌ-×Ï ÍÁÌÌÏÃÉÒÏ×ÁÎÎÏÊ ÐÁÍÑÔÉ */
-       u_int32_t       nnodes;  /* ÏÂÝÅÅ ËÏÌ-×Ï ÕÚÌÏ× ÄÅÒÅ×Á */
+       u_int64_t       totalen; /* ÏÂÝÅÅ ËÏÌ-×Ï ÍÁÌÌÏÃÉÒÏ×ÁÎÎÏÊ ÐÁÍÑÔÉ */
+       u_int64_t       nnodes;  /* ÏÂÝÅÅ ËÏÌ-×Ï ÕÚÌÏ× ÄÅÒÅ×Á */
+       char            plainmemory;  /* true ÅÓÌÉ ÄÅÒÅ×Ï × ÐÌÏÓËÏÊ ÐÁÍÑÔÉ */ 
 
        u_int32_t       datasize; /* ÒÁÚÍÅÒ ÚÎÁÞÅÎÉÑ (ËÒÁÔÅΠsizeof(u_int32_t))*/
         SFSNode        *node;    /* ËÏÒÎÅ×ÏÊ ÕÚÅÌ ÄÅÒÅ×Á */
@@ -216,5 +217,45 @@ int SFSIterate(SFSTree *info, SFSDataIO *out);
  */
 int SFSRange(SFSTree *info, char *word, SFSDataIO *f, SFSDataIO *l); 
 
+/*
+ * ëÏÐÉÒÕÅÔ ÄÅÒÅ×Ï × "ÐÌÏÓËÕÀ" ÐÁÍÑÔØ. "ðÌÏÓËÏÅ" ÄÅÒÅ×Ï Read Only.
+ */
+void SFSMakePlain(SFSTree *info, void *pointer);
+
+/*
+ * ëÏÎ×ÅÒÔÉÒÕÅÔ "ÐÌÏÓËÏÅ" ÄÅÒÅ×Ï × ÏÂÙÞÎÏÅ. 
+ */
+void * SFSRevertPlain(SFSTree *info);
+
+typedef struct SFSTreeDumpHeader {
+       u_int32_t               version;
+       u_int32_t               opaquesize;
+       u_int32_t               headersize;
+       u_int32_t               datasize;
+       u_int64_t               totalen;
+       u_int64_t               nnodes;
+} SFSTreeDumpHeader;
+
+#define  SFSTDHSZ      ( MAXALIGN(sizeof(SFSTreeDumpHeader))  )
+
+/*
+ * óÏÚÄÁÅÔ dump ÄÅÒÅ×Á, ÎÅ ÔÒÅÂÕÅÔ ÂÏÌØÛÏÇÏ ÒÁÓÈÏÄÁ ÐÁÍÑÔÉ
+ */
+void SFSWriteDump(SFSTree *info, char *filename);
+
+/*
+ * óÏÚÄÁÅÔ "ÏÂÙÞÎÏÅ" (ÎÅ ÐÌÏÓËÏÅ) ÄÅÒÅ×Ï ÉÚ ÄÁÍÐÁ, 
+ * ÎÅ ÔÒÅÂÕÅÔ ÂÏÌØÛÏÇÏ ÒÁÓÈÏÄÁ ÐÁÍÑÔÉ
+ * ïÂÎÕÌÑÅÔ info!
+ */
+void SFSReadDump(SFSTree *info, char *filename);
+
+/*
+ * éÎÉÃÉÁÌÉÚÉÒÕÅÔ "ÐÌÏÓËÏÅ" ÄÅÒÅ×Ï ÉÚ ÏÂÒÁÚÁ ÄÁÍÐÁ
+ * ïÂÎÕÌÑÅÔ info!
+ */
+void SFSInitFromDump(SFSTree *info, void *pointer, u_int64_t size);
+
+
 #endif
 
index 4f2a10d..f374036 100644 (file)
--- a/sfxtest.c
+++ b/sfxtest.c
@@ -50,7 +50,7 @@ STRINGCMP(const void *a, const void *b) {
 static void
 usage() {
        puts("Usage:");
-       puts("./sfxtest -d datafile [-b| [-a addondatafile] [-l | -p | -r | -g] [-n]] [-q]");
+       puts("./sfxtest -d datafile [-b| [-a addondatafile] [-P [-R] [-D] [-l | -p | -r | -g] [-n]] [-q]");
        puts("Detailed description:");
 
        puts("  Binary search");
@@ -58,13 +58,16 @@ usage() {
        puts("       -q - quiet (no output)");
 
        puts("  Optimal suffix search");
-       puts("    ./sfxtest -d datafile [-a addondatafile] [-l | -p | -r | -g]  [-n] [-q]");
+       puts("    ./sfxtest -d datafile [-a addondatafile] [-P [-R]] [-D] [-l | -p | -r | -g]  [-n] [-q]");
        puts("       -a addondatafile - addon file data for loading");
        puts("       -g - histogramm mode");
        puts("       -l - listing mode");
        puts("       -p - prefix search mode");
        puts("       -r - range search mode");
        puts("       -n - enumerate entries");
+       puts("       -P - use plain memory");
+       puts("       -R - use plain memory");
+       puts("       -D - dump/load tree");
        puts("       -q - quiet (no output)");
        
        exit(1);
@@ -103,14 +106,14 @@ main(int argn, char *argv[]) {
        FILE *in;
        char buf[4096];
        double sumtime=0.0;
-       int structsize=0, list=0, prefix=0, range=0, enumerate=0,gmode=0;
+       int structsize=0, list=0, prefix=0, range=0, enumerate=0,gmode=0, plain=0, revert=0, dump=0;
        char *addondatafile=NULL;
 
        int len=4096, curlen=0, checked=0, success=0;
        char **data;
        void *res;
        
-       while ((i = getopt(argn, argv, "pd:bqha:lrng")) != EOF) {
+       while ((i = getopt(argn, argv, "pd:bqha:lrngPRD")) != EOF) {
                switch(i) {
                        case 'a':
                                addondatafile=optarg;
@@ -139,6 +142,15 @@ main(int argn, char *argv[]) {
                        case 'q':
                                verbose=0;
                                break;
+                       case 'P':
+                               plain=1;
+                               break;
+                       case 'R':
+                               revert=1;
+                               break;
+                       case 'D':
+                               dump=1;
+                               break;
                        case 'h':
                        default: 
                                usage();
@@ -146,7 +158,7 @@ main(int argn, char *argv[]) {
        }
 
        if (!datafile) usage();
-
+               
        opentlog(TL_OPEN_STDERR|TL_OPEN_SYSLOG|TL_OPEN_FILE, TL_INFO, "./sfxtest.log");
        if ( (in=fopen(datafile,"r"))==NULL ) 
                tlog(TL_CRIT|TL_EXIT,"Beda with %s", datafile);
@@ -236,6 +248,18 @@ main(int argn, char *argv[]) {
                        tlog(TL_INFO,"Memory allocated: %.2fMb", ((float)info.totalen)/(1024.0*1024.0));
                        tlog(TL_INFO,"Number of nodes: %d", info.nnodes);
                        curlen+=cntadd;
+               } 
+
+               if ( plain && !gmode ) { 
+                       SFSMakePlain(&info, NULL);
+                       if ( revert )
+                               tfree( SFSRevertPlain(&info) );
+               }
+
+               if ( dump ) {
+                       SFSWriteDump(&info, "./sfxtest.dump");
+                       SFSFree(&info,NULL);
+                       SFSReadDump(&info, "./sfxtest.dump");
                }
 
                if (list) {
index 283f358..5e16dae 100644 (file)
@@ -10,3 +10,13 @@ echo Exact search
 cat data/sfxmem.search | ./sfxtest -d data/sfxmem.data -a data/sfxmem.addon -n || exit 1
 echo Range search
 echo g | ./sfxtest -d data/sfxmem.data -a data/sfxmem.addon -n -r || exit 1 
+echo Prefix search with plain tree
+echo zz | ./sfxtest -d data/sfxmem.data -a data/sfxmem.addon -n -p -P || exit 1
+echo Exact search with plain tree
+cat data/sfxmem.search | ./sfxtest -d data/sfxmem.data -a data/sfxmem.addon -n -P || exit 1
+echo Range search with plain tree
+echo g | ./sfxtest -d data/sfxmem.data -a data/sfxmem.addon -n -r -P || exit 1 
+echo Prefix search reload dump
+echo zz | ./sfxtest -d data/sfxmem.data -a data/sfxmem.addon -n -p -D || exit 1
+echo Prefix search with plain tree and reload dump
+echo zz | ./sfxtest -d data/sfxmem.data -a data/sfxmem.addon -n -p -P -D || exit 1
diff --git a/tools.h b/tools.h
index 6db856d..473e21b 100644 (file)
--- a/tools.h
+++ b/tools.h
@@ -48,9 +48,9 @@ double elapsedtime(struct timeval *begin);
 #define TYPEALIGN(ALIGNVAL,LEN)  \
         (((long) (LEN) + ((ALIGNVAL) - 1)) & ~((long) ((ALIGNVAL) - 1)))
 
-#define SHORTALIGN(LEN)                 TYPEALIGN(sizeof(int16), (LEN))
-#define INTALIGN(LEN)                   TYPEALIGN(sizeof(int32), (LEN))
-#define MAXALIGN(LEN)                   TYPEALIGN(sizeof(int64), (LEN))
+#define SHORTALIGN(LEN)                 TYPEALIGN(sizeof(int16_t), (LEN))
+#define INTALIGN(LEN)                   TYPEALIGN(sizeof(int32_t), (LEN))
+#define MAXALIGN(LEN)                   TYPEALIGN(sizeof(int64_t), (LEN))
 #define PTRALIGN(LEN)                   TYPEALIGN(sizeof(void*), (LEN))
 #endif