add .gitignore
[tedtools.git] / sfxstr.c
index 92c8233..d65bf7c 100644 (file)
--- a/sfxstr.c
+++ b/sfxstr.c
@@ -29,6 +29,7 @@
 
 #include <errno.h>
 #include <string.h>
+#include <stdint.h>
 #include <sys/types.h>
 #include <sys/file.h>
 #include <sys/uio.h>
@@ -46,7 +47,7 @@
 
 #define SFSTREE_VERSION                0x0100
 
-typedef unsigned long Opaque;  /* XXX sizeof(Opaque) == sizeof(void *) */
+typedef uintptr_t Opaque;  /* XXX sizeof(Opaque) == sizeof(void *) */
 
 #define CHECK_MEMORY(tree)     ( ( (tree)->plainmemory ) ? \
        tlog(TL_CRIT|TL_EXIT, "Tree in plain memory - read only access") : (void)0 ) 
@@ -139,19 +140,57 @@ SFSInit_c(SFSTree *info, char **in) {
        return info;
 }
 
+#define ISEND(p,w,l)   ( (l>0) ? ( ((char*)(p))-(w) >= (l) ) : ( *(p) == '\0' ) )
+
 void*
-SFSFindData(SFSTree *info, char *word) {
+SFSFindData(SFSTree *info, char *word, int len) {
+       SFSDataIO       in;
+
+       in.key = word;
+       in.keylen = len;
+
+       return SFSFindDataFromSavedOrSave(info, &in, NULL);
+}
+
+void*
+SFSFindDataOrSave(SFSTree *info, SFSDataIO *in, SFSTreePosition *position) {
+       if ( position )
+               memset(position, 0, sizeof(SFSTreePosition));
+
+       return SFSFindDataFromSavedOrSave(info, in, position); 
+}
+
+void*
+SFSFindDataFromSavedOrSave(SFSTree *info, SFSDataIO *in, SFSTreePosition *position) {
        SFSNode *node = info->node;
+       SFSNode **pnode = &(info->node);
        SFSNodeData *StopLow, *StopHigh, *StopMiddle;
-       u_int8_t *ptr =(u_int8_t*)word;
+       u_int8_t *ptr =(u_int8_t*)in->key;
+
+       if ( position && position->nodeptr && position->node && in->keylen > position->level ) {
+               node = position->node;
+               pnode = position->nodeptr;
+               ptr += position->level;
+       }
+
+       while( node && !ISEND(ptr, in->key, in->keylen) ) {
+               if ( position ) {
+                       position->nodeptr = pnode;
+                       position->node = node;
+                       position->level =  ((char*)ptr) - in->key;
+               }
 
-       while( node && *ptr ) {
                if ( node->isskip ) {
-                       if ( STRNCMP(ptr, ((char*)node)+node->dataptr, node->nchar) ) {
+                       if ( in->keylen>0 &&  in->keylen - (((char*)ptr) - in->key) < node->nchar )
+                               return NULL;
+                       else if ( STRNCMP(ptr, ((char*)node)+node->dataptr, node->nchar) ) {
                                ptr+=node->nchar;
-                               if ( *ptr=='\0' && node->isword) {
-                                       return (void*) ( ((char*)(node->data)) + ((node->haschild) ? sizeof(SFSNode*) : 0) );
+                               if ( ISEND(ptr, in->key, in->keylen) ) {
+                                       if (node->isword)
+                                               return (void*) ( ((char*)(node->data)) + ((node->haschild) ? sizeof(SFSNode*) : 0) );
+                                       return NULL;
                                } else if ( node->haschild ) {
+                                       pnode = (SFSNode**)( (char*)(node->data) );
                                        node = getSkipChildPointer(info, node);
                                } else {
                                        return NULL;
@@ -165,9 +204,12 @@ SFSFindData(SFSTree *info, char *word) {
                                StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
                                if ( StopMiddle->val == *ptr ) {
                                        ptr++;
-                                       if ( *ptr=='\0' && StopMiddle->isword ) {
-                                               return (void*)( ((char*)node) + node->dataptr + info->datasize * StopMiddle->data );
+                                       if ( ISEND(ptr, in->key, in->keylen) ) {
+                                               if ( StopMiddle->isword )
+                                                       return (void*)( ((char*)node) + node->dataptr + info->datasize * StopMiddle->data );
+                                               return NULL;
                                        } else if ( StopMiddle->haschild ) {
+                                               pnode = (SFSNode**)(((char*)StopMiddle) + StopMiddle->child);
                                                node = getChildPointer(info, StopMiddle);
                                        } else {
                                                return NULL;
@@ -186,6 +228,18 @@ SFSFindData(SFSTree *info, char *word) {
        return NULL;
 }
 
+void
+SFSAddSaved(SFSTree *info, SFSDataIO *in, SFSTreePosition *position) {
+       CHECK_MEMORY(info);
+
+       if ( !(position && position->nodeptr && position->node) ) {
+               SFSAdd(info, in);
+               return;
+       }
+
+       position->node = *(position->nodeptr) = addRecord(info, position->node, in, position->level);
+}
+
 static void
 freeFSFNode(SFSTree *info, SFSNode *node, void (*freefunc)(void*)) {
        u_int32_t i;
@@ -584,11 +638,11 @@ addRecord(SFSTree *info, SFSNode* node, SFSDataIO *in, int level) {
                        else 
                                node = splitSkipNode(info, node, in, level); 
                } else {
-                       StopLow = node->data;   
+               StopLow = node->data;   
                        StopHigh = StopLow + node->nchar;
                        while (StopLow < StopHigh) {
                                StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
-                               if ( StopMiddle->val == *ptr ) {
+                       if ( StopMiddle->val == *ptr ) {
                                        if ( *(ptr+1)=='\0' ) {
                                                if ( StopMiddle->isword ) {
                                                        /* already exists */
@@ -749,8 +803,13 @@ SFSIterate(SFSTree *info, SFSDataIO *out) {
                return 1;
        }
 
-       if ( s == NULL || s->node == NULL)
+       if ( s == NULL )
                return 0;
+       if ( s->node == NULL ) {
+               info->stack = s->next;
+               tfree(s);
+               return SFSIterate(info, out);
+       }
                                                         
        while ( s->level + s->node->nchar + 1 >= info->tlen ) {
                info->tlen *= 2;
@@ -978,7 +1037,7 @@ SFSRevertPlain(SFSTree *info) {
 }
 
 static Opaque
-writeNode(WorkPlain *wp, int fd, SFSNode *node) {
+writeNode(WorkPlain *wp, int fd, SFSNode *node, u_int32_t extrasize) {
        int     size = getNodeSize(wp->info, node);
        SFSNode  *wnode = (SFSNode*)tmalloc(size);
        off_t   myoffset = wp->offset;
@@ -991,7 +1050,7 @@ writeNode(WorkPlain *wp, int fd, SFSNode *node) {
        if ( node->isskip ) {
                if (node->haschild) 
                        *(Opaque*)( ((char*)wnode) + SFSNHRDSZ) = 
-                                       writeNode(wp, fd, getSkipChildPointer(wp->info, node));
+                                       writeNode(wp, fd, getSkipChildPointer(wp->info, node), extrasize);
        } else {
                SFSNodeData *nd = node->data;
                u_int32_t       i;
@@ -999,12 +1058,12 @@ writeNode(WorkPlain *wp, int fd, SFSNode *node) {
                for(i=0;i<node->nchar;i++) {
                        if (nd->haschild)
                                *(Opaque*)(((char*)wnode) + ( ((char*)nd) - ((char*)node) ) + nd->child) =
-                                               writeNode(wp, fd, getChildPointer( wp->info, nd ) );    
+                                               writeNode(wp, fd, getChildPointer( wp->info, nd ), extrasize ); 
                        nd++;
                }
        }
 
-       if ( lseek(fd, myoffset + SFSTDHSZ, SEEK_SET) < 0 )
+       if ( lseek(fd, myoffset + SFSTDHSZ + extrasize, SEEK_SET) < 0 )
                tlog(TL_CRIT|TL_EXIT, "lseek failed: %s", strerror(errno));
        if ( write(fd, wnode, size) != size )
                tlog(TL_CRIT|TL_EXIT, "write failed: %s", strerror(errno));
@@ -1015,17 +1074,23 @@ writeNode(WorkPlain *wp, int fd, SFSNode *node) {
 }
 
 void 
-SFSWriteDump(SFSTree *info, char *filename) {
+SFSWriteDump(SFSTree *info, char *filename, void *extradata, u_int32_t extrasize) {
        int                             fd;
        off_t                           size = info->totalen + SFSTDHSZ;
        SFSTreeDumpHeader       dh;
 
        if ( (fd = open(filename, O_RDWR|O_CREAT|O_TRUNC, 0666)) < 0 )
-               tlog(TL_CRIT|TL_EXIT, "Can not open file '%s': %s", strerror(errno));
+               tlog(TL_CRIT|TL_EXIT, "Can not open file '%s': %s", filename, strerror(errno));
 
        if ( flock(fd, LOCK_EX) < 0 )
                tlog(TL_CRIT|TL_EXIT, "flock failed: %s", strerror(errno));
-       if ( lseek(fd, size, SEEK_SET) < 0 )
+
+       if ( extrasize == 0 )
+               extradata = NULL;
+       else if ( extradata == NULL )
+               extrasize = 0;
+
+       if ( lseek(fd, size + MAXALIGN(extrasize) , SEEK_SET) < 0 )
                tlog(TL_CRIT|TL_EXIT, "lseek failed: %s", strerror(errno));
 
        dh.version = SFSTREE_VERSION;
@@ -1034,12 +1099,25 @@ SFSWriteDump(SFSTree *info, char *filename) {
        dh.datasize = info->datasize;
        dh.totalen = info->totalen;
        dh.nnodes = info->nnodes;
+       dh.extrasize = extrasize;
 
        if ( lseek(fd, 0, SEEK_SET) < 0 )
                tlog(TL_CRIT|TL_EXIT, "lseek failed: %s", strerror(errno));
        if ( write(fd, &dh, SFSTDHSZ) != SFSTDHSZ )
                tlog(TL_CRIT|TL_EXIT, "write failed: %s", strerror(errno));
 
+       if ( extrasize ) {
+               if ( write(fd, extradata, extrasize) != extrasize )
+                       tlog(TL_CRIT|TL_EXIT, "write failed: %s", strerror(errno));
+               if ( extrasize != MAXALIGN(extrasize) ) {
+                       char dummy[8] = {0,0,0,0,0,0,0,0};
+                       if ( write(fd, dummy, MAXALIGN(extrasize) - extrasize ) != MAXALIGN(extrasize) - extrasize )
+                               tlog(TL_CRIT|TL_EXIT, "write failed: %s", strerror(errno));
+
+                       extrasize = MAXALIGN(extrasize);
+               }
+       }
+
        if ( info->plainmemory ) {
                if ( write(fd, info->node, info->totalen) != info->totalen )
                        tlog(TL_CRIT|TL_EXIT, "write failed: %s", strerror(errno));
@@ -1048,8 +1126,9 @@ SFSWriteDump(SFSTree *info, char *filename) {
 
                wp.info = info;
                wp.offset = 0;
-               
-               writeNode(&wp, fd, info->node);
+       
+               if ( info->node )
+                       writeNode(&wp, fd, info->node, extrasize);
        }
        
        flock(fd, LOCK_UN);
@@ -1057,7 +1136,7 @@ SFSWriteDump(SFSTree *info, char *filename) {
 }
 
 static SFSNode*
-readNode(SFSTree *info, int fd, char *buf, int bufsize) {
+readNode(SFSTree *info, int fd, char *buf, int bufsize, int extrasize) {
        SFSNode *node;
        int             size;
 
@@ -1072,10 +1151,10 @@ readNode(SFSTree *info, int fd, char *buf, int bufsize) {
 
        if ( node->isskip ) {
                if (node->haschild) {
-                       if ( lseek(fd, *(Opaque*)node->data + SFSTDHSZ, SEEK_SET) < 0 )
+                       if ( lseek(fd, *(Opaque*)node->data + SFSTDHSZ + extrasize, SEEK_SET) < 0 )
                                tlog(TL_CRIT|TL_EXIT, "lseek failed: %s", strerror(errno));
                        *(SFSNode**)( (char*)(node->data) ) = 
-                               readNode(info, fd, buf, bufsize);
+                               readNode(info, fd, buf, bufsize, extrasize);
                }
        } else {
                SFSNodeData *nd = node->data;
@@ -1083,10 +1162,10 @@ readNode(SFSTree *info, int fd, char *buf, int bufsize) {
 
                for(i=0;i<node->nchar;i++) {
                        if (nd->haschild) {
-                               if ( lseek(fd, *(Opaque*)(((char*)nd) + nd->child ) + SFSTDHSZ, SEEK_SET) < 0 )
+                               if ( lseek(fd, *(Opaque*)(((char*)nd) + nd->child ) + SFSTDHSZ + extrasize, SEEK_SET) < 0 )
                                        tlog(TL_CRIT|TL_EXIT, "lseek failed: %s", strerror(errno));
                                *(SFSNode**) (((char*)nd) + nd->child ) = 
-                                       readNode(info, fd, buf, bufsize);
+                                       readNode(info, fd, buf, bufsize, extrasize);
                        }
                        nd++;
                }
@@ -1096,12 +1175,17 @@ readNode(SFSTree *info, int fd, char *buf, int bufsize) {
 }
 
 void
-SFSReadDump(SFSTree *info, char *filename) {
+SFSReadDump(SFSTree *info, char *filename, void **extradata, u_int32_t *extrasize) {
        int                 fd;
        SFSTreeDumpHeader   dh;
        char                            *buf;
-       int                                     bufsize; 
-       
+       int                                     bufsize;
+
+       if ( extradata )
+               *extradata = NULL;
+       if (extrasize )
+               *extrasize = 0;
+
        memset(info,0,sizeof(SFSTree));
 
        if ( (fd = open(filename, O_RDONLY)) < 0 )
@@ -1123,10 +1207,25 @@ SFSReadDump(SFSTree *info, char *filename) {
        info->nnodes = dh.nnodes;
        info->datasize = dh.datasize;
 
+       if ( dh.extrasize ) {
+               void    *pointer = tmalloc( MAXALIGN(dh.extrasize) );
+
+               if ( read(fd, pointer, MAXALIGN(dh.extrasize)) != MAXALIGN(dh.extrasize) )
+                       tlog(TL_CRIT|TL_EXIT, "read failed: %s", strerror(errno));
+
+               if ( extradata )
+                       *extradata = pointer;
+               else 
+                       tfree(pointer);
+
+               if ( extrasize )
+                       *extrasize = dh.extrasize;
+       }
+
        /* allocate buffer with max allowed size */
        bufsize = SFSNHRDSZ + 256*(sizeof(SFSNodeData) + sizeof(void*) + info->datasize);
        buf = tmalloc( bufsize );
-       info->node = readNode(info, fd, buf, bufsize);
+       info->node = readNode(info, fd, buf, bufsize, MAXALIGN(dh.extrasize));
        tfree(buf);
 
        flock(fd, LOCK_UN);
@@ -1134,9 +1233,14 @@ SFSReadDump(SFSTree *info, char *filename) {
 }
 
 void
-SFSInitFromDump(SFSTree *info, void *pointer, u_int64_t size) {
+SFSInitFromDump(SFSTree *info, void *pointer, u_int64_t size, void **extradata, u_int32_t *extrasize) {
        SFSTreeDumpHeader       *dh;
 
+       if ( extradata )
+               *extradata = NULL;
+       if (extrasize )
+               *extrasize = 0;
+
        memset(info,0,sizeof(SFSTree));
 
        if ( size && size < SFSTDHSZ )
@@ -1150,14 +1254,22 @@ SFSInitFromDump(SFSTree *info, void *pointer, u_int64_t size) {
                tlog(TL_CRIT|TL_EXIT, "sizeof(Opaque) mismatch");
        if ( dh->headersize != SFSTDHSZ )
                tlog(TL_CRIT|TL_EXIT, "Tree's header size mismatch (should be %d but %d bytes)", SFSTDHSZ, dh->headersize);
-       if ( size && size != dh->totalen + SFSTDHSZ )
-               tlog(TL_CRIT|TL_EXIT, "Memory size mismatch (should be %d but %d bytes)", dh->totalen + SFSTDHSZ, size);
+       if ( size && size != dh->totalen + SFSTDHSZ  + MAXALIGN(dh->extrasize) )
+               tlog(TL_CRIT|TL_EXIT, "Memory size mismatch (should be %d but %d bytes)", dh->totalen + SFSTDHSZ + dh->extrasize , size);
 
        info->totalen = dh->totalen;
        info->nnodes = dh->nnodes;
        info->datasize = dh->datasize;
        info->plainmemory = 1;
 
+       if ( dh->extrasize ) {
+               if ( extradata )
+                       *extradata = ((char*)pointer) + SFSTDHSZ;
+
+               if ( extrasize )
+                       *extrasize = dh->extrasize;
+       }
+
        if ( info->totalen && info->nnodes ) 
-               info->node = (SFSNode*)( ((char*)pointer) + SFSTDHSZ );
+               info->node = (SFSNode*)( ((char*)pointer) + SFSTDHSZ + MAXALIGN(dh->extrasize) );
 }