X-Git-Url: http://sigaev.ru/git/gitweb.cgi?a=blobdiff_plain;ds=sidebyside;f=sfxstr.c;h=d65bf7c118b1e00e04fd6e6b44b8b856e9ff3b88;hb=HEAD;hp=92c8233cca3f365026c0222823fa7d71a6c1a2e6;hpb=de2723270f9a04d99137a3f6a7d6ba1e90b5d01b;p=tedtools.git diff --git a/sfxstr.c b/sfxstr.c index 92c8233..d65bf7c 100644 --- a/sfxstr.c +++ b/sfxstr.c @@ -29,6 +29,7 @@ #include #include +#include #include #include #include @@ -46,7 +47,7 @@ #define SFSTREE_VERSION 0x0100 -typedef unsigned long Opaque; /* XXX sizeof(Opaque) == sizeof(void *) */ +typedef uintptr_t Opaque; /* XXX sizeof(Opaque) == sizeof(void *) */ #define CHECK_MEMORY(tree) ( ( (tree)->plainmemory ) ? \ tlog(TL_CRIT|TL_EXIT, "Tree in plain memory - read only access") : (void)0 ) @@ -139,19 +140,57 @@ SFSInit_c(SFSTree *info, char **in) { return info; } +#define ISEND(p,w,l) ( (l>0) ? ( ((char*)(p))-(w) >= (l) ) : ( *(p) == '\0' ) ) + void* -SFSFindData(SFSTree *info, char *word) { +SFSFindData(SFSTree *info, char *word, int len) { + SFSDataIO in; + + in.key = word; + in.keylen = len; + + return SFSFindDataFromSavedOrSave(info, &in, NULL); +} + +void* +SFSFindDataOrSave(SFSTree *info, SFSDataIO *in, SFSTreePosition *position) { + if ( position ) + memset(position, 0, sizeof(SFSTreePosition)); + + return SFSFindDataFromSavedOrSave(info, in, position); +} + +void* +SFSFindDataFromSavedOrSave(SFSTree *info, SFSDataIO *in, SFSTreePosition *position) { SFSNode *node = info->node; + SFSNode **pnode = &(info->node); SFSNodeData *StopLow, *StopHigh, *StopMiddle; - u_int8_t *ptr =(u_int8_t*)word; + u_int8_t *ptr =(u_int8_t*)in->key; + + if ( position && position->nodeptr && position->node && in->keylen > position->level ) { + node = position->node; + pnode = position->nodeptr; + ptr += position->level; + } + + while( node && !ISEND(ptr, in->key, in->keylen) ) { + if ( position ) { + position->nodeptr = pnode; + position->node = node; + position->level = ((char*)ptr) - in->key; + } - while( node && *ptr ) { if ( node->isskip ) { - if ( STRNCMP(ptr, ((char*)node)+node->dataptr, node->nchar) ) { + if ( in->keylen>0 && in->keylen - (((char*)ptr) - in->key) < node->nchar ) + return NULL; + else if ( STRNCMP(ptr, ((char*)node)+node->dataptr, node->nchar) ) { ptr+=node->nchar; - if ( *ptr=='\0' && node->isword) { - return (void*) ( ((char*)(node->data)) + ((node->haschild) ? sizeof(SFSNode*) : 0) ); + if ( ISEND(ptr, in->key, in->keylen) ) { + if (node->isword) + return (void*) ( ((char*)(node->data)) + ((node->haschild) ? sizeof(SFSNode*) : 0) ); + return NULL; } else if ( node->haschild ) { + pnode = (SFSNode**)( (char*)(node->data) ); node = getSkipChildPointer(info, node); } else { return NULL; @@ -165,9 +204,12 @@ SFSFindData(SFSTree *info, char *word) { StopMiddle = StopLow + ((StopHigh - StopLow) >> 1); if ( StopMiddle->val == *ptr ) { ptr++; - if ( *ptr=='\0' && StopMiddle->isword ) { - return (void*)( ((char*)node) + node->dataptr + info->datasize * StopMiddle->data ); + if ( ISEND(ptr, in->key, in->keylen) ) { + if ( StopMiddle->isword ) + return (void*)( ((char*)node) + node->dataptr + info->datasize * StopMiddle->data ); + return NULL; } else if ( StopMiddle->haschild ) { + pnode = (SFSNode**)(((char*)StopMiddle) + StopMiddle->child); node = getChildPointer(info, StopMiddle); } else { return NULL; @@ -186,6 +228,18 @@ SFSFindData(SFSTree *info, char *word) { return NULL; } +void +SFSAddSaved(SFSTree *info, SFSDataIO *in, SFSTreePosition *position) { + CHECK_MEMORY(info); + + if ( !(position && position->nodeptr && position->node) ) { + SFSAdd(info, in); + return; + } + + position->node = *(position->nodeptr) = addRecord(info, position->node, in, position->level); +} + static void freeFSFNode(SFSTree *info, SFSNode *node, void (*freefunc)(void*)) { u_int32_t i; @@ -584,11 +638,11 @@ addRecord(SFSTree *info, SFSNode* node, SFSDataIO *in, int level) { else node = splitSkipNode(info, node, in, level); } else { - StopLow = node->data; + StopLow = node->data; StopHigh = StopLow + node->nchar; while (StopLow < StopHigh) { StopMiddle = StopLow + ((StopHigh - StopLow) >> 1); - if ( StopMiddle->val == *ptr ) { + if ( StopMiddle->val == *ptr ) { if ( *(ptr+1)=='\0' ) { if ( StopMiddle->isword ) { /* already exists */ @@ -749,8 +803,13 @@ SFSIterate(SFSTree *info, SFSDataIO *out) { return 1; } - if ( s == NULL || s->node == NULL) + if ( s == NULL ) return 0; + if ( s->node == NULL ) { + info->stack = s->next; + tfree(s); + return SFSIterate(info, out); + } while ( s->level + s->node->nchar + 1 >= info->tlen ) { info->tlen *= 2; @@ -978,7 +1037,7 @@ SFSRevertPlain(SFSTree *info) { } static Opaque -writeNode(WorkPlain *wp, int fd, SFSNode *node) { +writeNode(WorkPlain *wp, int fd, SFSNode *node, u_int32_t extrasize) { int size = getNodeSize(wp->info, node); SFSNode *wnode = (SFSNode*)tmalloc(size); off_t myoffset = wp->offset; @@ -991,7 +1050,7 @@ writeNode(WorkPlain *wp, int fd, SFSNode *node) { if ( node->isskip ) { if (node->haschild) *(Opaque*)( ((char*)wnode) + SFSNHRDSZ) = - writeNode(wp, fd, getSkipChildPointer(wp->info, node)); + writeNode(wp, fd, getSkipChildPointer(wp->info, node), extrasize); } else { SFSNodeData *nd = node->data; u_int32_t i; @@ -999,12 +1058,12 @@ writeNode(WorkPlain *wp, int fd, SFSNode *node) { for(i=0;inchar;i++) { if (nd->haschild) *(Opaque*)(((char*)wnode) + ( ((char*)nd) - ((char*)node) ) + nd->child) = - writeNode(wp, fd, getChildPointer( wp->info, nd ) ); + writeNode(wp, fd, getChildPointer( wp->info, nd ), extrasize ); nd++; } } - if ( lseek(fd, myoffset + SFSTDHSZ, SEEK_SET) < 0 ) + if ( lseek(fd, myoffset + SFSTDHSZ + extrasize, SEEK_SET) < 0 ) tlog(TL_CRIT|TL_EXIT, "lseek failed: %s", strerror(errno)); if ( write(fd, wnode, size) != size ) tlog(TL_CRIT|TL_EXIT, "write failed: %s", strerror(errno)); @@ -1015,17 +1074,23 @@ writeNode(WorkPlain *wp, int fd, SFSNode *node) { } void -SFSWriteDump(SFSTree *info, char *filename) { +SFSWriteDump(SFSTree *info, char *filename, void *extradata, u_int32_t extrasize) { int fd; off_t size = info->totalen + SFSTDHSZ; SFSTreeDumpHeader dh; if ( (fd = open(filename, O_RDWR|O_CREAT|O_TRUNC, 0666)) < 0 ) - tlog(TL_CRIT|TL_EXIT, "Can not open file '%s': %s", strerror(errno)); + tlog(TL_CRIT|TL_EXIT, "Can not open file '%s': %s", filename, strerror(errno)); if ( flock(fd, LOCK_EX) < 0 ) tlog(TL_CRIT|TL_EXIT, "flock failed: %s", strerror(errno)); - if ( lseek(fd, size, SEEK_SET) < 0 ) + + if ( extrasize == 0 ) + extradata = NULL; + else if ( extradata == NULL ) + extrasize = 0; + + if ( lseek(fd, size + MAXALIGN(extrasize) , SEEK_SET) < 0 ) tlog(TL_CRIT|TL_EXIT, "lseek failed: %s", strerror(errno)); dh.version = SFSTREE_VERSION; @@ -1034,12 +1099,25 @@ SFSWriteDump(SFSTree *info, char *filename) { dh.datasize = info->datasize; dh.totalen = info->totalen; dh.nnodes = info->nnodes; + dh.extrasize = extrasize; if ( lseek(fd, 0, SEEK_SET) < 0 ) tlog(TL_CRIT|TL_EXIT, "lseek failed: %s", strerror(errno)); if ( write(fd, &dh, SFSTDHSZ) != SFSTDHSZ ) tlog(TL_CRIT|TL_EXIT, "write failed: %s", strerror(errno)); + if ( extrasize ) { + if ( write(fd, extradata, extrasize) != extrasize ) + tlog(TL_CRIT|TL_EXIT, "write failed: %s", strerror(errno)); + if ( extrasize != MAXALIGN(extrasize) ) { + char dummy[8] = {0,0,0,0,0,0,0,0}; + if ( write(fd, dummy, MAXALIGN(extrasize) - extrasize ) != MAXALIGN(extrasize) - extrasize ) + tlog(TL_CRIT|TL_EXIT, "write failed: %s", strerror(errno)); + + extrasize = MAXALIGN(extrasize); + } + } + if ( info->plainmemory ) { if ( write(fd, info->node, info->totalen) != info->totalen ) tlog(TL_CRIT|TL_EXIT, "write failed: %s", strerror(errno)); @@ -1048,8 +1126,9 @@ SFSWriteDump(SFSTree *info, char *filename) { wp.info = info; wp.offset = 0; - - writeNode(&wp, fd, info->node); + + if ( info->node ) + writeNode(&wp, fd, info->node, extrasize); } flock(fd, LOCK_UN); @@ -1057,7 +1136,7 @@ SFSWriteDump(SFSTree *info, char *filename) { } static SFSNode* -readNode(SFSTree *info, int fd, char *buf, int bufsize) { +readNode(SFSTree *info, int fd, char *buf, int bufsize, int extrasize) { SFSNode *node; int size; @@ -1072,10 +1151,10 @@ readNode(SFSTree *info, int fd, char *buf, int bufsize) { if ( node->isskip ) { if (node->haschild) { - if ( lseek(fd, *(Opaque*)node->data + SFSTDHSZ, SEEK_SET) < 0 ) + if ( lseek(fd, *(Opaque*)node->data + SFSTDHSZ + extrasize, SEEK_SET) < 0 ) tlog(TL_CRIT|TL_EXIT, "lseek failed: %s", strerror(errno)); *(SFSNode**)( (char*)(node->data) ) = - readNode(info, fd, buf, bufsize); + readNode(info, fd, buf, bufsize, extrasize); } } else { SFSNodeData *nd = node->data; @@ -1083,10 +1162,10 @@ readNode(SFSTree *info, int fd, char *buf, int bufsize) { for(i=0;inchar;i++) { if (nd->haschild) { - if ( lseek(fd, *(Opaque*)(((char*)nd) + nd->child ) + SFSTDHSZ, SEEK_SET) < 0 ) + if ( lseek(fd, *(Opaque*)(((char*)nd) + nd->child ) + SFSTDHSZ + extrasize, SEEK_SET) < 0 ) tlog(TL_CRIT|TL_EXIT, "lseek failed: %s", strerror(errno)); *(SFSNode**) (((char*)nd) + nd->child ) = - readNode(info, fd, buf, bufsize); + readNode(info, fd, buf, bufsize, extrasize); } nd++; } @@ -1096,12 +1175,17 @@ readNode(SFSTree *info, int fd, char *buf, int bufsize) { } void -SFSReadDump(SFSTree *info, char *filename) { +SFSReadDump(SFSTree *info, char *filename, void **extradata, u_int32_t *extrasize) { int fd; SFSTreeDumpHeader dh; char *buf; - int bufsize; - + int bufsize; + + if ( extradata ) + *extradata = NULL; + if (extrasize ) + *extrasize = 0; + memset(info,0,sizeof(SFSTree)); if ( (fd = open(filename, O_RDONLY)) < 0 ) @@ -1123,10 +1207,25 @@ SFSReadDump(SFSTree *info, char *filename) { info->nnodes = dh.nnodes; info->datasize = dh.datasize; + if ( dh.extrasize ) { + void *pointer = tmalloc( MAXALIGN(dh.extrasize) ); + + if ( read(fd, pointer, MAXALIGN(dh.extrasize)) != MAXALIGN(dh.extrasize) ) + tlog(TL_CRIT|TL_EXIT, "read failed: %s", strerror(errno)); + + if ( extradata ) + *extradata = pointer; + else + tfree(pointer); + + if ( extrasize ) + *extrasize = dh.extrasize; + } + /* allocate buffer with max allowed size */ bufsize = SFSNHRDSZ + 256*(sizeof(SFSNodeData) + sizeof(void*) + info->datasize); buf = tmalloc( bufsize ); - info->node = readNode(info, fd, buf, bufsize); + info->node = readNode(info, fd, buf, bufsize, MAXALIGN(dh.extrasize)); tfree(buf); flock(fd, LOCK_UN); @@ -1134,9 +1233,14 @@ SFSReadDump(SFSTree *info, char *filename) { } void -SFSInitFromDump(SFSTree *info, void *pointer, u_int64_t size) { +SFSInitFromDump(SFSTree *info, void *pointer, u_int64_t size, void **extradata, u_int32_t *extrasize) { SFSTreeDumpHeader *dh; + if ( extradata ) + *extradata = NULL; + if (extrasize ) + *extrasize = 0; + memset(info,0,sizeof(SFSTree)); if ( size && size < SFSTDHSZ ) @@ -1150,14 +1254,22 @@ SFSInitFromDump(SFSTree *info, void *pointer, u_int64_t size) { tlog(TL_CRIT|TL_EXIT, "sizeof(Opaque) mismatch"); if ( dh->headersize != SFSTDHSZ ) tlog(TL_CRIT|TL_EXIT, "Tree's header size mismatch (should be %d but %d bytes)", SFSTDHSZ, dh->headersize); - if ( size && size != dh->totalen + SFSTDHSZ ) - tlog(TL_CRIT|TL_EXIT, "Memory size mismatch (should be %d but %d bytes)", dh->totalen + SFSTDHSZ, size); + if ( size && size != dh->totalen + SFSTDHSZ + MAXALIGN(dh->extrasize) ) + tlog(TL_CRIT|TL_EXIT, "Memory size mismatch (should be %d but %d bytes)", dh->totalen + SFSTDHSZ + dh->extrasize , size); info->totalen = dh->totalen; info->nnodes = dh->nnodes; info->datasize = dh->datasize; info->plainmemory = 1; + if ( dh->extrasize ) { + if ( extradata ) + *extradata = ((char*)pointer) + SFSTDHSZ; + + if ( extrasize ) + *extrasize = dh->extrasize; + } + if ( info->totalen && info->nnodes ) - info->node = (SFSNode*)( ((char*)pointer) + SFSTDHSZ ); + info->node = (SFSNode*)( ((char*)pointer) + SFSTDHSZ + MAXALIGN(dh->extrasize) ); }