/* * Copyright (c) 2004 Teodor Sigaev * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY CONTRIBUTORS ``AS IS'' AND ANY EXPRESS * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL CONTRIBUTORS BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include "tlog.h" #include "tmalloc.h" #include "sfxstr.h" #include "tools.h" #define EN_VAL 0x01 #define EN_CHLD 0x02 #define EN_DATA 0x04 #define SFSTREE_VERSION 0x0100 typedef uintptr_t Opaque; /* XXX sizeof(Opaque) == sizeof(void *) */ #define CHECK_MEMORY(tree) ( ( (tree)->plainmemory ) ? \ tlog(TL_CRIT|TL_EXIT, "Tree in plain memory - read only access") : (void)0 ) static __inline__ int getNodeSize(SFSTree *info, SFSNode *node) { int size = SFSNHRDSZ; if ( node->isskip ) { if ( node->isword ) size += info->datasize; if ( node->haschild ) size += sizeof(SFSNode*); size += node->nchar; } else { u_int32_t i; u_int32_t nfound = 0; SFSNodeData *data = node->data; size += sizeof(SFSNodeData) * node->nchar; size += sizeof(SFSNode*) * node->nchild; for(i=0;inchar;i++) nfound += data[i].isword; size += nfound*info->datasize; } return PTRALIGN(size); } static __inline__ SFSNode* getChildPointer(SFSTree *info, SFSNodeData *nodedata) { char *p = ((char*)nodedata) + nodedata->child; if ( info->plainmemory ) return (SFSNode*) ( ((char*)(info->node)) + *(Opaque*)p ); return *(SFSNode**)p; } static __inline__ SFSNode* getSkipChildPointer(SFSTree *info, SFSNode *node) { if ( info->plainmemory ) return (SFSNode*) ( ((char*)(info->node)) + *(Opaque*)node->data ); return *(SFSNode**)( (char*)(node->data) ); } static SFSNode* enlargeNode(SFSTree *info, SFSNode* node, u_int8_t val, int flag, SFSNodeData **nd); static SFSNode* addRecord(SFSTree *info, SFSNode* node, SFSDataIO *in, int level); #define STRNCMP(p1,p2,n) ( ((n)==1) ? ( *((char*)(p1))==*((char*)(p2)) ) : (strncmp((char*)(p1), (char*)(p2), (n))==0) ) SFSTree* SFSInit_dp(SFSTree *info, u_int32_t datasize, SFSDataIO *in) { if ( datasize % sizeof(u_int32_t) ) tlog(TL_ALARM|TL_EXIT,"SFSInit_dp: datasize(%d) should be divided by sizeof(u_int32_t)", datasize); if (!info) info=(SFSTree*)tmalloc(sizeof(SFSTree)); memset(info,0,sizeof(SFSTree)); info->datasize = datasize; while(in && in->key) { SFSAdd(info, in); in++; } return info; } SFSTree* SFSInit_c(SFSTree *info, char **in) { char **ptr=in; SFSDataIO d; if (!info) info=(SFSTree*)tmalloc(sizeof(SFSTree)); memset(info,0,sizeof(SFSTree)); while(ptr && *ptr) { d.key=*ptr; d.keylen=0; SFSAdd(info, &d); ptr++; } return info; } #define ISEND(p,w,l) ( (l>0) ? ( ((char*)(p))-(w) >= (l) ) : ( *(p) == '\0' ) ) void* SFSFindData(SFSTree *info, char *word, int len) { SFSDataIO in; in.key = word; in.keylen = len; return SFSFindDataFromSavedOrSave(info, &in, NULL); } void* SFSFindDataOrSave(SFSTree *info, SFSDataIO *in, SFSTreePosition *position) { if ( position ) memset(position, 0, sizeof(SFSTreePosition)); return SFSFindDataFromSavedOrSave(info, in, position); } void* SFSFindDataFromSavedOrSave(SFSTree *info, SFSDataIO *in, SFSTreePosition *position) { SFSNode *node = info->node; SFSNode **pnode = &(info->node); SFSNodeData *StopLow, *StopHigh, *StopMiddle; u_int8_t *ptr =(u_int8_t*)in->key; if ( position && position->nodeptr && position->node && in->keylen > position->level ) { node = position->node; pnode = position->nodeptr; ptr += position->level; } while( node && !ISEND(ptr, in->key, in->keylen) ) { if ( position ) { position->nodeptr = pnode; position->node = node; position->level = ((char*)ptr) - in->key; } if ( node->isskip ) { if ( in->keylen>0 && in->keylen - (((char*)ptr) - in->key) < node->nchar ) return NULL; else if ( STRNCMP(ptr, ((char*)node)+node->dataptr, node->nchar) ) { ptr+=node->nchar; if ( ISEND(ptr, in->key, in->keylen) ) { if (node->isword) return (void*) ( ((char*)(node->data)) + ((node->haschild) ? sizeof(SFSNode*) : 0) ); return NULL; } else if ( node->haschild ) { pnode = (SFSNode**)( (char*)(node->data) ); node = getSkipChildPointer(info, node); } else { return NULL; } } else return NULL; } else { StopLow = node->data; StopHigh = StopLow + node->nchar; while (StopLow < StopHigh) { StopMiddle = StopLow + ((StopHigh - StopLow) >> 1); if ( StopMiddle->val == *ptr ) { ptr++; if ( ISEND(ptr, in->key, in->keylen) ) { if ( StopMiddle->isword ) return (void*)( ((char*)node) + node->dataptr + info->datasize * StopMiddle->data ); return NULL; } else if ( StopMiddle->haschild ) { pnode = (SFSNode**)(((char*)StopMiddle) + StopMiddle->child); node = getChildPointer(info, StopMiddle); } else { return NULL; } break; } else if ( StopMiddle->val < *ptr ) { StopLow = StopMiddle + 1; } else { StopHigh = StopMiddle; } } if ( StopLow >= StopHigh ) return NULL; } } return NULL; } void SFSAddSaved(SFSTree *info, SFSDataIO *in, SFSTreePosition *position) { CHECK_MEMORY(info); if ( !(position && position->nodeptr && position->node) ) { SFSAdd(info, in); return; } position->node = *(position->nodeptr) = addRecord(info, position->node, in, position->level); } static void freeFSFNode(SFSTree *info, SFSNode *node, void (*freefunc)(void*)) { u_int32_t i; if ( node->isskip ) { if (node->haschild) freeFSFNode(info, *(SFSNode**)(node->data), freefunc); if (freefunc && node->isword && info->datasize) (*freefunc)( (void*)( ((char*)(node->data))+ (node->haschild) ? sizeof(SFSNode*) : 0 ) ); } else { SFSNodeData *nd = node->data; char *data= ((char*)node) + node->dataptr; for(i=0;inchar;i++) { if (nd->haschild) freeFSFNode(info, *(SFSNode**)( ((char*)nd) + nd->child ), freefunc); if (freefunc && nd->isword && info->datasize) { (*freefunc)( (void*)data ); data+=info->datasize; } nd++; } } tfree(node); } void SFSFree(SFSTree *info, void (*freefunc)(void*)) { SFSNodeStack *s=info->stack; if (info->node && !info->plainmemory) freeFSFNode(info, info->node, freefunc); if (info->buf) tfree(info->buf); info->buf = NULL; info->tlen=0; info->node = NULL; while(s) { info->stack=s->next; tfree(s); s=info->stack; } } static SFSNode* makeSkipNode(SFSTree *info, SFSDataIO *io, int level) { u_int32_t len; int size; SFSNode *res; io->key+=level; io->keylen-=level; len = (io->keylen > 255) ? 255 : io->keylen; size = SFSNHRDSZ + ((io->keylen > 255) ? sizeof(SFSNode*) : info->datasize) + len; size = PTRALIGN(size); res = (SFSNode*)tmalloc(size); info->nnodes++; info->totalen+=size; res->isskip=1; res->nchar=len; res->dataptr = SFSNHRDSZ + ((io->keylen > 255) ? sizeof(SFSNode*) : info->datasize); memcpy(((char*)res) + res->dataptr, io->key, len); if ( io->keylen > 255 ) { res->haschild=1; res->isword=0; io->key = io->key+len; io->keylen = io->keylen - len; *(SFSNode**)(res->data) = makeSkipNode(info, io, 0); io->key = io->key-len; io->keylen = io->keylen + len; } else { res->haschild=0; res->isword=1; if (info->datasize) { memcpy( res->data, io->data, info->datasize ); } } io->key-=level; io->keylen+=level; return res; } static SFSNode* splitSkipNode(SFSTree *info, SFSNode* node, SFSDataIO *io, int level) { SFSNode *res; int prefixlen=0; char *s1,*s2; SFSNodeData *ndata; tassert(node->isskip); io->key+=level; io->keylen-=level; s1=((char*)node) + node->dataptr; s2=io->key; while( s1 - (((char*)node) + node->dataptr) < node->nchar && s2 - io->key < io->keylen && *s1==*s2) { s1++; s2++; } prefixlen = s2 - io->key; if ( prefixlen==0 ) { if ( node->nchar == 1 ) { int flag = EN_VAL | ((node->isword) ? EN_DATA : 0) | ((node->haschild) ? EN_CHLD : 0); res = enlargeNode(info, NULL, *(u_int8_t*)(((char*)node) + node->dataptr), flag, &ndata); if ( node->isword ) { if ( info->datasize ) memcpy( ((char*)res) + res->dataptr + info->datasize * ndata->data, ((char*)(node->data)) + ((node->haschild) ? sizeof(SFSNode*) : 0), info->datasize ); } if ( node->haschild ) *(SFSNode**)( ((char*)ndata) + ndata->child ) = *(SFSNode**)( node->data ); info->totalen -= getNodeSize(info, node); info->nnodes--; tfree(node); } else { int size; res = enlargeNode(info, NULL, *(u_int8_t*)(((char*)node) + node->dataptr), EN_VAL|EN_CHLD, &ndata); size = getNodeSize(info,node); info->totalen-=size; node->nchar--; memmove( ((char*)node) + node->dataptr, ((char*)node) + node->dataptr + 1, node->nchar); size = getNodeSize(info,node); info->totalen+=size; *(SFSNode**)( ((char*)ndata) + ndata->child ) = (SFSNode*)trealloc(node, size); } res = addRecord(info, res, io, 0); } else if (prefixlen==io->keylen) { if (prefixlen==node->nchar) { if ( node->isword || info->datasize==0) { if (info->datasize) memcpy( ((char*)(node->data)) + ((node->haschild) ? sizeof(SFSNodeData*) : 0), io->data, info->datasize); node->isword=1; res=node; } else { int osize = PTRALIGN(SFSNHRDSZ + ((node->haschild) ? sizeof(SFSNodeData*) : 0) + node->nchar); int nsize = PTRALIGN(SFSNHRDSZ + info->datasize + ((node->haschild) ? sizeof(SFSNodeData*) : 0) + node->nchar); info->totalen += nsize - osize; res=(SFSNode*)trealloc(node,nsize); res->dataptr=SFSNHRDSZ + info->datasize + ((res->haschild) ? sizeof(SFSNodeData*) : 0); res->isword=1; memmove(((char*)res) + res->dataptr, ((char*)(res->data)) + ((res->haschild) ? sizeof(SFSNodeData*) : 0), res->nchar); memcpy(((char*)(res->data)) + ((res->haschild) ? sizeof(SFSNodeData*) : 0), io->data, info->datasize); } } else { int size = SFSNHRDSZ + info->datasize + sizeof(SFSNodeData*) + prefixlen; size = PTRALIGN(size); info->totalen+=size; info->nnodes++; res = (SFSNode*)tmalloc(size); res->isskip=1; res->isword=1; res->haschild=1; res->nchar = prefixlen; res->dataptr = SFSNHRDSZ + info->datasize + sizeof(SFSNodeData*); memcpy(((char*)res)+res->dataptr, io->key, prefixlen); if (info->datasize) memcpy(((char*)(res->data)) + sizeof(SFSNodeData*), io->data, info->datasize); info->totalen-=getNodeSize(info,node); node->nchar-=prefixlen; memmove( ((char*)node) + node->dataptr, ((char*)node) + node->dataptr + prefixlen, node->nchar); size = getNodeSize(info,node); info->totalen+=size; *(SFSNode**)(res->data) = (SFSNode*)trealloc(node, size); } } else if ( prefixlen==node->nchar ) { int size = SFSNHRDSZ + info->datasize + sizeof(SFSNode*) + node->nchar; info->totalen+=sizeof(SFSNode*); res=(SFSNode*)trealloc(node,size); memmove( ((char*)(res->data)) + sizeof(SFSNode*), res->data, info->datasize + res->nchar); res->haschild=1; res->dataptr+=sizeof(SFSNode*); *(SFSNode**)(res->data) = makeSkipNode(info, io, prefixlen); } else { int size = SFSNHRDSZ + sizeof(SFSNodeData*) + prefixlen; size = PTRALIGN(size); info->totalen+=size; info->nnodes++; res = (SFSNode*)tmalloc(size); res->isskip=1; res->isword=0; res->haschild=1; res->nchar = prefixlen; res->dataptr = SFSNHRDSZ + sizeof(SFSNodeData*); memcpy(((char*)res)+res->dataptr, io->key, prefixlen); info->totalen-= getNodeSize(info,node); node->nchar-=prefixlen; memmove( ((char*)node) + node->dataptr, ((char*)node) + node->dataptr + prefixlen, node->nchar); size = getNodeSize(info,node); info->totalen+= size; *(SFSNode**)(res->data) = (SFSNode*)trealloc(node, size); *(SFSNode**)(res->data) = splitSkipNode(info, *(SFSNode**)(res->data), io, prefixlen); } io->key-=level; io->keylen+=level; return res; } static SFSNode* enlargeNode(SFSTree *info, SFSNode* node, u_int8_t val, int flag, SFSNodeData **nd) { u_int32_t nchild=0, nchar=0, nfound=0, i; SFSNode *rs,**chld; SFSNodeData *data; int sizenode; char *dataptr; if ( node ) { nchar=node->nchar; nchild=node->nchild; data=node->data; for(i=0;iisword; data++; } } if ( node ) /*info->totalen -= PTRALIGN(SFSNHRDSZ+nchar*sizeof(SFSNodeData)+nchild*sizeof(SFSNode*)+nfound*info->datasize);*/ info->totalen -= getNodeSize(info, node); if ( flag & EN_VAL ) nchar++; if ( flag & EN_CHLD ) nchild++; if ( flag & EN_DATA ) nfound++; sizenode = SFSNHRDSZ+nchar*sizeof(SFSNodeData)+nchild*sizeof(SFSNode*)+nfound*info->datasize; sizenode = PTRALIGN(sizenode); info->totalen+=sizenode; if ( !node ) info->nnodes++; rs=(SFSNode*)tmalloc(sizenode); rs->isskip = 0; rs->nchar = nchar; rs->nchild = nchild; rs->dataptr=SFSNHRDSZ+nchar*sizeof(SFSNodeData)+nchild*sizeof(SFSNode*); dataptr=((char*)rs) + rs->dataptr; chld = (SFSNode**)( ((char*)rs)+SFSNHRDSZ+nchar*sizeof(SFSNodeData) ); data=rs->data; if (node) { SFSNode **ochld=(SFSNode**)( ((char*)node)+SFSNHRDSZ+node->nchar*sizeof(SFSNodeData) ); SFSNodeData *odata = node->data; char *odataptr=((char*)node) + node->dataptr; int wasins=0; for(i=0;inchar;i++) { if ( val > odata->val ) { *(u_int32_t*)data = *(u_int32_t*)odata; if ( odata->haschild ) { *chld=*ochld; data->child = ((char*)chld) - ((char*)data); chld++; ochld++; } if ( odata->isword && info->datasize ) { memcpy(dataptr, odataptr, info->datasize); data->data = ((dataptr - ((char*)rs)) - rs->dataptr)/info->datasize; dataptr += info->datasize; odataptr += info->datasize; } data++; odata++; } else if ( val == odata->val ) { tassert ( (flag&EN_VAL)==0 ); *(u_int32_t*)data = *(u_int32_t*)odata; wasins=1; if ( odata->haschild || flag & EN_CHLD ) { if (odata->haschild) *chld=*ochld; data->child = ((char*)chld) - ((char*)data); data->haschild=1; chld++; if (odata->haschild) ochld++; } else data->haschild=0; if ( odata->isword || flag & EN_DATA ) { data->isword=1; if ( info->datasize && odata->isword ) { memcpy(dataptr, odataptr, info->datasize); odataptr += info->datasize; } data->data = ( info->datasize ) ? ((dataptr - ((char*)rs)) - rs->dataptr)/info->datasize : 0; dataptr += info->datasize; } else data->isword=0; *nd = data; data++; odata++; } else { if ( wasins==0 ) { tassert ( flag&EN_VAL ); data->val = val; if ( flag & EN_CHLD ) { data->haschild=1; data->child = ((char*)chld) - ((char*)data); chld++; } else data->haschild=0; if ( flag & EN_DATA ) { data->isword=1; data->data = ( info->datasize ) ? ((dataptr - ((char*)rs)) - rs->dataptr)/info->datasize : 0; dataptr += info->datasize; } else data->isword=0; *nd = data; data++; wasins=1; i--; } else { *(u_int32_t*)data = *(u_int32_t*)odata; if ( odata->haschild ) { *chld=*ochld; data->child = ((char*)chld) - ((char*)data); chld++; ochld++; } if ( odata->isword && info->datasize ) { memcpy(dataptr, odataptr, info->datasize); data->data = ((dataptr - ((char*)rs)) - rs->dataptr)/info->datasize; dataptr += info->datasize; odataptr += info->datasize; } data++; odata++; } } } if ( wasins==0 ) { tassert ( flag&EN_VAL ); data->val = val; if ( flag & EN_CHLD ) { data->haschild=1; data->child = ((char*)chld) - ((char*)data); chld++; } else data->haschild=0; if ( flag & EN_DATA ) { data->isword=1; data->data = ( info->datasize ) ? ((dataptr - ((char*)rs)) - rs->dataptr)/info->datasize : 0; dataptr += info->datasize; } else data->isword=0; *nd = data; } } else { tassert ( flag & EN_VAL ); data->val = val; if ( flag & EN_CHLD ) { data->haschild=1; data->child = ((char*)chld) - ((char*)data); } else data->haschild=0; if ( flag & EN_DATA ) { data->isword=1; data->data = ( info->datasize ) ? ((dataptr - ((char*)rs)) - rs->dataptr)/info->datasize : 0; } else data->isword=0; *nd = data; } if (node) tfree(node); return rs; } static SFSNode* addRecord(SFSTree *info, SFSNode* node, SFSDataIO *in, int level) { SFSNodeData *StopLow, *StopHigh, *StopMiddle; u_int8_t *ptr = ((u_int8_t*)in->key) + level; if ( node ) { if ( node->isskip ) { if ( node->haschild && in->keylen-level > node->nchar && strncmp(in->key+level, ((char*)node)+node->dataptr, node->nchar)==0 ) *(SFSNode**)( node->data ) = addRecord(info, *(SFSNode**)( node->data ), in, level+node->nchar); else node = splitSkipNode(info, node, in, level); } else { StopLow = node->data; StopHigh = StopLow + node->nchar; while (StopLow < StopHigh) { StopMiddle = StopLow + ((StopHigh - StopLow) >> 1); if ( StopMiddle->val == *ptr ) { if ( *(ptr+1)=='\0' ) { if ( StopMiddle->isword ) { /* already exists */ if ( info->datasize ) memcpy(((char*)node) + node->dataptr + info->datasize * StopMiddle->data, in->data, info->datasize ); } else { /* not exist word */ if (info->datasize) { node = enlargeNode(info, node, *ptr, EN_DATA, &StopMiddle); memcpy(((char*)node) + node->dataptr + info->datasize * StopMiddle->data, in->data, info->datasize ); } StopMiddle->isword = 1; } } else if ( StopMiddle->haschild ) { *(SFSNode**)( ((char*)StopMiddle) + StopMiddle->child ) = addRecord(info, *(SFSNode**)( ((char*)StopMiddle) + StopMiddle->child ), in, level+1); } else { node = enlargeNode(info, node, *ptr, EN_CHLD, &StopMiddle); *(SFSNode**)( ((char*)StopMiddle) + StopMiddle->child ) = makeSkipNode(info, in, level+1); } return node; } else if ( StopMiddle->val < *ptr ) { StopLow = StopMiddle + 1; } else { StopHigh = StopMiddle; } } if ( *(ptr+1)=='\0' ) { node = enlargeNode(info, node, *ptr, EN_VAL|EN_DATA, &StopMiddle); if ( info->datasize ) memcpy(((char*)node) + node->dataptr + info->datasize * StopMiddle->data, in->data, info->datasize ); } else { node = enlargeNode(info, node, *ptr, EN_VAL|EN_CHLD, &StopMiddle); *(SFSNode**)( ((char*)StopMiddle) + StopMiddle->child ) = makeSkipNode(info, in, level+1); } } } else node = makeSkipNode(info, in, level); return node; } void SFSAdd(SFSTree *info, SFSDataIO *in) { CHECK_MEMORY(info); if (in->keylen<=0) in->keylen=strlen(in->key); info->node = addRecord(info, info->node, in, 0); } static SFSNodeStack* pushStack(SFSNodeStack *top, SFSNode *node, int level ) { SFSNodeStack *r=(SFSNodeStack*)tmalloc(sizeof(SFSNodeStack)); r->next = top; r->node=node; r->data=node->data; r->level=level; r->checkedchild = 0; r->checkedval = 0; return r; } void SFSIteratorStart(SFSTree *info) { if ( !info->buf ) { info->tlen = 32; info->buf = (char*)tmalloc(info->tlen); } info->stack = pushStack(NULL, info->node, 0); info->hasword=0; } void SFSPrefixIteratorStart(SFSTree *info, char *word) { SFSNode *node = info->node; SFSNodeData *StopLow, *StopHigh, *StopMiddle; u_int8_t *ptr =(u_int8_t*)word; int len,wlen=strlen(word); if ( wlen+1>=info->tlen ) { info->tlen = 2*wlen; info->buf = (info->buf) ? (char*)trealloc(info->buf,info->tlen) : (char*)tmalloc(info->tlen); } info->hasword=0; while( node && *ptr) { len = wlen - (((char*)ptr)-word); if ( node->isskip ) { if ( STRNCMP(ptr, ((char*)node)+node->dataptr, (lennchar) ? len : node->nchar) ) { if ( len<=node->nchar ) { strcpy(info->buf,word); info->stack = pushStack(NULL, node, ((char*)ptr) - word); return; } else if ( node->haschild ) { ptr+=node->nchar; node = getSkipChildPointer(info, node); } else { return; } } else return; } else { StopLow = node->data; StopHigh = StopLow + node->nchar; while (StopLow < StopHigh) { StopMiddle = StopLow + ((StopHigh - StopLow) >> 1); if ( StopMiddle->val == *ptr ) { if ( *(ptr+1)=='\0' ) { len =((char*)ptr)-word+1; strcpy(info->buf,word); if ( StopMiddle->isword ) { info->hasword=1; info->wdata = (void*)( ((char*)node) + node->dataptr + info->datasize * StopMiddle->data ); } if ( StopMiddle->haschild ) info->stack = pushStack(NULL, getChildPointer( info, StopMiddle ), len); return; } else if ( StopMiddle->haschild ) { node = getChildPointer( info, StopMiddle ); } else { return; } ptr++; break; } else if ( StopMiddle->val < *ptr ) { StopLow = StopMiddle + 1; } else { StopHigh = StopMiddle; } } if ( StopLow >= StopHigh ) break; } } return; } int SFSIterate(SFSTree *info, SFSDataIO *out) { SFSNodeStack *s=info->stack; if ( info->hasword ) { out->key = info->buf; out->keylen = strlen(out->key); out->data = info->wdata; info->hasword = 0; return 1; } if ( s == NULL ) return 0; if ( s->node == NULL ) { info->stack = s->next; tfree(s); return SFSIterate(info, out); } while ( s->level + s->node->nchar + 1 >= info->tlen ) { info->tlen *= 2; info->buf = (char*)trealloc(info->buf, info->tlen); } if ( s->node->isskip ) { memcpy( info->buf + s->level, ((char*)s->node) + s->node->dataptr, s->node->nchar ); if ( s->node->isword && !s->checkedval) { info->buf[ s->level+s->node->nchar ] = '\0'; out->key = info->buf; out->keylen = s->level+s->node->nchar; out->data =((char*)(s->node->data)) + ((s->node->haschild) ? sizeof(SFSNode*) : 0); s->checkedval=1; return 1; } if ( s->node->haschild && !s->checkedchild) { info->stack = pushStack(s, getSkipChildPointer(info, s->node), s->level+s->node->nchar); s->checkedchild=1; if ( SFSIterate(info, out) ) return 1; } } else { while( s->data - s->node->data < s->node->nchar ) { info->buf[ s->level ] = (char)s->data->val; if ( s->checkedval==0 && s->data->isword ) { info->buf[ s->level+1 ] = '\0'; out->key = info->buf; out->keylen = s->level+1; out->data =((char*)s->node) + s->node->dataptr + info->datasize * s->data->data; s->checkedval = 1; return 1; } if ( s->checkedchild==0 && s->data->haschild ) { info->stack = pushStack(s, getChildPointer( info, s->data ), s->level+1); s->checkedchild=1; if ( SFSIterate(info, out) ) return 1; } s->checkedval = s->checkedchild = 0; s->data++; } } info->stack = s->next; tfree(s); return SFSIterate(info, out); } int SFSRange(SFSTree *info, char *word, SFSDataIO *f, SFSDataIO *l) { SFSNodeStack *s=info->stack; SFSPrefixIteratorStart(info, word); s=info->stack; if ( SFSIterate(info, f) ) { SFSNodeStack *sptr = info->stack, *stmp; while( sptr && sptr!=s ) { stmp=sptr->next; tfree(sptr); sptr=stmp; } if ( s == NULL ) { memcpy(l,f,sizeof(SFSDataIO)); return 1; } } else return 0; info->stack=NULL; while( f->keylen + s->level + 2 >= info->tlen ) { info->tlen *= 2; info->buf = (char*)trealloc(info->buf, info->tlen); } f->key = info->buf; l->key = info->buf + f->keylen + 1; memcpy(l->key, f->key, f->keylen + 1); while(s->node) { while( f->keylen + 1 + s->level + s->node->nchar + 1 >= info->tlen ) { info->tlen *= 2; info->buf = (char*)trealloc(info->buf, info->tlen); } if ( s->node->isskip ) { memcpy(info->buf + f->keylen + 1 + s->level, ((char*)(s->node))+s->node->dataptr, s->node->nchar); s->level+=s->node->nchar; if (s->node->haschild) { s->node=getSkipChildPointer(info, s->node); } else { /* if (s->node->isword) */ info->buf[ f->keylen + 1 + s->level ] = '\0'; l->data = (void*)(s->node->data); l->keylen = s->level+1; break; } } else { s->data = s->node->data + s->node->nchar - 1; while( s->data - s->node->data >= 0 ) { info->buf[ f->keylen + 1 + s->level ] = (char)s->data->val; if ( s->data->haschild ) { s->node = getChildPointer( info, s->data ); s->level++; break; } if ( s->data->isword ) { info->buf[ f->keylen + 1 + s->level ] = '\0'; l->keylen = s->level+1; l->data =((char*)s->node) + s->node->dataptr + info->datasize * s->data->data; s->node=NULL; break; } s->data--; } } } f->key = info->buf; l->key = info->buf + f->keylen + 1; tfree(s); return 1; } typedef struct WorkPlain { SFSTree *info; char *node; off_t offset; } WorkPlain; static Opaque plainNode(WorkPlain *wp, SFSNode *node) { int size = getNodeSize(wp->info, node); off_t myoffset = wp->offset; memcpy( wp->node + wp->offset, node, size ); wp->offset += size; tassert( wp->offset <= wp->info->totalen ); if ( node->isskip ) { if (node->haschild) *(Opaque*)(wp->node + myoffset + SFSNHRDSZ) = plainNode(wp, getSkipChildPointer(wp->info, node)); } else { SFSNodeData *nd = node->data; u_int32_t i; for(i=0;inchar;i++) { if (nd->haschild) *(Opaque*)(wp->node + myoffset + ( ((char*)nd) - ((char*)node) ) + nd->child) = plainNode(wp, getChildPointer( wp->info, nd ) ); nd++; } } tfree(node); return myoffset; } void SFSMakePlain(SFSTree *info, void *pointer) { WorkPlain wp; if ( info->plainmemory ) return; wp.info = info; wp.offset = 0; if ( pointer ) wp.node = (char*)pointer; else wp.node = (char*)tmalloc(sizeof(char*) * info->totalen); plainNode(&wp, info->node); tassert( wp.offset == info->totalen ); info->node = (SFSNode*)wp.node; info->plainmemory = 1; } static SFSNode* revertNode(SFSTree *info, SFSNode* node) { int size = getNodeSize(info, node); SFSNode *newnode; newnode = (SFSNode*)tmalloc( size ); memcpy(newnode, node, size); if ( node->isskip ) { if (node->haschild) *(SFSNode**)( (char*)(newnode->data) ) = revertNode(info, getSkipChildPointer(info, node)); } else { SFSNodeData *nd = node->data; SFSNodeData *nnd = newnode->data; u_int32_t i; for(i=0;inchar;i++) { if (nd->haschild) *(SFSNode**) (((char*)nnd) + nnd->child ) = revertNode(info, getChildPointer( info, nd )); nd++; nnd++; } } return newnode; } void * SFSRevertPlain(SFSTree *info) { void *pointer = info->node; if (! info->plainmemory ) return NULL; info->node = revertNode(info, info->node); info->plainmemory = 0; return pointer; } static Opaque writeNode(WorkPlain *wp, int fd, SFSNode *node, u_int32_t extrasize) { int size = getNodeSize(wp->info, node); SFSNode *wnode = (SFSNode*)tmalloc(size); off_t myoffset = wp->offset; memcpy( wnode, node, size ); wp->offset += size; tassert( wp->offset <= wp->info->totalen ); if ( node->isskip ) { if (node->haschild) *(Opaque*)( ((char*)wnode) + SFSNHRDSZ) = writeNode(wp, fd, getSkipChildPointer(wp->info, node), extrasize); } else { SFSNodeData *nd = node->data; u_int32_t i; for(i=0;inchar;i++) { if (nd->haschild) *(Opaque*)(((char*)wnode) + ( ((char*)nd) - ((char*)node) ) + nd->child) = writeNode(wp, fd, getChildPointer( wp->info, nd ), extrasize ); nd++; } } if ( lseek(fd, myoffset + SFSTDHSZ + extrasize, SEEK_SET) < 0 ) tlog(TL_CRIT|TL_EXIT, "lseek failed: %s", strerror(errno)); if ( write(fd, wnode, size) != size ) tlog(TL_CRIT|TL_EXIT, "write failed: %s", strerror(errno)); tfree(wnode); return myoffset; } void SFSWriteDump(SFSTree *info, char *filename, void *extradata, u_int32_t extrasize) { int fd; off_t size = info->totalen + SFSTDHSZ; SFSTreeDumpHeader dh; if ( (fd = open(filename, O_RDWR|O_CREAT|O_TRUNC, 0666)) < 0 ) tlog(TL_CRIT|TL_EXIT, "Can not open file '%s': %s", filename, strerror(errno)); if ( flock(fd, LOCK_EX) < 0 ) tlog(TL_CRIT|TL_EXIT, "flock failed: %s", strerror(errno)); if ( extrasize == 0 ) extradata = NULL; else if ( extradata == NULL ) extrasize = 0; if ( lseek(fd, size + MAXALIGN(extrasize) , SEEK_SET) < 0 ) tlog(TL_CRIT|TL_EXIT, "lseek failed: %s", strerror(errno)); dh.version = SFSTREE_VERSION; dh.opaquesize = sizeof(Opaque); dh.headersize = SFSTDHSZ; dh.datasize = info->datasize; dh.totalen = info->totalen; dh.nnodes = info->nnodes; dh.extrasize = extrasize; if ( lseek(fd, 0, SEEK_SET) < 0 ) tlog(TL_CRIT|TL_EXIT, "lseek failed: %s", strerror(errno)); if ( write(fd, &dh, SFSTDHSZ) != SFSTDHSZ ) tlog(TL_CRIT|TL_EXIT, "write failed: %s", strerror(errno)); if ( extrasize ) { if ( write(fd, extradata, extrasize) != extrasize ) tlog(TL_CRIT|TL_EXIT, "write failed: %s", strerror(errno)); if ( extrasize != MAXALIGN(extrasize) ) { char dummy[8] = {0,0,0,0,0,0,0,0}; if ( write(fd, dummy, MAXALIGN(extrasize) - extrasize ) != MAXALIGN(extrasize) - extrasize ) tlog(TL_CRIT|TL_EXIT, "write failed: %s", strerror(errno)); extrasize = MAXALIGN(extrasize); } } if ( info->plainmemory ) { if ( write(fd, info->node, info->totalen) != info->totalen ) tlog(TL_CRIT|TL_EXIT, "write failed: %s", strerror(errno)); } else { WorkPlain wp; wp.info = info; wp.offset = 0; if ( info->node ) writeNode(&wp, fd, info->node, extrasize); } flock(fd, LOCK_UN); close(fd); } static SFSNode* readNode(SFSTree *info, int fd, char *buf, int bufsize, int extrasize) { SFSNode *node; int size; size = read(fd, buf, bufsize ); if ( size < SFSNHRDSZ + sizeof(void*) ) tlog(TL_CRIT|TL_EXIT, "read failed: %s", strerror(errno)); size = getNodeSize(info, (SFSNode*)buf); tassert( size <= bufsize ); node = (SFSNode*)tmalloc( size ); memcpy(node, buf, size); if ( node->isskip ) { if (node->haschild) { if ( lseek(fd, *(Opaque*)node->data + SFSTDHSZ + extrasize, SEEK_SET) < 0 ) tlog(TL_CRIT|TL_EXIT, "lseek failed: %s", strerror(errno)); *(SFSNode**)( (char*)(node->data) ) = readNode(info, fd, buf, bufsize, extrasize); } } else { SFSNodeData *nd = node->data; u_int32_t i; for(i=0;inchar;i++) { if (nd->haschild) { if ( lseek(fd, *(Opaque*)(((char*)nd) + nd->child ) + SFSTDHSZ + extrasize, SEEK_SET) < 0 ) tlog(TL_CRIT|TL_EXIT, "lseek failed: %s", strerror(errno)); *(SFSNode**) (((char*)nd) + nd->child ) = readNode(info, fd, buf, bufsize, extrasize); } nd++; } } return node; } void SFSReadDump(SFSTree *info, char *filename, void **extradata, u_int32_t *extrasize) { int fd; SFSTreeDumpHeader dh; char *buf; int bufsize; if ( extradata ) *extradata = NULL; if (extrasize ) *extrasize = 0; memset(info,0,sizeof(SFSTree)); if ( (fd = open(filename, O_RDONLY)) < 0 ) tlog(TL_CRIT|TL_EXIT, "Can not open file '%s': %s", strerror(errno)); if ( flock(fd, LOCK_SH) < 0 ) tlog(TL_CRIT|TL_EXIT, "flock failed: %s", strerror(errno)); if ( read(fd, &dh, SFSTDHSZ) != SFSTDHSZ ) tlog(TL_CRIT|TL_EXIT, "read failed: %s", strerror(errno)); if ( dh.version != SFSTREE_VERSION ) tlog(TL_CRIT|TL_EXIT, "Tree version mismatch (should be 0x%04x but 0x%04x)", SFSTREE_VERSION, dh.version); if ( dh.opaquesize != sizeof(Opaque) ) tlog(TL_CRIT|TL_EXIT, "sizeof(Opaque) mismatch"); if ( dh.headersize != SFSTDHSZ ) tlog(TL_CRIT|TL_EXIT, "Tree's header size mismatch (should be %d but %d bytes)", SFSTDHSZ, dh.headersize); info->totalen = dh.totalen; info->nnodes = dh.nnodes; info->datasize = dh.datasize; if ( dh.extrasize ) { void *pointer = tmalloc( MAXALIGN(dh.extrasize) ); if ( read(fd, pointer, MAXALIGN(dh.extrasize)) != MAXALIGN(dh.extrasize) ) tlog(TL_CRIT|TL_EXIT, "read failed: %s", strerror(errno)); if ( extradata ) *extradata = pointer; else tfree(pointer); if ( extrasize ) *extrasize = dh.extrasize; } /* allocate buffer with max allowed size */ bufsize = SFSNHRDSZ + 256*(sizeof(SFSNodeData) + sizeof(void*) + info->datasize); buf = tmalloc( bufsize ); info->node = readNode(info, fd, buf, bufsize, MAXALIGN(dh.extrasize)); tfree(buf); flock(fd, LOCK_UN); close(fd); } void SFSInitFromDump(SFSTree *info, void *pointer, u_int64_t size, void **extradata, u_int32_t *extrasize) { SFSTreeDumpHeader *dh; if ( extradata ) *extradata = NULL; if (extrasize ) *extrasize = 0; memset(info,0,sizeof(SFSTree)); if ( size && size < SFSTDHSZ ) tlog(TL_CRIT|TL_EXIT, "Memsize too small"); dh = (SFSTreeDumpHeader*)pointer; if ( dh->version != SFSTREE_VERSION ) tlog(TL_CRIT|TL_EXIT, "Tree version mismatch (should be 0x%04x but 0x%04x)", SFSTREE_VERSION, dh->version); if ( dh->opaquesize != sizeof(Opaque) ) tlog(TL_CRIT|TL_EXIT, "sizeof(Opaque) mismatch"); if ( dh->headersize != SFSTDHSZ ) tlog(TL_CRIT|TL_EXIT, "Tree's header size mismatch (should be %d but %d bytes)", SFSTDHSZ, dh->headersize); if ( size && size != dh->totalen + SFSTDHSZ + MAXALIGN(dh->extrasize) ) tlog(TL_CRIT|TL_EXIT, "Memory size mismatch (should be %d but %d bytes)", dh->totalen + SFSTDHSZ + dh->extrasize , size); info->totalen = dh->totalen; info->nnodes = dh->nnodes; info->datasize = dh->datasize; info->plainmemory = 1; if ( dh->extrasize ) { if ( extradata ) *extradata = ((char*)pointer) + SFSTDHSZ; if ( extrasize ) *extrasize = dh->extrasize; } if ( info->totalen && info->nnodes ) info->node = (SFSNode*)( ((char*)pointer) + SFSTDHSZ + MAXALIGN(dh->extrasize) ); }