2 * Copyright (c) 2004 Teodor Sigaev <teodor@sigaev.ru>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the author nor the names of any co-contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY CONTRIBUTORS ``AS IS'' AND ANY EXPRESS
18 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL CONTRIBUTORS BE LIABLE FOR ANY
21 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
23 * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
25 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
26 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
27 * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 static SFSNode* enlargeNode(SFSTree *info, SFSNode* node, u_int8_t val, int flag, SFSNodeData **nd);
41 static SFSNode* addRecord(SFSTree *info, SFSNode* node, SFSDataIO *in, int level);
43 #define STRNCMP(p1,p2,n) ( ((n)==1) ? ( *((char*)(p1))==*((char*)(p2)) ) : (strncmp((char*)(p1), (char*)(p2), (n))==0) )
46 SFSInit_dp(SFSTree *info, u_int32_t datasize, SFSDataIO *in) {
47 if ( datasize % sizeof(u_int32_t) )
48 tlog(TL_ALARM|TL_EXIT,"SFSInit_dp: datasize(%d) should be divided by sizeof(u_int32_t)", datasize);
51 info=(SFSTree*)tmalloc(sizeof(SFSTree));
52 memset(info,0,sizeof(SFSTree));
54 info->datasize = datasize;
56 while(in && in->key) {
65 SFSInit_c(SFSTree *info, char **in) {
70 info=(SFSTree*)tmalloc(sizeof(SFSTree));
71 memset(info,0,sizeof(SFSTree));
84 SFSFindData(SFSTree *info, char *word) {
85 SFSNode *node = info->node;
86 SFSNodeData *StopLow, *StopHigh, *StopMiddle;
87 u_int8_t *ptr =(u_int8_t*)word;
89 while( node && *ptr ) {
91 if ( STRNCMP(ptr, ((char*)node)+node->dataptr, node->nchar) ) {
93 if ( *ptr=='\0' && node->isword) {
94 return (void*) ( ((char*)(node->data)) + ((node->haschild) ? sizeof(SFSNode*) : 0) );
95 } else if ( node->haschild ) {
96 node = *(SFSNode**)(node->data);
103 StopLow = node->data;
104 StopHigh = StopLow + node->nchar;
105 while (StopLow < StopHigh) {
106 StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
107 if ( StopMiddle->val == *ptr ) {
109 if ( *ptr=='\0' && StopMiddle->isword ) {
110 return (void*)( ((char*)node) + node->dataptr + info->datasize * StopMiddle->data );
111 } else if ( StopMiddle->haschild ) {
112 node=*(SFSNode**)( ((char*)StopMiddle) + StopMiddle->child );
117 } else if ( StopMiddle->val < *ptr ) {
118 StopLow = StopMiddle + 1;
120 StopHigh = StopMiddle;
123 if ( StopLow >= StopHigh )
131 freeFSFNode(SFSTree *info, SFSNode *node, void (*freefunc)(void*)) {
134 if ( node->isskip ) {
136 freeFSFNode(info, *(SFSNode**)(node->data), freefunc);
137 if (freefunc && node->isword && info->datasize)
138 (*freefunc)( (void*)( ((char*)(node->data))+ (node->haschild) ? sizeof(SFSNode*) : 0 ) );
140 SFSNodeData *nd = node->data;
141 char *data= ((char*)node) + node->dataptr;
143 for(i=0;i<node->nchar;i++) {
145 freeFSFNode(info, *(SFSNode**)( ((char*)nd) + nd->child ), freefunc);
146 if (freefunc && nd->isword && info->datasize) {
147 (*freefunc)( (void*)data );
148 data+=info->datasize;
158 SFSFree(SFSTree *info, void (*freefunc)(void*)) {
159 SFSNodeStack *s=info->stack;
161 if (info->node) freeFSFNode(info, info->node, freefunc);
162 if (info->buf) tfree(info->buf);
175 makeSkipNode(SFSTree *info, SFSDataIO *io, int level) {
183 len = (io->keylen > 255) ? 255 : io->keylen;
184 size = SFSNHRDSZ + ((io->keylen > 255) ? sizeof(SFSNode*) : info->datasize) + len;
186 res = (SFSNode*)tmalloc(size);
193 res->dataptr = SFSNHRDSZ + ((io->keylen > 255) ? sizeof(SFSNode*) : info->datasize);
194 memcpy(((char*)res) + res->dataptr, io->key, len);
196 if ( io->keylen > 255 ) {
199 io->key = io->key+len;
200 io->keylen = io->keylen - len;
201 *(SFSNode**)(res->data) = makeSkipNode(info, io, 0);
202 io->key = io->key-len;
203 io->keylen = io->keylen + len;
207 if (info->datasize) {
208 memcpy( res->data, io->data, info->datasize );
219 splitSkipNode(SFSTree *info, SFSNode* node, SFSDataIO *io, int level) {
225 tassert(node->isskip);
229 s1=((char*)node) + node->dataptr;
232 while( s1 - (((char*)node) + node->dataptr) < node->nchar && s2 - io->key < io->keylen && *s1==*s2) {
237 prefixlen = s2 - io->key;
239 if ( prefixlen==0 ) {
240 if ( node->nchar == 1 ) {
241 int flag = EN_VAL | ((node->isword) ? EN_DATA : 0) | ((node->haschild) ? EN_CHLD : 0);
242 res = enlargeNode(info, NULL, *(u_int8_t*)(((char*)node) + node->dataptr), flag, &ndata);
243 if ( node->isword ) {
244 if ( info->datasize )
246 ((char*)res) + res->dataptr + info->datasize * ndata->data,
247 ((char*)(node->data)) + ((node->haschild) ? sizeof(SFSNode*) : 0),
251 if ( node->haschild )
252 *(SFSNode**)( ((char*)ndata) + ndata->child ) = *(SFSNode**)( node->data );
253 info->totalen -= SFSNHRDSZ + ((node->isword) ? info->datasize : 0) +
254 ((node->haschild) ? sizeof(SFSNodeData*) : 0) + node->nchar;
258 res = enlargeNode(info, NULL, *(u_int8_t*)(((char*)node) + node->dataptr), EN_VAL|EN_CHLD, &ndata);
260 memmove( ((char*)node) + node->dataptr, ((char*)node) + node->dataptr + 1, node->nchar);
262 *(SFSNode**)( ((char*)ndata) + ndata->child ) = (SFSNode*)trealloc(node,
263 SFSNHRDSZ + ((node->haschild) ? sizeof(SFSNode*) : 0) +
264 ((node->isword) ? info->datasize : 0 )+ node->nchar);
266 res = addRecord(info, res, io, 0);
267 } else if (prefixlen==io->keylen) {
268 if (prefixlen==node->nchar) {
269 if ( node->isword || info->datasize==0) {
271 memcpy( ((char*)(node->data)) + ((node->haschild) ? sizeof(SFSNodeData*) : 0),
277 int size = SFSNHRDSZ + info->datasize + ((node->haschild) ? sizeof(SFSNodeData*) : 0) + node->nchar;
279 info->totalen+=info->datasize;
280 res=(SFSNode*)trealloc(node,size);
281 res->dataptr=SFSNHRDSZ + info->datasize + ((res->haschild) ? sizeof(SFSNodeData*) : 0);
283 memmove(((char*)res) + res->dataptr,
284 ((char*)(res->data)) + ((res->haschild) ? sizeof(SFSNodeData*) : 0), res->nchar);
285 memcpy(((char*)(res->data)) + ((res->haschild) ? sizeof(SFSNodeData*) : 0), io->data, info->datasize);
288 int size = SFSNHRDSZ + info->datasize + sizeof(SFSNodeData*) + prefixlen;
291 res = (SFSNode*)tmalloc(size);
295 res->nchar = prefixlen;
296 res->dataptr = SFSNHRDSZ + info->datasize + sizeof(SFSNodeData*);
297 memcpy(((char*)res)+res->dataptr, io->key, prefixlen);
299 memcpy(((char*)(res->data)) + sizeof(SFSNodeData*), io->data, info->datasize);
300 node->nchar-=prefixlen;
301 memmove( ((char*)node) + node->dataptr, ((char*)node) + node->dataptr + prefixlen, node->nchar);
302 info->totalen-=prefixlen;
303 *(SFSNode**)(res->data) = (SFSNode*)trealloc(node, SFSNHRDSZ + ((node->isword) ? info->datasize : 0) +
304 ((node->haschild) ? sizeof(SFSNodeData*) : 0) + node->nchar);
306 } else if ( prefixlen==node->nchar ) {
307 int size = SFSNHRDSZ + info->datasize + sizeof(SFSNode*) + node->nchar;
308 info->totalen+=sizeof(SFSNode*);
309 res=(SFSNode*)trealloc(node,size);
310 memmove( ((char*)(res->data)) + sizeof(SFSNode*), res->data, info->datasize + res->nchar);
312 res->dataptr+=sizeof(SFSNode*);
313 *(SFSNode**)(res->data) = makeSkipNode(info, io, prefixlen);
315 int size = SFSNHRDSZ + sizeof(SFSNodeData*) + prefixlen;
318 res = (SFSNode*)tmalloc(size);
322 res->nchar = prefixlen;
323 res->dataptr = SFSNHRDSZ + sizeof(SFSNodeData*);
324 memcpy(((char*)res)+res->dataptr, io->key, prefixlen);
326 node->nchar-=prefixlen;
327 memmove( ((char*)node) + node->dataptr, ((char*)node) + node->dataptr + prefixlen, node->nchar);
328 info->totalen-=prefixlen;
330 *(SFSNode**)(res->data) = (SFSNode*)trealloc(node,
331 SFSNHRDSZ + ((node->isword) ? info->datasize : 0) +
332 ((node->haschild) ? sizeof(SFSNodeData*) : 0) + node->nchar
334 *(SFSNode**)(res->data) = splitSkipNode(info, *(SFSNode**)(res->data), io, prefixlen);
345 enlargeNode(SFSTree *info, SFSNode* node, u_int8_t val, int flag, SFSNodeData **nd) {
346 u_int32_t nchild=0, nchar=0, nfound=0, i;
358 for(i=0;i<nchar;i++) {
359 nfound += data->isword;
365 info->totalen -= SFSNHRDSZ+nchar*sizeof(SFSNodeData)+nchild*sizeof(SFSNode*)+nfound*info->datasize;
367 if ( flag & EN_VAL ) nchar++;
368 if ( flag & EN_CHLD ) nchild++;
369 if ( flag & EN_DATA ) nfound++;
372 sizenode = SFSNHRDSZ+nchar*sizeof(SFSNodeData)+nchild*sizeof(SFSNode*)+nfound*info->datasize;
374 info->totalen+=sizenode;
375 if ( !node ) info->nnodes++;
376 rs=(SFSNode*)tmalloc(sizenode);
381 rs->dataptr=SFSNHRDSZ+nchar*sizeof(SFSNodeData)+nchild*sizeof(SFSNode*);
382 dataptr=((char*)rs) + rs->dataptr;
383 chld = (SFSNode**)( ((char*)rs)+SFSNHRDSZ+nchar*sizeof(SFSNodeData) );
387 SFSNode **ochld=(SFSNode**)( ((char*)node)+SFSNHRDSZ+node->nchar*sizeof(SFSNodeData) );
388 SFSNodeData *odata = node->data;
389 char *odataptr=((char*)node) + node->dataptr;
392 for(i=0;i<node->nchar;i++) {
393 if ( val > odata->val ) {
394 *(u_int32_t*)data = *(u_int32_t*)odata;
395 if ( odata->haschild ) {
397 data->child = ((char*)chld) - ((char*)data);
400 if ( odata->isword && info->datasize ) {
401 memcpy(dataptr, odataptr, info->datasize);
402 data->data = ((dataptr - ((char*)rs)) - rs->dataptr)/info->datasize;
403 dataptr += info->datasize; odataptr += info->datasize;
406 } else if ( val == odata->val ) {
407 tassert ( (flag&EN_VAL)==0 );
408 *(u_int32_t*)data = *(u_int32_t*)odata;
410 if ( odata->haschild || flag & EN_CHLD ) {
411 if (odata->haschild) *chld=*ochld;
412 data->child = ((char*)chld) - ((char*)data);
414 chld++; if (odata->haschild) ochld++;
417 if ( odata->isword || flag & EN_DATA ) {
419 if ( info->datasize && odata->isword ) {
420 memcpy(dataptr, odataptr, info->datasize);
421 odataptr += info->datasize;
423 data->data = ( info->datasize ) ? ((dataptr - ((char*)rs)) - rs->dataptr)/info->datasize : 0;
424 dataptr += info->datasize;
431 tassert ( flag&EN_VAL );
433 if ( flag & EN_CHLD ) {
435 data->child = ((char*)chld) - ((char*)data);
439 if ( flag & EN_DATA ) {
441 data->data = ( info->datasize ) ? ((dataptr - ((char*)rs)) - rs->dataptr)/info->datasize : 0;
442 dataptr += info->datasize;
450 *(u_int32_t*)data = *(u_int32_t*)odata;
451 if ( odata->haschild ) {
453 data->child = ((char*)chld) - ((char*)data);
456 if ( odata->isword && info->datasize ) {
457 memcpy(dataptr, odataptr, info->datasize);
458 data->data = ((dataptr - ((char*)rs)) - rs->dataptr)/info->datasize;
459 dataptr += info->datasize; odataptr += info->datasize;
466 tassert ( flag&EN_VAL );
468 if ( flag & EN_CHLD ) {
470 data->child = ((char*)chld) - ((char*)data);
474 if ( flag & EN_DATA ) {
476 data->data = ( info->datasize ) ? ((dataptr - ((char*)rs)) - rs->dataptr)/info->datasize : 0;
477 dataptr += info->datasize;
483 tassert ( flag & EN_VAL );
485 if ( flag & EN_CHLD ) {
487 data->child = ((char*)chld) - ((char*)data);
490 if ( flag & EN_DATA ) {
492 data->data = ( info->datasize ) ? ((dataptr - ((char*)rs)) - rs->dataptr)/info->datasize : 0;
497 if (node) tfree(node);
502 addRecord(SFSTree *info, SFSNode* node, SFSDataIO *in, int level) {
503 SFSNodeData *StopLow, *StopHigh, *StopMiddle;
504 u_int8_t *ptr = ((u_int8_t*)in->key) + level;
508 if ( node->isskip ) {
509 if ( node->haschild && in->keylen-level > node->nchar &&
510 strncmp(in->key+level, ((char*)node)+node->dataptr, node->nchar)==0 )
511 *(SFSNode**)( node->data ) = addRecord(info, *(SFSNode**)( node->data ), in, level+node->nchar);
513 node = splitSkipNode(info, node, in, level);
515 StopLow = node->data;
516 StopHigh = StopLow + node->nchar;
517 while (StopLow < StopHigh) {
518 StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
519 if ( StopMiddle->val == *ptr ) {
520 if ( *(ptr+1)=='\0' ) {
521 if ( StopMiddle->isword ) {
523 if ( info->datasize )
524 memcpy(((char*)node) + node->dataptr + info->datasize * StopMiddle->data,
525 in->data, info->datasize );
528 if (info->datasize) {
529 node = enlargeNode(info, node, *ptr, EN_DATA, &StopMiddle);
530 memcpy(((char*)node) + node->dataptr + info->datasize * StopMiddle->data,
531 in->data, info->datasize );
533 StopMiddle->isword = 1;
535 } else if ( StopMiddle->haschild ) {
536 *(SFSNode**)( ((char*)StopMiddle) + StopMiddle->child ) =
537 addRecord(info, *(SFSNode**)( ((char*)StopMiddle) + StopMiddle->child ), in, level+1);
539 node = enlargeNode(info, node, *ptr, EN_CHLD, &StopMiddle);
540 *(SFSNode**)( ((char*)StopMiddle) + StopMiddle->child ) =
541 makeSkipNode(info, in, level+1);
544 } else if ( StopMiddle->val < *ptr ) {
545 StopLow = StopMiddle + 1;
547 StopHigh = StopMiddle;
550 if ( *(ptr+1)=='\0' ) {
551 node = enlargeNode(info, node, *ptr, EN_VAL|EN_DATA, &StopMiddle);
552 if ( info->datasize )
553 memcpy(((char*)node) + node->dataptr + info->datasize * StopMiddle->data,
554 in->data, info->datasize );
556 node = enlargeNode(info, node, *ptr, EN_VAL|EN_CHLD, &StopMiddle);
557 *(SFSNode**)( ((char*)StopMiddle) + StopMiddle->child ) =
558 makeSkipNode(info, in, level+1);
562 node = makeSkipNode(info, in, level);
570 SFSAdd(SFSTree *info, SFSDataIO *in) {
572 in->keylen=strlen(in->key);
573 info->node = addRecord(info, info->node, in, 0);
577 pushStack(SFSNodeStack *top, SFSNode *node, int level ) {
578 SFSNodeStack *r=(SFSNodeStack*)tmalloc(sizeof(SFSNodeStack));
591 SFSIteratorStart(SFSTree *info) {
594 info->buf = (char*)tmalloc(info->tlen);
596 info->stack = pushStack(NULL, info->node, 0);
601 SFSPrefixIteratorStart(SFSTree *info, char *word) {
602 SFSNode *node = info->node;
603 SFSNodeData *StopLow, *StopHigh, *StopMiddle;
604 u_int8_t *ptr =(u_int8_t*)word;
605 int len,wlen=strlen(word);
607 if ( wlen+1>=info->tlen ) {
609 info->buf = (info->buf) ?
610 (char*)trealloc(info->buf,info->tlen)
612 (char*)tmalloc(info->tlen);
616 while( node && *ptr) {
617 len = wlen - (((char*)ptr)-word);
618 if ( node->isskip ) {
619 if ( STRNCMP(ptr, ((char*)node)+node->dataptr, (len<node->nchar) ? len : node->nchar) ) {
620 if ( len<=node->nchar ) {
621 strcpy(info->buf,word);
622 info->stack = pushStack(NULL, node, ((char*)ptr) - word);
624 } else if ( node->haschild ) {
626 node = *(SFSNode**)(node->data);
633 StopLow = node->data;
634 StopHigh = StopLow + node->nchar;
635 while (StopLow < StopHigh) {
636 StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
637 if ( StopMiddle->val == *ptr ) {
638 if ( *(ptr+1)=='\0' ) {
639 len =((char*)ptr)-word+1;
640 strcpy(info->buf,word);
641 if ( StopMiddle->isword ) {
643 info->wdata = (void*)( ((char*)node) + node->dataptr + info->datasize * StopMiddle->data );
645 if ( StopMiddle->haschild )
646 info->stack = pushStack(NULL, *(SFSNode**)( ((char*)StopMiddle) + StopMiddle->child ), len);
648 } else if ( StopMiddle->haschild ) {
649 node=*(SFSNode**)( ((char*)StopMiddle) + StopMiddle->child );
655 } else if ( StopMiddle->val < *ptr ) {
656 StopLow = StopMiddle + 1;
658 StopHigh = StopMiddle;
661 if ( StopLow >= StopHigh )
669 SFSIterate(SFSTree *info, SFSDataIO *out) {
670 SFSNodeStack *s=info->stack;
672 if ( info->hasword ) {
673 out->key = info->buf;
674 out->keylen = strlen(out->key);
675 out->data = info->wdata;
680 if ( s == NULL || s->node == NULL)
683 while ( s->level + s->node->nchar + 1 >= info->tlen ) {
685 info->buf = (char*)trealloc(info->buf, info->tlen);
688 if ( s->node->isskip ) {
689 memcpy( info->buf + s->level, ((char*)s->node) + s->node->dataptr, s->node->nchar );
690 if ( s->node->isword && !s->checkedval) {
691 info->buf[ s->level+s->node->nchar ] = '\0';
692 out->key = info->buf;
693 out->keylen = s->level+s->node->nchar;
694 out->data =((char*)(s->node->data)) + ((s->node->haschild) ? sizeof(SFSNode*) : 0);
698 if ( s->node->haschild && !s->checkedchild) {
699 info->stack = pushStack(s, *(SFSNode**)( (char*)(s->node->data) ), s->level+s->node->nchar);
701 if ( SFSIterate(info, out) )
705 while( s->data - s->node->data < s->node->nchar ) {
706 info->buf[ s->level ] = (char)s->data->val;
707 if ( s->checkedval==0 && s->data->isword ) {
708 info->buf[ s->level+1 ] = '\0';
709 out->key = info->buf;
710 out->keylen = s->level+1;
711 out->data =((char*)s->node) + s->node->dataptr + info->datasize * s->data->data;
715 if ( s->checkedchild==0 && s->data->haschild ) {
716 info->stack = pushStack(s, *(SFSNode**)( ((char*)s->data) + s->data->child ), s->level+1);
718 if ( SFSIterate(info, out) )
721 s->checkedval = s->checkedchild = 0;
725 info->stack = s->next;
728 return SFSIterate(info, out);
732 SFSRange(SFSTree *info, char *word, SFSDataIO *f, SFSDataIO *l) {
733 SFSNodeStack *s=info->stack;
735 SFSPrefixIteratorStart(info, word);
738 if ( SFSIterate(info, f) ) {
739 SFSNodeStack *sptr = info->stack, *stmp;
740 while( sptr && sptr!=s ) {
747 memcpy(l,f,sizeof(SFSDataIO));
755 while( f->keylen + s->level + 2 >= info->tlen ) {
757 info->buf = (char*)trealloc(info->buf, info->tlen);
761 l->key = info->buf + f->keylen + 1;
762 memcpy(l->key, f->key, f->keylen + 1);
765 while( f->keylen + 1 + s->level + s->node->nchar + 1 >= info->tlen ) {
767 info->buf = (char*)trealloc(info->buf, info->tlen);
769 if ( s->node->isskip ) {
770 memcpy(info->buf + f->keylen + 1 + s->level,
771 ((char*)(s->node))+s->node->dataptr, s->node->nchar);
772 s->level+=s->node->nchar;
773 if (s->node->haschild) {
774 s->node=*(SFSNode**)( s->node->data );
775 } else { /* if (s->node->isword) */
776 info->buf[ f->keylen + 1 + s->level ] = '\0';
777 l->data = (void*)(s->node->data);
778 l->keylen = s->level+1;
782 s->data = s->node->data + s->node->nchar - 1;
783 while( s->data - s->node->data >= 0 ) {
784 info->buf[ f->keylen + 1 + s->level ] = (char)s->data->val;
785 if ( s->data->haschild ) {
786 s->node = *(SFSNode**)( ((char*)s->data) + s->data->child );
790 if ( s->data->isword ) {
791 info->buf[ f->keylen + 1 + s->level ] = '\0';
792 l->keylen = s->level+1;
793 l->data =((char*)s->node) + s->node->dataptr + info->datasize * s->data->data;
802 l->key = info->buf + f->keylen + 1;