From: teodor Date: Tue, 8 Feb 2005 14:14:37 +0000 (+0000) Subject: Hash based cache X-Git-Url: http://sigaev.ru/git/gitweb.cgi?a=commitdiff_plain;h=1a9bd04fad98b23f594258f28b6d8604dc45662b;p=tedtools.git Hash based cache --- diff --git a/tbtree.c b/tbtree.c index 1422da0..becd120 100644 --- a/tbtree.c +++ b/tbtree.c @@ -79,7 +79,7 @@ TBTOpen(TBTree *db, char *file) { } if ( db->npage ) { - db->Cache=(TBTMemPage**)t0malloc( sizeof(TBTMemPage*)*db->npage ); + db->Cache=(TBTMemPage**)t0malloc( sizeof(TBTMemPage*)*HASHSIZE(db->npage) ); db->curpage=0; } @@ -127,9 +127,16 @@ TBTClose(TBTree *db) { if ( db->npage ) { u_int32_t i; - - for(i=0; icurpage; i++) - tfree( db->Cache[i] ); + TBTMemPage *ptr,*tmp; + + for(i=0; inpage); i++) { + ptr = db->Cache[i]; + while(ptr) { + tmp = ptr->link; + tfree(ptr); + ptr=tmp; + } + } tfree( db->Cache ); } @@ -140,54 +147,103 @@ TBTClose(TBTree *db) { return (rc) ? TBT_ERROR : TBT_OK; } +static int +cmpNPage(const void *a, const void *b) { + tassert( (*(TBTMemPage**)a)->pagenumber != (*(TBTMemPage**)b)->pagenumber ); + return ( (*(TBTMemPage**)a)->pagenumber > (*(TBTMemPage**)b)->pagenumber ) ? 1 : -1; +} + + int TBTSync(TBTree *db) { - u_int32_t i; int rc=0; if ( db->readonly || db->npage==0 ) return TBT_OK; - - for(i=0; icurpage; i++) - if ( !db->Cache[i]->issynced ) - rc |= TBTWritePage(db, db->Cache[i]); + + if ( db->npage ) { + u_int32_t i, total=0; + TBTMemPage *ptr, **data = (TBTMemPage**)tmalloc(db->npage*sizeof(TBTMemPage*)), **pptr; + pptr=data; + for(i=0; inpage); i++) { + ptr = db->Cache[i]; + while(ptr) { + if ( !ptr->issynced ) { + *pptr = ptr; + pptr++; + } + ptr = ptr->link; + } + } + + total=pptr-data; + if ( total > 0 ) { + if ( total>1 ) + qsort( data, total, sizeof(TBTMemPage*), cmpNPage); + + pptr=data; + while( pptr-datafd) ) { tlog(TL_CRIT,"TBTSync: fsync failed: %s",strerror(errno)); - rc=1; + rc=TBT_ERROR; } return (rc) ? TBT_ERROR : TBT_OK; } -static int -findInCache(TBTree *db, u_int32_t pagenumber, int *pos, TBTMemPage **StopLow, TBTMemPage **StopHigh) { - TBTMemPage **StopMiddle; +static TBTMemPage * +findInCache(TBTree *db, u_int32_t pagenumber) { + TBTMemPage *ptr; - /* Loop invariant: StopLow <= StopMiddle < StopHigh */ - while (StopLow < StopHigh) { - StopMiddle = StopLow + ( ( StopHigh - StopLow ) >> 1 ); - - if ( (*StopMiddle)->pagenumber == pagenumber ) { - *pos = StopMiddle - db->Cache; - return FOUND; - } else if ( (*StopMiddle)->pagenumber < pagenumber ) - StopLow = StopMiddle + 1; - else - StopHigh = StopMiddle; + ptr = db->Cache[ pagenumber % HASHSIZE(db->npage) ]; + + while(ptr && ptr->pagenumber != pagenumber) + ptr = ptr->link; + + return ptr;; +} + +static void +removeFromCache(TBTree *db, u_int32_t pagenumber) { + TBTMemPage *ptr, *prev=NULL; + int pos = pagenumber % HASHSIZE(db->npage); + + ptr = db->Cache[ pos ]; + + while(ptr) { + if ( ptr->pagenumber == pagenumber ) { + if ( prev ) + prev->link = ptr->link; + else + db->Cache[ pos ] = ptr->link; + break; + } + prev=ptr; + ptr = ptr->link; } +} - *pos = StopHigh - db->Cache; - return NOTFOUND; +static void +insertInCache(TBTree *db, TBTMemPage *page) { + int pos = page->pagenumber % HASHSIZE(db->npage); + + page->link = db->Cache[ pos ]; + db->Cache[ pos ] = page; } static TBTMemPage* -PageAlloc(TBTree *db, int *posincache) { +PageAlloc(TBTree *db) { TBTMemPage *page; - *posincache = -1; if ( db->curpage < db->npage ) { - page = db->Cache[db->curpage] = (TBTMemPage*)tmalloc(sizeof(TBTMemPage)); + page = (TBTMemPage*)tmalloc(sizeof(TBTMemPage)); memset( page, 0, TBTMEMPAGEHDRSZ ); page->iscached=1; if ( db->curpage == 0 ) { @@ -197,7 +253,6 @@ PageAlloc(TBTree *db, int *posincache) { page->prev = db->TimeCacheLast; db->TimeCacheLast = page; } - *posincache = db->curpage; db->curpage++; } else if ( db->npage == 0 ) { page = (TBTMemPage*)tmalloc(sizeof(TBTMemPage)); @@ -207,69 +262,49 @@ PageAlloc(TBTree *db, int *posincache) { while( page->next && page->islocked ) page = page->next; if ( page==db->TimeCacheLast && page->islocked ) { /*all pages locked*/ - /*tlog(TL_WARN,"TBTReadPage: all pages in cache locked, increase number of cached pages");*/ + /* tlog(TL_WARN,"TBTReadPage: all pages in cache locked, increase number of cached pages"); */ page = (TBTMemPage*)tmalloc(sizeof(TBTMemPage)); memset( page, 0, TBTMEMPAGEHDRSZ ); } else { if ( !page->issynced ) if ( TBTWritePage(db, page) ) return NULL; - if( findInCache(db, page->pagenumber, posincache, db->Cache, db->Cache + db->curpage) != FOUND ) - tlog(TL_CRIT,"PageAlloc: something wrong: no page with pagenumber %d in cache", page->pagenumber); - memset( page, 0, TBTMEMPAGEHDRSZ ); - page->iscached=1; + removeFromCache(db, page->pagenumber); if ( page != db->TimeCacheLast ) { - if ( page->prev ) + if ( page == db->TimeCache ) { + db->TimeCache = page->next; + db->TimeCache->prev = NULL; + } else { page->prev->next = page->next; - if ( page->next ) page->next->prev = page->prev; + } + memset( page, 0, TBTMEMPAGEHDRSZ ); db->TimeCacheLast->next = page; page->prev = db->TimeCacheLast; db->TimeCacheLast = page; - page->next = NULL; - } + } else + memset( page, 0, TBTMEMPAGEHDRSZ ); + page->iscached=1; } } return page; } -static void -findAndMovePN(TBTree *db, TBTMemPage *page, int pos) { - int newpos; - - if ( db->curpage < 2 ) - return; - - if ( pos < 0 ) - pos = db->curpage-1; - tassert( pos < db->curpage && db->Cache[pos]==page ); - - if ( pos+1 != db->curpage && db->Cache[pos+1]->pagenumber < page->pagenumber ) { - if ( findInCache(db, page->pagenumber, &newpos, db->Cache + pos + 1, db->Cache + db->curpage )!=NOTFOUND) - tlog(TL_CRIT,"findAndMovePN: Something wrong, duplicate page with pagenumber %d", page->pagenumber); - memmove( db->Cache + pos, db->Cache + pos + 1, (newpos-pos-1)*sizeof(TBTMemPage**) ); - db->Cache[newpos-1] = page; - } else if ( pos!=0 && db->Cache[pos-1]->pagenumber > page->pagenumber ) { - if( findInCache(db, page->pagenumber, &newpos, db->Cache, db->Cache + pos )!=NOTFOUND); - tlog(TL_CRIT,"findAndMovePN: Something wrong, duplicate page with pagenumber %d", page->pagenumber); - memmove( db->Cache + newpos + 1, db->Cache + newpos, (pos-newpos)*sizeof(TBTMemPage**)); - db->Cache[newpos] = page; - } -} - static int TBTReadPage(TBTree *db, u_int32_t pagenumber, TBTMemPage **page) { - int rc, pos=0; + int rc; - if ( findInCache(db, pagenumber, &pos, db->Cache, db->Cache + db->curpage)==FOUND ) { + if ( db->npage && (*page=findInCache(db, pagenumber))!=NULL ) { db->cachehits++; - *page=db->Cache[pos]; if ( *page != db->TimeCacheLast ) { - if ( (*page)->prev ) + if ( (*page) == db->TimeCache ) { + db->TimeCache = (*page)->next; + db->TimeCache->prev=NULL; + } else { (*page)->prev->next = (*page)->next; - if ( (*page)->next ) (*page)->next->prev = (*page)->prev; + } db->TimeCacheLast->next = (*page); (*page)->prev = db->TimeCacheLast; db->TimeCacheLast = *page; @@ -278,7 +313,7 @@ TBTReadPage(TBTree *db, u_int32_t pagenumber, TBTMemPage **page) { } else { off_t offset = (off_t)pagenumber * (off_t)TBTREEPAGESIZE; - if( (*page = PageAlloc(db, &pos))==NULL ) + if( (*page = PageAlloc(db))==NULL ) return TBT_ERROR; if ( lseek(db->fd, offset, SEEK_SET) != offset ) { @@ -288,11 +323,11 @@ TBTReadPage(TBTree *db, u_int32_t pagenumber, TBTMemPage **page) { (*page)->issynced=1; (*page)->pagenumber=pagenumber; if ( (rc=read(db->fd, &( (*page)->page ), TBTREEPAGESIZE)) != TBTREEPAGESIZE ) { - tlog(TL_CRIT,"TBTReadPage: read failed: %s %d %d",strerror(errno), pagenumber, offset); + tlog(TL_CRIT,"TBTReadPage: read failed: %s %d %d",strerror(errno), pagenumber, offset); return TBT_ERROR; } if ( (*page)->iscached ) - findAndMovePN(db, *page, pos); + insertInCache(db, *page); } db->pageread++; @@ -325,8 +360,6 @@ TBTWritePage(TBTree *db, TBTMemPage *page) { static int TBTNewPage(TBTree *db, TBTMemPage **page) { - int pos; - if ( db->lastpagenumber==0 ) { off_t offset; if ( (offset=lseek(db->fd, 0, SEEK_END)) < 0 ) { @@ -342,7 +375,7 @@ TBTNewPage(TBTree *db, TBTMemPage **page) { db->lastpagenumber = offset/TBTREEPAGESIZE; } - if( (*page = PageAlloc(db, &pos))==NULL ) + if( (*page = PageAlloc(db))==NULL ) return TBT_ERROR; (*page)->pagenumber = db->lastpagenumber; @@ -355,7 +388,7 @@ TBTNewPage(TBTree *db, TBTMemPage **page) { (*page)->page.freespace = TBTREEPAGESIZE-TBTPAGEHDRSZ; if ( (*page)->iscached ) - findAndMovePN(db, *page, pos); + insertInCache(db, *page); return TBT_OK; } diff --git a/tbtree.h b/tbtree.h index 612913b..f6e4117 100644 --- a/tbtree.h +++ b/tbtree.h @@ -43,6 +43,8 @@ #define PTRALIGN(LEN) TYPEALIGN(sizeof(void*), (LEN)) +/*#define HASHSIZE(LEN) ( TYPEALIGN(2, (int)((LEN)*2)) + 1 )*/ +#define HASHSIZE(LEN) ( (LEN)<<1 ) /* end utils */ @@ -93,10 +95,11 @@ typedef struct TBTMemPage { unused:29; struct TBTMemPage *prev; struct TBTMemPage *next; + struct TBTMemPage *link; TBTPage page; } TBTMemPage; -#define TBTMEMPAGEHDRSZ (sizeof(u_int32_t)*2 + sizeof(TBTMemPage*)*2 + TBTPAGEHDRSZ) +#define TBTMEMPAGEHDRSZ (sizeof(u_int32_t)*2 + sizeof(TBTMemPage*)*3 + TBTPAGEHDRSZ) typedef struct { u_int16_t length; char *value;