From: teodor Date: Mon, 7 Feb 2005 17:19:13 +0000 (+0000) Subject: Optimize cache in tbtree. Fix small bug in psort and move on tmalloc psortex X-Git-Url: http://sigaev.ru/git/gitweb.cgi?a=commitdiff_plain;h=b030450e37fedf401284d076257f6ddebf3eda02;p=tedtools.git Optimize cache in tbtree. Fix small bug in psort and move on tmalloc psortex --- diff --git a/psort.c b/psort.c index f61221c..e056091 100644 --- a/psort.c +++ b/psort.c @@ -111,9 +111,8 @@ PS_init( SORT* sobj, int buflen, compare_sort func, ffree ffunc ) { if ( ! func ) return -1; if ( buflen <= 0 ) return -1; - sobj->buf = (SORTITEM*)tmalloc( sizeof(SORTITEM)*buflen ); + sobj->buf = (SORTITEM*)t0malloc( sizeof(SORTITEM)*buflen ); - memset( (void*)sobj->buf, 0, sizeof(SORTITEM)*buflen ); sobj->buflen = buflen; sobj->cmpr = func; sobj->free = ffunc; @@ -149,7 +148,7 @@ PS_reset( SORT* sobj ) { int i; if ( sobj->free ) { - for(i=0; icurlen; i++ ); + for(i=0; icurlen; i++ ) (*(sobj->free))( sobj->buf[i].value ); } memset( (void*)sobj->buf, 0, sizeof(SORTITEM)*sobj->buflen ); diff --git a/psortex.c b/psortex.c index fedd701..93c9449 100644 --- a/psortex.c +++ b/psortex.c @@ -30,6 +30,8 @@ #include #include +#include "tlog.h" +#include "tmalloc.h" #include "psort.h" #define LENARR (1<<10) @@ -70,6 +72,8 @@ main(int argn, char *argv[]) { int inserted=0; EXMPL **aptr; + opentlog( TL_OPEN_STDERR, TL_DEBUG, NULL); + if ( PS_init( &sobj, LIMIT, exstr_cmp, exstr_free ) ) { fprintf(stderr,"No memory\n"); return 1; @@ -78,16 +82,8 @@ main(int argn, char *argv[]) { printf("Test insert:\n"); for(i=0;ivalue=(char*)malloc( 64 ); - if ( ! ptr->value ) { - fprintf(stderr,"No memory\n"); - return 1; - } + ptr = (EXMPL*)tmalloc( sizeof(EXMPL) ); + ptr->value=(char*)tmalloc( 64 ); } ptr->id = random() % LENARR; sprintf( ptr->value, "Value: %d, startnum: %d", ptr->id, i ); @@ -111,22 +107,10 @@ main(int argn, char *argv[]) { srandom(1); printf("Test bulk insert:\n"); - aptr = (EXMPL**)malloc( sizeof(EXMPL*) * LENARR ); - if ( ! aptr ) { - fprintf(stderr,"No memory\n"); - return 1; - } + aptr = (EXMPL**)tmalloc( sizeof(EXMPL*) * LENARR ); for(i=0;ivalue = (char*)malloc( 64 ); - if ( ! aptr[i]->value ) { - fprintf(stderr,"No memory\n"); - return 1; - } + aptr[i] = (EXMPL*)tmalloc( sizeof(EXMPL) ); + aptr[i]->value = (char*)tmalloc( 64 ); aptr[i]->id = random() % LENARR; sprintf( aptr[i]->value, "Value: %d, startnum: %d", aptr[i]->id, i ); } @@ -153,18 +137,10 @@ main(int argn, char *argv[]) { fprintf(stderr,"No memory\n"); return 1; } - ptr = (EXMPL*)malloc( sizeof(EXMPL) * LENARR ); - if ( ! ptr ) { - fprintf(stderr,"No memory\n"); - return 1; - } + ptr = (EXMPL*)tmalloc( sizeof(EXMPL) * LENARR ); for(i=0;i #include #include -#include #include "tmalloc.h" #include "tlog.h" #include "tbtree.h" +#define NOTFOUND 0 +#define FOUND 1 + static int TBTReadPage(TBTree *db, u_int32_t pagenumber, TBTMemPage **page) ; static int TBTWritePage(TBTree *db, TBTMemPage *page) ; static int TBTNewPage(TBTree *db, TBTMemPage **page) ; + + int TBTOpen(TBTree *db, char *file) { off_t offset; @@ -76,7 +80,6 @@ TBTOpen(TBTree *db, char *file) { if ( db->npage ) { db->Cache=(TBTMemPage**)t0malloc( sizeof(TBTMemPage*)*db->npage ); - db->TimeCache=(TBTMemPage**)t0malloc( sizeof(TBTMemPage*)*db->npage ); db->curpage=0; } @@ -129,7 +132,6 @@ TBTClose(TBTree *db) { tfree( db->Cache[i] ); tfree( db->Cache ); - tfree( db->TimeCache ); } flock( db->fd, LOCK_UN ); @@ -158,31 +160,74 @@ TBTSync(TBTree *db) { return (rc) ? TBT_ERROR : TBT_OK; } +static int +findInCache(TBTree *db, u_int32_t pagenumber, int *pos, TBTMemPage **StopLow, TBTMemPage **StopHigh) { + TBTMemPage **StopMiddle; + + /* Loop invariant: StopLow <= StopMiddle < StopHigh */ + while (StopLow < StopHigh) { + StopMiddle = StopLow + ( ( StopHigh - StopLow ) >> 1 ); + + if ( (*StopMiddle)->pagenumber == pagenumber ) { + *pos = StopMiddle - db->Cache; + return FOUND; + } else if ( (*StopMiddle)->pagenumber < pagenumber ) + StopLow = StopMiddle + 1; + else + StopHigh = StopMiddle; + } + + *pos = StopHigh - db->Cache; + return NOTFOUND; +} + static TBTMemPage* -PageAlloc(TBTree *db, int *pos) { +PageAlloc(TBTree *db, int *posincache) { TBTMemPage *page; + *posincache = -1; if ( db->curpage < db->npage ) { - page = db->Cache[db->curpage] = db->TimeCache[db->curpage] = (TBTMemPage*)t0malloc(sizeof(TBTMemPage)); - *pos = db->curpage; + page = db->Cache[db->curpage] = (TBTMemPage*)tmalloc(sizeof(TBTMemPage)); + memset( page, 0, TBTMEMPAGEHDRSZ ); page->iscached=1; + if ( db->curpage == 0 ) { + db->TimeCache = db->TimeCacheLast = page; + } else { + db->TimeCacheLast->next = page; + page->prev = db->TimeCacheLast; + db->TimeCacheLast = page; + } + *posincache = db->curpage; db->curpage++; } else if ( db->npage == 0 ) { - page = (TBTMemPage*)t0malloc(sizeof(TBTMemPage)); + page = (TBTMemPage*)tmalloc(sizeof(TBTMemPage)); + memset( page, 0, TBTMEMPAGEHDRSZ ); } else { - u_int32_t i; - for( i=0; icurpage && db->TimeCache[i]->islocked; i++ ); - if ( i == db->curpage ) { /*all pages locked*/ + page = db->TimeCache; + while( page->next && page->islocked ) + page = page->next; + if ( page==db->TimeCacheLast && page->islocked ) { /*all pages locked*/ /*tlog(TL_WARN,"TBTReadPage: all pages in cache locked, increase number of cached pages");*/ - page = (TBTMemPage*)t0malloc(sizeof(TBTMemPage)); + page = (TBTMemPage*)tmalloc(sizeof(TBTMemPage)); + memset( page, 0, TBTMEMPAGEHDRSZ ); } else { - page = db->TimeCache[i]; if ( !page->issynced ) if ( TBTWritePage(db, page) ) return NULL; - memset(page,0,sizeof(TBTMemPage)); + if( findInCache(db, page->pagenumber, posincache, db->Cache, db->Cache + db->curpage) != FOUND ) + tlog(TL_CRIT,"PageAlloc: something wrong: no page with pagenumber %d in cache", page->pagenumber); + memset( page, 0, TBTMEMPAGEHDRSZ ); page->iscached=1; - *pos=i; + if ( page != db->TimeCacheLast ) { + if ( page->prev ) + page->prev->next = page->next; + if ( page->next ) + page->next->prev = page->prev; + db->TimeCacheLast->next = page; + page->prev = db->TimeCacheLast; + db->TimeCacheLast = page; + page->next = NULL; + } } } @@ -190,81 +235,50 @@ PageAlloc(TBTree *db, int *pos) { } static void -moveFirstTime(TBTree *db, int pos) { - TBTMemPage *ptr; - - if ( pos < 0 ) - return; - if (pos+1==db->curpage ) - return; - - ptr = db->TimeCache[pos]; - memmove( db->TimeCache + pos , db->TimeCache + pos + 1, sizeof(TBTMemPage*)*(db->curpage-pos-1) ); - db->TimeCache[db->curpage-1] = ptr; -} - -static void -findAndMoveFirstTime(TBTree *db, TBTMemPage *page) { - int pos; - for(pos=0;poscurpage;pos++) - if ( page==db->TimeCache[pos] ) { - moveFirstTime(db,pos); - break; - } - tassert( poscurpage ); -} - -static void -findAndMovePN(TBTree *db, TBTMemPage *page) { - int pos; +findAndMovePN(TBTree *db, TBTMemPage *page, int pos) { + int newpos; if ( db->curpage < 2 ) return; - for(pos=0;poscurpage;pos++) - if ( page==db->Cache[pos] ) - break; - tassert( poscurpage ); - - while(1) { - if ( pos+1 != db->curpage && db->Cache[pos+1]->pagenumber < page->pagenumber ) { - db->Cache[pos] = db->Cache[pos+1]; - db->Cache[pos+1] = page; - pos++; - } else if ( pos!=0 && db->Cache[pos-1]->pagenumber > page->pagenumber ) { - db->Cache[pos] = db->Cache[pos-1]; - db->Cache[pos-1] = page; - pos--; - } else - break; - } -} - -static int -cmpNPage(const void *a, const void *b) { - if ( (*(TBTMemPage**)a)->pagenumber == (*(TBTMemPage**)b)->pagenumber ) - return 0; - return ( (*(TBTMemPage**)a)->pagenumber > (*(TBTMemPage**)b)->pagenumber ) ? 1 : -1; + if ( pos < 0 ) + pos = db->curpage-1; + tassert( pos < db->curpage && db->Cache[pos]==page ); + + if ( pos+1 != db->curpage && db->Cache[pos+1]->pagenumber < page->pagenumber ) { + if ( findInCache(db, page->pagenumber, &newpos, db->Cache + pos + 1, db->Cache + db->curpage )!=NOTFOUND) + tlog(TL_CRIT,"findAndMovePN: Something wrong, duplicate page with pagenumber %d", page->pagenumber); + memmove( db->Cache + pos, db->Cache + pos + 1, (newpos-pos-1)*sizeof(TBTMemPage**) ); + db->Cache[newpos-1] = page; + } else if ( pos!=0 && db->Cache[pos-1]->pagenumber > page->pagenumber ) { + if( findInCache(db, page->pagenumber, &newpos, db->Cache, db->Cache + pos )!=NOTFOUND); + tlog(TL_CRIT,"findAndMovePN: Something wrong, duplicate page with pagenumber %d", page->pagenumber); + memmove( db->Cache + newpos + 1, db->Cache + newpos, (pos-newpos)*sizeof(TBTMemPage**)); + db->Cache[newpos] = page; + } } static int TBTReadPage(TBTree *db, u_int32_t pagenumber, TBTMemPage **page) { - TBTMemPage *ptr, key, **res; - int rc; - int pos; - - ptr=&key; - key.pagenumber = pagenumber; + int rc, pos=0; - res = (TBTMemPage**)bsearch(&ptr, db->Cache, db->curpage, sizeof(TBTMemPage*), cmpNPage); - if ( res ) { + if ( findInCache(db, pagenumber, &pos, db->Cache, db->Cache + db->curpage)==FOUND ) { db->cachehits++; - *page=*res; - findAndMoveFirstTime(db, *page); + *page=db->Cache[pos]; + if ( *page != db->TimeCacheLast ) { + if ( (*page)->prev ) + (*page)->prev->next = (*page)->next; + if ( (*page)->next ) + (*page)->next->prev = (*page)->prev; + db->TimeCacheLast->next = (*page); + (*page)->prev = db->TimeCacheLast; + db->TimeCacheLast = *page; + (*page)->next = NULL; + } } else { off_t offset = (off_t)pagenumber * (off_t)TBTREEPAGESIZE; - - if( (*page = PageAlloc(db,&pos))==NULL ) + + if( (*page = PageAlloc(db, &pos))==NULL ) return TBT_ERROR; if ( lseek(db->fd, offset, SEEK_SET) != offset ) { @@ -277,15 +291,11 @@ TBTReadPage(TBTree *db, u_int32_t pagenumber, TBTMemPage **page) { tlog(TL_CRIT,"TBTReadPage: read failed: %s %d %d",strerror(errno), pagenumber, offset); return TBT_ERROR; } - if ( (*page)->iscached ) { - moveFirstTime(db, pos); - findAndMovePN(db, *page); - } + if ( (*page)->iscached ) + findAndMovePN(db, *page, pos); } db->pageread++; - gettimeofday( &( (*page)->last ), NULL ); - return TBT_OK; } @@ -316,6 +326,7 @@ TBTWritePage(TBTree *db, TBTMemPage *page) { static int TBTNewPage(TBTree *db, TBTMemPage **page) { int pos; + if ( db->lastpagenumber==0 ) { off_t offset; if ( (offset=lseek(db->fd, 0, SEEK_END)) < 0 ) { @@ -331,29 +342,24 @@ TBTNewPage(TBTree *db, TBTMemPage **page) { db->lastpagenumber = offset/TBTREEPAGESIZE; } - if( (*page = PageAlloc(db,&pos))==NULL ) + if( (*page = PageAlloc(db, &pos))==NULL ) return TBT_ERROR; + (*page)->pagenumber = db->lastpagenumber; (*page)->issynced=0; (*page)->islocked=1; db->lastpagenumber++; - memset( &((*page)->page), 0, sizeof(TBTPAGEHDRSZ)); + memset( &((*page)->page), 0, TBTMEMPAGEHDRSZ); (*page)->page.freespace = TBTREEPAGESIZE-TBTPAGEHDRSZ; - gettimeofday( &( (*page)->last ), NULL ); - if ( (*page)->iscached ) { - moveFirstTime(db, pos); - findAndMovePN(db, *page); - } + if ( (*page)->iscached ) + findAndMovePN(db, *page, pos); return TBT_OK; } -#define NOTFOUND 0 -#define FOUND 1 - static void printValue(u_int32_t len, char *ptr) { putchar('\''); @@ -409,7 +415,7 @@ dumpTree(TBTree *db, u_int32_t pagenumber, int follow) { TBTMemPage *page; if ( TBTReadPage(db, pagenumber, &page) ) { - fprintf(stderr,"BEDA %d\n", pagenumber); + tlog(TL_CRIT,"BEDA %d\n", pagenumber); return; } diff --git a/tbtree.h b/tbtree.h index fa11321..612913b 100644 --- a/tbtree.h +++ b/tbtree.h @@ -84,17 +84,19 @@ typedef struct { char data[TBTREEPAGESIZE-TBTPAGEHDRSZ]; } TBTPage; -typedef struct { +typedef struct TBTMemPage { u_int32_t pagenumber; - struct timeval last; u_int32_t issynced:1, iscached:1, islocked:1, unused:29; + struct TBTMemPage *prev; + struct TBTMemPage *next; TBTPage page; } TBTMemPage; +#define TBTMEMPAGEHDRSZ (sizeof(u_int32_t)*2 + sizeof(TBTMemPage*)*2 + TBTPAGEHDRSZ) typedef struct { u_int16_t length; char *value; @@ -119,7 +121,8 @@ typedef struct { u_int32_t npage; u_int32_t curpage; TBTMemPage **Cache; - TBTMemPage **TimeCache; + TBTMemPage *TimeCache; + TBTMemPage *TimeCacheLast; u_int32_t lastpagenumber; /* stat subsystem */