/* lastblock.c -- Copyright 1993, 1994 Liam R. E. Quin. All Rights Reserved. * This code is NOT in the public domain. * See the file COPYRIGHT for full details. */ /* LQT_LastBlockInChain - find the last block used by a given word's postings. * * $Id: lastblk.c,v 1.17 2001/05/31 03:50:13 liam Exp $ * * Smarts: keep a cache of the last block in the chain for each WID. * this makes updates faster at the expense of using extra disk. */ #include "error.h" #include /* stderr, also for fileinfo.h */ #include #include "globals.h" /* defines and declarations for database filenames */ #ifdef HAVE_FCNTL_H # ifdef HAVE_SYSV_FCNTL_H # include # endif # include #endif #ifdef HAVE_STDLIB_H # include #else # include #endif #ifdef HAVE_UNISTD_H # include #endif #include "fileinfo.h" /* for wordinfo.h */ #include "wordinfo.h" #include "pblock.h" #include "numbers.h" #include "getbyte.h" #include "blkheader.h" #include "emalloc.h" #include "lqutil.h" #include "liblqtext.h" #include "lqtrace.h" /** **/ /* The cache is structured in groups of a little over 10,000 entries. * In this way, we cope with the cache being extended. */ #define SEGMENT_LENGTH 1024 #define MAX_SEGMENTS 20 typedef struct s_LastBlockCache { unsigned long FirstWID; unsigned long theEnds[SEGMENT_LENGTH]; } t_LastBlockCache; static int theBlockCacheFile = -1; static t_LastBlockCache **theBlockCache = (t_LastBlockCache **) 0; static long cacheSizeInBytes = 0L; static int cacheCount = 0; static int nextToFree = -1; /* 0 when we add one to it... */ PRIVATE void OpenLastBlockCacheFile(db) t_LQTEXT_Database *db; { int Flags, Modes; struct stat theStatBuf; LQT_GetFileModes(db, &Flags, &Modes); /* If this file doesn't exist, we must be able to create it: */ theBlockCacheFile = LQU_Eopen( E_FATAL|E_SYS, db->LastBlockFile, "block chain end file", Flags|O_CREAT, Modes ); if (stat(db->LastBlockFile, &theStatBuf) < 0) { Error(E_FATAL|E_SYS, "stat: can't get filesystem information for \"%s\"", db->LastBlockFile ); } cacheSizeInBytes = theStatBuf.st_size; if (theBlockCache) { (void) efree((char *) theBlockCache); } theBlockCache = (t_LastBlockCache **) ecalloc( "theBlockCacheFile", MAX_SEGMENTS, sizeof(t_LastBlockCache *) ); cacheCount = 0; } PRIVATE void FreeSegment(db, theSegment) t_LQTEXT_Database *db; t_LastBlockCache *theSegment; { unsigned long theStart; register int i; if (!theSegment) { return; } /* Find the starting offset: * First, round down to the nearest whole segment by * dividing by the segment length... */ theStart = theSegment->FirstWID / SEGMENT_LENGTH; /* ... and then multiplying by the same number... * and then multiply that by the size of a block offset: */ theStart *= SEGMENT_LENGTH * sizeof(unsigned long); (void) LQU_Elseek( E_FATAL, db->LastBlockFile, "cache of last block in each disk chain", theBlockCacheFile, theStart, SEEK_SET ); i = write( theBlockCacheFile, theSegment->theEnds, SEGMENT_LENGTH * sizeof(unsigned long) ); if (i < 0) { Error(E_FATAL|E_SYS, "Couldn't write %ld bytes to fd %d \"%s\"", SEGMENT_LENGTH * sizeof(unsigned long), theBlockCacheFile, db->LastBlockFile ); } } /* * LQTp_FlushLastBlockCache * Database/Update, Database/Files * *

Ensures that all entries in the last block cache are written out * to disk. This routine must be called before a routine that has * updated the database exits.

*

This routine is registered as an action to be performed on a * database close or sync, and so is called automatically by * LQT_CloseDatabase and LQT_SyncDatabase; * the ignored argument and the return value are for * compatibility with LQT_AddActionOnClose. * * Warns if there are system problems writing the data or closing * the associated file. * * LQT_SetLastBlockInChain * LQT_LastBlockInChain * LQT_AddActionOnClose * LQT_CloseDatabase * */ LIBRARY int LQTp_FlushLastBlockCache(db) t_LQTEXT_Database *db; { int i; #ifdef ASCIITRACE LQT_Trace(LQTRACE_LASTBLOCK, "[flush last block cache]" ); #endif if (theBlockCacheFile < 0) { /* it wasn't open, nothing to do */ return 0; } for (i = 0; i < cacheCount; i++) { if (theBlockCache[i]) { FreeSegment(db, theBlockCache[i]); efree((char *) theBlockCache[i]); theBlockCache[i] = (t_LastBlockCache *) 0; } } cacheCount = 0; if (close(theBlockCacheFile) < 0) { Error(E_WARN|E_SYS, "problem closing block chain end file \"%s\"", db->LastBlockFile ); } theBlockCacheFile = -1; return 0; } PRIVATE t_LastBlockCache * ReadSegmentContaining(db, theWID) t_LQTEXT_Database *db; unsigned long theWID; { t_LastBlockCache *Result; register int i; unsigned long theStart; nextToFree = (nextToFree + 1) % MAX_SEGMENTS; /* find an empty cache slot */ if (cacheCount == MAX_SEGMENTS) { Result = theBlockCache[nextToFree]; FreeSegment(db, Result); } else { /* allocate a new cache slot */ theBlockCache[nextToFree] = Result = (t_LastBlockCache *) emalloc( "lastblock", sizeof(t_LastBlockCache) ); ++cacheCount; } theStart = (theWID / SEGMENT_LENGTH); Result->FirstWID = theStart * SEGMENT_LENGTH; theStart = Result->FirstWID * sizeof(unsigned long); (void) LQU_Elseek( E_FATAL, db->LastBlockFile, "Cache of last block in each disk chain", theBlockCacheFile, theStart, SEEK_SET ); i = read( theBlockCacheFile, Result->theEnds, SEGMENT_LENGTH * sizeof(unsigned long) ); if (i < 0) { Error(E_FATAL|E_SYS, "Couldn't read %ld bytes from fd %d \"%s\"", SEGMENT_LENGTH * sizeof(unsigned long), theBlockCacheFile, db->LastBlockFile ); } else if (i == 0) { (void) bzero( Result->theEnds, SEGMENT_LENGTH * sizeof(unsigned long) ); } else if (i != SEGMENT_LENGTH * sizeof(unsigned long)) { /* Clear the unread values. * It doesn't really matter if we zero out an extra value -- * it's only a cache. It _does_ matter if we zero out a partial * value, though! That would put a corrupt value into the cache. * So we always zero a multuple of the cache size. */ int firstUnset = i / sizeof(unsigned long); /* round down!! */ i *= sizeof(unsigned long); Error(E_WARN|E_SYS, "Read a partial block from %s at offset %d, got %d != %d bytes", db->LastBlockFile, theStart, i, SEGMENT_LENGTH * sizeof(unsigned long) ); (void) bzero( (char *) &(Result->theEnds[firstUnset]), (SEGMENT_LENGTH - firstUnset) * sizeof(unsigned long) ); } return Result; } PRIVATE t_LastBlockCache * FindResultFromCache(WID) t_WID WID; { register int i; for (i = 0; i < cacheCount; i++) { if (theBlockCache[i]) { register t_LastBlockCache *LBp = theBlockCache[i]; if (LBp->FirstWID <= WID && LBp->FirstWID + SEGMENT_LENGTH > WID) { return LBp; } } } return (t_LastBlockCache *) 0; } /* * LQT_SetLastBlockInChain * Database/Update, Database/Files * *

LQT_SetLastBlockInChain maintains the chainend file in the * database directory; this contains the block number of the last * block in the chain used to store data for a given WID. This * allows lqaddfile to update an entry efficiently, as otherwise it * has to read the entire chain from the start to determine the * last block before it can start appending to it. * Failing to call this function after changing the last block number * for a given WID will result in a corrupt database.

*

The given Offsetp is a pointer to a long, although the value * is not changed; this is simply for consistency with other routines, * and may change in the future. The FirstUnusedBytepp is currently * used only for debugging; the value is recomputed from the data * when it is used. * * Fatal error if the cache file can't be created, if it isn't * already open. * * LQTp_FlushLastBlockCache, LQT_LastBlockInChain * */ API void LQT_SetLastBlockInChain(db, WID, Offsetp, FirstUnusedBytep, theBlock) t_LQTEXT_Database *db; t_WID WID; unsigned long *Offsetp; /* In: last offset */ unsigned char *FirstUnusedBytep; unsigned char *theBlock; { t_LastBlockCache *theCacheEntry; if (theBlockCacheFile < 0) { OpenLastBlockCacheFile(db); theCacheEntry = ReadSegmentContaining(db, WID); } else { if (!(theCacheEntry = FindResultFromCache(WID))) { theCacheEntry = ReadSegmentContaining(db, WID); } } theCacheEntry->theEnds[WID - theCacheEntry->FirstWID] = (*Offsetp); #ifdef ASCIITRACE if (LQT_TraceFlagsSet(LQTRACE_LASTBLOCK)) { LQT_Trace(LQTRACE_LASTBLOCK, "Last(%04ld) set to %ld.%u", WID, theCacheEntry->theEnds[WID - theCacheEntry->FirstWID], FirstUnusedBytep - theBlock ); } #endif } /* * LQT_LastBlockInChain * Database/Update, Database/Files * * Returns the last block in the chain for a given WID. The value * may have been set previously by LQT_SetLastBlockInChain, or * can be deduced by reading the chain from disk a block at a time * until the end is reached. * * A pointer to the (extended) block in the data cache * * Fatal error (E_BUG) if the value cannot be determined * * LQT_SetLastBlockInChain * */ API unsigned char * LQT_LastBlockInChain(db, WID, Offsetp, FirstUnusedBytepp, BlockLengthp) t_LQTEXT_Database *db; t_WID WID; unsigned long *Offsetp; /* in: first offset; Out: last offset */ unsigned char **FirstUnusedBytepp; /* out only */ unsigned int *BlockLengthp; { t_LastBlockCache *theCacheEntry; unsigned char *Result = 0; register unsigned char *p; t_BlockHeader *BH; register int j; if (theBlockCacheFile < 0) { OpenLastBlockCacheFile(db); theCacheEntry = ReadSegmentContaining(db, WID); } else { if (!(theCacheEntry = FindResultFromCache(WID))) { theCacheEntry = ReadSegmentContaining(db, WID); } } j = WID - theCacheEntry->FirstWID; Oops: if (theCacheEntry->theEnds[j]) { unsigned long NewOffset = theCacheEntry->theEnds[j]; Result = LQT_ReadBlock(db, NewOffset, WID); BH = (t_BlockHeader *) Result; if (!BH || BH->NextOffset != 0L) { /* Wrong answer, cache is out of date (HOW??) */ theCacheEntry->theEnds[j] = 0L; if (NewOffset) { goto Oops; } else { Error(E_FATAL|E_BUG, "%s: %d: BH is 0x%x, BH->NextOffset %ld", __FILE__, __LINE__, BH, BH ? BH->NextOffset : 0L ); } } else { *Offsetp = NewOffset; } } else { /* we still don't have the answer, do it the hard way, * by following the chain: */ unsigned long NextOffset = (*Offsetp); if (!NextOffset) { Error(E_BUG|E_WARN|E_INTERNAL, "LQT_LastBlockInChain: Offsetp points to 0 in %s:%d", __FILE__, __LINE__ ); return (unsigned char *) 0; } do { /** save the curent block number: **/ *Offsetp = NextOffset; /** read the block **/ Result = LQT_ReadBlock(db, NextOffset, WID); if (!Result) { Error(E_FATAL|E_BUG|E_INTERNAL, "%s: %d: LQT_ReadBlock(%ld) --> 0 for WID %ld", __FILE__, __LINE__, NextOffset, WID ); } /** find the next offset **/ BH = (t_BlockHeader *) Result; if (!BH) { /* this should only happen if there's a compiler bug. */ Error(E_FATAL|E_BUG|E_INTERNAL, "%s: %d: BH 0 after LQT_ReadBlock(%ld) --> 0 for WID %ld", __FILE__, __LINE__, NextOffset, WID ); } NextOffset = BH->NextOffset; } while (NextOffset != 0L); } { unsigned int OriginalLength; BH = (t_BlockHeader *) Result; if (!BH->NumberOfBlocks) { if (theCacheEntry->theEnds[j]) { Error(E_WARN|E_INTERNAL, "%s: %d: LQT_LastBlockInChain: BH->len 0 for WID %ld, Block %ld", __FILE__, __LINE__, WID, *Offsetp ); theCacheEntry->theEnds[j] = 0; goto Oops; } else { Error(E_FATAL|E_INTERNAL|E_BUG, "%s: %d; LQT_LastBlockInChain: BH->len is 0 for WID %ld, Block %ld", __FILE__, __LINE__, WID, *Offsetp ); } } OriginalLength = (*BlockLengthp) = BH->NumberOfBlocks; OriginalLength *= BLOCKSIZE; /* Now find free blocks after *Offsetp that we can join to this one: */ LQT_ExtendBlock(db, *Offsetp, BlockLengthp, 0); /* now record the number of contiguous blocks assigned: */ BH->NumberOfBlocks = (*BlockLengthp); /* the block length we return is in bytes, so we need to convert: */ *BlockLengthp *= BLOCKSIZE; if (*BlockLengthp < OriginalLength) { Error(E_BUG|E_WARN, "LQT_LastBlockInChain(WID=%ld): *B %d < O %d", WID, *BlockLengthp, OriginalLength ); } if (*BlockLengthp < OriginalLength) { Error(E_BUG|E_INTERNAL, "%s: %d: *BlockLengthp %lu < OriginalLength %lu", __FILE__, __LINE__, *BlockLengthp, OriginalLength ); } /** find the last used byte: **/ for (p = &Result[OriginalLength - 1]; p >= BH->Data; p--) { if (*p != (unsigned char) 0xFF) { break; } } /* p still points to the last non-0xFF data byte. */ /* If the original block was exactly full, but we have extended * it, there's room in the new part: */ if (p == &Result[OriginalLength - 1] && OriginalLength < *BlockLengthp) { p[1] = 0xFF; } /* p still points to the last non-0xFF data byte. * BH is still set to the start of Result. */ /* If the block is still exactly full, we need to allocate a new one * and return a pointer to it: */ if (p >= &Result[*BlockLengthp - 1]) { unsigned long LastStart = (*Offsetp); /* Get the new block; a requested length of 0 gives * us one as long as possible. */ BH->NextOffset = (*Offsetp) = LQT_FindFreeBlock(db, WID, BlockLengthp, 0); /* Write the old block, since we've changed its Next pointer: */ LQT_WriteBlock( db, LastStart, Result, (int) (OriginalLength / BLOCKSIZE), WID ); /* read the new block and set it up: */ Result = LQT_ReadBlock(db, *Offsetp, (t_WID) 0); BH = (t_BlockHeader *) Result; BH->NextOffset = 0L; /* a new block, so no Next pointer */ BH->NumberOfBlocks = (*BlockLengthp) / BLOCKSIZE; #ifdef WIDINBLOCK BH->WID = WID; #endif /* we'll need to return a pointer to it: */ p = BH->Data; *p = (unsigned char) 0xFF; /* for a debugging check... */ } else if (*p != (unsigned char) 0xFF) { ++p; /* so we don't overwrite the last byte! */ if (*p != 0xFF) { Error(E_FATAL|E_BUG, "%s: %d: LQT_LastBlockInChain: *p %d != 255, p - R %ld; O %ld L %ld", __FILE__, __LINE__, *p, p - Result, *Offsetp, BH->NumberOfBlocks ); } } /* and save it for next time: */ theCacheEntry->theEnds[j] = (*Offsetp); } *FirstUnusedBytepp = p; #ifdef ASCIITRACE if (LQT_TraceFlagsSet(LQTRACE_LASTBLOCK)) { LQT_Trace(LQTRACE_LASTBLOCK, "Last(%04ld) is %ld.%u", WID, *Offsetp, *FirstUnusedBytepp - Result ); } #endif return Result; }