/* rblock.c -- Copyright 1991, 1994-1996 Liam R. E. Quin. * All Rights Reserved. * This code is NOT in the public domain. * See the file COPYRIGHT for full details. */ /* The low-level physical Word Database for lq-text. * * $Id: rblock.c,v 1.6 1996/05/15 23:10:27 lee Exp $ */ #include "globals.h" /* defines and declarations for database filenames */ #include "error.h" #include /* stderr, also for fileinfo.h */ #include #ifdef HAVE_FCNTL_H # ifdef HAVE_SYSV_FCNTL_H # include # endif # include #endif #include "fileinfo.h" /* for wordinfo.h */ #include "wordinfo.h" #include "pblock.h" #include "numbers.h" #include "wordrules.h" #include "getbyte.h" #include "blkheader.h" #include "liblqtext.h" #include "lqtrace.h" /** C library functions that need to be declared: **/ /** lqtext library functions that need to be declared: **/ /** Functions within this file that need to be declared: **/ /** **/ /* Layout of the physical index database * ===================================== * * This file is the only interface to the database of FID/Offset pairs. * * The db is organised in blocks arranged in Tagliatelli format: a linked * list of blocks for each WID; there is a list for each WID in the Word * Index. The Word Index contains the block number of the start of the * chain. * * A separate file, FreeListFile (by default "freefile") contains one bit for * every block in the data file -- if the bit is set, the corresponding block * is in use. * Block 0 of data is never used to store wordplaces; it's intended to use it * for concurrency/locking in the future. As a result, the first bit of the * first byte of the freelist is always set... * * block 1... first data block: * +--------------------------- * | bytes 0...3: Offset of next block in this chain * | The (FID, Offset) pairs follow, in compressed format. * | * block 2... next data block (either the start of a new chain, or a * continuation of some other chain. Or maybe unused, especially if files * have been deleted). * * The block header is described by t_BlockHeader. It's awfully simple, * but it used to be more complex. * * If lq-text was compiled with -DWIDINBLOCK, each block also contains the * WID for which it was written, for better checking. This only really * makes sense with -DASCIITRACE, as otherwise the debugging and tracing * code gets compiled out! * */ /* This is simply to help keep the source lines getting too long! */ typedef unsigned char *UCP; LIBRARY unsigned char LQTp__GetByte(db, WID, sp, Blockp, BlockLength, NextBlock) t_LQTEXT_Database *db; t_WID WID; unsigned char **sp; unsigned char **Blockp; unsigned int *BlockLength; unsigned long *NextBlock; { t_BlockHeader *BH; if (*sp - (*Blockp) >= (*BlockLength)) { if (*NextBlock == (unsigned long) 0) { Error(E_BUG, "LQTp__GetByte: WID %ld: database is corrupt, Next is zero", WID ); } else { #ifdef ASCIITRACE if (LQT_TraceFlagsSet( LQTRACE_FREEBLOCKS|LQTRACE_GETPLACES|LQTRACE_READBLOCK ) ) { if (LQT_BlockIsFree(db, *NextBlock)) { Error(E_BUG, "%s: %d: GetByte: Block %ld in used chain was free!", __FILE__, __LINE__, *NextBlock ); } } #endif (*sp) = (*Blockp) = LQT_ReadBlock(db, *NextBlock, WID); } /* Check the new block */ if ((*Blockp) == (UCP) 0) { Error(E_BUG, "LQTp__GetByte: Database corrupt, %lu, sigh.", *NextBlock ); } /*NOSTRICT*/ BH = (t_BlockHeader *) (*Blockp); *BlockLength = BLOCKSIZE * BH->NumberOfBlocks; *NextBlock = BH->NextOffset; (*sp) = (UCP) BH->Data; } return *((*sp)++); } /* This is the reverse of PutLong. * Things are slightly complicated by the need to provide LQT_sReadNumber * with a contiguous copy of all of the bytes in a number that spanned * a gap between data blocks. */ LIBRARY unsigned long LQTp__GetLong(db, WID, sp, Blockp, BlockLength, NextBlock) t_LQTEXT_Database *db; t_WID WID; unsigned char **sp; unsigned char **Blockp; unsigned int *BlockLength; unsigned long *NextBlock; { unsigned char Buffer[sizeof(unsigned long) * 8/7 + 2]; long Result; t_BlockHeader *BH; unsigned char *NumberStart = (*sp); unsigned char *p; if ( LQT_sReadNumber(sp, (unsigned int *) &Result, *Blockp, *BlockLength) < 0 || (*sp) - (*Blockp) > (*BlockLength) ) { unsigned char *BeginningOfNewData = Buffer; if (*NextBlock == (unsigned long) 0) { return 0L; } /* Copy the first half of the number into the overflow buffer */ for (p = NumberStart; p < &(*Blockp)[*BlockLength]; p++) { *BeginningOfNewData++ = *p; } /** Now: ** . sp is garbage, as is NumberStart, as they point at the old ** data block ** . Buffer contains the first few bytes of the number ** . we need some more bytes, but don't yet know how many, as ** this depends on the number representation ** NOTE that we must have, however, that we know that there ** are more bytes, so that we know if we need the next block. ** . BeginningOfNewData points 1 beyond the end of the first section ** of the number, i.e. to the first byte that came from the newly * read block. **/ (*sp) = *Blockp = LQT_ReadBlock(db, *NextBlock, WID); #ifdef ASCIITRACE if (LQT_TraceFlagsSet( LQTRACE_FREEBLOCKS|LQTRACE_PUTPLACES|LQTRACE_READBLOCK ) ) { if (LQT_BlockIsFree(db, *NextBlock)) { Error(E_BUG, "%s: %d: GetLong: Block %ld in used chain was free!", __FILE__, __LINE__, *NextBlock ); } } #endif /* Check the new block */ if ((*Blockp) == (UCP) 0) { Error(E_BUG, "GetLong: WID %ld: Database corrupt, *NextBlock zero", WID); } BH = (t_BlockHeader *) *Blockp; *BlockLength = BLOCKSIZE * BH->NumberOfBlocks; *NextBlock = BH->NextOffset; (*sp) = (UCP) BH->Data; /* Fill up the buffer from the new block */ for (p = BeginningOfNewData; p - Buffer < sizeof(Buffer) - 1; p++) { *p = *(*sp)++; } /* read the number from the buffer */ (*sp) = Buffer; /* Try that number again... */ (void) LQT_sReadNumber(sp, (unsigned int *) &Result, Buffer, sizeof Buffer); /* Now put sp where it should be. Part of the buffer was * from the old block... */ (*sp) = &BH->Data[((*sp) - BeginningOfNewData)]; } return Result; }