/* rblock.c -- Copyright 1991, 1994-1996 Liam R. E. Quin.
 * All Rights Reserved.
 * This code is NOT in the public domain.
 * See the file COPYRIGHT for full details.
 */

/* The low-level physical Word Database for lq-text.
 *
 * $Id: rblock.c,v 1.6 1996/05/15 23:10:27 lee Exp $
 */

#include "globals.h" /* defines and declarations for database filenames */
#include "error.h"

#include <stdio.h> /* stderr, also for fileinfo.h */
#include <sys/types.h>
#ifdef HAVE_FCNTL_H
# ifdef HAVE_SYSV_FCNTL_H
#  include <sys/stat.h>
# endif
# include <fcntl.h>
#endif

#include "fileinfo.h" /* for wordinfo.h */
#include "wordinfo.h"
#include "pblock.h"
#include "numbers.h"
#include "wordrules.h"
#include "getbyte.h"
#include "blkheader.h"
#include "liblqtext.h"
#include "lqtrace.h"

/** C library functions that need to be declared: **/
/** lqtext library functions that need to be declared: **/
/** Functions within this file that need to be declared: **/
/** **/

/* Layout of the physical index database
 * =====================================
 *
 * This file is the only interface to the database of FID/Offset pairs.
 *
 * The db is organised in blocks arranged in Tagliatelli format: a linked
 * list of blocks for each WID; there is a list for each WID in the Word
 * Index.  The Word Index contains the block number of the start of the
 * chain.
 *
 * A separate file, FreeListFile (by default "freefile") contains one bit for
 * every block in the data file -- if the bit is set, the corresponding block
 * is in use.
 * Block 0 of data is never used to store wordplaces; it's intended to use it
 * for concurrency/locking in the future.  As a result, the first bit of the
 * first byte of the freelist is always set...
 *
 * block 1... first data block:
 * +---------------------------
 * | bytes 0...3: Offset of next block in this chain
 * | The (FID, Offset) pairs follow, in compressed format.
 * |
 * block 2... next data block (either the start of a new chain, or a
 * continuation of some other chain.  Or maybe unused, especially if files
 * have been deleted).
 *
 * The block header is described by t_BlockHeader.  It's awfully simple,
 * but it used to be more complex.
 *
 * If lq-text was compiled with -DWIDINBLOCK, each block also contains the
 * WID for which it was written, for better checking.  This only really
 * makes sense with -DASCIITRACE, as otherwise the debugging and tracing
 * code gets compiled out!
 *
 */

/* This is simply to help keep the source lines getting too long! */
typedef unsigned char *UCP;

LIBRARY unsigned char
LQTp__GetByte(db, WID, sp, Blockp, BlockLength, NextBlock)
    t_LQTEXT_Database *db;
    t_WID WID;
    unsigned char **sp;
    unsigned char **Blockp;
    unsigned int *BlockLength;
    unsigned long *NextBlock;
{
    t_BlockHeader *BH;

    if (*sp - (*Blockp) >= (*BlockLength)) {
	if (*NextBlock == (unsigned long) 0) {
	    Error(E_BUG,
		"LQTp__GetByte: WID %ld: database is corrupt, Next is zero",
		WID
	    );
	} else {
#ifdef ASCIITRACE
	    if (LQT_TraceFlagsSet(
		    LQTRACE_FREEBLOCKS|LQTRACE_GETPLACES|LQTRACE_READBLOCK
		)
	    ) {
		if (LQT_BlockIsFree(db, *NextBlock)) {
		    Error(E_BUG,
			"%s: %d: GetByte: Block %ld in used chain was free!",
			__FILE__, __LINE__, *NextBlock
		    );
		}
	    }
#endif
	    (*sp) = (*Blockp) = LQT_ReadBlock(db, *NextBlock, WID);
	}
	/* Check the new block */
	if ((*Blockp) == (UCP) 0) {
	    Error(E_BUG,
		"LQTp__GetByte: Database corrupt, %lu, sigh.",
		*NextBlock
	    );
	}
	/*NOSTRICT*/
	BH = (t_BlockHeader *) (*Blockp);
	*BlockLength = BLOCKSIZE * BH->NumberOfBlocks;
	*NextBlock = BH->NextOffset;
	(*sp) = (UCP) BH->Data;
    }
    return *((*sp)++);
}

/* This is the reverse of PutLong.
 * Things are slightly complicated by the need to provide LQT_sReadNumber
 * with a contiguous copy of all of the bytes in a number that spanned
 * a gap between data blocks.
 */
LIBRARY unsigned long
LQTp__GetLong(db, WID, sp, Blockp, BlockLength, NextBlock)
    t_LQTEXT_Database *db;
    t_WID WID;
    unsigned char **sp;
    unsigned char **Blockp;
    unsigned int *BlockLength;
    unsigned long *NextBlock;
{
    unsigned char Buffer[sizeof(unsigned long) * 8/7 + 2];
    long Result;
    t_BlockHeader *BH;
    unsigned char *NumberStart = (*sp);
    unsigned char *p;
 
    if (
	LQT_sReadNumber(sp, (unsigned int *) &Result, *Blockp, *BlockLength) < 0 ||
	(*sp) - (*Blockp) > (*BlockLength)
    ) {
        unsigned char *BeginningOfNewData = Buffer;
 
        if (*NextBlock == (unsigned long) 0) {
            return 0L;
        }
 
        /* Copy the first half of the number into the overflow buffer */
        for (p = NumberStart; p < &(*Blockp)[*BlockLength]; p++) {
            *BeginningOfNewData++ = *p;
        }
 
        /** Now:
         ** . sp is garbage, as is NumberStart, as they point at the old
         **   data block
         ** . Buffer contains the first few bytes of the number
         ** . we need some more bytes, but don't yet know how many, as
         **   this depends on the number representation
         **   NOTE that we must have, however, that we know that there
         **   are more bytes, so that we know if we need the next block.
         ** . BeginningOfNewData points 1 beyond the end of the first section
	 **   of the number, i.e. to the first byte that came from the newly
	 *    read block.
         **/
 
        (*sp) = *Blockp = LQT_ReadBlock(db, *NextBlock, WID);

#ifdef ASCIITRACE
	if (LQT_TraceFlagsSet(
		LQTRACE_FREEBLOCKS|LQTRACE_PUTPLACES|LQTRACE_READBLOCK
	    )
	) {
	    if (LQT_BlockIsFree(db, *NextBlock)) {
		Error(E_BUG,
		    "%s: %d: GetLong: Block %ld in used chain was free!",
		    __FILE__, __LINE__, *NextBlock
		);
	    }
	}
#endif
        /* Check the new block */
        if ((*Blockp) == (UCP) 0) {
            Error(E_BUG,
                "GetLong: WID %ld: Database corrupt, *NextBlock zero", WID);
        }
        BH = (t_BlockHeader *) *Blockp;
        *BlockLength = BLOCKSIZE * BH->NumberOfBlocks;
        *NextBlock = BH->NextOffset;
        (*sp) = (UCP) BH->Data;
        /* Fill up the buffer from the new block */
        for (p = BeginningOfNewData; p - Buffer < sizeof(Buffer) - 1; p++) {
            *p = *(*sp)++;
        }
        /* read the number from the buffer */
        (*sp) = Buffer;
        /* Try that number again... */
	(void) LQT_sReadNumber(sp, (unsigned int *) &Result, Buffer, sizeof Buffer);
        /* Now put sp where it should be.  Part of the buffer was
         * from the old block...
         */
        (*sp) = &BH->Data[((*sp) - BeginningOfNewData)];
    }
    return Result;
}