/* smalldb.c -- Copyright 1989, 1992, 1994, 1996 Liam R. E. Quin.
 * All Rights Reserved.
 * This code is NOT in the public domain.
 * See the file COPYRIGHT for full details.
 */

/* Simple interface to start and end dbm.
 * You may also need to supply dbm_store() and dbm_fetch(), but these
 * should certainly be macros.
 *
 * $Id: smalldb.c,v 1.25 1996/07/09 13:24:05 lee Exp lee $
 */

#include "globals.h"
#include "error.h"

/* Actually we don't need stdio.h on most systems, but ANSI C requires it */
#include <stdio.h>

#ifdef HAVE_FCNTL_H
# ifdef HAVE_SYSV_FCNTL_H
#  include <sys/stat.h>
# endif
# include <fcntl.h>
#endif

#ifdef HAVE_UNISTD_H
# include <unistd.h> /* for open() */
#endif

#ifdef HAVE_STDLIB_H
# include <stdlib.h>
#endif

#ifdef HAVE_STRING_H
# include <string.h>
#else
# include <strings.h>
#endif

#include "emalloc.h"
#include "smalldb.h"
#include "lqutil.h"
#include "liblqtext.h"
#include "smalldb.h"

/* The physical database for the list of words, and for the list
 * of files, uses ndbm.
 * The advantage of this is that it takes only two file system accesses
 * to retrieve any data item (honest!).
 * It's also reasonably fast at insertion.
 * One disadvantage is that it doesn't cope if too many words have the
 * same (32-bit) hash function, although some of the publicly available
 * replacements such as the 4.4 BSD db package fix this.
 *
 * Since starting the database is expensive (two file opens and a malloc),
 * I have a cache of DBM pointers and keep them open.  Versions of the
 * dbm routines that don't support more than one database will have to
 * have a cache-size of one!
 * I am not sure what the impact of this would be on performance; for
 * adding a new file it shouldn't be too bad, as the file list is examined
 * only once for each file, during reading, and the word database is looked
 * at (at least once for each distinct word) only on writing.
 * For retrieval, however, the word database will be looked at for each
 * word in the query, and the file database for (potentially) each match
 * of each word, so the requests will be more interspersed.
 * Under no circumstances is it acceptable to dispense with the cache, as
 * otherwise you will be doing (literally) thousands of calls to
 * open() and close() per second!
 *
 */

#undef LQT_OpenKeyValueDatabase

#ifndef LQT_KEYVALUE_CACHE
# define LQT_KEYVALUE_CACHE 4
    /* Each active cache entry involves keeping two files open, or, with
     * BSD4.4 db (included with lq-text), a single file open.
     * Very old Unix systems limit each process to 20 file descriptors,
     * and lq-text only ever uses two entries anyway right now, so there's
     * no point increasing this.  If you want to reuse this code (and have
     * permission!), you may want to increase the number...
     */
#endif

#ifdef dbnative
# undef DBM
# define DBM DB
LIBRARY DBT dbm_dbfetch(const DB *db, DBT *keyp)
{
    DBT tmp;

    (void) bzero(&tmp, sizeof(tmp));
    if ((db)->get(
		db,
#ifndef USE_DB_1_85_H
		NULL, /* transaction pointer */
#endif
		keyp,
		&tmp
#ifndef USE_DB_1_85_H
		, (u_int32_t)0 /* flags */
#endif
		) != 0) {
	tmp.size = 0;
	tmp.data = 0;
    }
    return tmp;
}

#endif

typedef struct s_DatabaseCache {
    char *Name;
    DBM *Value;
    struct s_DatabaseCache *Next;
    short NameLength;
} t_DatabaseCache;

static int MaxInCache = LQT_KEYVALUE_CACHE;
static int NumberInCache = 0;
static t_DatabaseCache *DatabaseCache;

/* <Function>
 *   <Name>LQT_ObtainWriteAccess
 *   <Class>Database/Database, Database/Files
 *   <Purpose>
 *      Grants write access to the current database.
 *      This is called automatically by LQT_OpenDatabase if appropriate.
 *   <Returns>
 *      <LIST>
 *        <LI>zero on success
 *        <LI>-1 on error or failure
 *	</LIST>
 *   <Notes>
 *	Write access may on some systems be exclusive, so that no other
 *	process can open the database, neither for reading nor for writing.
 *	You should not rely on this, however; on some systems, multiple
 *	clients may succeed in writing, and will corrupt the database.
 *   <Errors>
 *      A corrupt database may cause a fatal or E_BUG error.
 *   <SeeAlso>
 *	LQT_ObtainReadOnlyAccess
 *	LQT_OpenDatabase
 * </Function>
 */
API int
LQT_ObtainWriteAccess(db)
    t_LQTEXT_Database *db;
{
    int fd;

    db->FileFlags |= (O_RDWR|O_CREAT);
    db->FileModes |= 0600; /* allow self to read and write */

    fd = open(db->DataBase, db->FileFlags, db->FileModes);
    if (fd >= 0) {
	(void) close(fd);
	return 0; /* seems to be working, at least for that file. */
    } else {
	/* permission problem */
	Error(E_WARN|E_SYS,
	    "Couldn't obtain write access for lq-text database %s",
	    db->DatabaseDirectory
	);
	return -1;
    }
}

/* <Function>
 *   <Name>LQT_ObtainReadOnlyAccess
 *   <Class>Database/Database, Database/Files
 *   <Purpose>
 *      Obtains read-only access to the current database.
 *      This is called automatically by LQT_OpenDatabase if appropriate.
 *	If the database was previously open for writing, it should be
 *	closed first with LQT_CloseDatabase or LQT_SyncDatabase.
 *   <Returns>
 *      <LIST>
 *        <LI>zero on success</LI>
 *        <LI>-1 on failure or error</LI>
 *	</LIST>
 *   <Errors>
 *      A corrupt database may cause a fatal or E_BUG error.
 *   <SeeAlso>
 *	LQT_OpenDatabase
 *	LQT_ObtainWriteAccess
 *	LQT_OpenKeyValueDatabase
 *	LQT_CloseDatabase
 * </Function>
 */
API int
LQT_ObtainReadOnlyAccess(db)
    t_LQTEXT_Database *db;
{
    int fd;

    db->FileFlags = O_RDONLY;

    fd = open(db->DataBase, db->FileFlags, db->FileModes);

    if (fd >= 0) {
	(void) close(fd);
	return 0; /* seems to be working, at least for that file. */
    } else {
	return -1;
    }
}

/* <Function>
 *   <Name>LQT_CurrentlyHaveWriteAccess
 *   <Class>Database/Database, Database/Files
 *   <Purpose>
 *      Returns non-zero if and only if the given database is open
 *	with write access.
 *   <Notes>
 *	Write access may on some systems be exclusive, so that no other
 *	process can open the database, neither for reading nor for writing.
 *	You should not rely on this, however.
 *   <SeeAlso>
 *      LQT_ObtainWriteAccess
 *	LQT_OpenDatabase
 * </Function>
 */
API int
LQT_CurrentlyHaveWriteAccess(db)
    t_LQTEXT_Database *db;
{
    return ((db->FileFlags & O_RDWR) == O_RDWR);
}

/* <Function>
 *   <Name>LQT_GetFileModes
 *   <Class>Database/Retrieval, Database/Physical
 *   <Purpose>
 *	Returns the current file modes, as determined by 
 *	LQT_ObtainReadOnlyAccess or LQT_ObtainWriteAccess, in Flagsp and
 *	Modesp.
 *	The returned values are suitable for passing to open(2).
 *   <Errors>
 *      Passing null pointers causes a fatal (E_BUG) error.
 *   <SeeAlso>
 *	LQT_OpenDatabase
 *	LQT_ObtainWriteAccess
 *	LQU_Eopen
 * </Function>
 */
API void
LQT_GetFileModes(db, Flagsp, Modesp)
    t_LQTEXT_Database *db;
    int *Flagsp;
    int *Modesp;
{
    if (!Flagsp || !Modesp) {
	Error(E_BUG, "LQT_GetFileModes(Flagsp=0x%x, Modesp=0x%x): %s zero",
	    Flagsp, Modesp,
	    (Flagsp) ? "Flagsp" : "Modesp"
	);
    }
    *Flagsp = db->FileFlags;
    *Modesp = db->FileModes;
}

/* <Function>
 *   <Name>LQT_OpenKeyValueDatabase
 *   <Class>Database/Dynamic Hashing, Database/Files
 *   <Purpose>
 *      <P>Opens an ndbm-style database of the given name, creating it if
 *	the current database modes allow it.
 *	The function keeps a cache of open databases, so that if there
 *	is already an open database of the given name, its handle is
 *	simply returned.</P>
 *	<P>Opening a Key Value Database involves several file system
 *	accesses and using malloc to obtain memory, so it's much better
 *	to use the cached values.
 *	It is even better still to keep frequently used Key Value
 *	Databases open, for example in a static variable, and to close
 *	them only when the database is closed.</P>
 *   <Returns>
 *      A handle (usually a DBM * pointer) to the named Key Value Database.
 *   <Errors>
 *	If the underlying ndbm-style database couldn't be opened, a fatal
 *	error is produced (E_FATAL|E_SYS) indicating the problem.
 *	One possible cause of this is that $HOME/LQTEXTDIR isn't a directory,
 *	or doesn't exist, and $LQTEXTDIR isn't set to point to a suitable
 *	alternate directory.
 *	Another possible problem is that a previous run of lqaddfile
 *	failed, and left the Key Value Databases locked for writing; the
 *	best thing to do in this case is to run the lqclean program and
 *	start again.
 *   <SeeAlso>
 *	LQT_CloseKeyValueDatabase
 *      LQT_OpenDatabase
 *	LQT_AddActionOnClose
 *	LQT_SyncDatabase
 * </Function>
 */
API DBM *
LQT_OpenKeyValueDatabase(db, FilePrefix)
    t_LQTEXT_Database *db;
    char *FilePrefix;
{
    t_DatabaseCache *cp;
    unsigned int NameLength = strlen(FilePrefix);

    for (cp = DatabaseCache; cp; cp = cp->Next) {
	if (cp->NameLength == NameLength && cp->Value &&
				strcmp(cp->Name, FilePrefix) == 0) {
	    return cp->Value;
	}
    }

    /* assert: cp == 0 */

    /* not in the cache */

    /* if the cache is too big, close one entry, the last one */
    if (NumberInCache > MaxInCache) {
	t_DatabaseCache **cpp;

	for (cpp = &DatabaseCache; (*cpp); cpp = &(*cpp)->Next) {
	    if (!(*cpp)->Next) break;
	}

	if (*cpp && !(*cpp)->Next) {
	    /* Actually if this isn't true, MaxInCache is probably zero! */

	    if ((*cpp)->Value) {
#ifdef dbnative
	    cp->Value->close((*cpp)->Value, 0);
#else
	    (void) dbm_close((*cpp)->Value);
#endif
		(*cpp)->Value = (DBM *) 0;
	    }
	    if ((*cpp)->Name) {
		(void) efree((*cpp)->Name);
		(*cpp)->Name = (char *) 0;
	    }
	    cp = (*cpp);
	    *cpp = (t_DatabaseCache *) 0; /* so it isn't pointed to any more */
	    --NumberInCache;
	} else {
	    Error(E_BUG,
		"LQT_OpenKeyValueDatabase(%s) - cache is full up, none can be discarded",
		FilePrefix
	    );
	}
    }
    
    if (!cp) {
	cp = (t_DatabaseCache *) emalloc("dbm cache", sizeof(t_DatabaseCache));
    }

#ifdef dbnative
    {
#ifdef USE_DB_1_85_H
	HASHINFO *H =  (HASHINFO *) emalloc("db parameters", sizeof(HASHINFO));

	H->bsize = 4096;
	H->ffactor = 1000;
	H->nelem = 33000;
	H->cachesize = 1 * 1024 * 1024; /* 1 MB cache */
	H->hash = 0; /* default */
	H->lorder = 0; /* default */

	cp->Value = dbopen(
	    FilePrefix,
	    db->FileFlags | O_CREAT,
	    db->FileModes,
	    DB_HASH,
	    H
	);

	/* Should H be freed now?  I assume not */
#else
	DB *dbp;
	int ret;

	if ((ret = db_create(&dbp, (DB_ENV *) NULL, 0)) != 0 || !dbp) {
	    Error(E_FATAL|E_INTERNAL,
		"couldn't create internal DB structure [%s]",
		FilePrefix,
		db_strerror(ret)
	    );
	}
	dbp->set_cachesize(dbp, 
	    0L, /* gigabytes */
	    1 * 1024 * 1024, /* + bytes */
	    1 /* unused for a cache this small, see db3 api docs */
	);
	dbp->set_pagesize(dbp, 4096);
	dbp->set_h_ffactor(dbp, 1000);
	dbp->set_h_nelem(dbp, 33000);
	ret = dbp->open(
	    dbp,
	    FilePrefix,
	    NULL,
	    DB_HASH,
	    DB_CREATE,
	    db->FileModes
	);
	if (ret != 0) {
	    Error(E_FATAL,
		"couldn't open DB database \"%s\"; is $LQTEXTDIR set? [%s]",
		FilePrefix,
		db_strerror(ret)
	    );
	}
	cp->Value = dbp;

#endif
    }
#else
    cp->Value = dbm_open(
	FilePrefix,
	db->FileFlags,
	db->FileModes
    );
#endif

    if (!cp->Value){
	char *p = getenv("LQTEXTDIR");

	if (!p) {
	    Error(E_FATAL|E_SYS,
		"couldn't open dbm database \"%s\"; set $LQTEXTDIR",
		FilePrefix
	    );
	} else {
	    Error(E_FATAL|E_SYS,
		"couldn't open dbm database \"%s\" [$LQTEXTDIR is \"%s\"]",
		FilePrefix,
		p
	    );
	}
	return (DBM *) 0;
    }

    cp->NameLength = NameLength;
    cp->Name = emalloc("dbm filename", NameLength + 1);
    (void) strcpy(cp->Name, FilePrefix);
    /* Put the new element at the start of the list, since if we just called
     * dbstart, we're certain to want this dbm database almost immediately,
     * and in any case before any other database.
     */
    cp->Next = DatabaseCache; /* cp->Next was previously invalid */
    DatabaseCache = cp;
    ++NumberInCache;

    return cp->Value;
}

#ifndef LQT_KEYVALUE_CACHE
#undef LQT_CloseKeyValueDatabase

/* <Function>
 *   <Name>LQT_CloseKeyValueDatabase
 *   <Class>Database/Dynamic Hashing, Database/Files
 *   <Purpose>
 *      This currently does nothing, since the Key Value Databases are
 *	kept open.  If the library is compiled with dbm instead of ndbm,
 *	or with the cache disabled, LQT_CloseKeyDatabase becomes active,
 *	so it should be paired with every call to LQT_OpenKeyValueDatabase
 *   <SeeAlso>
 *	LQT_SyncAndCloseAllKeyValueDatabases
 * </Function>
 */
/*ARGSUSED*/
API int
LQT_CloseKeyValueDatabase(db)
    DBM *db;
{
    /* no-op */
    /* This could check the named db and move it to the bottom of the list,
     * I suppose.
     */
    return 0;
}
#endif /*LQT_KEYVALUE_CACHE*/

/* <Function>
 *   <Name>LQT_SyncAndCloseAllKeyValueDatabases
 *   <Class>Database/Dynamic Hashing, Database/Files
 *   <Purpose>
 *      <P>Closes all Key Value Databases that have been opened, after
 *	writing any pending data to disk.</P>
 *	<P>This function is registered automatically as an action to be
 *	performed when a database is closed or on a call to LQT_Sync, and
 *	should not normally need to be called directly.
 *	The return value and argument are for compatibility with
 *	LQT_AddActionOnClose.
 *	The argument must be a null pointer, for future compatibility.</P>
 *   <SeeAlso>
 *	LQT_OpenKeyValueDatabase
 *	LQT_AddActionOnClose
 *	LQT_CloseDatabase
 * </Function>
 */
API int
LQT_SyncAndCloseAllKeyValueDatabases(db)
    t_LQTEXT_Database *db;
{
    register t_DatabaseCache *cp;
    t_DatabaseCache *Next = 0;

    cp = DatabaseCache;
    while (cp) {
	if (cp->Value) {
#ifdef dbnative
	    int failure = cp->Value->close(cp->Value, 0);

	    if (failure != 0) {
		Error(E_WARN,
		    "could not close BSD key/value database %s: %s",
		    cp->Name ? cp->Name : "[unknown file]",
		    db_strerror(failure)
		);
	    }
#else
	    (void) dbm_close(cp->Value);
#endif
	    cp->Value = 0;
	}
	if (cp->Name) {
	    (void) efree(cp->Name);
	    cp->Name = 0;
	}

	Next = cp->Next;
	(void) efree((char *) cp);
	/* can no longer refer to cp->Next...*/
	cp = Next;
    }
    NumberInCache = 0;
    DatabaseCache = (t_DatabaseCache *) 0;

    return 0;
}

/* <Function>
 *   <Name>LQTp_CreateEmptyKeyValueDatabase
 *   <Class>Database/Dynamic Hashing, Database/Files
 *   <Purpose>
 *      <P>Some versions of dbm or ndbm provided with various Unix systems
 *	do not automatically create a new DBM file, even when asked to; it
 *	is necessary to create the file with the open(2) or creat(2) system
 *	calls.  The original Unix dbm library was like this.</P>
 *	<P>This function creates the necessary files, in the given
 *	Directory; the files will have names beginning with the given
 *	Prefix, and depending on the version of ndbm in use, may have a
 *	suffix such as .db; BSD db uses a single file, but most other
 *	implementations use two, one called Prefix.dir and one called
 *	Prefix.pag.</P>
 *	<P>This routine is called automatically by LQT_OpenKeyValueDatabase
 *	when necessary, but is made available for general use
 *	for convenience.</P>
 *   <Bugs>
 *	LQTp_CreateEmptyKeyValueDatabase should be in liblqutil instead.
 *   <SeeAlso>
 *	LQT_OpenKeyValueDatabase
 * </Function>
 */
LIBRARY char *
LQTp_CreateEmptyKeyValueDatabase(db, Directory, prefix)
    t_LQTEXT_Database *db;
    char *Directory;
    char *prefix;
{
#if DBMCREAT == 0
    extern int errno;

    int Flags, Modes;

    LQT_GetFileModes(db, &Flags, &Modes);
#endif

    /* Although ndbm will create files automatically, gdbm and dbm will
     * not, so we do that here.
     * Also, it might take a while to get to here, so it will be a lot
     * better if we get an error message now.
     */
    char *p = LQU_joinstr3(Directory, "/", prefix);

#if DBMCREAT == 0
    q = LQU_joinstr3(p, ".", "dir");
    errno = 0; /* paranoia */

    if ((i = open(q, O_RDONLY, 0)) < 0 && errno == ENOENT) {

	if (!(Flags & O_CREAT)) {
	    return p; /* let it fail */
	}

	i = open(q, Flags, Modes); /* rw-rw-rw & umask */

	if (i < 0) {
	    Error(E_FATAL|E_SYS,
		"LQTp_CreateEmptyKeyValueDatabase: can't create dbm database \"%s\"",
		q
	    );
	}

	(void) close(i);
    }
    (void) strcpy(&q[strlen(q) - 3], "pag");

    if ((i = open(q, O_RDONLY, 0)) < 0 && errno == ENOENT) {
	i = open(q, Flags, Modes); /* rw-rw-rw & umask */

	if (i < 0) {
	    Error(E_FATAL|E_SYS,
		"can't create dbm database file \"%s\"",
		q
	    );
	}

	(void) close(i);
    }

    (void) efree(q);

#endif /*DBMCREAT*/

    return p; /* the prefix for dbm, not the whole path */
}

void
lqdbnativestore(db, key, value, flags)
    DB *db;
    DBT *key;
    DBT *value;
    u_int32_t flags;
{
    int ret = (db)->put(db, (DB_TXN *)NULL, key, value, flags);

    if (ret != 0) {
	/* TODO use Error here */
	kvpdbg("error storing pair; key: ", *key);
	kvpdbg(" value ", *value);
	fprintf(stderr, "\n");
	Error(E_FATAL,
	    "could not save pair to BSD key/value database 0x%x: %s [flags %ld]",
	    db,
	    db_strerror(ret),
	    flags
	);
    }
}


int kvpdbg(char *prefix, DBT t)
{
    unsigned char *p;

    fprintf(stderr, prefix);
    putc('[', stderr);
    for (p = t.dptr; p - (unsigned char *) t.dptr < t.dsize; p++) {
	if (*p >= ' ' && *p <= '~') {
	    putc(*p, stderr);
	} else {
	    fprintf(stderr, "\\%03o", *p);
	}
    }
    putc(']', stderr);
    return 37; /* so I can use it in a comma expression */
}
