/* smalldb.c -- Copyright 1989, 1992, 1994, 1996 Liam R. E. Quin. * All Rights Reserved. * This code is NOT in the public domain. * See the file COPYRIGHT for full details. */ /* Simple interface to start and end dbm. * You may also need to supply dbm_store() and dbm_fetch(), but these * should certainly be macros. * * $Id: smalldb.c,v 1.25 1996/07/09 13:24:05 lee Exp lee $ */ #include "globals.h" #include "error.h" /* Actually we don't need stdio.h on most systems, but ANSI C requires it */ #include #ifdef HAVE_FCNTL_H # ifdef HAVE_SYSV_FCNTL_H # include # endif # include #endif #ifdef HAVE_UNISTD_H # include /* for open() */ #endif #ifdef HAVE_STDLIB_H # include #endif #ifdef HAVE_STRING_H # include #else # include #endif #include "emalloc.h" #include "smalldb.h" #include "lqutil.h" #include "liblqtext.h" /* The physical database for the list of words, and for the list * of files, uses ndbm. * The advantage of this is that it takes only two file system accesses * to retrieve any data item (honest!). * It's also reasonably fast at insertion. * One disadvantage is that it doesn't cope if too many words have the * same (32-bit) hash function, although some of the publicly available * replacements such as the 4.4 BSD db package fix this. * * Since starting the database is expensive (two file opens and a malloc), * I have a cache of DBM pointers and keep them open. Versions of the * dbm routines that don't support more than one database will have to * have a cache-size of one! * I am not sure what the impact of this would be on performance; for * adding a new file it shouldn't be too bad, as the file list is examined * only once for each file, during reading, and the word database is looked * at (at least once for each distinct word) only on writing. * For retrieval, however, the word database will be looked at for each * word in the query, and the file database for (potentially) each match * of each word, so the requests will be more interspersed. * Under no circumstances is it acceptable to dispense with the cache, as * otherwise you will be doing (literally) thousands of calls to * open() and close() per second! * */ #undef LQT_OpenKeyValueDatabase #ifndef LQT_KEYVALUE_CACHE # define LQT_KEYVALUE_CACHE 4 /* Each active cache entry involves keeping two files open, or, with * BSD4.4 db (included with lq-text), a single file open. * Very old Unix systems limit each process to 20 file descriptors, * and lq-text only ever uses two entries anyway right now, so there's * no point increasing this. If you want to reuse this code (and have * permission!), you may want to increase the number... */ #endif #ifdef dbnative # undef DBM # define DBM DB LIBRARY DBT dbm_dbfetch(const DB *db, DBT *keyp) { DBT tmp; if ((db)->get(db, keyp, &tmp, 0) != 0) { tmp.size = 0; tmp.data = 0; } return tmp; } #endif typedef struct s_DatabaseCache { char *Name; DBM *Value; struct s_DatabaseCache *Next; short NameLength; } t_DatabaseCache; static int MaxInCache = LQT_KEYVALUE_CACHE; static int NumberInCache = 0; static t_DatabaseCache *DatabaseCache; /* * LQT_ObtainWriteAccess * Database/Database, Database/Files * * Grants write access to the current database. * This is called automatically by LQT_OpenDatabase if appropriate. * * *
  • zero on success *
  • -1 on error or failure * * * Write access may on some systems be exclusive, so that no other * process can open the database, neither for reading nor for writing. * You should not rely on this, however; on some systems, multiple * clients may succeed in writing, and will corrupt the database. * * A corrupt database may cause a fatal or E_BUG error. * * LQT_ObtainReadOnlyAccess * LQT_OpenDatabase * */ API int LQT_ObtainWriteAccess(db) t_LQTEXT_Database *db; { int fd; db->FileFlags |= (O_RDWR|O_CREAT); db->FileModes |= 0600; /* allow self to read and write */ fd = open(db->DataBase, db->FileFlags, db->FileModes); if (fd >= 0) { (void) close(fd); return 0; /* seems to be working, at least for that file. */ } else { /* permission problem */ Error(E_WARN|E_SYS, "Couldn't obtain write access for lq-text database %s", db->DatabaseDirectory ); return -1; } } /* * LQT_ObtainReadOnlyAccess * Database/Database, Database/Files * * Obtains read-only access to the current database. * This is called automatically by LQT_OpenDatabase if appropriate. * If the database was previously open for writing, it should be * closed first with LQT_CloseDatabase or LQT_SyncDatabase. * * *
  • zero on success
  • *
  • -1 on failure or error
  • *
    * * A corrupt database may cause a fatal or E_BUG error. * * LQT_OpenDatabase * LQT_ObtainWriteAccess * LQT_OpenKeyValueDatabase * LQT_CloseDatabase *
    */ API int LQT_ObtainReadOnlyAccess(db) t_LQTEXT_Database *db; { int fd; db->FileFlags = O_RDONLY; fd = open(db->DataBase, db->FileFlags, db->FileModes); if (fd >= 0) { (void) close(fd); return 0; /* seems to be working, at least for that file. */ } else { return -1; } } /* * LQT_CurrentlyHaveWriteAccess * Database/Database, Database/Files * * Returns non-zero if and only if the given database is open * with write access. * * Write access may on some systems be exclusive, so that no other * process can open the database, neither for reading nor for writing. * You should not rely on this, however. * * LQT_ObtainWriteAccess * LQT_OpenDatabase * */ API int LQT_CurrentlyHaveWriteAccess(db) t_LQTEXT_Database *db; { return ((db->FileFlags & O_RDWR) == O_RDWR); } /* * LQT_GetFileModes * Database/Retrieval, Database/Physical * * Returns the current file modes, as determined by * LQT_ObtainReadOnlyAccess or LQT_ObtainWriteAccess, in Flagsp and * Modesp. * The returned values are suitable for passing to open(2). * * Passing null pointers causes a fatal (E_BUG) error. * * LQT_OpenDatabase * LQT_ObtainWriteAccess * LQU_Eopen * */ API void LQT_GetFileModes(db, Flagsp, Modesp) t_LQTEXT_Database *db; int *Flagsp; int *Modesp; { if (!Flagsp || !Modesp) { Error(E_BUG, "LQT_GetFileModes(Flagsp=0x%x, Modesp=0x%x): %s zero", Flagsp, Modesp, (Flagsp) ? "Flagsp" : "Modesp" ); } *Flagsp = db->FileFlags; *Modesp = db->FileModes; } /* * LQT_OpenKeyValueDatabase * Database/Dynamic Hashing, Database/Files * *

    Opens an ndbm-style database of the given name, creating it if * the current database modes allow it. * The function keeps a cache of open databases, so that if there * is already an open database of the given name, its handle is * simply returned.

    *

    Opening a Key Value Database involves several file system * accesses and using malloc to obtain memory, so it's much better * to use the cached values. * It is even better still to keep frequently used Key Value * Databases open, for example in a static variable, and to close * them only when the database is closed.

    * * A handle (usually a DBM * pointer) to the named Key Value Database. * * If the underlying ndbm-style database couldn't be opened, a fatal * error is produced (E_FATAL|E_SYS) indicating the problem. * One possible cause of this is that $HOME/LQTEXTDIR isn't a directory, * or doesn't exist, and $LQTEXTDIR isn't set to point to a suitable * alternate directory. * Another possible problem is that a previous run of lqaddfile * failed, and left the Key Value Databases locked for writing; the * best thing to do in this case is to run the lqclean program and * start again. * * LQT_CloseKeyValueDatabase * LQT_OpenDatabase * LQT_AddActionOnClose * LQT_SyncDatabase *
    */ API DBM * LQT_OpenKeyValueDatabase(db, FilePrefix) t_LQTEXT_Database *db; char *FilePrefix; { t_DatabaseCache *cp; int NameLength = strlen(FilePrefix); for (cp = DatabaseCache; cp; cp = cp->Next) { if (cp->NameLength == NameLength && cp->Value && strcmp(cp->Name, FilePrefix) == 0) { return cp->Value; } } /* assert: cp == 0 */ /* not in the cache */ /* if the cache is too big, close one entry, the last one */ if (NumberInCache > MaxInCache) { t_DatabaseCache **cpp; for (cpp = &DatabaseCache; (*cpp); cpp = &(*cpp)->Next) { if (!(*cpp)->Next) break; } if (*cpp && !(*cpp)->Next) { /* Actually if this isn't true, MaxInCache is probably zero! */ if ((*cpp)->Value) { #ifdef dbnative cp->Value->close((*cpp)->Value); #else (void) dbm_close((*cpp)->Value); #endif (*cpp)->Value = (DBM *) 0; } if ((*cpp)->Name) { (void) efree((*cpp)->Name); (*cpp)->Name = (char *) 0; } cp = (*cpp); *cpp = (t_DatabaseCache *) 0; /* so it isn't pointed to any more */ --NumberInCache; } else { Error(E_BUG, "LQT_OpenKeyValueDatabase(%s) - cache is full up, none can be discarded", FilePrefix ); } } if (!cp) { cp = (t_DatabaseCache *) emalloc("dbm cache", sizeof(t_DatabaseCache)); } #ifdef dbnative { HASHINFO *H = (HASHINFO *) emalloc("db parameters", sizeof(HASHINFO)); H->bsize = 4096; H->ffactor = 1000; H->nelem = 33000; H->cachesize = 1 * 1024 * 1024; /* 1 MB cache */ H->hash = 0; /* default */ H->lorder = 0; /* default */ cp->Value = dbopen( FilePrefix, db->FileFlags | O_CREAT, db->FileModes, DB_HASH, H ); /* Should H be freed now? I assume not */ } #else cp->Value = dbm_open( FilePrefix, db->FileFlags, db->FileModes ); #endif if (!cp->Value){ char *p = getenv("LQTEXTDIR"); if (!p) { Error(E_FATAL|E_SYS, "couldn't open dbm database \"%s\"; set $LQTEXTDIR", FilePrefix ); } else { Error(E_FATAL|E_SYS, "couldn't open dbm database \"%s\" [$LQTEXTDIR is \"%s\"]", FilePrefix, p ); } return (DBM *) 0; } cp->NameLength = NameLength; cp->Name = emalloc("dbm filename", NameLength + 1); (void) strcpy(cp->Name, FilePrefix); /* Put the new element at the start of the list, since if we just called * dbstart, we're certain to want this dbm database almost immediately, * and in any case before any other database. */ cp->Next = DatabaseCache; /* cp->Next was previously invalid */ DatabaseCache = cp; ++NumberInCache; return cp->Value; } #ifndef LQT_KEYVALUE_CACHE #undef LQT_CloseKeyValueDatabase /* * LQT_CloseKeyValueDatabase * Database/Dynamic Hashing, Database/Files * * This currently does nothing, since the Key Value Databases are * kept open. If the library is compiled with dbm instead of ndbm, * or with the cache disabled, LQT_CloseKeyDatabase becomes active, * so it should be paired with every call to LQT_OpenKeyValueDatabase * * LQT_SyncAndCloseAllKeyValueDatabases * */ /*ARGSUSED*/ API int LQT_CloseKeyValueDatabase(db) DBM *db; { /* no-op */ /* This could check the named db and move it to the bottom of the list, * I suppose. */ return 0; } #endif /*LQT_KEYVALUE_CACHE*/ /* * LQT_SyncAndCloseAllKeyValueDatabases * Database/Dynamic Hashing, Database/Files * *

    Closes all Key Value Databases that have been opened, after * writing any pending data to disk.

    *

    This function is registered automatically as an action to be * performed when a database is closed or on a call to LQT_Sync, and * should not normally need to be called directly. * The return value and argument are for compatibility with * LQT_AddActionOnClose. * The argument must be a null pointer, for future compatibility.

    * * LQT_OpenKeyValueDatabase * LQT_AddActionOnClose * LQT_CloseDatabase *
    */ API int LQT_SyncAndCloseAllKeyValueDatabases(db) t_LQTEXT_Database *db; { register t_DatabaseCache *cp; t_DatabaseCache *Next = 0; cp = DatabaseCache; while (cp) { if (cp->Value) { #ifdef dbnative cp->Value->close(cp->Value); #else (void) dbm_close(cp->Value); #endif cp->Value = 0; } if (cp->Name) { (void) efree(cp->Name); cp->Name = 0; } Next = cp->Next; (void) efree((char *) cp); /* can no longer refer to cp->Next...*/ cp = Next; } NumberInCache = 0; DatabaseCache = (t_DatabaseCache *) 0; return 0; } /* * LQTp_CreateEmptyKeyValueDatabase * Database/Dynamic Hashing, Database/Files * *

    Some versions of dbm or ndbm provided with various Unix systems * do not automatically create a new DBM file, even when asked to; it * is necessary to create the file with the open(2) or creat(2) system * calls. The original Unix dbm library was like this.

    *

    This function creates the necessary files, in the given * Directory; the files will have names beginning with the given * Prefix, and depending on the version of ndbm in use, may have a * suffix such as .db; BSD db uses a single file, but most other * implementations use two, one called Prefix.dir and one called * Prefix.pag.

    *

    This routine is called automatically by LQT_OpenKeyValueDatabase * when necessary, but is made available for general use * for convenience.

    * * LQTp_CreateEmptyKeyValueDatabase should be in liblqutil instead. * * LQT_OpenKeyValueDatabase *
    */ LIBRARY char * LQTp_CreateEmptyKeyValueDatabase(db, Directory, prefix) t_LQTEXT_Database *db; char *Directory; char *prefix; { #if DBMCREAT == 0 extern int errno; int Flags, Modes; LQT_GetFileModes(db, &Flags, &Modes); #endif /* Although ndbm will create files automatically, gdbm and dbm will * not, so we do that here. * Also, it might take a while to get to here, so it will be a lot * better if we get an error message now. */ char *p = LQU_joinstr3(Directory, "/", prefix); #if DBMCREAT == 0 q = LQU_joinstr3(p, ".", "dir"); errno = 0; /* paranoia */ if ((i = open(q, O_RDONLY, 0)) < 0 && errno == ENOENT) { if (!(Flags & O_CREAT)) { return p; /* let it fail */ } i = open(q, Flags, Modes); /* rw-rw-rw & umask */ if (i < 0) { Error(E_FATAL|E_SYS, "LQTp_CreateEmptyKeyValueDatabase: can't create dbm database \"%s\"", q ); } (void) close(i); } (void) strcpy(&q[strlen(q) - 3], "pag"); if ((i = open(q, O_RDONLY, 0)) < 0 && errno == ENOENT) { i = open(q, Flags, Modes); /* rw-rw-rw & umask */ if (i < 0) { Error(E_FATAL|E_SYS, "can't create dbm database file \"%s\"", q ); } (void) close(i); } (void) efree(q); #endif /*DBMCREAT*/ return p; /* the prefix for dbm, not the whole path */ }