/* lqunindex.c -- Copyright 1994, 1996 Liam R. E. Quin. * All Rights Reserved. * This code is NOT in the public domain. * See the file COPYRIGHT for full details. */ /* lqunindex -- remove a document from the LQ-Text text retrieval index * Liam Quin, July 1994 * * $Id: lqunindex.c,v 1.13 2001/05/31 03:50:13 liam Exp $ */ static char *Version = "@(#) $Id: lqunindex.c,v 1.13 2001/05/31 03:50:13 liam Exp $"; #include "globals.h" /* defines and declarations for database filenames */ #include "error.h" #ifndef FILE # include #endif #include #include #include #ifdef HAVE_FCNTL_H # include #endif #ifdef HAVE_STRING_H # include #else # include #endif #ifdef HAVE_UNISTD_H # include #endif #ifdef HAVE_STDLIB_H # include #else # include #endif #include "fileinfo.h" #include "wordinfo.h" #include "wordrules.h" #include "wordplace.h" #include "emalloc.h" #include "addfile.h" #include "lqutil.h" #include "liblqtext.h" #include "lqtrace.h" #include "filter.h" #include "revision.h" /** Arguments and actions for call-backs: **/ typedef enum { LQC_UNINDEX_COMPLETED_OK = 1, LQC_UNINDEX_FILE_NOT_INDEXED, LQC_UNINDEX_FILE_NOT_FOUND, LQC_UNINDEX_FILE_HAS_CHANGED, LQC_UNINDEX_FILE_INCOMPREHENSIBLE, LQC_UNINDEX_CONTAINED_NEW_WORDS, LQC_UNINDEX_OTHER_PROBLEM } t_UnIndexResult; typedef enum { LQC_UNINDEX_INTERNAL_ERROR = 0, LQC_UNINDEX_ACTION_ASSUME_OK = 100, LQC_UNINDEX_ACTION_RETRY, LQC_UNINDEX_ACTION_FAIL_AND_STOP, LQC_UNINDEX_ACTION_FAIL_START_NEXT_FILE } t_UnIndexAction; /** a callback function: **/ typedef t_UnIndexAction (* t_UnIndexCallBack)( #ifdef HAVE_PROTO char *theName, t_UnIndexResult theResult #endif ); /** Data structure for hash table **/ typedef struct s_WordToDelete { struct s_WordToDelete *Next; /* 4 bytes */ t_WID WID; /* 4 bytes, total so far is 8 */ t_FID *FIDArray; /* 4 + (16 or more), total 28 */ /* one for the first value, one for a trailing 0 */ } t_WordToDelete; static t_WordToDelete ZeroEntry = { 0, }; #ifndef PENDING_DELETE_TABLE_SIZE /* Each cache entry is likely to use 16 bytes. * On a 64-bit machine, or an old MIPS/SGI system using 64-byte alignment, * things are worse, but then you have lots and lot of memory. * * If there are 65536 entries (16 bits worth), * WID could be a short, as it would only be necessary to store * the top 16 bits. This uses about a megabyte, but in practice, * the short gets rounded out to a full word for byte alignment, * so it doesn't help. * * The trailing FIDArray is allocated as needed. The last entry is * equal to 0. * * The default value for PENDING_DELETE_TABLE_SIZE is 128 * 1024 entries, * using 3.5 MBytes of memory, with an additional 64*1024 entries * adding up to another 1.75 MBytes. * */ # define PENDING_DELETE_TABLE_SIZE (128*1024) #endif /* PENDING_DELETE_TABLE_SIZE */ t_WordToDelete *PendingDeletes = (t_WordToDelete *) 0; unsigned long HashSize = PENDING_DELETE_TABLE_SIZE; /* We are using open chaining, so MaxChainedWordsInHashTable represents * the maximum extra words that can be allocated before they are all * flushed. We allow 50% overflow by default: */ unsigned long MaxChainedWordsInHashTable = PENDING_DELETE_TABLE_SIZE * 0.5; unsigned long TotalWordsInHashTable = 0; /* cache starts off empty! */ /** Keep a queue of files to unindex. This lets the filters get ** started, so we aren't waiting for an uncompress, for example. ** There can be several filters active at a time, each filling the ** first buffer in the input pipe. If your Unix system has only a few ** file descriptors (between 20 and 30 per process, say), you should set ** LQC_MAX_FILES_WAITING to 2 or 3, or you'll sometimes see messages about ** running out of file descriptors. ** ** Most modern Unix systems allow at least 256 file descriptors per process, ** so this isn't likely to be a problem. **/ typedef struct s_FileItem { t_FileInfo *FileInfo; t_UnIndexCallBack StatusCallback; int HaveStarted; struct s_FileItem *Next; } t_FileItem; static t_FileItem *FilesWaiting = 0; static t_FileItem **NextFileItem = 0; static int NumberOfFilesWaiting = 0; #ifndef LQC_MAX_FILES_WAITING # define LQC_MAX_FILES_WAITING 5 #endif /** System calls and library routines used in this file: **/ /** System calls: **/ /** Library Functions: **/ /** liblqtext functions: **/ extern void lqSetSignals(); /** Functions within this file that need declaring: **/ PRIVATE t_UnIndexAction LQC_AddToFilesWaitingToBeDeleted( #ifdef HAVE_PROTO t_LQTEXT_Database *db, char *Name, t_UnIndexCallBack StatusCallback #endif ); PRIVATE void LQC_UnIndexOneWaitingFile( #ifdef HAVE_PROTO t_LQTEXT_Database *db #endif ); PRIVATE void LQC_EmptyHashTableOut( #ifdef HAVE_PROTO t_LQTEXT_Database *db #endif ); PRIVATE void LQC_InitialiseUnIndexing(/*empty*/); PRIVATE void LQC_UnIndexStream( #ifdef HAVE_PROTO t_LQTEXT_Database *db, t_FileInfo *FileInfo #endif ); PRIVATE void LQC_UnIndexFrom( #ifdef HAVE_PROTO t_LQTEXT_Database *db, char *Name #endif ); /* Symbol Table Interface */ PRIVATE void LQC_UnIndexWord( #ifdef HAVE_PROTO t_LQTEXT_Database *db, t_FileInfo *FileInfo, t_WordInfo *WordInfo #endif ); /**/ char *progname = "@(#) $Source: /usr/home/liam/src/lq-text1.17/src/lqtext/RCS/lqunindex.c,v $"; static int SignalFlag = 0; PRIVATE int SignalHandler() { ++SignalFlag; if (SignalFlag > 3) { LQT_CloseDatabase((t_LQTEXT_Database *) 0); Error(E_FATAL, "received %d signals to quit, exiting; db may be corrupt!.", SignalFlag ); } return 0; } int main(argc, argv) int argc; char *argv[]; { extern int getopt(); extern char *optarg; extern int optind; int c; int ErrorFlag = 0; int DoNothing = 0; char *InputFile = (char *) 0; t_lqdbOptions *Options; t_LQTEXT_Database *db = 0; progname = argv[0]; /* retain the full path at first */ Options = LQT_InitFromArgv(argc, argv); while ((c = getopt(argc, argv, "f:H:w:xVuZz:")) != -1) { switch (c) { case 'f': if (InputFile) { Error(E_USAGE|E_XHINT|E_FATAL, "only one -f option allowed; use -xv for explanation" ); } InputFile = optarg; break; case 'H': if (!LQU_cknatstr(optarg)) { Error(E_FATAL|E_USAGE|E_XHINT, "-H must be given a hash table size >= 1, not \"%s\"", optarg ); } HashSize = atoi(optarg); break; case 'w': if (!LQU_cknatstr(optarg)) { Error(E_FATAL|E_USAGE|E_XHINT, "-w must be given a number >= 0, not \"%s\"", optarg ); } MaxChainedWordsInHashTable = atoi(optarg); break; case 'Z': case 'z': break; /* work done in SetDefault() */ case 'V': fprintf(stderr, "%s: Release: %s\n", progname, LQTEXTREVISION); fprintf(stderr, "%s: Revision: %s\n", progname, Version); DoNothing = 1; break; case 'x': ErrorFlag = (-1); break; default: case '?': ErrorFlag = 1; } } if ((progname = strrchr(progname, '/')) != (char *) NULL) { ++progname; /* step over the last / */ } else { progname = argv[0]; } if (ErrorFlag > 0) { fprintf(stderr, "use %s -x or %s -xv for an explanation.\n", progname, progname); exit(1); } else if (ErrorFlag < 0) { /* -x was used */ fprintf(stderr, "%s -- remove documents from an lq-text retrieval database\n", progname ); fputs("Options are:\n\ -f file -- read the list of files to index from \"file\"\n\ -w n -- process the hash table when it contains more than n\n\ distinct words (each 20 bytes)\n\ -H n -- set the hash table size to n entries (each 20 bytes)\n\ -- -- all following arguments are file names\n\ \n\ ", stderr); LQT_PrintDefaultUsage(Options); if (LQT_TraceFlagsSet(LQTRACE_VERBOSE|LQTRACE_DEBUG)) { /* used -v or -t1 */ fprintf(stderr, "\n\ Any remaining arguments are taken to be file names. The current\n\ DOCPATH (%s) is searched for the files,\n\ which must be exactly as they were when they were added to the index.\n\ The index information about those files is removed from the database.\n\ The actual files themselves are completely untouched by the operation,\n\ except that they are scanned to determine which words they contain.\n\ If you use the -f option, you should not give filename\n\ arguments on the command line, although you can use \"-f -\" to read the\n\ list of files from standard input, one per line. Setting (with -w) the\n\ size of the cache may dramatically improve performance.\n\ Systems with memory larger than the data can try -w0.\n\ See %s(1) for more information.\n", db->DocPath, progname ); } exit(0); } if (DoNothing) { if (optind < argc) { Error( E_WARN|E_XHINT, "%d extra argument%s ignored...", argc - optind, argc - optind == 1 ? "" : "%s" ); } exit(0); } db = LQT_OpenDatabase(Options, O_RDONLY, 0644); if (!db || LQT_ObtainReadOnlyAccess(db) < 0) { Error(E_FATAL, "couldn't open lq-text database"); } LQT_InitFilterTable(db); LQC_InitialiseUnIndexing(); lqSetSignals(SignalHandler); LQT_ObtainWriteAccess(db); if (InputFile) { if (optind < argc) { Error(E_FATAL|E_USAGE|E_XHINT, "cannot give filenames after -f %s", InputFile ); } else { LQC_UnIndexFrom(db, InputFile); } } else for (; optind < argc; ++optind) { (void) LQC_AddToFilesWaitingToBeDeleted( db, argv[optind], (t_UnIndexCallBack) 0 ); if (SignalFlag) { Error(E_WARN, "Caught signal at level %d, dumping cache", SignalFlag ); LQC_EmptyHashTableOut(db); LQT_CloseDatabase(db); Error(E_FATAL, "Caught signal at level %d, dumped cache, quitting", SignalFlag ); } } while (NumberOfFilesWaiting > 0) { LQC_UnIndexOneWaitingFile(db); } LQC_EmptyHashTableOut(db); LQT_CloseDatabase(db); return 0; } PRIVATE void LQC_UnIndexFrom(db, Name) t_LQTEXT_Database *db; char *Name; { FILE *fp; char *Line; if (Name[0] == '-' && Name[1] == '\0') { fp = stdin; } else { fp = LQU_fEopen(E_FATAL, Name, "list of files to add", "r"); } while (LQU_fReadLine(fp, &Line, LQUF_NORMAL) != -1) { /* Note: * LQU_fReadFile will silently swallow blank lines. * If we use LQUF_NORMAL it will swallow lines that start with a #, * but not delete # signs in the middle of a line. */ (void) LQC_AddToFilesWaitingToBeDeleted( db, Line, (t_UnIndexCallBack) 0 ); if (SignalFlag) { Error(E_WARN, "Caught signal at level %d -- dumping cache", SignalFlag ); LQT_CloseDatabase(db); exit(1); } } if (fp != stdin) { (void) fclose(fp); } } PRIVATE void LQC_UnIndexOneWaitingFile(db) t_LQTEXT_Database *db; { t_FileItem *Next; if (!FilesWaiting) { return; } LQC_UnIndexStream(db, FilesWaiting->FileInfo); if (LQT_RemoveFileInfoFromIndex(db, FilesWaiting->FileInfo) < 0) { Error(E_WARN, "%s not removed from database index (may not have been there)", FilesWaiting->FileInfo->Name ); } LQT_Trace(LQTRACE_VERBOSE|LQTRACE_DEBUG, "%d: %s: removed from index.", FilesWaiting->FileInfo->FID, FilesWaiting->FileInfo->Name ); LQT_DestroyFileInfo(db, FilesWaiting->FileInfo); Next = FilesWaiting->Next; (void) efree((char *) FilesWaiting); FilesWaiting = Next; --NumberOfFilesWaiting; } PRIVATE void LQC_UnIndexStream(db, FileInfo) t_LQTEXT_Database *db; t_FileInfo *FileInfo; { t_WordInfo *WordInfo; while (SignalFlag <= 1) { /* needs more than one signal to quit in the middle of a file */ WordInfo = LQT_ReadWordFromFileInfo( db, FileInfo, LQT_READWORD_IGNORE_COMMON ); if (WordInfo == (t_WordInfo *) 0) { break; } else { LQC_UnIndexWord(db, FileInfo, WordInfo); WordInfo->WID = (t_WID) 0; } } if (SignalFlag > 1) { Error(E_WARN, "Signal received during processing of %s", FileInfo->Name); Error(E_WARN, "That and other files may be incomplete..."); return; } } PRIVATE t_UnIndexAction LQC_AddToFilesWaitingToBeDeleted(db, FileName, StatusCallBack) t_LQTEXT_Database *db; char *FileName; t_UnIndexCallBack StatusCallBack; { t_FileInfo *FileInfo; t_FID FID; if ((FID = LQT_NameToFID(db, FileName)) == (t_FID) 0) { if (StatusCallBack) { switch ((* StatusCallBack)(FileName,LQC_UNINDEX_FILE_NOT_INDEXED)) { case LQC_UNINDEX_ACTION_ASSUME_OK: return LQC_UNINDEX_ACTION_ASSUME_OK; case LQC_UNINDEX_ACTION_RETRY: Error(E_INTERNAL|E_WARN, "%s: %d: LQC_AddToFilesWaitingToBeDeleted: LQC_UNINDEX_ACTION_RETRY illegal callback return value for FIILE_NOT_INDEXED, file \"%s\"", FileName ); return LQC_UNINDEX_INTERNAL_ERROR; case LQC_UNINDEX_ACTION_FAIL_START_NEXT_FILE: Error(E_WARN, "%s was not indexed in the database, ignored", FileName ); return LQC_UNINDEX_ACTION_FAIL_START_NEXT_FILE; case LQC_UNINDEX_ACTION_FAIL_AND_STOP: Error(E_FATAL, "%s was not indexed in the database, quitting", FileName ); default: return LQC_UNINDEX_INTERNAL_ERROR; } } else { Error(E_WARN, "%s was not indexed in the database, ignored", FileName ); return LQC_UNINDEX_ACTION_FAIL_START_NEXT_FILE; } } if ((FileInfo = LQT_FIDToFileInfo(db, FID)) == (t_FileInfo *) 0) { Error(E_WARN|E_INTERNAL, "Couldn't get File Info for document %ld, %s -- not unindexed", FID, FileName ); return LQC_UNINDEX_ACTION_FAIL_START_NEXT_FILE; } else { /* Check to see if the file has changed since it was last * indexed. If it has, we should delete the old one from * the database and give this one a new FID, but I have * not done that yet -- that's /usr/local/lib/lqtextd or * something, I suppose! Deleting it involves going through the * entire database looking for words that matched that FID and * making them not do so any more, e.g. by deleting them and rewriting * the chain entirely. * TODO: * Simply marking the FID as invalid would go a long way, though. */ struct stat StatBuf; char *theName = LQT_FindFile(db, FileInfo->Name); if (theName && theName != FileInfo->Name) { (void) efree(FileInfo->Name); FileInfo->Name = emalloc(theName, strlen(theName) + 1); (void) strcpy(FileInfo->Name, theName); } theName = FileInfo->Name; if (stat(theName, &StatBuf) >= 0) { if (FileInfo->Date < StatBuf.st_mtime && FileInfo->FileSize != StatBuf.st_size) { if (LQT_TraceFlagsSet(LQTRACE_VERBOSE|LQTRACE_DEBUG)) { Error(E_WARN, "%s changed since last run -- not indexed", FileName ); } LQT_DestroyFileInfo(db, FileInfo); return LQC_UNINDEX_ACTION_FAIL_START_NEXT_FILE; } } else { Error(E_WARN|E_SYS, "stat: couldn't get file system information for \"%s\"", FileName ); LQT_DestroyFileInfo(db, FileInfo); return LQC_UNINDEX_ACTION_FAIL_START_NEXT_FILE; } } if ((FileInfo->Stream = LQT_MakeInput(db, FileInfo)) == (FILE *) 0) { Error(E_WARN|E_SYS, "couldn't open input filter for %s -- not unindexed", FileInfo->Name ); LQT_DestroyFileInfo(db, FileInfo); return LQC_UNINDEX_ACTION_FAIL_START_NEXT_FILE; } /* OK, so now we are ready to start! */ if (!FilesWaiting) { NextFileItem = &FilesWaiting; } *NextFileItem = (t_FileItem *) emalloc("pending file", sizeof(t_FileItem)); (*NextFileItem)->Next = (t_FileItem *) 0; (*NextFileItem)->FileInfo = FileInfo; (*NextFileItem)->StatusCallback = StatusCallBack; (*NextFileItem)->HaveStarted = 1; NextFileItem = &(*NextFileItem)->Next; ++NumberOfFilesWaiting; if (NumberOfFilesWaiting > LQC_MAX_FILES_WAITING) { LQC_UnIndexOneWaitingFile(db); } return LQC_UNINDEX_ACTION_ASSUME_OK; } /** Hash Code **/ static int BitsToShift = 0; PRIVATE void LQC_InitialiseUnIndexing() { unsigned long RealHashSize = 1; if (PendingDeletes) { Error(E_FATAL|E_BUG, "LQC_InitialiseUnIndexing() called more than once" ); } while (RealHashSize <= HashSize) { RealHashSize <<= 1; ++BitsToShift; } /* gone too far: */ RealHashSize >>= 1; --BitsToShift; if (RealHashSize == HashSize) { LQT_Trace(LQTRACE_VERBOSE|LQTRACE_DEBUG, "Using %ld hash table entries (%d bits), %ld bytes", HashSize, BitsToShift, HashSize * sizeof(t_WordToDelete) ); } else { /* HashSize had more than one bit set */ RealHashSize <<= 1; ++BitsToShift; LQT_Trace(LQTRACE_VERBOSE|LQTRACE_DEBUG, "Hash size increased from %ld to %ld to be a power of two", HashSize, RealHashSize ); LQT_Trace(LQTRACE_VERBOSE|LQTRACE_DEBUG, "Using %ld hash table entries [%d bits], %ld bytes", HashSize, BitsToShift, HashSize * sizeof(t_WordToDelete) ); HashSize = RealHashSize; } PendingDeletes = (t_WordToDelete *) ecalloc( "Hash Table for pending words", HashSize, sizeof(t_WordToDelete) ); TotalWordsInHashTable = 0; } PRIVATE void LQC_RemoveUnwantedPlacesFor( #ifdef HAVE_PROTO t_LQTEXT_Database *db, t_WordToDelete *WordToDelete #endif ); #ifdef ASCIITRACE PRIVATE void PrintHashTable() { unsigned long theSlot; LQT_Trace(LQTRACE_DEBUG, "Hash table begins:"); for (theSlot = 0; theSlot < HashSize; theSlot++) { register t_WordToDelete *theEntry; theEntry = &PendingDeletes[theSlot]; if (theEntry->WID) { do { LQT_Trace(LQTRACE_DEBUG, "\t%d: WID %d", theSlot, theEntry->WID ); theEntry = theEntry->Next; } while (theEntry); } } LQT_Trace(LQTRACE_DEBUG, "Hash table ends."); } #endif PRIVATE void LQC_EmptyHashTableOut(db) t_LQTEXT_Database *db; { unsigned long theSlot; #ifdef ASCIITRACE if (LQT_TraceFlagsSet(LQTRACE_DEBUG)) { LQT_Trace(LQTRACE_DEBUG, "LQC_EmptyHashTableOut..."); PrintHashTable(); } #endif for (theSlot = 0; theSlot < HashSize; theSlot++) { register t_WordToDelete *theEntry; theEntry = &PendingDeletes[theSlot]; if (theEntry->WID) { t_WordToDelete *StartingPlace = theEntry; /* remove the entries from the database: */ do { LQC_RemoveUnwantedPlacesFor(db, theEntry); theEntry = theEntry->Next; } while (theEntry); /* reclaim storage: */ theEntry = StartingPlace; do { t_WordToDelete *Next = theEntry->Next; if (theEntry->FIDArray) { (void) efree((char *) theEntry->FIDArray); theEntry->FIDArray = 0; } if (theEntry == StartingPlace) { /* the first entry is part of an array, * so we don't free it. */ (*theEntry) = ZeroEntry; /* structure copy */ } else { (void) efree((char *) theEntry); } theEntry = Next; } while (theEntry); } } /* for each slot */ TotalWordsInHashTable = 0; } PRIVATE void LQC_AddFileToSlot(theFileInfo, theWordInfo, Entry) t_FileInfo *theFileInfo; t_WordInfo *theWordInfo; register t_WordToDelete *Entry; { long NewLength = 0; register unsigned long theSlot; /* When this function is called, we've already identified * that Entry is the right hash-table slot, and so we're going to * add the FID represented by theFileInfo to the list of files * whose wordplaces are to be removed for this word. * * If that FID is already here, we don't need to add it again -- * we're going to delete all the matches of this word for that * file anyway. */ if (Entry->FIDArray) { for (theSlot = 0; Entry->FIDArray[theSlot] != (t_FID) 0; theSlot++) { if (Entry->FIDArray[theSlot] == theFileInfo->FID) { /* it's already there */ #ifdef ASCIITRACE LQT_Trace(LQTRACE_DEBUG, "(%ld=%*.*s: FID %ld already there, slot %ld)", theWordInfo->WID, theWordInfo->Length, theWordInfo->Length, theWordInfo->Word, theFileInfo->FID, theSlot ); #endif return; } } } else { theSlot = 0L; } #ifdef ASCIITRACE LQT_Trace(LQTRACE_DEBUG, "<- %*.*s\tslot %ld\t%ld=%s", theWordInfo->Length, theWordInfo->Length, theWordInfo->Word, theSlot, theFileInfo->FID, theFileInfo->Name ); #endif switch (theSlot) { case 0: Entry->FIDArray = (t_FID *) emalloc( "FIDArray", sizeof(t_FID) * 2 ); break; case 1: /* second time round */ NewLength = 4; break; case 3: /* third time round */ NewLength = 8; break; case 7: NewLength = 12; break; case 11: NewLength = 16; break; default: if (((theSlot + 1) & 15) == 0) { NewLength = theSlot + 16; } break; } if (NewLength) { Entry->FIDArray = (t_FID *) erealloc( (char *) Entry->FIDArray, NewLength * sizeof(t_FID) ); } Entry->WID = theWordInfo->WID; Entry->FIDArray[theSlot++] = theFileInfo->FID; Entry->FIDArray[theSlot] = (t_FID) 0; } PRIVATE void LQC_UnIndexWord(db, FileInfo, WordInfo) t_LQTEXT_Database *db; t_FileInfo *FileInfo; t_WordInfo *WordInfo; { t_WordToDelete *theEntry; t_WordToDelete **Entryp; /* We assume that the following elements of WordInfo are OK, * but probably not any others: * WordInfo->Word -- the actual word (not necessarily nul-terminated) * WordInfo->Length -- the length of the word, excluding any trailing \0 * WordInfo->WID -- if not 0, this is the WID of the word */ #ifdef ASCIITRACE if (LQT_TraceFlagsSet(LQTRACE_WORDINFO)) { LQT_Trace(LQTRACE_WORDINFO, "unindex word: %*.*s", WordInfo->Length, WordInfo->Length, WordInfo->Word ); } #endif if (WordInfo->WID) { WordInfo->WID = (t_WID) 0; } if (!WordInfo->WID) { static t_FID LastFIDWarnedAbout = (t_FID) 0; WordInfo->WID = LQT_WordToWID(db, WordInfo->Word, WordInfo->Length); if (!WordInfo->WID) { static int ErrorCount = 0; if (FileInfo->FID != LastFIDWarnedAbout || ErrorCount < 4) { if (FileInfo->FID == LastFIDWarnedAbout) { ErrorCount++; } else { LastFIDWarnedAbout = FileInfo->FID; ErrorCount = 1; } Error((ErrorCount < 4) ? E_WARN : (E_WARN|E_MULTILINE), "%s: document contains word not previously indexed: %*.*s", FileInfo->Name, WordInfo->Length, WordInfo->Length, WordInfo->Word ); if (ErrorCount == 4) { Error(E_WARN|E_MULTILINE|E_LASTLINE, "%s: (no more warnings for this document)", FileInfo->Name ); } } return; } } theEntry = &PendingDeletes[WordInfo->WID & (HashSize - 1)]; if (theEntry->WID == (t_WID) 0) { LQC_AddFileToSlot(FileInfo, WordInfo, theEntry); return; } if (theEntry->WID != WordInfo->WID) { for (Entryp = &(theEntry->Next); *Entryp; Entryp = &(*Entryp)->Next) { if ((*Entryp)->WID >= WordInfo->WID) { break; } } if (!*Entryp || (*Entryp)->WID != WordInfo->WID) { /* Not found, so make a new entry */ /* Actually, if there are already too many entries, * We'll clear some out and try again */ if (TotalWordsInHashTable + 1 > MaxChainedWordsInHashTable) { LQC_EmptyHashTableOut(db); LQC_UnIndexWord(db, FileInfo, WordInfo); return; } ++TotalWordsInHashTable; theEntry = (t_WordToDelete *) ecalloc( "One Word To Delete", 1, sizeof(t_WordToDelete) ); LQC_AddFileToSlot(FileInfo, WordInfo, theEntry); /* List insertion: */ theEntry->Next = (*Entryp); *Entryp = theEntry; /* done! */ return; } else { /* Already there */ theEntry = (*Entryp); } } if (!theEntry) { Error(E_BUG|E_FATAL, "%s: %c: theEntry is zero adding word \"%*.*s\" in file %s", __FILE__, __LINE__, WordInfo->Length, WordInfo->Length, WordInfo->Word, FileInfo->Name ); } if (theEntry->WID != WordInfo->WID) { Error(E_BUG|E_FATAL, "%s: %c: theEntry->WID %ld != %d adding word \"%*.*s\" in file %s", __FILE__, __LINE__, theEntry->WID, WordInfo->WID, WordInfo->Length, WordInfo->Length, WordInfo->Word, FileInfo->Name ); } LQC_AddFileToSlot(FileInfo, WordInfo, theEntry); } static t_FID *FIDsToDelete = 0; static int Last = 0; PRIVATE void LQC_PrepareEntryForExecution(theEntry) t_WordToDelete *theEntry; { FIDsToDelete = theEntry->FIDArray; Last = 0; } PRIVATE int LQC_FIDIsWanted(db, WID, WordPlace) t_LQTEXT_Database *db; t_WID WID; t_WordPlace *WordPlace; { register t_FID queryFID; if (FIDsToDelete == (t_FID *) 0) { return 1; } queryFID = WordPlace->FID; while (FIDsToDelete[Last] != (t_FID) 0) { register t_FID ArrayFID = FIDsToDelete[Last]; /* Reminder: * The FIDS in the array are sorted, and then * null terminated. */ if (ArrayFID == queryFID) { return 0; /* not wanted */ } else if (ArrayFID > queryFID) { return 1; } else { ++Last; } } return 1; /* not in the ToDelete list */ } PRIVATE void LQC_RemoveUnwantedPlacesFor(db, WordToDelete) t_LQTEXT_Database *db; t_WordToDelete *WordToDelete; { t_pblock *pblock; t_WordInfo *WordInfo; /** get the old entry */ if (!WordToDelete->WID || !(WordInfo = LQT_WIDToWordInfo(db, WordToDelete->WID)) ) { Error(E_BUG, "Word with WID %ld went away!", WordToDelete->WID); return; } #ifdef ASCIITRACE if (LQT_TraceFlagsSet(LQTRACE_WORDINFO)) { LQT_fprintWordInfo(db, stderr, WordInfo, "LQC_RemoveUnwantedPlacesFor"); } #endif if (WordInfo->NumberOfWordPlaces == 0) { (void) LQT_DestroyWordInfo(db, WordInfo); return; } LQC_PrepareEntryForExecution(WordToDelete); pblock = LQT_GetpblockWhere(db, WordInfo, LQC_FIDIsWanted); if (pblock->NumberOfWordPlaces != WordInfo->NumberOfWordPlaces) { pblock->NumberOfWordPlaces = WordInfo->NumberOfWordPlaces; } /* delete the old entry from disk */ if (WordInfo->Offset) { /* Remove the old information from disk. */ LQT_DeleteWordPlaces(db, WordInfo->Offset, WordInfo->WID); } WordInfo->Offset = pblock->ChainStart = 0L; /* it's invalid now... */ /* First, let's make an index entry: */ if (pblock->NumberOfWordPlaces <= MaxWordPlacesInAWordBlock(db)) { (void) LQT_MakeWordInfoBlock(db, WordInfo, pblock); } /** write out the new entry */ if (WordInfo->WordPlacesInHere == pblock->NumberOfWordPlaces) { /* In this case it all fits into the WID index */ pblock->ChainStart = 0L; } else { (void) LQT_Writepblock(db, WordInfo, pblock); } if (LQT_PutWordInfoIntoIndex(db, WordInfo, pblock->ChainStart) < 0) { Error(E_FATAL|E_SYS, "UpdateEntry: Couldn't update \"%s\" in database at 0x%lx", WordInfo->Word, pblock->ChainStart ); } /** reclaim storage */ (void) efree((char *)pblock); (void) LQT_DestroyWordInfo(db, WordInfo); }