/* addfile.h -- Copyright 1989, 1994, 1996 Liam R. Quin. * All Rights Reserved. * This code is NOT in the public domain. * See the file COPYRIGHT for full details. */ /* $Id: addfile.h,v 1.7 1996/08/14 16:50:11 lee Exp $ * * This file defines the parameter to DumpCache. * */ #define MAXPROBES 15 /* If we don't find an empty cache slot after this many tries, * we decide the hash table is full and clear it out, then try again. */ #define DUMP_CACHE 00 /* normal usage */ #define DUMP_SYNC 01 /* write all cached entries to disk */ #define DUMP_NOFREE 02 /* don't bother calling free() */ #define DUMP_FAST 04 /* only do a fast dump */ /* when the word-cache fills up, we first try and dump all of * the words that occur DUMP_FAST_THRESH or fewer times. This generally * clears up a lot of slots in the hash table. If that hasn't cleared * enough memory, we then dump the single most frequent word, before * increasing the dump threshhold and trying again. * Now that updating the index is possible, I am not sure that this is * the best way to continue, but DUMP_FAST_THRESH is certainly an effective * way to clear slots in the cache. * MUST_DUMP is used to start the dump process. */ #ifndef DUMP_FAST_THRESH # define DUMP_FAST_THRESH 5 #endif /* MUST_FREE_SLOTS is true when we need more free slots in the symbol table * used to cache words being indexed. * * We require 80% of the slots to be free, so that the hash function doen't * spend ages finding a free slot. */ #ifndef MUST_FREE_SLOTS # define MUST_FREE_SLOTS(InCacheCount,MaxInCache,SlotsUsed,TotalSlots) \ (SlotsUsed * 10 > TotalSlots * 8) #endif /* MUST_FREE_MORE_SLOTS is true when we have already started to dump the * cache to disk and we are checking to see when to stop. This value is set * so that we don't immediately dump the cache again -- it must take a while * to fill up, no? * We require at least 80% of the slots to be empty, so that finding a slot * for a new word doesn't take too long. * * Usually there are plenty of slots to go round, but if you index something * with a huge vocabulary (/usr/dict/words, for example!) where there are * lots of words each occurring infrequently, you may run low on slots, and * with that sort of input, you really do need lots of slots free. * * So dumping all but 20% is only a performance hit when there are lots of * infrequent words, and that's when we need to do it most anyway. */ #ifndef MUST_FREE_MORE_SLOTS # define MUST_FREE_MORE_SLOTS(InCacheCount,MaxInCache,SlotsUsed,TotalSlots) \ (SlotsUsed * 5 > TotalSlots) #endif /* MUST_DUMP evaluates to non-zero when lqaddfile must dump its cache;; * MUST_DUMP_MORE is used to see if it's OK to stop dumping. */ #ifndef MUST_DUMP # define MUST_DUMP(InCacheCount,MaxInCache,SlotsUsed,TotalSlots) \ ( (InCacheCount + 1 >= MaxInCache) || \ MUST_FREE_SLOTS(InCacheCount,MaxInCache,SlotsUsed,TotalSlots)) #endif /* MUST_DUMP_MORE returns 0 when the cache is suffiently dumped to * enable more indexing to proceed. */ #ifndef MUST_DUMP_MORE # define MUST_DUMP_MORE(InCacheCount,MaxInCache,SlotsUsed,TotalSlots) \ ( (InCacheCount * 100 >= MaxInCache * 60) /* allow 60% full */ || \ MUST_FREE_MORE_SLOTS(InCacheCount,MaxInCache,SlotsUsed,TotalSlots)) #endif /* FILE_DUMP returns 1 if we should dump the cache between files: */ #ifndef MUST_FILE_DUMP # define MUST_FILE_DUMP(InCacheCount,MaxInCache,SlotsUsed,TotalSlots) \ ( (InCacheCount * 100 >= MaxInCache * 98) || \ (SlotsUsed * 100 > TotalSlots * 75)) /* allow 75% full */ #endif /* FILE_DUMP_MORE returns 1 if we haven't finished dumping between files: */ #ifndef MUST_FILE_DUMP_MORE # define MUST_FILE_DUMP_MORE(InCacheCount,MaxInCache,SlotsUsed,TotalSlots) \ ( (InCacheCount * 100 >= MaxInCache * 90) /* allow 90% full */ || \ MUST_FREE_MORE_SLOTS(InCacheCount,MaxInCache,SlotsUsed,TotalSlots)) #endif