/* lqaddfile.c -- Copyright 1989, 1990, 1995, 1996 Liam R. E. Quin.
 * All Rights Reserved.
 * This code is NOT in the public domain.
 * See the file COPYRIGHT for full details.
 */

/* addfile -- add a file to the LQ-Text text retrieval index
 * Liam Quin, August 1989 and later...
 *
 * $Id: lqaddfile.c,v 1.41 96/07/27 01:38:12 lee Exp $ 
 */

static char *Version =
    "@(#) $Id: lqaddfile.c,v 1.41 96/07/27 01:38:12 lee Exp $";

#include "globals.h" /* defines and declarations for database filenames */
#include "error.h"

#include <stdlib.h>

#include <stdio.h>
#include <ctype.h>

#include <sys/types.h>
#include <sys/stat.h>

#ifdef HAVE_STRING_H
# include <string.h>
#else
# include <strings.h>
#endif
#ifdef HAVE_STDLIB_H
# include <stdlib.h>
#endif

#include <malloc.h>

#ifdef HAVE_FCNTL_H
# include <fcntl.h>
#endif

#include "fileinfo.h"
#include "wordinfo.h"
#include "wordrules.h"
#include "emalloc.h"
#include "addfile.h"
#include "lqutil.h"
#include "liblqtext.h"
#include "filter.h"
#include "lqtrace.h"
#include "revision.h"

/** Functions within this file that need declaring: **/
PRIVATE void AddStream(
#ifdef HAVE_PROTO
    t_LQTEXT_Database *db,
    t_FileInfo *FileInfo
#endif
);


/* Symbol Table Interface */
extern void AddWord(
#ifdef HAVE_PROTO
    t_LQTEXT_Database *db,
    t_WordInfo *WordInfo
#endif
);

extern void DumpCache(
#ifdef HAVE_PROTO
    t_LQTEXT_Database *db,
    int CallFree
#endif
);

extern void SetDumpThresh(
#ifdef HAVE_PROTO
    t_lqdbOptions *Options,
    int Thresh
#endif
);

PRIVATE int AddFile(
#ifdef HAVE_PROTO
    t_LQTEXT_Database *db,
    char *Name
#endif
);


/**/

char *progname = "@(#) $Id: lqaddfile.c,v 1.41 96/07/27 01:38:12 lee Exp $";

#ifdef USE_LINENUMBERS
static int UseLineNumbers = 0;
#endif

/* The database we'll operate on: */
static t_LQTEXT_Database *dbForSignalHandler;
static int SignalFlag = 0;

int
SignalHandler()
{
    ++SignalFlag;
    if (SignalFlag > 3) {
	LQT_CloseDatabase(dbForSignalHandler);
	Error(E_FATAL,
	    "received %d signals to quit, exiting; db may be corrupt!.",
	    SignalFlag
	);
    }
    return 0;
}

extern int SetHashSize(
#ifdef HAVE_PROTO
    t_lqdbOptions *Options,
    int theNewSize
#endif
);


static char IAmAChildDaemon = 0;

typedef struct {
    char isRunning;
    FILE *input;
#ifdef HAVE_PIPE
    int pid;
#endif
    char *inputFile;
    int optind;
    int argc;
    char **argv:
} t_ThingStream;

typedef struct {
    char *DocumentName; /* what the user gave */
    char *FileName; /* where we actually found it */
    int FileType;
} t_Thing;

PRIVATE t_Thing *
getThingToIndex(findFileDaemon)
    t_ThingStream *findFileDaemon;
{
    char *Line;
    int len;

    if (!findFileDaemon->isRunning) {
	return (char *) NULL;
    }

    if (!findFileDaemon->input) {
	return (char *) NULL;
    }

    /* The 0 in the call to LQU_fReadLine() prevents it from interpreting
     * a # at the start of a line as beginning a comment.
     * If LQUF_IGNSPACE were to be used instead, LQU_fReadLine would also
     * elide leading & trailing spaces from the input.
     */
    while ((len = LQU_fReadLine(findFileDaemon->input, &Line, 0)) != -1) {
	if (!Line || !*Line || !Line[0]) {
	    continue;
	}

	if (len < 6) {
	    Error(E_WARN|E_INTERNAL|E_BUG,
		"protocol error: findFileDaemon returned [%s]",
		Line
	    );
	    continue;
	}

	/* the daemon can pass warnings through to us so that we can
	 * report them -- since it;s a separate process, its own
	 * warnings would be randomly interspersed with ours, making
	 * them hard to read.
	 */
	if (STRNCMP(Line, "warn\t", 5) == 0) {
	    Error(E_WARN, "%s", &Line[6]);
	    continue;
	}

	if (STRNCMP(Line, "add\t", 4) == 0) {
	    static t_Thing Result;
	    char *p;

	    /* The format is
	     * add<tab>type<tab>name<tab>location\n
	     * where type, is an integer offset into the filter table;
	     * name is the filename as given by the user;
	     * location is the full pathname where we found the file
	     */
	    Result.FileType = 0;
	    for (p = &Line[4]; ; p++) {
		if (isdigit(*p)) {
		    Result.FileType *= 10;
		    Result.FileType += *p - '0';
		} else if (*p == '\t') {
		    break;
		} else {
		    Error(E_WARN|E_INTERNAL|E_BUG,
			"findFileDaemon protocol error after type in [%s]",
			Line
		    );
		    continue;
		}
	    }

	    /* ASSERT: *p == \t */

	    p++;
	    Result.FileName = p;
	    while (*p && *p != '\t) {
		p++;
	    }

	    if (*p != '\t') {
		Error(E_WARN|E_INTERNAL|E_BUG,
		    "findFileDaemon protocol error after fn in [%s]",
		    Line
		);
		continue;
	    }

	    *p = '\0';
	    ++p;

	    if (!*p) {
		Error(E_WARN|E_INTERNAL|E_BUG,
		    "findFileDaemon protocol error (empty docname) in [%s]",
		    Line
		);
		continue;
	    }
	    Result.DocumentName = p;

	    if (Result.Line) {
		/* It's left over from last time we were called */
		efree(Result.Line);
	    }
	    /* save the line, and free it later: */
	    Result.Line = LQU_StealReadLineBuffer();

	    return &Result;
	}
    }

    /* reached EOF */

    /* close the file descriptor, so we don't get a broken pipe signal */
    if (findFileDaemon->input) {
	(void) fclose(findFileDaemon->input);
	findFileDaemon->input = 0;
    }

    /* signal EOF by returning NULL: */
    return (char *) NULL;
}

PRIVATE void
destroyFindFileDaemon(findFileDaemon)
    t_ThingStream *findFileDaemon;
{
    int status;

    if (!findFileDaemon) {
	Error(E_BUG|E_FATAL|E_INTERNAL,
	    "%s: %d: destroyFindFileDaemon(NULL)", __FILE__, __LINE__
	);
    }

    if (!findFileDaemon->isRunning) {
	return;
    }

    if (findFileDaemon->input) {
	(void) fclose(findFileDaemon->input);
	findFileDaemon->input = 0;
    }

    if (findFileDaemon->pid > 0) {
	(void) kill(9, findFileDaemon->pid);
    } else {
	return;
	/* already done */
    }

    for (;;) {
	int thePid;
	char *name;

	thePid = wait(&status);

	if (thePid == findFileDaemon->pid) {
	    name = "findFile daemon";
	} else if (thePid == -1) {
	    /* no more children, so ours has already gone... */
	    Error(E_WARN, "findFileDaemon went away silently...");
	    findFileDaemon->pid = 0;
	    findFileDaemon->isRunning = 0;
	    return;
	} else {
	    name = "unknown child process";
	}

	switch (status & 0377) {
	case 0177:
	    /* a child was suspended -- we don't care.
	     * probably it's being traced or debugged.
	     */
	    if (thePid == findFileDaemon->pid) {
		Error(E_BUG|E_WARN,
		    "destroyFindFileDaemon: daemon %d is stopped!", thePid
		);
		continue;
	    }
	    break;
	case 0:
	    /* it died by calling exit */
	    {
		char *msg = "";
		int result = (status >> 8) & 0377;

		if (status & 0200) {
		    msg = " (memory image saved to \"core\" for debugging)";
		}

		LQT_Trace(LQTRACE_VERBOSE|LQTRACE_DEBUG,
		    "%s: %d: exit %d%s",
		    name, thePid, result, msg
		);
	    }
	    break;
	default:
	    /* it died from a signal */
	    LQT_Trace(LQTRACE_VERBOSE|LQTRACE_DEBUG,
		"%s: %d: killed by signal %d",
		name, thePid, status & 0177
	    );
	}

	if (thePid == findFileDaemon->pid) {
	    findFileDaemon->pid = 0;
	    findFileDaemon->isRunning = 0;
	    break;
	}
    } /* forever */
}

PRIVATE t_ThingStream *
startFindFileDaemon(db, Options, argc, argv, inputFile)
    t_LQTEXT_Database *db;
    t_lqdbOptions *Options;
    int argc;
    char *argv[];
    char *inputFile;
{
    static t_ThingStream Me;
    int forkedOK = 0;
    int io[2];
    *pidp = -1;
    t_FileInfo *FileInfo;

    Me.pid = 0;
    Me.input = 0;
    Me.isRunning = 0;
    Me.inputFile = inputFile;
    Me.argc = argc;
    Me.argv = argv:
	/* optind is used to record how far we've got processing arguments */
    Me.optind = 0;

    /* There are two strategies hidden here.
     * (1) If we can do forking (e.g. we're on Unix), we
     *     start up a separate process, the findFileDaemon.
     * The findFileDaemon looks at each filename we're given,
     * finds it somewhere in the database document path, and tries to
     * determine its file type by opening it and inspecting it, or by
     * using its suffix.
     * Whenever it suceeds, it writes out the follwing:
     *     the file type (an integer)
     *     the filename as given
     *     the filename actually found
     * onto a pipe that the parent addfile process is reading.
     *
     * The parent calls getThingToIndex() repeatedly to read this
     * information and index the file.  This strategy means that while
     * the findFileDaemon is waiting for an open() or stat() or whatever,
     * the parent can be doing indexing -- and vice versa.
     *
     * The parent has to do a single open on each file, but the inode will
     * usually be in the cache, unless the files are really big, in which
     * case the extra overhead is insignificant anyway.
     *
     * (2) if there is no forking available (e.g. on Windows NT), we
     * simply save state in the struct we return, and do all the work in
     * getThingToIndex(), returning the same object as before.
     */

#ifdef HAVE_FORK
    if (pipe(io) == -1) {
	Error(E_WARN|E_SYS, "startFindFileDaemon: couldn't make pipe");

	/* no point making a fork now... */
	forkedOK = 0;
    } else {
	Me.pid = fork();

	if (Me.pid == -1) {
	    Error(E_WARN|E_SYS, "startFindFileDaemon: couldn't fork");
	    forkedOK = 0;
	} else if (Me.pid != 0) {
	    /* parent: we've started the child successfully: */
	    Me.isRunning = 1;

	    /* we'll read from one end of the pipe... */
	    Me.input = fdopen(io[0], "r");
	    Me.output = (FILE *) NULL;

	    /* ... and close the end of the pipe that you write into: */
	    (void) close(fd[1]);

	    /* Me now encapsulates everything we need. */
	    return &Me;
	} else {
	    /* child process */
	    forkedOK = 1;

	    /* The child doesn't want the reading end of the pipe */
	    (void) close(io[0]);
	    Me.input = (FILE *) NULL;
	    Me.output = fdopen(io[1], "w");

	    /* fall through and continue */
	}
    }

    if (InputFile) {
	FILE *fp;
	char *Line;

	if (Name[0] == '-' && Name[1] == '\0') {
	    fp = stdin;
	} else {
	    fp = LQU_fEopen(E_FATAL, Name, "list of files to add", "r");
	}

	while (LQU_fReadLine(fp, &Line, 0) != -1) {
	    if (forkedOK) {
		FileInfo = oneFileNameForDaemon(db, InputFile);
		if (FileInfo) {
		    fprintf(Me.output,
			"%d\t%s\t%s\n",
			FileInfo->FilterType,
			Line,
			FileInfo->Name;
		    );
		}
	    } else {
		AddFile(db, Line);
	    }
	}

	if (fp != stdin) {
	    fclose(fp);
	}
    } else {
	int optind;

	for (optind = 0; optind < argc; ++optind) {
	    if (forkedOK) {
		oneFileName(db, InputFile);
	    } else {
		AddFile(db, Line);
	    }
	}
    }

    if (forkedOK) {
	/* We are the child process, and we've finished.
	 * The parent will kill us eventually, but in the
	 * mean-time, let's sleep.
	 * This is because if the child dies first, the parent on
	 * some systems may get a broken pipe message.
	 * TODO: trap SIG_PIPE??
	 * So we wait meekly and humbly to be executed...
	 *
	 * Note that we won't get a chance to close the database -- so
	 * it's fortunate that we have not yet asked for write access...
	 */
	for (;;) {
	    (void) sleep(32760); /* wait 8 hours or so, */
	    /* I am using 32760 in case some systems only allow a
	     * 16-bit argument here...
	     */
	}
    }
}


int
main(argc, argv)
    int argc;
    char *argv[];
{
    extern int getopt();
    extern char *optarg;
    extern int optind;
    extern int MaxWordsInCache; /* see wordtable.c */

    int c;
    int ErrorFlag = 0;
    int DoNothing = 0;
    char *InputFile = (char *) 0;
    t_LQTEXT_Database *db;
    t_lqdbOptions *Options;
    FILE *findFileDaemon;

#ifdef MALLOCTRACE
    malloc_debug(2);
#endif

    progname = argv[0]; /* retain the full path at first */

#ifdef M_MXFAST
    (void) mallopt(M_MXFAST, 6); /* i.e. typical word length with \0 */
    /* may need to comment mallopt() out entirely for BSD -- use ifndef.
     * seems to work under SunOS, though.
     * It says "Allocate 100 or so chunks of this size at a time, and whenever
     * I ask for this much or less, give me one of the chunks".
     */
#endif
    Options = LQT_InitFromArgv(argc, argv);

    while ((c = getopt(argc, argv, "w:f:H:M:xVZz:")) != -1) {
	switch (c) {
	case 'M':
	    if (!LQU_cknatstr(optarg)) {
		Error(E_FATAL|E_USAGE|E_XHINT,
		    "-M must be given a number >= 0, not \"%s\"",
		    optarg
		);
	    }
	    SetDumpThresh(Options, atoi(optarg));
	    break;
	case 'H':
	    if (!LQU_cknatstr(optarg)) {
		Error(E_FATAL|E_USAGE|E_XHINT,
		    "-H must be given a hash table size >= 1, not \"%s\"",
		    optarg
		);
	    }
	    SetHashSize(Options, atoi(optarg));
	    break;
	case 'w':
	    if (!LQU_cknatstr(optarg)) {
		Error(E_FATAL|E_USAGE|E_XHINT,
		    "-w must be given a number >= 0, not \"%s\"",
		    optarg
		);
	    }
	    MaxWordsInCache = atoi(optarg);
	    break;
	case 'Z':
	case 'z':
	    break; /* work done in SetDefault() */
	case 'V':
	    fprintf(stderr, "%s: Release: %s\n", progname, LQTEXTREVISION);
	    fprintf(stderr, "%s: Revision: %s\n", progname, Version);
	    DoNothing = 1;
	    break;
	case 'f':
	    if (InputFile) {
		Error(E_USAGE|E_XHINT|E_FATAL,
		    "only one -f option allowed; use -xv for explanation"
		);
	    }
	    InputFile = optarg;
	    break;
	case 'x':
	    ErrorFlag = (-1);
	    break;
	default:
	case '?':
	    ErrorFlag = 1;
	}
    }

    if ((progname = strrchr(progname, '/')) != (char *) NULL) {
	++progname; /* step over the last / */
    } else {
	progname = argv[0];
    }

    if (ErrorFlag > 0) {
	fprintf(stderr, "use %s -x or %s -xv for an explanation.\n",
							progname, progname);
	exit(1);
    } else if (ErrorFlag < 0) { /* -x was used */
	fprintf(stderr, "%s -- add files to an lq-text retrieval database\n",
								    progname);

	fputs("Options are:\n\
	-f file -- read the list of files to index from \"file\"\n\
	-M n	-- try not to flush cache entries with n or more entries\n\
	-w n	-- dump the word-cache every n words\n\
\n\
", stderr);

	LQT_PrintDefaultUsage(Options);

	if (LQT_TraceFlagsSet(LQTRACE_VERBOSE)) {
	    /* used -v or -t1 */
	    fprintf(stderr,
		"\n\
    Any remaining arguments are taken to be file names.  The current\n\
DOCPATH (%s) is searched for the files,\n\
and they are read and added to the index.\n\
If you use the -f option, you should not give filename\n\
arguments on the command line, although you can use \"-f -\" to read the\n\
list of files from standard input, one per line.\n\
Setting (with -w) the size of the cache may dramatically\n\
improve performance.  Systems with memory larger than the data can try -w0.\n\
See %s(1) for more information.\n",
		(char *) LQT_GetOption(Options, "file search path"),
		progname
	    );
	}
	exit(0);
    }

#ifdef WIDINBLOCK
# ifdef ASCIITRACE
    /* remind people to recompile... */
    Error(E_WARN, "**** Compiled with -DWIDINBLOCK for debugging ****");
    /* remind them a lot... */
    Error(E_WARN, "**** Compiled with -DWIDINBLOCK for debugging ****");
    sleep(5);
    Error(E_WARN, "**** Compiled with -DWIDINBLOCK for debugging ****");
    sleep(5);
    Error(E_WARN, "**** Compiled with -DWIDINBLOCK for debugging ****");
    sleep(5);
    Error(E_WARN, "**** Compiled with -DWIDINBLOCK for debugging ****");
# else
    /* don't allow -DWIDINBLOCK without -DASCIITRACE */
    Error(E_BUG, "Compiled with -DWIDINBLOCK but not -DASCIITRACE!");
    syntax error; this prevents compilation here;
# endif /* ASCIITRACE */
#endif

    if (DoNothing) {
	if (optind < argc) {
	    Error(E_WARN|E_XHINT,
		"%d extra argument%s ignored...",
		argc - optind,
		argc - optind == 1 ? "" : "%s"
	    );
	}
	exit(0);
    }

    /* some checking first: */
    if (InputFile && optind < argc) {
	    Error(E_FATAL|E_USAGE|E_XHINT,
		"cannot give filenames after -f %s",
		InputFile
	    );
	}
    }

    /* OK, now open the database;
     * We specify O_CREAT so that we will create a new database if
     * there isn't one there already.  As a sanity check, 
     * LQT_InitFromArgv() has already checked that there is a
     * config.txt file in the database directory, so it's a plausible
     * place in which to create a new database if we have to.
     * We will end up creating a few files (depending on how
     * lq-text was compiled) in that directory -- typically anywhere from
     * five up to about a dozen files.
     */
    db = LQT_OpenDatabase(Options, O_RDWR|O_CREAT, 0664);

    /* The filter table is not initialised by default, in order to avoid
     * linking in all the filter code.  Only part of it is initialised;
     * just enough for read-only access.  So we need to initialise the rest,
     * because we will be using input filters:
     */
    LQT_InitFilterTable(db);

    /* arrange to catch interrupts */
    dbForSignalHandler = db;
    lqSetSignals(SignalHandler);

    /* We create a FindFileDaemon object; this may be implemented as
     * a separate thread or a separate process entirely on some systems.
     * We hand it all of our remaining filename arguments to process.
     */
    findFileDaemon = startFindFileDaemon(
	db, Options, argc - optind, &argv[optind], InputFile
    );

    /* Even though we specified O_RDWR, we need to ask explicitly for
     * write access.  The modes you give to LQT_OpenDatabase are saved
     * for future use, and LQT_OpenDatabase checks that you could get
     * the requested access if you tried, but doesn't guarantee that you
     * have it.  This is so that multiple database writers could be
     * supported, although they aren't right now, and also so that you
     * can switch between read & write modes.
     */
    LQT_ObtainWriteAccess(db);

    while ((thing = getThingToIndex(findFileDaemon))) {
	AddThing(db, thing);
	if (SignalFlag) {
	    Error(E_WARN,
		"Caught signal at level %d, dumping cache",
		SignalFlag
	    );
	    DumpCache(db, DUMP_SYNC);
	    destroyFindFileDaemon(findFileDaemon);
	    LQT_CloseDatabase(db);
	    exit(1);
	}
    }

    destroyFindFileDaemon(findFileDaemon);

#ifndef MALLOCTRACE
    /* don't bother recaiming storage if we're about to exit, unless we
     * want to check for memory leaks afterwards.
     */
    DumpCache(db, DUMP_SYNC|DUMP_NOFREE);
#else
    DumpCache(db, DUMP_SYNC);
#endif

    LQT_CloseDatabase(db);

#ifdef MALLOCTRACE
    (void) fprintf(stderr, "%s: Malloctrace: checking...\n", progname);
    malloc_verify();
    (void) fprintf(stderr, "%s: Malloc Map\n", progname);
    mallocmap();
#endif

#ifdef WIDINBLOCK
# ifdef ASCIITRACE
    /* remind people again to recompile... */
    Error(E_WARN, "Reminder: Compiled with -DWIDINBLOCK for debugging ****");
    Error(E_WARN, "Reminder: Compiled with -DWIDINBLOCK for debugging ****");
    Error(E_WARN, "Reminder: Compiled with -DWIDINBLOCK for debugging ****");
# else
    /* don't allow -DWIDINBLOCK without -DASCIITRACE */
    Error(E_BUG, "Compiled with -DWIDINBLOCK but not -DASCIITRACE!");
    syntax error; this prevents compilation here;
# endif /* ASCIITRACE */
#endif

    return 0;
}

PRIVATE t_FileInfo *
FileNameToFileInfoIfNoDaemon(db, FileName, thingStream)
    t_LQTEXT_Database *db;
    char *FileName;
    t_ThingStream *thingStream;
{
    struct stat StatBuf;
    char *doc;
    t_FileInfo *FileInfo;

    /* This routine is called if we don't have a findFileDaemon.
     * I'd like to coalesce this with oneFileNameForDaemon() really,
     * but I need to do some profiling first and it's esier to profile them
     * this way.
     */
    if ((doc = LQT_FindAndStatFile(db, FileName, &StatBuf)) == (char *) 0) {
	Error(E_WARN, "Can't find document \"%s\"", FileName);
	return (t_FileInfo *) 0;
    }

    if (StatBuf.st_size == 0L) {
	LQT_Trace(LQTRACE_VERBOSE|LQTRACE_DEBUG,
	    "%s empty -- not indexed",
	    FileName
	);
	return (t_FileInfo *) 0;
    }

    /* Allocate Structure */
    FileInfo = (t_FileInfo *) emalloc("MakeFileInfo", sizeof(t_FileInfo));

    /* Although not always necessary, call emalloc here so that a
     * FileInfo can always be deleted with LQT_DestroyFileInfo()
     */
    FileInfo->Name = emalloc(
	"MakeFileInfo.Name",
	(unsigned)(strlen(FileName) + 1)
    );
    (void) strcpy(FileInfo->Name, FileName);

    /* Other bits to set: */

    FileInfo->Date = StatBuf.st_mtime;
    FileInfo->FileSize = StatBuf.st_size;
    FileInfo->Stream = 0;

    /* file type */
    FileInfo->FilterType = LQT_GetFilterType(db, FileInfo, &StatBuf);
    if (FileInfo->FilterType < 0) {
	if (thingStream->output) {
	    fprintf(thingStream->output,
		"warn\t%s unknown file type -- not indexed",
		FileName
	    );
	} else {
	    LQT_Trace(LQTRACE_VERBOSE|LQTRACE_DEBUG,
		"%s unknown file type -- not indexed",
		FileName
	    );
	}
	LQT_DestroyFileInfo(db, FileInfo);
	return (t_FileInfo *) 0;
    }

    FileInfo->FID = 0; /* unknown */
    FileInfo->Date = (long) time((long *) 0); /* it's a time_t on BSD */

    FileInfo->Stream = 0L;
    return FileInfo;
}

PRIVATE void
oneFileNameForDaemon(db, FileName, thingStream)
    t_LQTEXT_Database *db;
    char *FileName;
    t_ThingStream *thingStream;
{
    struct stat StatBuf;
    char *doc;
    int FilterType;

    /* This function is called by the child findFileDaemon process.
     * its job is merely to try and open each file in turn that will
     * be indexed.
     */
    if ((doc = LQT_FindFile(db, FileName, &StatBuf)) == (char *) 0) {
	fprintf(thingStream->output,
	    "warn\tCan't find document \"%s\"\n",
	    FileName
	);
	return;
    }

    if (StatBuf.st_size == 0L) {
	fprintf(thingStream->output,
	    "warn\t%s empty -- not indexed",
	    FileName
	);
	return;
    }

    /* file type */
    FilterType = LQT_GetFilterType(db, FileInfo, &StatBuf);
    if (FilterType < 0) {
	fprintf(thingStream->output,
	    "warn\t%s unknown file type -- not indexed",
	    FileName
	);
	return;
    }

    /* OK, we have the information we need */
    fprintf(thingStream->output,
	"add\t%d\t%s\t%s\n",
	FilterType,
	FileName,
	doc
    );
}

PRIVATE int
AddThing(db, Thing)
    t_LQTEXT_Database *db;
    t_Thing *Thing;
{
    t_FileInfo *theFileInfo;

    if (!Thing) {
	Error(E_FATAL|E_BUG|E_INTERNAL,
	    "%s: %d: AddThing: Attempt to add Null Thing",
	    __FILE__, __LINE__
	);
    }

    theFileInfo = LQT_MakeFileInfo(
	db, Thing->FileName, Thing->Location, Thing->Type, Thing->StatBuf
    );

    if (theFileInfo == (t_FileInfo *) 0) {
	return -1;
    }

    AddStream(db, theFileInfo);
    LQT_SaveFileInfo(db, theFileInfo);
    LQT_DestroyFileInfo(db, theFileInfo);

    if (SignalFlag) {
	return -1;
    }

    return 0;
}

PRIVATE int
AddFile(db, Name)
    t_LQTEXT_Database *db;
    char *Name;
{
    t_FileInfo *theFileInfo;

    if (!Name || !*Name) {
	return -1;
    }

    if ((theFileInfo = LQT_MakeFileInfo(db, Name)) == (t_FileInfo *) 0) {
	return -1;
    }

    AddStream(db, theFileInfo);
    LQT_SaveFileInfo(db, theFileInfo);
    LQT_DestroyFileInfo(db, theFileInfo);

    if (SignalFlag) {
	return -1;
    }

    return 0;
}

PRIVATE void
AddStream(db, FileInfo)
    t_LQTEXT_Database *db;
    t_FileInfo *FileInfo;
{
    /* I have to mark the last word in the block.
     * I do that by marking the previous word if it was in a differant block
     * than the current one.
     */
    t_WordInfo *WordInfo;
    t_WordInfo *LastWord = 0;

    while (SignalFlag <= 1) {
	/* needs more than one signal to quit in the middle of a file */

	WordInfo = LQT_ReadWordFromFileInfo(
	    db,
	    FileInfo,
	    LQT_READWORD_IGNORE_COMMON
	);

	if (WordInfo == (t_WordInfo *) 0) {
	    break;
	} else {
	    if (LastWord) {
		if (LastWord->WordPlace.BlockInFile !=
					    WordInfo->WordPlace.BlockInFile) {
		    LastWord->WordPlace.Flags |= WPF_LASTINBLOCK;
		}
		AddWord(db, LastWord);
	    }

	    LastWord = WordInfo;

	}
    }

    if (SignalFlag > 1) {
	Error(E_WARN|E_MULTILINE,
	    "Signal received during processing of %s",
	    FileInfo->Name
	);
	Error(E_WARN|E_MULTILINE|E_LASTLINE,
	    "That and other files may be incomplete..."
	);
	return;
    }

    if (LastWord) {
	/* it's the last in the file, so it is also the last in the block */
	LastWord->WordPlace.Flags |= WPF_LASTINBLOCK;
	AddWord(db, LastWord);
    }

    if (SignalFlag <= 1 && LQT_TraceFlagsSet(LQTRACE_VERBOSE|LQTRACE_DEBUG)) {
	LQT_Trace(LQTRACE_VERBOSE|LQTRACE_DEBUG,
	    "%d: %s: indexed, %s\n",
	    FileInfo->FID,
	    FileInfo->Name,
	    LQT_GetFilterName(db, FileInfo)
	);
    }
}
