/* lqunoverlap.c -- Copyright 1995, 1996 Liam R. E. Quin. * All Rights Reserved. * This code is NOT in the public domain. * See the file COPYRIGHT for full details. * * $Id: lqunoverlap.c,v 1.2 1996/07/01 21:37:26 lee Exp $ * * lqunoverlap, part of Liam Quin's text retrieval package... * * coalesce overlapping matches in input. * */ #include "error.h" #include "globals.h" /* defines and declarations for database filenames */ #include /* stderr, also for fileinfo.h */ #include #ifdef HAVE_SYSV_FCNTL_H # include #endif #ifdef HAVE_FCNTL_H #include #endif #include #ifdef HAVE_STRING_H # include #else # include #endif #include "emalloc.h" /* for efree() */ #include "fileinfo.h" /* for wordinfo.h */ #include "wordinfo.h" #include "pblock.h" #include "phrase.h" #include "lqutil.h" #include "lqtrace.h" #include "liblqtext.h" /** functions used before they're defined within this file: **/ /** **/ static char *Revision = "@(#) $Id: lqunoverlap.c,v 1.2 1996/07/01 21:37:26 lee Exp $"; char *progname = "$Revision: 1.2 $"; int SilentMode = 0; /* don't print matches if set to one */ static int MinimumNumberOfWordsInRankedPhrase = 3; typedef struct s_OneFile { char *Name; struct s_OneFile *Next; } t_OneFile; static t_OneFile *FilesToProcess; static t_OneFile **LastFile; static void AddOneFile(theName) char *theName; { t_OneFile *F; F = (t_OneFile *) emalloc("remembering file name", sizeof(t_OneFile)); F->Name = theName; F->Next = (t_OneFile *) 0; if (!LastFile) { LastFile = &FilesToProcess; } while (*LastFile) { LastFile = &(*LastFile)->Next; } *LastFile = F; LastFile = &F->Next; } static void ProcessFiles(); static void AddOneMatchString( #ifdef HAVE_PROTO char *theString #endif ); int main(argc, argv) int argc; char *argv[]; { extern int optind, getopt(); extern char *optarg; int ch; int ErrorFlag = 0; char *InputFile = 0; progname = argv[0]; LQT_InitFromArgv(argc, argv); while ((ch = getopt(argc, argv, "Zz:f:lNr:xVv")) != EOF) { switch (ch) { case 'z': case 'Z': break; /* done by LQT_InitFromArgv(); */ case 'V': fprintf(stderr, "%s version %s\n", progname, Revision); break; case 'f': AddOneFile(optarg); break; case 'l': break; /* list mode is the default */ case 'r': /* ignored for compat. with lqrank*/ break; case 'x': ErrorFlag = (-1); break; case '?': ErrorFlag = 1; } } if (ErrorFlag) { fprintf(stderr, "Usage: %s [options] [filename [...]]\n", progname); fprintf(stderr, "%s: options are:\n", progname); fputs("-f file -- the named file contains matches to print\n\ \n", stderr); LQT_PrintDefaultUsage(); exit( ErrorFlag > 0 ? 1 : 0); /* 0 means -x was used */ } while (optind < argc) { AddOneFile(argv[optind]); ++optind; } if (!FilesToProcess) { AddOneFile("-"); } ProcessFiles(); } #define STDIN_NAME "[standard input]" static void ProcessFiles() { t_OneFile *thisFile; for (thisFile = FilesToProcess; thisFile; thisFile = thisFile->Next) { FILE *f; int NeedClose; if (STREQ(InputFile, "-")) { f = stdin; NeedClose = 0; thisFile->Name = STDIN_NAME; /* for better error messages */ } else { f = LQU_fEopen(E_FATAL|E_SYS, InputFile, "List of phrases", "r"); NeedClose = 1; } while (LQU_fReadLine(f, &theLine, LQUF_NORMAL) >= 0) { if (theLine && *theLine) { AddOneMatchString(theLine); } } if (NeedClose) { if (fclose(f) == EOF) { Error(E_WARN|E_SYSTEM, "couldn't close file \"%s\"", thisFile->Name ); } } } } #define MATCHES_PER_LUMP 30 typedef struct s_MatchLump { t_MatchStart Matches[MATCHES_PER_LUMP]; struct s_MatchLump *Next; short MatchesUsed; } t_MatchLump; typedef struct s_ResultsForOneFile { t_FID FID; t_MatchLump *Lumps; struct s_ResultsForOneFile *Next; } t_ResultsForOneFile; static void AddOneMatchString(theString) char *theString; { t_MatchStart *Start; /* convert the string to a match */ /* find the right file */ /* see if the match overlaps an existing one */ /* yes -- combine them */ /* no -- allocate a new match and save it */ }