/* lqphrase.c -- Copyright 1989, 1990, 1994, 1995 Liam R. E. Quin. * All Rights Reserved. * This code is NOT in the public domain. * See the file COPYRIGHT for full details. * * $Id: lqphrase.c,v 1.12 2001/05/31 03:50:13 liam Exp $ * * lqphrase, part of Liam Quin's text retrieval package... * * lqphrase is intended to be an example of one way to use the programming * interface to lq-text. * * The idea is quite simple: * Simply take a phrase p, held in a string (char *p), and call * t_Phrase *Phrase = LQT_StringToPhrase(p); * The result, if not null, contains only one interesting thing at this * point: * Phrase->ModifiedString * is the canonical version of p -- with common and short words removed. * for example, * p = "The boy sat down in His Boat and playd with his toes."; * might result in Phrase->ModifiedString containing * "[*the] boy sat down [in] [*his] boat [*and] [?playd] with [*his] toe" * Common words are marked with a *, and unknown words with ?. * [NOTE: modifiedPhrase is no longer be supported] * * An attempt may have been made to reduce plurals. * * Since this phrase contains a word not in the database (playd), it will * never match anything. As a result, it is a good idea to print this string * (possibly massaging it first) so users can see what is going on. If you * have it, the curses-based "lqtext" does this. * * If we change "playd" to "played", the above string is equivalent to * "[*the] boy sat down [xx] [*the] boat [*the] played with [*the] toe" * In other words, all common words are equivalent. The package remembers * that one or more common words were skipped, and also that one or more * lumps of letters too small to make up a word were skipped. * Compare also h/wordrules.h for more info, and liblqtext/Phrase.c for * the code that actually does the matching. * * Now, having sorted that out, we have our canonical string (and lots of * other things) in Phrase, so we can now call * LQT_MakeMatches(Phrase); * This will return the number of matches (*NOT* the number of files) for * the given ModifiedPhrase in the database. * This can take several seconds, so again, it can be worth printing out * the modified string as soon as it is available, so the user is looking at * that whilst LQT_MakeMatches is working! * * Now we have done LQT_MakeMatches, we can march along the linked list of * pointers to linked lists of arrays of matches. See MatchOnePhrase() below. * * As an optimisation, LQT_MakeMatchesWhere() lets us call a function to * print (in out casr) each match as it is found. This is slower overall, * because of the extra function calls, but not much slower, and the results * start to appear much sooner, so it feels much faster. * * Now, each match currently gives us * t_FID FID; Files are numbered from 1 in the database * unsigned long BlockInFile; -- the block in the file * unsigned char WordInBlock; -- the word in the block * unsigned char StuffBefore; -- the amount of leading garbage * unsigned char Flags, including (see wordrules.h): * * WPF_WASPLURAL The word... ended in s * WPF_UPPERCASE ...Started with a capital letter * WPF_POSSESSIVE ...ended in 's * WPF_ENDEDINING ...ended in ing * WPF_LASTWASCOMMON the previous word was common * WPF_LASTHADLETTERS we skipped some letters to get here * WPF_LASTINBLOCK I'm the last word in this block * */ #include "error.h" #include "globals.h" /* defines and declarations for database filenames */ #include /* stderr, also for fileinfo.h */ #include #ifdef HAVE_SYSV_FCNTL_H # include #endif #ifdef HAVE_FCNTL_H #include #endif #ifdef HAVE_STRING_H # include #else # include #endif #ifdef HAVE_STDLIB_H # include #else # include #endif #include "emalloc.h" /* for efree() */ #include "fileinfo.h" /* for wordinfo.h */ #include "wordinfo.h" #include "pblock.h" #include "phrase.h" #include "lqutil.h" #include "liblqtext.h" #include "lqtrace.h" /** System calls and functions... **/ /** Unix system calls used in this file: **/ extern void exit(); /** Unix Library Functions used: **/ /** lqtext library functions: **/ /** functions used before they're defined within this file: **/ PRIVATE void MatchOnePhrase( #ifdef HAVE_PROTO t_LQTEXT_Database *db, char *Phrase #endif ); /** **/ static char *Revision = "@(#) $Id: lqphrase.c,v 1.12 2001/05/31 03:50:13 liam Exp $"; char *progname = "lqphrase"; int SilentMode = 0; /* don't print matches if set to one */ int main(argc, argv) int argc; char *argv[]; { extern int optind, getopt(); extern char *optarg; int ch; int ErrorFlag = 0; char *InputFile = 0; t_lqdbOptions *Options; t_LQTEXT_Database *db = 0; progname = argv[0]; Options = LQT_InitFromArgv(argc, argv); while ((ch = getopt(argc, argv, "Zz:af:hNpr:slxVv")) != EOF) { switch (ch) { case 'z': case 'Z': break; /* done by LQT_InitFromArgv(); */ case 'V': fprintf(stderr, "%s version %s\n", progname, Revision); break; case 'f': InputFile = optarg; break; case 'l': break; /* list mode is the default */ case 'r': /* ignored for compat. with lqrank*/ break; case 's': SilentMode = 1; break; case 'x': ErrorFlag = (-1); break; case '?': ErrorFlag = 1; } } if (ErrorFlag) { fprintf(stderr, "Usage: %s [options] \"phrase\" [...]\n", progname); fprintf(stderr, "%s: options are:\n", progname); fputs("\ -l -- list mode, suitable for lqshow (the default)\n\ -s -- silent mode; exit status indicates success of matching\n\ \n", stderr); LQT_PrintDefaultUsage(Options); exit( ErrorFlag > 0 ? 1 : 0); /* 0 means -x was used */ } db = LQT_OpenDatabase(Options, O_RDONLY, 0); if (!db || LQT_ObtainReadOnlyAccess(db) < 0) { Error(E_FATAL, "couldn't open lq-text database"); } if (InputFile) { FILE *f; char NeedClose = 1; char *theLine; if (STREQ(InputFile, "-")) { f = stdin; NeedClose = 0; } else { f = LQU_fEopen(E_FATAL|E_SYS, InputFile, "List of phrases", "r"); } while (LQU_fReadLine(f, &theLine, LQUF_NORMAL) >= 0) { if (theLine && *theLine) { MatchOnePhrase(db, theLine); } } if (NeedClose) { (void) fclose(f); } } while (optind < argc) { MatchOnePhrase(db, argv[optind++]); } if (SilentMode) { /* if we got to here we didn't find anything */ exit(1); } return 0; } static int gotOne = 0; static int ExitSilently(db, Phrase, Match) t_LQTEXT_Database *db; t_Phrase *Phrase; t_Match *Match; { ++gotOne; return LQMATCH_QUIT; } typedef struct s_Rank { t_FID FID; unsigned long Sum; } t_Rank; PRIVATE void MatchOnePhrase(db, Phrase) t_LQTEXT_Database *db; char *Phrase; { t_Phrase *P; if (!Phrase || !*Phrase) { /* ignore an empty phrase */ return; } if ((P = LQT_StringToPhrase(db, Phrase)) == (t_Phrase *) 0) return; if (SilentMode) { (void) LQT_MakeMatchesWhere(db, P, ExitSilently); if (gotOne) { exit(0); } } else { (void) LQT_MakeMatchesWhere(db, P, LQT_PrintAndRejectOneMatch); } LQT_DestroyPhrase(db, P); }