/* test speed of readword */ static char *Version = "@(#) $Id: readword.c,v 1.3 2001/05/31 03:50:55 liam Exp $"; #include "globals.h" /* defines and declarations for database filenames */ #include "error.h" #include #include #include #include #ifdef HAVE_FCNTL_H # ifdef HAVE_SYSV_FCNTL_H # include # endif # include #endif #ifdef HAVE_STRING_H # include #else # include #endif #ifdef HAVE_STDLIB_H # include #endif #ifdef HAVE_UNISTD_H # include #endif #include "fileinfo.h" #include "wordinfo.h" #include "wordrules.h" #include "emalloc.h" #include "addfile.h" #include "lqutil.h" #include "revision.h" #include "liblqtext.h" #include "lqtrace.h" #include "filter.h" #include /** Functions within this file that need declaring: **/ PRIVATE void AddStream( #ifdef HAVE_PROTO t_LQTEXT_Database *, t_FileInfo *FileInfo #endif ); PRIVATE void AddFrom( #ifdef HAVE_PROTO t_LQTEXT_Database *, char *Name #endif ); /* Symbol Table Interface */ static void PrintWord( #ifdef HAVE_PROTO t_LQTEXT_Database *, t_WordInfo *WordInfo #endif ); PRIVATE void AddFile( #ifdef HAVE_PROTO t_LQTEXT_Database *, char *Name #endif ); /**/ char *progname = "@(#) $Id: readword.c,v 1.3 2001/05/31 03:50:55 liam Exp $"; static int SignalFlag = 0; int main(argc, argv) int argc; char *argv[]; { extern int getopt(); extern char *optarg; extern int optind; t_LQTEXT_Database *db; t_lqdbOptions *Options; int c; int ErrorFlag = 0; int DoNothing = 0; char *InputFile = (char *) 0; progname = argv[0]; /* retain the full path at first */ Options = LQT_InitFromArgv(argc, argv); while ((c = getopt(argc, argv, "w:f:H:M:xVZz:")) != -1) { switch (c) { case 'Z': case 'z': break; /* work done in SetDefault() */ case 'V': fprintf(stderr, "%s: Release: %s\n", progname, LQTEXTREVISION); fprintf(stderr, "%s: Revision: %s\n", progname, Version); DoNothing = 1; break; case 'f': if (InputFile) { Error(E_USAGE|E_XHINT|E_FATAL, "only one -f option allowed; use -xv for explanation" ); } InputFile = optarg; break; case 'x': ErrorFlag = (-1); break; default: case '?': ErrorFlag = 1; } } if ((progname = strrchr(progname, '/')) != (char *) NULL) { ++progname; /* step over the last / */ } else { progname = argv[0]; } if (ErrorFlag > 0) { fprintf(stderr, "use %s -x or %s -xv for an explanation.\n", progname, progname); exit(1); } else if (ErrorFlag < 0) { /* -x was used */ fprintf(stderr, "%s -- read words\n", progname); LQT_PrintDefaultUsage(Options); exit(0); } if (DoNothing) { if (optind < argc) { Error(E_WARN|E_XHINT, "%d extra argument%s ignored...", argc - optind, argc - optind == 1 ? "" : "%s" ); } exit(0); } if (!(db = LQT_OpenDatabase(Options, O_RDONLY, 0))) { Error(E_FATAL, "couldn't open database for reading."); } LQT_InitFilterTable(db); if (InputFile) { if (optind < argc) { Error(E_FATAL|E_USAGE|E_XHINT, "cannot give filenames after -f %s", InputFile ); } AddFrom(db, InputFile); } else for (; optind < argc; ++optind) { AddFile(db, argv[optind]); } LQT_CloseDatabase(db); return 0; } static void AddFrom(db, Name) t_LQTEXT_Database *db; char *Name; { FILE *fp; char *Line; if (Name[0] == '-' && Name[1] == '\0') { fp = stdin; } else { fp = LQU_fEopen(E_FATAL, Name, "list of files to add", "r"); } while (LQU_fReadLine(fp, &Line, LQUF_NORMAL) > 0) { /* Note: * LQU_fReadFile will silently swallow blank lines. * if we use LQUF_NORMAL it will swallow lines that start with a #, * but we don't want that here! */ AddFile(db, Line); } if (fp != stdin) { (void) fclose(fp); } } PRIVATE void AddFile(db, Name) t_LQTEXT_Database *db; char *Name; { t_FileInfo *theFileInfo; t_FID FID; if (!Name || !*Name) { return; } if ((FID = LQT_NameToFID(db, Name)) == (t_FID) 0) { return; } if ((theFileInfo = LQT_FIDToFileInfo(db, FID)) == (t_FileInfo *) 0) { return; } theFileInfo->Stream = LQT_MakeInput(db, theFileInfo); AddStream(db, theFileInfo); LQT_DestroyFileInfo(db, theFileInfo); return; } PRIVATE void AddStream(db, FileInfo) t_LQTEXT_Database *db; t_FileInfo *FileInfo; { /* I have to mark the last word in the block. * I do that by marking the previous word if it was in a differant block * than the current one. */ char *Base; char *Start, *End; t_WordInfo *WordInfo; t_WordInfo *LastWord = 0; if (!FileInfo->FileSize) { struct stat s; if (fstat(fileno(FileInfo->Stream), &s) < 0) { Error(E_WARN|E_SYS, "Can't get size of %s", FileInfo->Name); return; } FileInfo->FileSize = s.st_size; } #ifndef MAP_FILE # define MAP_FILE 0 #endif Base = mmap( 0, FileInfo->FileSize, PROT_READ, MAP_FILE|MAP_SHARED, fileno(FileInfo->Stream), 0 ); if (Base == (caddr_t) -1) { Error(E_WARN|E_SYS, "can't mmap input for %s", FileInfo->Name); return; } /* reset the word-reading routine */ (void) LQT_ReadWordFromStringPointer( db, (char **) NULL, (char **) NULL, (char *) NULL, 0 ); /* add the words in this file, one at a time. * We are always one word behind, because when ReadWord * finds punctuation after a word, it sets the flag in the * previous word's WordPlace... so we have to leave it in place * to get set! */ Start = Base; End = &Base[FileInfo->FileSize]; LastWord = (t_WordInfo *) 0; while (SignalFlag <= 1) { /* needs more than one signal to quit in the middle of a file */ WordInfo = LQT_ReadWordFromStringPointer( db, &Start, (char **) NULL, End, LQT_READWORD_IGNORE_COMMON ); if (WordInfo == (t_WordInfo *) NULL) { break; } else { WordInfo->WordPlace.FID = FileInfo->FID; if (LastWord) { LastWord->WordPlace.FID = FileInfo->FID; PrintWord(db, LastWord); } LastWord = WordInfo; } } if (LastWord) { /* ensure that the WPF_LASTINBLOCK flag is not set */ LastWord->WordPlace.Flags &= ~WPF_LASTINBLOCK; LastWord->WordPlace.FID = FileInfo->FID; PrintWord(db, LastWord); LastWord = (t_WordInfo *) 0; } (void) munmap(Base, FileInfo->FileSize); } PRIVATE void PrintWord(db, Word) t_LQTEXT_Database *db; t_WordInfo *Word; { /* print enough information to allow the word to be indexed */ printf("%d\t%*.*s\t%ld\t%ud\t%lu\t%lu\n", (int) Word->Length, (int) Word->Length, (int) Word->Length, Word->Word, (unsigned long) Word->WordPlace.Flags, (unsigned int) Word->WordPlace.StuffBefore, Word->WordPlace.BlockInFile, Word->WordPlace.WordInBlock ); }