/* Phrase.c -- Copyright 1989, 1994, 1995, 1996 Liam R. E. Quin.
 * All Rights Reserved.
 * This code is NOT in the public domain.
 * See the file COPYRIGHT for full details.
 */

/*
 * Deal with (WID, FID, Offfset) triples
 * Liam Quin, September 1989
 *
 * $Id: phrase.c,v 1.39 96/07/04 20:15:29 lee Exp $
 *
 */

#include "error.h"
#include "globals.h" /* defines and declarations for database filenames */

#include <stdio.h> /* stderr, also for fileinfo.h */
#include <sys/types.h>
#ifdef HAVE_FCNTL_H
# ifdef HAVE_SYSV_FCNTL_H
#  include <sys/stat.h>
# endif
# include <fcntl.h>
#endif

#include <ctype.h>
#ifdef HAVE_STRING_H
# include <string.h>
#else
# include <strings.h>
#endif

#ifdef HAVE_STDLIB_H
# include <stdlib.h>
#else
# include <malloc.h>
#endif

#include "fileinfo.h" /* for wordinfo.h */
#include "wordinfo.h"
#include "pblock.h"
#include "phrase.h"
#include "wordrules.h"
#include "emalloc.h"
#include "lqutil.h"
#include "liblqtext.h"
#include "lqtrace.h"

/** Unix system calls that need to be declared: **/
/** Unix/C Library Functions: **/
/** lqtext functions: **/
/** functions within this file that need forward declarations **/

/** **/

static t_FID *FilesInShortestEntry = 0;
static t_FID *FilesInShortestEntryPointer;
static long PlacesAccepted;
#if 0
static long FilesInShortestEntryDeleted = 0;
#endif

PRIVATE int ContinuesMatch(
#ifdef HAVE_PROTO
    t_LQTEXT_Database *db,
    t_PhraseItem *PreviousWord,
    t_PhraseItem *Word,
    register t_WordPlace *First,
    t_WordPlace *Prev,
    register t_WordPlace *New,
    t_Position Position
#endif
);

PRIVATE int
AcceptWordFunc(db, WID, WordPlace)
    t_LQTEXT_Database *db;
    t_WID WID;
    t_WordPlace *WordPlace;
{
    t_FID LastPlausible = (*FilesInShortestEntryPointer);

    while (*FilesInShortestEntryPointer < WordPlace->FID) { /* not there yet */
	if  (*FilesInShortestEntryPointer == (t_FID) 0) {
	    return 0;
	}
	/* delete the skipped file so we have fewer files next time round,
	 * although the same number of entries in the array.
	 * It's hard to compress the array without introducing an
	 * O(nfile^2) behaviour, slowing matching down somewhat.
	 */
	if (*FilesInShortestEntryPointer != LastPlausible) {
	    *FilesInShortestEntryPointer = LastPlausible;
	}
	++FilesInShortestEntryPointer;
    }

    if (*FilesInShortestEntryPointer > WordPlace->FID) {
	/* gone past it */
	return 0;
    } else if (*FilesInShortestEntryPointer == WordPlace->FID) {
	++PlacesAccepted;
	return 1;
    } else {
	/* got to the end! */
	return 0;
    }
}

/* <Function>
 *   <Name>LQT_MakeMatchesWhere
 *   <Class>Retrieval/Matching, Retrieval/Phrases
 *   <Purpose>
 *      <P>Matches the given phrase, and returns the number of successful
 *	matches.  The given AcceptFunction is called for each match; it
 *	must return one of the following flags as defined in
 *	<h>phrase.h</h>: either LQMATCH_ACCEPT, which adds the match to the
 *	result, or LQMATCH_REJECT, which does not add the match to the result.
 *	In addition, either of these flags may be combined (using bitwise or)
 *	with LQMATCH_QUIT, in which case LQT_MakeMatchesWhere will return
 *	the result collected so far and abandon further processing, or
 *	LQMATCH_NEXT_FILE, in which case LQT_MakeMatchesWhere will not
 *	call the AcceptFunction again until a match is found in a document
 *	with a different File Identifier (FID).</P>
 *	<P>A NULL AcceptFunction pointer is equivalent to one that always
 *	returns LQMATCH_ACCEPT, except much more efficient.</P>
 *   <Returns>
 *      The number of matches accepted.
 *	All matches that are accepted are stored in the given Phrase object.
 *   <SeeAlso>
 *      LQT_StringToPhrase
 * </Function>
 */
API long
LQT_MakeMatchesWhere(db, Phrase, AcceptFunction)
    t_LQTEXT_Database *db;
    t_Phrase *Phrase;
    int (*AcceptFunction)(
#ifdef HAVE_PROTO
	t_LQTEXT_Database *,
	t_Phrase *,
	t_Match *
#endif
    );
{
    /* Each word has a pointer (SearchIndex) to the last Word Place
     * that was examined.  This enables an O(NumberOfWords) search instead
     * of O(NumberOfWords * NumberOfWords) search.
     */
    unsigned long PIFW; /* Place In First Word */
    t_MatchList **MLPP = &(Phrase->Matches);
    t_Match **MPP;
    t_Match **OldMatch = (t_Match **) 0;
    t_WordPlace *pp;
    t_PhraseItem *Word;
    t_PhraseItem *PreviousWord = 0;
    long Result = 0L;
    t_PhraseItem *LeastWord;
    int HowGood;
    long LengthOfFileArray = 0L;
    t_FID PendingFID = 0;

    if (!Phrase) {
	return 0L;
    }

    LQT_ResetPhraseMatch(db, Phrase);
    /* Each iteration over this list either produces a match or rejects a
     * possible phrase starting place.
     */

    LQT_Trace(LQTRACE_MATCH_PHRASE,
	"match phrase \"%s\"",
	Phrase->ModifiedString
    );

    /* A phrase with garbage words can't match anything */
    if (Phrase->HasUnknownWords && db->PhraseMatchLevel != PCM_AnyCase) {
	return 0L;
    }

    /* Find the word in the phrase with least matches: */
    LeastWord = Phrase->Words;
    for (Word = Phrase->Words; Word; Word = Word->Next) {

	if (Word->Word->NumberOfWordPlaces <
					LeastWord->Word->NumberOfWordPlaces) {
	    LeastWord = Word;
	}
    }

    /* for efficiency, let's see which files we have to fetch */
    if (FilesInShortestEntry) {
	efree((char *) FilesInShortestEntry);
    }

    FilesInShortestEntry = (t_FID *) emalloc(
	"FIDS in Phrase",
	sizeof(t_FID) * (LeastWord->Word->NumberOfWordPlaces + 1)
	/* allow +1 for trailing 0 */
    );

    /* check we have the postings: */
    if (LeastWord->Word->WordPlacesInHere<LeastWord->Word->NumberOfWordPlaces){
	t_WordInfo *W = LeastWord->Word; /* less indirection! */

	if (W->WordPlaces) {
	    (void) efree((char *) W->WordPlaces);
	}

	W->WordPlaces = LQT_GetWordPlaces(
	    db,
	    W->WID,
	    W->WordPlaceStart,
	    (unsigned) WIDBLOCKSIZE - (W->WordPlaceStart - W->DataBlock),
	    W->Offset,
	    &W->NumberOfWordPlaces
	);
	W->WordPlacesInHere = W->NumberOfWordPlaces;
    }

    /* now complete the list of files that are possible candidates */
    {
	register long i, j;
	t_WordInfo *W = LeastWord->Word; /* less indirection! */

	FilesInShortestEntry[i = 0] = W->WordPlaces[0].FID;
	for (j = 1; j < W->NumberOfWordPlaces; j++) {
	    if (W->WordPlaces[j].FID != FilesInShortestEntry[i]) {
		i++;
		FilesInShortestEntry[i] = W->WordPlaces[j].FID;
	    }
	}
	FilesInShortestEntry[++i] = (t_FID) 0;
	LengthOfFileArray = i + 1;
	FilesInShortestEntry = (t_FID *) erealloc(
	    (char *) FilesInShortestEntry,
	    (unsigned int) (LengthOfFileArray * sizeof(t_FID))
	);
    }

    /* Ensure that the matches have been read */
    for (Word = Phrase->Words; Word; Word = Word->Next) {
	if (Word->Word->WordPlacesInHere < Word->Word->NumberOfWordPlaces) {
	    t_WordInfo *W = Word->Word; /* less indirection! */

	    if (W->WordPlaces) {
		(void) efree((char *) W->WordPlaces);
	    }

	    FilesInShortestEntryPointer = FilesInShortestEntry;
	    PlacesAccepted = 0L;
	    W->WordPlaces = LQT_GetWordPlacesWhere(
		db,
		W->WID,
		W->WordPlaceStart,
		(unsigned) WIDBLOCKSIZE - (W->WordPlaceStart - W->DataBlock),
		W->Offset,
		&W->NumberOfWordPlaces,
		AcceptWordFunc
	    );
#ifdef ASCIITRACE
	    LQT_Trace(LQTRACE_MATCH_PHRASE,
		"%s: %ld/%ld wanted\n",
		W->Word,
		PlacesAccepted,
		W->NumberOfWordPlaces
	    );
#endif
	    W->WordPlacesInHere = W->NumberOfWordPlaces = PlacesAccepted;

	    if (!PlacesAccepted) {
		return (long) 0;
	    }

#if 0
	    /* spot trailing unwanted documents: */
	    if (*FilesInShortestEntryPointer) {
		if (*++FilesInShortestEntryPointer) {
		    *FilesInShortestEntryPointer = (t_FID) 0;
		    FilesInShortestEntryDeleted += (
			LengthOfFileArray +1 -
			(FilesInShortestEntryPointer - FilesInShortestEntry)
		    );
		    /* (+1 for the trailing zero) */
		}
	    }

	    if (Word->Next && FilesInShortestEntryDeleted > 1) {
		register t_FID *WhereWeAreAt;
		register t_FID *This;

		WhereWeAreAt = This = FilesInShortestEntry;

		while (*This) {
		    if (*This == This[1]) {
			t_FID Bad = (*This);

			while (*This == Bad) {
			    This++;
			}
			continue;
		    }

		    if (This != WhereWeAreAt) {
			*(WhereWeAreAt++) = *(This++);
		    } else {
			++WhereWeAreAt;
			++This;
		    }
		}

		*WhereWeAreAt = (t_FID) 0;
		LengthOfFileArray = (WhereWeAreAt - FilesInShortestEntry) + 1;
		FilesInShortestEntry = (t_FID *) erealloc(
		    FilesInShortestEntry,
		    LengthOfFileArray * sizeof(t_FID)
		);
	    }
#endif
	}
    }

    /* For each match in the first word in the phrase: */
    for (PIFW = 0; PIFW < Phrase->Words->Word->NumberOfWordPlaces; PIFW++) {
	t_WordPlace *LastTryWord = (t_WordPlace *) 0;

	/* The idea is that the next two loops are are likely to reduce
	 * considerably the number of places we have to consider in the
	 * case that the first word in the phrase has a lot of matches
	 * and there is a subsequent word with relatively few matches.
	 * Experiments suggest that this is fairly common.
	 *
	 * This is still a nearly (i.e. slightly-better-than) linear
	 * algorithm w.r.t the total number of matches in all of the
	 * words added up.  Note that I alter LeastWord->SearchIndex in
	 * one of the two loops that follow, so when WordPlaces from that
	 * word are considered, we don't have to look at any twice.
	 *
	 * In order to do better, one would have to be able to avoid
	 * looking at some or (better!) most of the WordPlaces.
	 *
	 * For example, not fetching so many from disk:
	 * if we didn't do the fetches until we needed to, and we gave
	 * LQT_GetWordPlaces a minimum FID to look for, we might be able
	 * to reduce things by (say) 15%.
	 * If all of the FIDS were stored separately, we would not
	 * have to look at the (Block, Word, Flags, StuffBefore) stuff at
	 * all, and that would be much faster.  One way to do that might be
	 * to store the list of FIDs with the word (as now), and perhaps
	 * some flags and the count of words/fid, and to store the rest
	 * in a per-file data structure.
	 *
	 * That would be a major, major hack...
	 * 					... sigh.
	 */

	while (LeastWord->Word->WordPlaces[LeastWord->SearchIndex].FID <
		    Phrase->Words->Word->WordPlaces[PIFW].FID) {
	    if (++(LeastWord->SearchIndex) >=
					LeastWord->Word->NumberOfWordPlaces) {
		goto GiveUp;
	    }
	}

	while (Phrase->Words->Word->WordPlaces[PIFW].FID <
		LeastWord->Word->WordPlaces[LeastWord->SearchIndex].FID) {
	    if (++PIFW >= Phrase->Words->Word->NumberOfWordPlaces) {
		goto GiveUp;
	    }
	}

	/* The following comment tells Sabre_C not to moan about "if (0)" */
	/*SUPPRESS558*/
	if (0) {
GiveUp:     break;
	}

	MPP = (t_Match **) 0;
	pp = &Phrase->Words->Word->WordPlaces[Phrase->Words->SearchIndex=PIFW];
	/* When we have a partially completed match,
	 * TryWord (declared below) will point to the WordPlace currently
	 * being considered to see if it extends the partial match;
	 * LastTryWord points to the previous WordPlace in the match.
	 */

	/* For each word in the phrase: */
	for (Word = Phrase->Words; Word; Word = Word->Next) {
	    int GotOne = 0;

	    /* For each occurrence of that word: */
	    for (; Word->SearchIndex < Word->Word->NumberOfWordPlaces;
							++Word->SearchIndex) {
		register t_WordPlace *TryWord =
		    &Word->Word->WordPlaces[Word->SearchIndex];
		
		if (!LastTryWord) {
		    LastTryWord = TryWord;
		}

		/** So:
		 ** | int PIFW = each match in the first word in the phrase
		 ** | t_WordPlace *pp = each match in the phrase
		 **    | t_PhraseItem *Word = each word in the phrase
		 **	   | unsigned SearchIndex = each match of that word
		 **	   | t_WordPlace *TryWord = each occurrence of that word
		 **
		 ** Hence, we are comparing pp and TryWord, hoping that each time
		 ** round the (Word) loop we will advance TryWord.
		 ** Once we have decided that TryWord and pp relate to the
		 ** same file and that TryWord is no earlier than pp in the
		 ** file, we must then check that TryWord is advancing the
		 ** chain by comparing it to the previous element in the
		 ** list (LastTryWord).
		 **
		 ** When we break from this inner list, we must either have
		 ** eliminated this particular (PIFW) as starting a match-
		 ** chain, or have decided that we have extended the
		 ** current match chain (by setting GotOne).
		 **/

		if (LastTryWord == TryWord) { /* same word! */
		    HowGood = LQTp_CheckFlags(
			db,
			Word->Word,
			TryWord,
			    (Word == Phrase->Words) ? e_First : (
				( (Word->Next) ? e_Middle : e_Last)
			    )
			);
		} else if (pp->FID == TryWord->FID) {
		    HowGood = ContinuesMatch(
			db,
			PreviousWord,
			Word,
			pp,
			LastTryWord,
			TryWord,
			(Word == Phrase->Words) ? e_First : (
			    ( (Word->Next) ? e_Middle : e_Last)
			)
		    );
		} else { /* different FIDs */
		    HowGood = (pp->FID > TryWord->FID) ? -1 : 1;
		}

		switch (HowGood) {
		case 0:
		    /* G O T C H A !!!! */
		    /* extend the HitList, since it's OK so far. */

		    /* allocate a new match: */
		    if (!MPP) {
			*MLPP = (t_MatchList *)
			    emalloc("MatchList entry for LQT_MakeMatches",
			    sizeof(t_MatchList)
			);
			(*MLPP)->Match = (t_Match *) 0;
			OldMatch = MPP = &((*MLPP)->Match);
			MLPP = &(*MLPP)->Next;
			*MLPP = (t_MatchList *) 0;
		    }

		    *MPP = (t_Match *) emalloc("Phrase Match", sizeof(t_Match));
		    (*MPP)->WID = Word->Word->WID;
		    (*MPP)->Where = TryWord;
		    (*MPP)->Next = (t_Match *) 0;
		    MPP = &(*MPP)->Next;
		    GotOne++;
		    break;
		case 1: /* gone too far */
#ifdef ASCIITRACE
		    if (LQT_TraceFlagsSet(LQTRACE_MATCH_PHRASE)) {
			t_WordInfo WW;
			
			WW = *(Word->Word);

			if (LastTryWord == TryWord) {
			    /* LQT_GenerateWordFromRoot() returns a pointer
			     * to a static buffer,
			     * so I have to use two printf() calls here.
			     */
			    fprintf(stderr,
				"Reject(%s (%d) != ",
				LQT_GenerateWordFromRoot(db,
				    &WW,
				    WW.WordPlace.Flags
				),
				WW.WordPlace.Flags
			    );
			    fprintf(stderr, "%s (%d)) [flags]\n",
				LQT_GenerateWordFromRoot(
				    db, &WW, TryWord->Flags
				),
				TryWord->Flags
			    );
			} else {
			    fprintf(stderr,
				"Reject(%s) -- too far, FIDS %ld %ld\n",
				LQT_GenerateWordFromRoot(
				    db,
				    &WW,
				    WW.WordPlace.Flags
				),
				pp->FID,
				TryWord->FID
			    );
			}
		    }
#endif
		    break;
		case -1:
		    continue; /* not there yet */
		default:
		    Error(E_FATAL|E_INTERNAL,
			"%s: %d HowGood == %d",
			__FILE__, __LINE__,
			HowGood
		    );
		}

		/* Remember where we got up to... so that we can extend
		 * the list when we start looking at the next word.
		 */
		LastTryWord = TryWord;

#ifdef ASCIITRACE
		if (GotOne && LQT_TraceFlagsSet(LQTRACE_MATCH_PHRASE)) {
		    t_WordInfo WW;
		    
		    WW = *(Word->Word);
		    /* LQT_GenerateWordFromRoot() returns a pointer to a
		     * static buffer
		     */
		    fprintf(stderr, "Partial match for %s ",
			LQT_GenerateWordFromRoot(
			    db,
			    &WW,
			    Word->Word->WordPlace.Flags
			)
		    );
		    fprintf(stderr, "(%s,%lu,%lu) FID %lu)\n",
			LQT_GenerateWordFromRoot(db, &WW, TryWord->Flags),
			TryWord->BlockInFile, TryWord->WordInBlock,
			TryWord->FID
		    );
		}
#endif
		/* If we got to here, we extended the list, which is fine;
		 * otherwise, if we hit a continue, we try to carry on
		 * looking at matches of this word, and if we hit a break
		 * before we set "GotOne", we give up on this match
		 * altogether.
		 */
		break;
	    } /* For each occurrence of that word: */

	    if (!GotOne) {
		t_Match *MP;
		/* This word isn't here, so neither is the phrase found
		 * in this file starting here.
		 */

		if (MPP) {
		    for (MP = (*OldMatch); MP != (t_Match *) 0; /*void*/) {
			t_Match *Next = MP->Next;

			efree((char *) MP);
			MP = Next;
		    }
		    *OldMatch = (t_Match *) 0;
		}

		break;
	    } else if (Word->Next == (t_PhraseItem *) 0) {
		/* If we've reached the end of the phrase, i.e. if
		 * Word->Next is zero, we have successfully added a new
		 * phrase!
		 */

		if (PendingFID) {
		    if (PendingFID >
			Word->Word->WordPlaces[Word->SearchIndex].FID
		    ) {
			goto reject;
		    } else {
			PendingFID = 0;
		    }
		}

		if (AcceptFunction) {
		    int action = (*AcceptFunction)(db, Phrase, *OldMatch);

		    if (action & LQMATCH_NEXT_FILE) {
			PendingFID =
			    Word->Word->WordPlaces[Word->SearchIndex].FID + 1;
			action |= ~LQMATCH_NEXT_FILE;
		    }
		    switch (action & (~LQMATCH_QUIT)) {
		    case LQMATCH_ACCEPT: /* OK */
			Result++;
#ifdef ASCIITRACE
			LQT_Trace(LQTRACE_MATCH_PHRASE,
			    "OK, Result now %d", Result
			);
#endif
			break;
		    case LQMATCH_REJECT:
		    reject:
			/* free the match structure */
#ifdef ASCIITRACE
			LQT_Trace(LQTRACE_MATCH_PHRASE,
			    "AcceptFunc rejected match, still %d",
			    Result
			);
#endif
			if (MPP) {
			    t_Match *MP = (*OldMatch);

			    while (MP != (t_Match *) 0) {
				t_Match *Next = MP->Next;

				efree((char *) MP);
				MP = Next;
			    }
			}

			*OldMatch = (t_Match *) 0;
			break;
		    default:
			Error(E_FATAL|E_BUG,
"LQT_MakeMatchesWhere: AcceptFunction returned illegal action %d",
			    action
			);
		    }
		    if (action & LQMATCH_QUIT) {
#ifdef ASCIITRACE
			LQT_Trace(LQTRACE_MATCH_PHRASE,
			    "AcceptFunc returned LQMATCH_QUIT, giving up at %d",
			    Result
			);
#endif
			LQT_ResetPhraseMatch(db, Phrase);
			return Phrase->NumberOfMatches = Result;
		    }
		} else { /* no AcceptFunction, so accept automatically */
		    ++Result;
#ifdef ASCIITRACE
		    LQT_Trace(LQTRACE_MATCH_PHRASE,
			"Result now %d\n", Result
		    );
#endif
		}
	    }

	    PreviousWord = Word;

	} /* end for (each word in the phrase) */
    } /* end (for each FID/Offset pair in the first word */
    LQT_ResetPhraseMatch(db, Phrase);
    return Phrase->NumberOfMatches = Result;
}

/* <Function>
 *   <Name>LQT_MakeMatches
 *   <Class>Retrieval/Matching, Retrieval/Phrases
 *   <Purpose>
 *      This is equivalent to LQT_MakeMatchesWhere with a null AcceptFunction,
 *	and is provided for convenience.
 *   <SeeAlso>
 *	LQT_StringToPhrase
 *      LQT_MakeMatchesWhere
 * </Function>
 */
API long
LQT_MakeMatches(thedb, Phrase)
    t_LQTEXT_Database *thedb;
    t_Phrase *Phrase;
{
    /* Call LQT_MakeMatchesWhere with a NULL callback function: */
    return LQT_MakeMatchesWhere(thedb, Phrase, (int (*)())0);
}

PRIVATE int
ContinuesMatch(db, PreviousWord, Word, First, Prev, New, Position)
    t_LQTEXT_Database *db;
    t_PhraseItem *PreviousWord;
    t_PhraseItem *Word;
    register t_WordPlace *First;
    t_WordPlace *Prev;
    register t_WordPlace *New;
    t_Position Position;
{
    register t_WordInfo *QueryWord = Word->Word;

    /* Return Value is
     *  -1 --- if New occurs before Prev (and thus isn't part of the match)
     *  0  --- if it's the next word in the match
     *  +1 --- if we've gone past it
     * Note: you can use these values in a switch() if you want.
     */

#ifdef ASCIITRACE
    if (LQT_TraceFlagsSet(LQTRACE_MATCH_PHRASE)) {
	LQT_Trace(LQTRACE_MATCH_PHRASE,
	    "ContinuesMatch: %*.*s [f 0%o %ld %lu] fby %*.*s [f 0%o %ld %lu]",
	    PreviousWord ? PreviousWord->Word->Length : 0,
	    PreviousWord ? PreviousWord->Word->Length : 0,
	    PreviousWord ? PreviousWord->Word->Word : "",
	    Prev->Flags, Prev->BlockInFile, Prev->WordInBlock,
	    Word->Word->Length, Word->Word->Length,
	    Word->Word->Word,
	    New->Flags,
	    New->BlockInFile, New->WordInBlock
	);
    }
#endif

    /* First check we are looking at the right file:
     * Have we gone far enough?
     */
    if (New->FID < First->FID) {
	return -1; /* not far enough */
    } else if (New->FID > First->FID) {
	return 1; /* too far */
    } else if (Prev == New) {
	return 0;
    }

    /* Hey everybody, they're the same!
     * That means that this might be a candidate for a MATCH!!!!
     */
    
    /* if (SimplyAnywhereWillDo) { OK; break; } */

    /* Clearly later words in the phrase can't be in earlier
     * blocks...
     * Note that the converse isn't true: if the phrase spans a block
     * boundary, the earlier word will be in an earlier block than the
     * next one.
     */
    if (New->BlockInFile < First->BlockInFile) {
	/* Although we are in the right file, we have not
	 * yet reached the correct offset.
	 */
	return -1;
    } 

    /* If we get to here,
     * . we are in the right file
     * . we are at least as far into the file as the start
     *   of the phrase
     */

    /* Now check that we are a reasonable distance past
     * the preceding word (checking that we are not on the first
     * match in the list, of course):
     */
    if (New->BlockInFile < Prev->BlockInFile) {
	/* not gone far enough */
	return -1;
    }
    if (New->BlockInFile > Prev->BlockInFile + 1) {
	/* If they are more than one block apart, I
	 * don't believe them to be part of a phrase!
	 */
	return 1;
    }

    if (New->BlockInFile == Prev->BlockInFile) {
	long DistanceApartInQuery = 1;

	if (PreviousWord && PreviousWord->Word) {
	    DistanceApartInQuery = Word->Word->WordPlace.WordInBlock -
				PreviousWord->Word->WordPlace.WordInBlock;
	}

	/* If they are in the same block, one must be
	 * exactly one word beyond the other.  I don't
	 * think they can ever be the same, unless there
	 * is a bug somewhere!  (Query expansion and anaphoristic
	 * reference handling might change that in the future, though!)
	 *
	 * However, if common words were skipped in the query,
	 * there may be a gap.
	 *
	 * In principle, it's possible for a "common word" -- e.g. a number --
	 * to span a whole block, and then we can't match words on both
	 * sides of it.  Variable sized blocks will cure this, as then a
	 * phrase will never span a block.
	 */
	if (New->WordInBlock <= Prev->WordInBlock) {
	    /* too early in the block */
	    return -1;
	}
	switch (db->PhraseMatchLevel) {
	case PCM_AnyCase:
	    if (New->WordInBlock > 
				Prev->WordInBlock + DistanceApartInQuery + 2) {
		return 1; /* gone too far */
		/* We allow a few words slop in this case, though... */
	    }
	    break; /* clearly OK */
	case PCM_SameCase:
	case PCM_HalfCase:
	default:
	    if (New->WordInBlock > Prev->WordInBlock + DistanceApartInQuery) {
#ifdef ASCIITRACE
	    if (LQT_TraceFlagsSet(LQTRACE_MATCH_PHRASE)) {
		LQT_Trace(LQTRACE_MATCH_PHRASE,
		    "ContinuesMatch: reject: WIB: %lu !fby %lu",
		    Prev->WordInBlock, New->WordInBlock
		);
	    }
#endif
		return 1; /* gone too far */
	    }
	}
    } else {
	int MaxWordInBlock = 0;

	switch (db->PhraseMatchLevel) {
	case PCM_AnyCase:
	    MaxWordInBlock = 1;
	    break; /* clearly OK */
	case PCM_SameCase:
	case PCM_HalfCase:
	default:
	    MaxWordInBlock = 0;
	}

	/* If there is a block boundary between the two words,
	 * the 2nd word should be word zero in its block.
	 * If we are allowing approx. matches, we'll let an extra
	 * word slip in there.
	 */
	if (New->WordInBlock > MaxWordInBlock) {
	    /* the previous word was at the end of a block,
	     * but this one isn't at the start!
	     */
#ifdef ASCIITRACE
	    if (LQT_TraceFlagsSet(LQTRACE_MATCH_PHRASE)) {
		LQT_Trace(LQTRACE_MATCH_PHRASE,
		    "ContinuesMatch: reject: WIB > %d: %ld.%lu !fby %ld.%lu",
		    MaxWordInBlock,
		    Prev->BlockInFile, Prev->WordInBlock,
		    New->BlockInFile, New->WordInBlock
		);
	    }
#endif
	    return 1; /* gone too far */
	}

	/* they are in adjacent blocks */
	if (!(Prev->Flags & WPF_LASTINBLOCK)) {
	    /* there is another word between them, so
	     * we have gone too far.
	     * I went to a lot of effort in addfile.c to
	     * mantain that flag, just for this!
	     */
#ifdef ASCIITRACE
	    if (LQT_TraceFlagsSet(LQTRACE_MATCH_PHRASE)) {
		LQT_Trace(LQTRACE_MATCH_PHRASE,
		    "ContinuesMatch: reject: !LAST: %ld.%lu !fby %ld.%lu",
		    Prev->BlockInFile, Prev->WordInBlock,
		    New->BlockInFile, New->WordInBlock
		);
	    }
#endif
	    return 1;
	}
    }
    
    /* Now we check that the word matches the given word
     * -- in other words, that possessive/plural/case
     *    is correct if required.  Do this later as it is
     *    relatively expensive I expect, and we will not
     *    usually care about case.
     *
     * Since the word is in the right place, if it fails here there
     * is no point in looking at the next word in this block!
     */

    return LQTp_CheckFlags(db, QueryWord, New, Position);
}

LIBRARY int
LQTp_CheckFlags(db, QueryWord, New, Position)
    t_LQTEXT_Database *db;
    t_WordInfo *QueryWord;
    t_WordPlace *New;
    t_Position Position;
{
    int StuffBeforeInQuery, StuffBeforeInNew;

#ifdef ASCIITRACE
    if (LQT_TraceFlagsSet(LQTRACE_MATCH_PHRASE)) {
	LQT_Trace(LQTRACE_MATCH_PHRASE,
	    "LQTp_CheckFlags %*.*s (%s Qf=0%ocmp f=0%o",
	    QueryWord->Length, QueryWord->Length, QueryWord->Word,
	    Position == e_First ? 
		"First" : ( (Position == e_Middle ) ? "Middle" : "Last" ),
	    QueryWord->WordPlace.Flags, New->Flags
	);
    }
#endif

    /* Phrase matching levels currently have silly names, sorry. */
    switch (db->PhraseMatchLevel) {

    default: /* defensive! */
	Error(E_FATAL|E_INTERNAL|E_BUG,
	    "%d %s: unknown phrase match level %d",
	    __LINE__, __FILE__,
	    db->PhraseMatchLevel
	);
    case PCM_AnyCase:
	break; /* clearly OK */
    case PCM_SameCase: /* precise matching */
	StuffBeforeInQuery = LQT_WPF_STUFF_BEFORE(&QueryWord->WordPlace);
	if (StuffBeforeInQuery > 1 || Position != e_First) {
	    int Difference;

	    StuffBeforeInNew = LQT_WPF_STUFF_BEFORE(New);

	    Difference = StuffBeforeInQuery - StuffBeforeInNew;
	    if (Difference < -2 || Difference > 4) {
		/* allow stuff before to be too small on the first word;
		 * if Difference > 0, the query has more space &| punctuation
		 * at the start than the indexed text, so this isn't a
		 * match.
		 */
		if (Position != e_First || Difference > 0) {
#ifdef ASCIITRACE
		    if (LQT_TraceFlagsSet(LQTRACE_MATCH_PHRASE)) {
			LQT_Trace(LQTRACE_MATCH_PHRASE,
			    "reject %*.*s: stuff before Q %d Try %d",
			    QueryWord->Length, QueryWord->Length,
			    QueryWord->Word,
			    StuffBeforeInQuery,
			    StuffBeforeInNew
			);
		    }
#endif
		    return -1; /* give up on this match */
		}
	    }
	}

	/** for things before the current word, be "heuristic" if
	 ** it's the first word in the phrase, as the user can't have
	 ** known about the context of the putative match, only its contents.
	 **/

	/* Skipped common words: */
	if (Position == e_First) {
	    /* It's the first word in the query, so only check for things
	     * before this word if they were specified:
	     */
	    if ((LQT_WPF_LASTHADLETTERS(&QueryWord->WordPlace)) &&
				!(LQT_WPF_LASTHADLETTERS(New))) {
#ifdef ASCIITRACE
		    if (LQT_TraceFlagsSet(LQTRACE_MATCH_PHRASE)) {
			LQT_Trace(LQTRACE_MATCH_PHRASE,
			    "reject %*.*s: last had letters(q but not w)",
			    QueryWord->Length, QueryWord->Length,
			    QueryWord->Word
			);
		    }
#endif
		return -1; /* give up on this match */
	    }

	    if (LQT_WPF_LASTWASCOMMON(&QueryWord->WordPlace) &&
			    !LQT_WPF_LASTWASCOMMON(New)) {
#ifdef ASCIITRACE
		    if (LQT_TraceFlagsSet(LQTRACE_MATCH_PHRASE)) {
			LQT_Trace(LQTRACE_MATCH_PHRASE,
			    "reject %*.*s: skipped common q but not w",
			    QueryWord->Length, QueryWord->Length,
			    QueryWord->Word
			);
		    }
#endif
		return -1;
	    }

	    if (Position != e_Last) {
		if ((LQT_WPF_NEXTHASPUNCT(&QueryWord->WordPlace)) &&
				    !(LQT_WPF_NEXTHASPUNCT(New))) {
#ifdef ASCIITRACE
		    if (LQT_TraceFlagsSet(LQTRACE_MATCH_PHRASE)) {
			LQT_Trace(LQTRACE_MATCH_PHRASE,
			    "reject %*.*s: next has punct q but not w",
			    QueryWord->Length, QueryWord->Length,
			    QueryWord->Word
			);
		    }
#endif
		    return -1; /* give up on this match */
		}
	    }

	} else if (Position == e_Last) {
	    if (LQT_WPF_NEXTHASPUNCT(&QueryWord->WordPlace) &&
				!LQT_WPF_NEXTHASPUNCT(New)) {
#ifdef ASCIITRACE
		if (LQT_TraceFlagsSet(LQTRACE_MATCH_PHRASE)) {
		    LQT_Trace(LQTRACE_MATCH_PHRASE,
			"reject %*.*s: NEXTHASPUNCT q but not w",
			QueryWord->Length, QueryWord->Length,
			QueryWord->Word
		    );
		}
#endif
	    }

	    if (LQT_WPF_NEXTISCOMMON(&QueryWord->WordPlace) &&
		!LQT_WPF_NEXTISCOMMON(New)
	    ) {
#ifdef ASCIITRACE
		if (LQT_TraceFlagsSet(LQTRACE_MATCH_PHRASE)) {

		    StuffBeforeInNew = LQT_WPF_STUFF_BEFORE(New);
		    LQT_Trace(LQTRACE_MATCH_PHRASE,
			"reject %*.*s: NEXTISCOMMON w but not not q",
			QueryWord->Length, QueryWord->Length,
			QueryWord->Word,
			StuffBeforeInQuery,
			StuffBeforeInNew
		    );
		}
#endif
		return -1;
	    }
	} else { /* middle of query */
	    if (LQT_WPF_NEXTHASPUNCT(New) !=
			LQT_WPF_NEXTHASPUNCT(&QueryWord->WordPlace)) {
#ifdef ASCIITRACE
		if (LQT_TraceFlagsSet(LQTRACE_MATCH_PHRASE)) {
		    LQT_Trace(LQTRACE_MATCH_PHRASE,
			"reject %*.*s: NEXTHASPUNCT w != q",
			QueryWord->Length, QueryWord->Length,
			QueryWord->Word
		    );
		}
#endif
		return -1;
	    }

	    if (LQT_WPF_NEXTISCOMMON(New) !=
			LQT_WPF_NEXTISCOMMON(&QueryWord->WordPlace)) {
#ifdef ASCIITRACE
		if (LQT_TraceFlagsSet(LQTRACE_MATCH_PHRASE)) {
		    StuffBeforeInNew = LQT_WPF_STUFF_BEFORE(New);
		    LQT_Trace(LQTRACE_MATCH_PHRASE,
			"reject %*.*s: NEXTISCOMMON w != q",
			QueryWord->Length, QueryWord->Length,
			QueryWord->Word,
			StuffBeforeInQuery,
			StuffBeforeInNew
		    );
		}
#endif
		return -1;
	    }
	}

	/* All the non-context-sensitive flags at once: */
	if ((QueryWord->WordPlace.Flags & (WPF_WASPLURAL|WPF_UPPERCASE)) != 
			(New->Flags & (WPF_UPPERCASE|WPF_WASPLURAL))) {
	    /* The cases are different, no good */
#ifdef ASCIITRACE
	    if (LQT_TraceFlagsSet(LQTRACE_MATCH_PHRASE)) {
		LQT_Trace(LQTRACE_MATCH_PHRASE,
		    "reject %*.*s: plural/case q != w",
		    QueryWord->Length, QueryWord->Length,
		    QueryWord->Word
		);
	    }
#endif
	    return -1; /* give up on this match */
	}

	/* Finally, the least reliable, so always heuristic: */
	if ((LQT_WPF_POSSESSIVE(&QueryWord->WordPlace)) &&
				!(LQT_WPF_POSSESSIVE(New))) {
#ifdef ASCIITRACE
	    if (LQT_TraceFlagsSet(LQTRACE_MATCH_PHRASE)) {
		LQT_Trace(LQTRACE_MATCH_PHRASE,
		    "reject %*.*s: possessive q but not w",
		    QueryWord->Length, QueryWord->Length,
		    QueryWord->Word
		);
	    }
#endif
	    return -1; /* give up on this match */
	}

	break;
    case PCM_HalfCase:
	/* In this case, we are lax about things, but if the
	 * user typed  plural/possessive/capital, we only
	 * match one with the same attribute.
	 */
	if ((LQT_WPF_UPPERCASE(&QueryWord->WordPlace)) &&
				    !(LQT_WPF_UPPERCASE(New))) {
#ifdef ASCIITRACE
	LQT_Trace(LQTRACE_MATCH_PHRASE, "Reject [uppercase]");
#endif
	    return -1; /* give up on this match */
	}

	/* plurals: this should be separate */
	if ((LQT_WPF_WASPLURAL(&QueryWord->WordPlace)) &&
	    !(LQT_WPF_WASPLURAL(New))) {
#ifdef ASCIITRACE
	    LQT_Trace(LQTRACE_MATCH_PHRASE, "Reject [plural]");
#endif
	    return -1; /* give up on this match */
	}

	/* Now, what about skipped common words? */
	if ((LQT_WPF_NEXTISCOMMON(&QueryWord->WordPlace)) &&
				!(LQT_WPF_NEXTISCOMMON(New))) {
#ifdef ASCIITRACE
	    LQT_Trace(LQTRACE_MATCH_PHRASE, "Reject [next-is-common]");
#endif
	    return -1; /* give up on this match */
	}

	/* Stuff before, if given, must be present: */
	StuffBeforeInQuery = LQT_WPF_STUFF_BEFORE(&QueryWord->WordPlace);
	if (StuffBeforeInQuery > 1) {
	    int Difference;

	    StuffBeforeInNew = LQT_WPF_STUFF_BEFORE(New);
	    Difference = StuffBeforeInQuery - StuffBeforeInNew;

	    if (Difference < -2 || Difference > 4) {
		/* allow stuff before to be too small on the first word;
		 * if Difference > 0, the query has more space &| punctuation
		 * at the start than the indexed text, so this isn't a
		 * match.
		 */
		if (Position != e_First || Difference > 0) {
#ifdef ASCIITRACE
		    LQT_Trace(LQTRACE_MATCH_PHRASE,
			"Reject [Stuff Before %d != Q%d]",
			StuffBeforeInNew,
			StuffBeforeInQuery
		    );
#endif
		    return -1; /* give up on this match */
		}
	    }
	}

	if (LQT_WPF_NEXTHASPUNCT(&QueryWord->WordPlace) &&
				   !LQT_WPF_NEXTHASPUNCT(New)) {
#ifdef ASCIITRACE
	    LQT_Trace(LQTRACE_MATCH_PHRASE,
		"Reject [punctuation %s %o != %s Q%o]",
		(LQT_WPF_NEXTHASPUNCT(New)) ?  "yes" : "no",
		New->Flags,
		(LQT_WPF_NEXTHASPUNCT(&QueryWord->WordPlace)) ? "yes" : "no",
		QueryWord->WordPlace.Flags
	    );
#endif
	    return -1; /* give up on this match */
	}

	if (LQT_WPF_POSSESSIVE(&QueryWord->WordPlace) &&
				   !LQT_WPF_POSSESSIVE(New)) {
#ifdef ASCIITRACE
	    LQT_Trace(LQTRACE_MATCH_PHRASE, "Reject [possessive]");
#endif
	    return -1; /* give up on this match */
	}
	break;
    }

    /* If we got here...
     * we couldn't find anything wrong.
     */
    
#ifdef ASCIITRACE
    LQT_Trace(LQTRACE_MATCH_PHRASE, "Accept");
#endif
    return 0; /* It's all OK! */
}