/*-
 * Copyright (c) 1980, 1983 The Regents of the University of California.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms are permitted
 * provided that: (1) source distributions retain this entire copyright
 * notice and comment, and (2) distributions including binaries display
 * the following acknowledgement:  ``This product includes software
 * developed by the University of California, Berkeley and its contributors''
 * in the documentation or other materials provided with the distribution
 * and in all advertising materials mentioning features or use of this
 * software. Neither the name of the University nor the names of its
 * contributors may be used to endorse or promote products derived
 * from this software without specific prior written permission.
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
 */

/* Note: I have modified this file as follows:
 * Liam
 *
 * $Id: lqsort.c,v 1.8 1996/06/11 22:07:00 lee Exp $
 *
 * $Log: lqsort.c,v $
 * Revision 1.8  96/06/11  22:07:00  lee
 * BIF is a long now.
 * 
 * Revision 1.7  1996/05/15  22:59:02  lee
 * api change, plus ? bugfix?
 *
 * Revision 1.6  94/03/04  11:53:31  lee
 * fix core dump...
 * 
 * Revision 1.5  94/02/26  14:53:47  lee
 * API change
 * 
 * Revision 1.4  92/02/15  06:43:40  lee
 * declared qst() as void.
 * 
 * Revision 1.3  92/02/15  05:34:45  lee
 * Added a macro that doesn't work...
 * 
 * Revision 1.2  92/01/31  00:14:55  lee
 * gcc warnings; added an INLINE.
 * 
 * Revision 1.1  91/06/16  18:08:39  lee
 * Initial revision
 * 
 */


#if defined(LIBC_SCCS) && !defined(lint)
static char sccsid[] = "@(#)qsort.c	5.7 (Berkeley) 5/17/90";
#endif /* LIBC_SCCS and not lint */

#include "error.h"
#include <stdio.h>
#include <sys/types.h>
#include "globals.h"
#include "liblqtext.h"
#include "fileinfo.h"
#include "wordplace.h"

/*
 * qsort.c:
 * Our own version of the system qsort routine which is faster by an average
 * of 25%, with lows and highs of 10% and 50%.
 * The THRESHold below is the insertion sort threshold, and has been adjusted
 * for records of size 48 bytes.
 * The MTHREShold is where we stop finding a better median.
 */

#define		THRESH		4		/* threshold for insertion */
#define		MTHRESH		6		/* threshold for median */

#if 0
#define qcmp(F1,F2) wpqcmp((t_WordPlace *)F1, (t_WordPlace *)F2)
/* this is commented out because it dumps core sometimes... */

/* There's a function for this below, if you have problems. */
#define wpqcmp(F1,F2) \
    (((F1)->FID == (F2)->FID) ? ( \
	((F1)->BlockInFile == (F2)->BlockInFile) ? \
	    ((F1)->WordInBlock - (F2)->WordInBlock) : \
	    ((F1)->BlockInFile - (F2)->BlockInFile) ) : \
	((F1)->FID - (F2)->FID))

#else

/* I'll leave this in case the macro version above makes some compilers
 * explode...
 */
static int
qcmp(F1, F2)		/* the comparison routine */
    register t_WordPlace *F1, *F2;
{
    if (F1->FID != F2->FID) {
	return F1->FID - F2->FID;
    } else {
	long diff;

	if (F1->BlockInFile != F2->BlockInFile) {
	    diff = F1->BlockInFile - F2->BlockInFile;
	} else {
	    diff = F1->WordInBlock - F2->WordInBlock;
	}
	if (diff < 0) return -1;
	if (diff > 0) return 1;
	return 0;
    }
}
#endif

static  int		qsz;			/* size of each record */
static  int		thresh;			/* THRESHold in chars */
static  int		mthresh;		/* MTHRESHold in chars */

/*
 * qsort:
 * First, set up some global parameters for qst to share.  Then, quicksort
 * with qst(), and then a cleanup insertion sort ourselves.  Sound simple?
 * It's not...
 */
PRIVATE void qst();

/* <Function>
 *   <Name>LQT_SortWordPlaces
 *   <Class>Database/Update, Database/Words
 *   <Purpose>
 *      Sorts the given WordPlace array using Quicker Sort
 *      to the in-memory stop list, to be ignored by LQT_ReadWord.
 *	A WordPlace array must be sorted in ascending order by FID, then
 *	by Block In File, then by Word Within Block, in order to be
 *	written to the database.  Since this is exactly the order generated
 *	by reading files one at a time from beginning to end, this
 *	routine is not currently used.
 *   <Notes>
 *      Buggy, I think.
 * </Function>
 */
API void
LQT_SortWordPlaces(db, NumberOfWordPlaces, WordPlaces)
    t_LQTEXT_Database *db;
    unsigned long NumberOfWordPlaces;
    t_WordPlace *WordPlaces;
{
	char *base = (char *) WordPlaces;
	register char c, *i, *j, *lo, *hi;
	char *min, *max;

	if (NumberOfWordPlaces <= 1) return;
#if 0 /* untested */
	if (NumberOfWordPlaces == 2) {
	    if (qcmp(&WordPlaces[0], &WordPlaces[1]) > 0) {
		t_WordPlace tmp = WordPlaces[0]; /* struct copy */
		WprdPlaces[0] = WordPlaces[1];
		WordPlaces[1] = tmp; 
	    }
	    return;
	}
#endif

	qsz = sizeof(t_WordPlace);
	thresh = qsz * THRESH;
	mthresh = qsz * MTHRESH;
	max = base + NumberOfWordPlaces * qsz;
	if (NumberOfWordPlaces >= THRESH) {
		qst(base, max);
		hi = base + thresh;
	} else {
		hi = max;
	}
	/*
	 * First put smallest element, which must be in the first THRESH, in
	 * the first position as a sentinel.  This is done just by searching
	 * the first THRESH elements (or the first n if n < THRESH), finding
	 * the min, and swapping it into the first position.
	 */
	for (j = lo = base; (lo += qsz) < hi; )
		if (qcmp(j, lo) > 0)
			j = lo;
	if (j != base) {
		/* swap j into place */
		for (i = base, hi = base + qsz; i < hi; ) {
			c = *j;
			*j++ = *i;
			*i++ = c;
		}
	}
	/*
	 * With our sentinel in place, we now run the following hyper-fast
	 * insertion sort.  For each remaining element, min, from [1] to [n-1],
	 * set hi to the index of the element AFTER which this one goes.
	 * Then, do the standard insertion sort shift on a character at a time
	 * basis for each element in the frob.
	 */
	for (min = base; (hi = min += qsz) < max; ) {
		while (qcmp(hi -= qsz, min) > 0)
			/* void */;
		if ((hi += qsz) != min) {
			for (lo = min + qsz; --lo >= min; ) {
				c = *lo;
				for (i = j = lo; (j -= qsz) >= hi; i = j)
					*i = *j;
				*i = c;
			}
		}
	}
}

/*
 * qst:
 * Do a quicksort
 * First, find the median element, and put that one in the first place as the
 * discriminator.  (This "median" is just the median of the first, last and
 * middle elements).  (Using this median instead of the first element is a big
 * win).  Then, the usual partitioning/swapping, followed by moving the
 * discriminator into the right place.  Then, figure out the sizes of the two
 * partions, do the smaller one recursively and the larger one via a repeat of
 * this code.  Stopping when there are less than THRESH elements in a partition
 * and cleaning up with an insertion sort (in our caller) is a huge win.
 * All data swaps are done in-line, which is space-losing but time-saving.
 * (And there are only three places where this is done).
 */

static void
qst(base, max)
    char *base, *max;
{
    register char c, *i, *j, *jj;
    register int ii;
    char *mid, *tmp;
    int lo, hi;

    /*
     * At the top here, lo is the number of characters of elements in the
     * current partition.  (Which should be max - base).
     * Find the median of the first, last, and middle element and make
     * that the middle element.  Set j to largest of first and middle.
     * If max is larger than that guy, then it's that guy, else compare
     * max with loser of first and take larger.  Things are set up to
     * prefer the middle, then the first in case of ties.
     */
    lo = max - base;		/* number of elements as chars */
    do {
	mid = i = base + qsz * ((lo / qsz) >> 1);
	if (lo >= mthresh) {
	    j = (qcmp((jj = base), i) > 0 ? jj : i);
	    if (qcmp(j, (tmp = max - qsz)) > 0) {
		/* switch to first loser */
		j = (j == jj ? i : jj);
		if (qcmp(j, tmp) < 0) {
		    j = tmp;
		}
	    }
	    if (j != i) {
		ii = qsz;
		do {
		    c = *i;
		    *i++ = *j;
		    *j++ = c;
		} while (--ii);
	    }
	}
	/*
	 * Semi-standard quicksort partitioning/swapping
	 */
	for (i = base, j = max - qsz; ; ) {
	    while (i < mid && qcmp(i, mid) <= 0) {
		i += qsz;
	    }
	    while (j > mid) {
		if (qcmp(mid, j) <= 0) {
		    j -= qsz;
		    continue;
		}
		tmp = i + qsz;	/* value of i after swap */
		if (i == mid) {
		    /* j <-> mid, new mid is j */
		    mid = jj = j;
		} else {
		    /* i <-> j */
		    jj = j;
		    j -= qsz;
		}
		goto swap;
	    }
	    if (i == mid) {
		break;
	    } else {
		/* i <-> mid, new mid is i */
		jj = mid;
		tmp = mid = i;	/* value of i after swap */
		j -= qsz;
	    }
swap:
	    ii = qsz;
	    do	{
		c = *i;
		*i++ = *jj;
		*jj++ = c;
	    } while (--ii);
	    i = tmp;
	}
	/*
	 * Look at sizes of the two partitions, do the smaller
	 * one first by recursion, then do the larger one by
	 * making sure lo is its size, base and max are update
	 * correctly, and branching back.  But only repeat
	 * (recursively or by branching) if the partition is
	 * of at least size THRESH.
	 */
	i = (j = mid) + qsz;
	if ((lo = j - base) <= (hi = max - i)) {
	    if (lo >= thresh) {
		qst(base, j);
	    }
	    base = i;
	    lo = hi;
	} else {
	    if (hi >= thresh) {
		qst(i, max);
	    }
	    max = j;
	}
    } while (lo >= thresh);
}