/*- * Copyright (c) 1980, 1983 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms are permitted * provided that: (1) source distributions retain this entire copyright * notice and comment, and (2) distributions including binaries display * the following acknowledgement: ``This product includes software * developed by the University of California, Berkeley and its contributors'' * in the documentation or other materials provided with the distribution * and in all advertising materials mentioning features or use of this * software. Neither the name of the University nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ /* Note: I have modified this file as follows: * Liam * * $Id: lqsort.c,v 1.8 1996/06/11 22:07:00 lee Exp $ * * $Log: lqsort.c,v $ * Revision 1.8 96/06/11 22:07:00 lee * BIF is a long now. * * Revision 1.7 1996/05/15 22:59:02 lee * api change, plus ? bugfix? * * Revision 1.6 94/03/04 11:53:31 lee * fix core dump... * * Revision 1.5 94/02/26 14:53:47 lee * API change * * Revision 1.4 92/02/15 06:43:40 lee * declared qst() as void. * * Revision 1.3 92/02/15 05:34:45 lee * Added a macro that doesn't work... * * Revision 1.2 92/01/31 00:14:55 lee * gcc warnings; added an INLINE. * * Revision 1.1 91/06/16 18:08:39 lee * Initial revision * */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)qsort.c 5.7 (Berkeley) 5/17/90"; #endif /* LIBC_SCCS and not lint */ #include "error.h" #include #include #include "globals.h" #include "liblqtext.h" #include "fileinfo.h" #include "wordplace.h" /* * qsort.c: * Our own version of the system qsort routine which is faster by an average * of 25%, with lows and highs of 10% and 50%. * The THRESHold below is the insertion sort threshold, and has been adjusted * for records of size 48 bytes. * The MTHREShold is where we stop finding a better median. */ #define THRESH 4 /* threshold for insertion */ #define MTHRESH 6 /* threshold for median */ #if 0 #define qcmp(F1,F2) wpqcmp((t_WordPlace *)F1, (t_WordPlace *)F2) /* this is commented out because it dumps core sometimes... */ /* There's a function for this below, if you have problems. */ #define wpqcmp(F1,F2) \ (((F1)->FID == (F2)->FID) ? ( \ ((F1)->BlockInFile == (F2)->BlockInFile) ? \ ((F1)->WordInBlock - (F2)->WordInBlock) : \ ((F1)->BlockInFile - (F2)->BlockInFile) ) : \ ((F1)->FID - (F2)->FID)) #else /* I'll leave this in case the macro version above makes some compilers * explode... */ static int qcmp(F1, F2) /* the comparison routine */ register t_WordPlace *F1, *F2; { if (F1->FID != F2->FID) { return F1->FID - F2->FID; } else { long diff; if (F1->BlockInFile != F2->BlockInFile) { diff = F1->BlockInFile - F2->BlockInFile; } else { diff = F1->WordInBlock - F2->WordInBlock; } if (diff < 0) return -1; if (diff > 0) return 1; return 0; } } #endif static int qsz; /* size of each record */ static int thresh; /* THRESHold in chars */ static int mthresh; /* MTHRESHold in chars */ /* * qsort: * First, set up some global parameters for qst to share. Then, quicksort * with qst(), and then a cleanup insertion sort ourselves. Sound simple? * It's not... */ PRIVATE void qst(); /* * LQT_SortWordPlaces * Database/Update, Database/Words * * Sorts the given WordPlace array using Quicker Sort * to the in-memory stop list, to be ignored by LQT_ReadWord. * A WordPlace array must be sorted in ascending order by FID, then * by Block In File, then by Word Within Block, in order to be * written to the database. Since this is exactly the order generated * by reading files one at a time from beginning to end, this * routine is not currently used. * * Buggy, I think. * */ API void LQT_SortWordPlaces(db, NumberOfWordPlaces, WordPlaces) t_LQTEXT_Database *db; unsigned long NumberOfWordPlaces; t_WordPlace *WordPlaces; { char *base = (char *) WordPlaces; register char c, *i, *j, *lo, *hi; char *min, *max; if (NumberOfWordPlaces <= 1) return; #if 0 /* untested */ if (NumberOfWordPlaces == 2) { if (qcmp(&WordPlaces[0], &WordPlaces[1]) > 0) { t_WordPlace tmp = WordPlaces[0]; /* struct copy */ WprdPlaces[0] = WordPlaces[1]; WordPlaces[1] = tmp; } return; } #endif qsz = sizeof(t_WordPlace); thresh = qsz * THRESH; mthresh = qsz * MTHRESH; max = base + NumberOfWordPlaces * qsz; if (NumberOfWordPlaces >= THRESH) { qst(base, max); hi = base + thresh; } else { hi = max; } /* * First put smallest element, which must be in the first THRESH, in * the first position as a sentinel. This is done just by searching * the first THRESH elements (or the first n if n < THRESH), finding * the min, and swapping it into the first position. */ for (j = lo = base; (lo += qsz) < hi; ) if (qcmp(j, lo) > 0) j = lo; if (j != base) { /* swap j into place */ for (i = base, hi = base + qsz; i < hi; ) { c = *j; *j++ = *i; *i++ = c; } } /* * With our sentinel in place, we now run the following hyper-fast * insertion sort. For each remaining element, min, from [1] to [n-1], * set hi to the index of the element AFTER which this one goes. * Then, do the standard insertion sort shift on a character at a time * basis for each element in the frob. */ for (min = base; (hi = min += qsz) < max; ) { while (qcmp(hi -= qsz, min) > 0) /* void */; if ((hi += qsz) != min) { for (lo = min + qsz; --lo >= min; ) { c = *lo; for (i = j = lo; (j -= qsz) >= hi; i = j) *i = *j; *i = c; } } } } /* * qst: * Do a quicksort * First, find the median element, and put that one in the first place as the * discriminator. (This "median" is just the median of the first, last and * middle elements). (Using this median instead of the first element is a big * win). Then, the usual partitioning/swapping, followed by moving the * discriminator into the right place. Then, figure out the sizes of the two * partions, do the smaller one recursively and the larger one via a repeat of * this code. Stopping when there are less than THRESH elements in a partition * and cleaning up with an insertion sort (in our caller) is a huge win. * All data swaps are done in-line, which is space-losing but time-saving. * (And there are only three places where this is done). */ static void qst(base, max) char *base, *max; { register char c, *i, *j, *jj; register int ii; char *mid, *tmp; int lo, hi; /* * At the top here, lo is the number of characters of elements in the * current partition. (Which should be max - base). * Find the median of the first, last, and middle element and make * that the middle element. Set j to largest of first and middle. * If max is larger than that guy, then it's that guy, else compare * max with loser of first and take larger. Things are set up to * prefer the middle, then the first in case of ties. */ lo = max - base; /* number of elements as chars */ do { mid = i = base + qsz * ((lo / qsz) >> 1); if (lo >= mthresh) { j = (qcmp((jj = base), i) > 0 ? jj : i); if (qcmp(j, (tmp = max - qsz)) > 0) { /* switch to first loser */ j = (j == jj ? i : jj); if (qcmp(j, tmp) < 0) { j = tmp; } } if (j != i) { ii = qsz; do { c = *i; *i++ = *j; *j++ = c; } while (--ii); } } /* * Semi-standard quicksort partitioning/swapping */ for (i = base, j = max - qsz; ; ) { while (i < mid && qcmp(i, mid) <= 0) { i += qsz; } while (j > mid) { if (qcmp(mid, j) <= 0) { j -= qsz; continue; } tmp = i + qsz; /* value of i after swap */ if (i == mid) { /* j <-> mid, new mid is j */ mid = jj = j; } else { /* i <-> j */ jj = j; j -= qsz; } goto swap; } if (i == mid) { break; } else { /* i <-> mid, new mid is i */ jj = mid; tmp = mid = i; /* value of i after swap */ j -= qsz; } swap: ii = qsz; do { c = *i; *i++ = *jj; *jj++ = c; } while (--ii); i = tmp; } /* * Look at sizes of the two partitions, do the smaller * one first by recursion, then do the larger one by * making sure lo is its size, base and max are update * correctly, and branching back. But only repeat * (recursively or by branching) if the partition is * of at least size THRESH. */ i = (j = mid) + qsz; if ((lo = j - base) <= (hi = max - i)) { if (lo >= thresh) { qst(base, j); } base = i; lo = hi; } else { if (hi >= thresh) { qst(i, max); } max = j; } } while (lo >= thresh); }