/* wcregex.c -- Copyright 1996 Liam R. E. Quin.
 * All Rights Reserved.
 * This code is NOT in the public domain.
 * See the file COPYRIGHT for full details.
 *
 * wcregex.c -- convert wildcards to regexps, and match them
 * 
 * $Id: wcregex.c,v 1.2 1996/07/01 23:27:40 lee Exp $
 *
 */

#include "globals.h" /* defines and declarations for database filenames */
#include "error.h"
#include <stdio.h>

#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef HAVE_STRING_H
#include <string.h>
#else
#include <strings.h>
#endif
#include <sys/types.h>

#include "fileinfo.h"
#include "smalldb.h"
#include "wordindex.h"
#include "wordinfo.h"
#include "numbers.h"
#include "emalloc.h"
#include "wordrules.h" /* max word length */
#include "pblock.h"
#include "lqutil.h"
#include "liblqtext.h"
#include "lqtrace.h"

/** regular expression matching code **/

#include <hsregex.h> /* Henry Spencer's regex header, renamed by Liam */

typedef struct {
    unsigned char *Expression;
    regex_t *CompiledExpression;
} t_WildCardPatternObject;

API unsigned char *
LQT_PrepareWildCardForMatching(db, WildCard, Length, PrefixLengthp)
    t_LQTEXT_Database *db;
    unsigned char *WildCard;
    int Length;
    int *PrefixLengthp;
{
    register unsigned char *p, *q;
    t_WildCardPatternObject *PatternObject;

    if (!PrefixLengthp) {
	Error(E_FATAL|E_INTERNAL,
	    "LQT_PrepareWildCardForMatching: null PrefixLengthp not allowed"
	);
    }

    PatternObject = (t_WildCardPatternObject *) emalloc("WildcardObject",
	sizeof(t_WildCardPatternObject)
    );

    q = PatternObject->Expression = (unsigned char *) emalloc(
	"WildCard Expression",
	Length * 2 + 3
	/* times two for the worst case, x --> \x for every character;
	 * plus three for adding ^ and $ and \0
	 */
    );
    PatternObject->CompiledExpression = (regex_t *) emalloc("Compiled regex",
	sizeof(regex_t)
    );

    *q++ = '^';

    *PrefixLengthp = 0;

    for (p = WildCard; p - WildCard < Length; p++) {
	switch (*p) {
	case '*':
	    if (!*(PrefixLengthp)) {
		*PrefixLengthp = p - WildCard;
	    }
	    *q++ = '.';
	    *q++ = '*';
	    continue;
	case '?':
	    if (!*(PrefixLengthp)) {
		*PrefixLengthp = p - WildCard;
	    }
	    *q++ = '.';
	    continue;
	case '^':
	case '.':
	case '[':
	case '$':
	case '(':
	case ')':
	case '|':
	/* case '*': already handled above */
	case '+':
	/* case '?': already handled above */
	case '{': /* don't need to quote the } it seems */
	case '\\':
	    *q++ = '\\';
	    /* fall through */
	}
	*q++ = (*p);
    }
    *q++ = '$';
    *q = '\0';
    if (!*PrefixLengthp) {
	/* unlikely, this would be a bug, as we shouldn't get here with
	 * a constant string
	 */
	*PrefixLengthp = p - WildCard;
    }
    {
	int i;
	
	i = regcomp(
	    PatternObject->CompiledExpression,
	    PatternObject->Expression,
	    REG_BASIC | /* no enhanced syntax */
	    REG_NOSUB /* we only need to match, not to substitute */
	);

	if (i != 0) {
	    Error(E_FATAL,
"LQT_PrepareWildCardForMatching: regexp compile failed (E%d) for \"%s\"",
		i,
		PatternObject->Expression
	    );
	}
    }
    return (unsigned char *) PatternObject;
}

API void
LQT_FinishWildCardAfterMatching(db, Argument)
    t_LQTEXT_Database *db;
    unsigned char *Argument;
{
    t_WildCardPatternObject *PatternObject;

    PatternObject = (t_WildCardPatternObject *) Argument;
    regfree(PatternObject->CompiledExpression);
    efree((char *) PatternObject->CompiledExpression);
    efree((char *) PatternObject->Expression);
    efree((char *) PatternObject);
}


API int
LQT_MatcherForWildCards(
    db,
    String,
    StringLength,
    Pattern, 
    PatternLength,
    PrefixLength,
    Argument
)
    t_LQTEXT_Database *db;
    unsigned char *String;
    int StringLength;
    unsigned char *Pattern;
    int PatternLength;
    int PrefixLength;
    unsigned char *Argument;
{
    t_WildCardPatternObject *PatternObject;
    int i;
    regmatch_t pmatch[2];

    if (PrefixLength > 0) {
	if (StringLength < PrefixLength) {
	    return 1;
	}
	i = strncmp(String, Pattern, PrefixLength);
	
	if (i > 0) {
	    return LQT_WIDMATCH_FAILED; /* give up, there are no more */
	}
	if (i < 0) {
	    return i;
	}
	/* assert: i == 0 */
    }
    PatternObject = (t_WildCardPatternObject *) Argument;

    pmatch[0].rm_so = 0;
    pmatch[0].rm_eo = StringLength;
    i = regexec(
	PatternObject->CompiledExpression,
	String,
	0, /* nmatch array used for substitutions */
	pmatch,
	REG_STARTEND
    );

    if (i == REG_NOMATCH) {
	return 1;
    } else if (i == 0) {
	return 0;
    } else {
	Error(E_WARN|E_INTERNAL,
	    "regexec returned %d, wildcard error", i
	);
	return LQT_WIDMATCH_FAILED;
    }
}