/* wcregex.c -- Copyright 1996 Liam R. E. Quin. * All Rights Reserved. * This code is NOT in the public domain. * See the file COPYRIGHT for full details. * * wcregex.c -- convert wildcards to regexps, and match them * * $Id: wcregex.c,v 1.2 1996/07/01 23:27:40 lee Exp $ * */ #include "globals.h" /* defines and declarations for database filenames */ #include "error.h" #include #ifdef HAVE_STDLIB_H #include #endif #ifdef HAVE_UNISTD_H #include #endif #ifdef HAVE_STRING_H #include #else #include #endif #include #include "fileinfo.h" #include "smalldb.h" #include "wordindex.h" #include "wordinfo.h" #include "numbers.h" #include "emalloc.h" #include "wordrules.h" /* max word length */ #include "pblock.h" #include "lqutil.h" #include "liblqtext.h" #include "lqtrace.h" /** regular expression matching code **/ #include /* Henry Spencer's regex header, renamed by Liam */ typedef struct { unsigned char *Expression; regex_t *CompiledExpression; } t_WildCardPatternObject; API unsigned char * LQT_PrepareWildCardForMatching(db, WildCard, Length, PrefixLengthp) t_LQTEXT_Database *db; unsigned char *WildCard; int Length; int *PrefixLengthp; { register unsigned char *p, *q; t_WildCardPatternObject *PatternObject; if (!PrefixLengthp) { Error(E_FATAL|E_INTERNAL, "LQT_PrepareWildCardForMatching: null PrefixLengthp not allowed" ); } PatternObject = (t_WildCardPatternObject *) emalloc("WildcardObject", sizeof(t_WildCardPatternObject) ); q = PatternObject->Expression = (unsigned char *) emalloc( "WildCard Expression", Length * 2 + 3 /* times two for the worst case, x --> \x for every character; * plus three for adding ^ and $ and \0 */ ); PatternObject->CompiledExpression = (regex_t *) emalloc("Compiled regex", sizeof(regex_t) ); *q++ = '^'; *PrefixLengthp = 0; for (p = WildCard; p - WildCard < Length; p++) { switch (*p) { case '*': if (!*(PrefixLengthp)) { *PrefixLengthp = p - WildCard; } *q++ = '.'; *q++ = '*'; continue; case '?': if (!*(PrefixLengthp)) { *PrefixLengthp = p - WildCard; } *q++ = '.'; continue; case '^': case '.': case '[': case '$': case '(': case ')': case '|': /* case '*': already handled above */ case '+': /* case '?': already handled above */ case '{': /* don't need to quote the } it seems */ case '\\': *q++ = '\\'; /* fall through */ } *q++ = (*p); } *q++ = '$'; *q = '\0'; if (!*PrefixLengthp) { /* unlikely, this would be a bug, as we shouldn't get here with * a constant string */ *PrefixLengthp = p - WildCard; } { int i; i = regcomp( PatternObject->CompiledExpression, PatternObject->Expression, REG_BASIC | /* no enhanced syntax */ REG_NOSUB /* we only need to match, not to substitute */ ); if (i != 0) { Error(E_FATAL, "LQT_PrepareWildCardForMatching: regexp compile failed (E%d) for \"%s\"", i, PatternObject->Expression ); } } return (unsigned char *) PatternObject; } API void LQT_FinishWildCardAfterMatching(db, Argument) t_LQTEXT_Database *db; unsigned char *Argument; { t_WildCardPatternObject *PatternObject; PatternObject = (t_WildCardPatternObject *) Argument; regfree(PatternObject->CompiledExpression); efree((char *) PatternObject->CompiledExpression); efree((char *) PatternObject->Expression); efree((char *) PatternObject); } API int LQT_MatcherForWildCards( db, String, StringLength, Pattern, PatternLength, PrefixLength, Argument ) t_LQTEXT_Database *db; unsigned char *String; int StringLength; unsigned char *Pattern; int PatternLength; int PrefixLength; unsigned char *Argument; { t_WildCardPatternObject *PatternObject; int i; regmatch_t pmatch[2]; if (PrefixLength > 0) { if (StringLength < PrefixLength) { return 1; } i = strncmp(String, Pattern, PrefixLength); if (i > 0) { return LQT_WIDMATCH_FAILED; /* give up, there are no more */ } if (i < 0) { return i; } /* assert: i == 0 */ } PatternObject = (t_WildCardPatternObject *) Argument; pmatch[0].rm_so = 0; pmatch[0].rm_eo = StringLength; i = regexec( PatternObject->CompiledExpression, String, 0, /* nmatch array used for substitutions */ pmatch, REG_STARTEND ); if (i == REG_NOMATCH) { return 1; } else if (i == 0) { return 0; } else { Error(E_WARN|E_INTERNAL, "regexec returned %d, wildcard error", i ); return LQT_WIDMATCH_FAILED; } }