/* ctype.c -- Copyright 1996 Liam R. E. Quin. * All Rights Reserved. * This code is NOT in the public domain. * See the file COPYRIGHT for full details. */ /* ctype -- character types * Liam Quin, June 1996 and later... * * $Id: ctype.c,v 1.4 2001/05/31 03:50:13 liam Exp $ */ #include "error.h" #include "globals.h" /* defines and declarations for database filenames */ #include #include #include #include #ifdef HAVE_STRING_H # include #else # include #endif #ifdef HAVE_STDLIB_H # include #else # include #endif #include /* comment out if you don't have it */ #include "fileinfo.h" #include "wordinfo.h" #include "wordrules.h" #include "emalloc.h" #include "chartype.h" #include "liblqtext.h" #include "lqtrace.h" /** System calls and library routines used in this file: **/ /** System calls: **/ /** Library Functions: **/ #ifndef toupper extern int toupper( # ifdef HAVE_PROTO int theChar # endif ); #endif #ifndef tolower extern int tolower( # ifdef HAVE_PROTO int theChar # endif ); #endif /** Functions within this file that need declaring: **/ /**/ /* * LQT_InitialiseCharacterTypes * Database/Defaults, Database/Words, Language/Stemming * *

Initialises the tables used to determine whether a given * character is part of a word or not.

*

This function is called automatically by LQT_OpenDatabase().

* * zero on success. *
*/ LIBRARY void LQTp_InitialiseCharacterTypes(db) t_LQTEXT_Database *db; { register int i, value; char *newLocale; char *PreferredLocale = LQT_GetOption(&db->Options, "locale"); #ifdef LC_CTYPE if (PreferredLocale) { newLocale = setlocale(LC_CTYPE, PreferredLocale); if (!newLocale) { Error(E_FATAL, "%s: invalid LC_CTYPE locale \"%s\"; check $LANG is OK", db->ConfigurationFile, PreferredLocale ); } } else { newLocale = setlocale(LC_CTYPE, ""); } if (newLocale) { if (!PreferredLocale) { PreferredLocale = "not found in config file"; } LQT_Trace(LQTRACE_DEBUG|LQTRACE_VERBOSE, "Locale (%s) set to %s", PreferredLocale, newLocale ); } else { Error(E_WARN, "Locale not set; check $LANG and $LC_CTYPE"); } #endif for (i = 0; i < 256; i++) { db->ctypeTable[i] = 0; db->upperTable[i] = 0; db->lowerTable[i] = 0; } if (db->IndexNumbers == 0) { db->ctypeTable[LQT_DIGIT_TO_IGNORE] |= (LQT_C_ISDIGIT|LQT_C_STARTS_WORD); } /* this should really support reading an external character table. */ for (i = 0; i < 256; i++) { value = 0; if (isupper(i)) { value |= LQT_C_ISUPPER; db->lowerTable[i] = tolower(i); } else if (islower(i)) { value |= LQT_C_ISLOWER; db->upperTable[i] = toupper(i); } /* toupper(some_non_alphabetical_character) could return * anything, but it might as well return its argument... * * same for tolower()... * * only set these if they are not already set, so as not * to overwrite asymmetrical values... */ if (!db->upperTable[i]) { db->upperTable[i] = i; } if (!db->lowerTable[i]) { db->lowerTable[i] = i; } if (isdigit(i)) { value |= LQT_C_ISDIGIT; } if (ispunct(i)) { value |= LQT_C_ISPUNCT; } if (isalpha(i)) { value |= LQT_C_ISALPHA; } /* starts word */ if (LQT_StartsWord(db, i)) { value |= LQT_C_STARTS_WORD; } /* a word character when surrounded by other word chars */ if (LQT_OnlyWithinWord(db, i)) { value |= LQT_C_ONLY_WITHIN_WORD; } /* only in the word if at the end of the word */ if (LQT_EndsWord(db, i)) { value |= LQT_C_ENDS_WORD; } db->ctypeTable[i] = value; } db->ctypeTable[LQT_CHAR_TO_IGNORE] |= LQT_C_ISLOWER; db->ctypeTable[LQT_CHAR_TO_IGNORE] |= LQT_C_ISALPHA; db->lowerTable[LQT_CHAR_TO_IGNORE] = LQT_CHAR_TO_IGNORE; db->upperTable[LQT_CHAR_TO_IGNORE] = LQT_CHAR_TO_IGNORE; }