/* TroffFilter.c -- Copyright 1994, 1996 Liam R. Quin.
 * All Rights Reserved.
 * This code is NOT in the public domain.
 * See the file COPYRIGHT for full details.
 */

/* $Id: TroffFilter.c,v 1.7 96/08/14 16:52:33 lee Exp $
 */

/* Filter for nroff, troff, groff, sqtroff files.
 * See FilterMain and wordrules.h for more info.
 *
 */

#ifdef SYSV
 extern int _filbuf(), _flsbuf(); /* for lint! */
#endif

#include "globals.h"
#include "error.h"

#include <stdio.h>
#include <malloc.h>
#include <ctype.h>
#include <sys/types.h> /* for liblqutil */

#ifdef HAVE_STRING_H
# include <string.h>
#else
# include <strings.h>
#endif

#ifdef HAVE_STDLIB
# include <stdlib.h>
#endif

#ifdef HAVE_UNISTD_H
# include <unistd.h>
#endif

#include "wordrules.h"
#include "emalloc.h"
#include "lqutil.h"
#include "liblqtext.h"
#include "filter.h"

/** C Library functions that need to be declared: **/
/** Functions in this file that need to be declared **/

#define PUTMODE_IGNORE	1
#define PUTMODE_PRINT	0

PRIVATE int LQFpReadOneCharacter(
#ifdef HAVE_PROTO
    t_LQTEXT_Database *db,
    FILE *inputFile,
    char *fileName,
    FILE *OutputFile
#endif
);

/** **/

PRIVATE int InWord = 0;

LIBRARY int
LQF_Troff_Copy(db, InputFile, Name, OutputFile)
    t_LQTEXT_Database *db;
    FILE *InputFile;
    char *Name;
    FILE *OutputFile;
{
    int ch;

    InWord = 0;

    while ((ch = LQFpReadOneCharacter(db, InputFile, Name, OutputFile)) != EOF) {
	if (ch != 0) {
	    putc(ch, OutputFile);
	}
    }
    return 0; /* TODO: error return */
}

#define OPEN_PAREN '('

PRIVATE void
OutputChar(ch, Mode, OutputFile)
    int ch;
    int Mode;
    FILE *OutputFile;
{
    if (Mode == PUTMODE_PRINT) {
	putc(ch, OutputFile);
    } else {
	if (isalnum(ch)) {
	    putc(LQT_CHAR_TO_IGNORE, OutputFile);
	} else {
	    putc(ch, OutputFile);
	}
    }
}

PRIVATE int
doDelim(db, inputFile, Delim, Mode, fileName, OutputFile)
    t_LQTEXT_Database *db;
    FILE *inputFile;
    int Delim;
    int Mode;
    char *fileName;
    FILE *OutputFile;
{
    int ch;

    if (Delim == '\'') {
	putc(':', OutputFile); /* because ' can be within a word */
    } else {
	putc(Delim, OutputFile);
    }

    if (Delim == '[') {
	Delim = ']';
    }

    while ((ch = LQFpReadOneCharacter(db, inputFile, fileName, OutputFile)) != EOF) {
	if (ch == Delim || ch == '\n') {
	    if (ch == '\'') {
		/* ' can be within a word normally, but not here,
		 * since it's the delimiter character
		 */
		putc(':', OutputFile);
	    } else {
		putc(ch, OutputFile);
	    }
	    return 0;
	} else {
	    OutputChar(ch, Mode, OutputFile);
	}
    }
    return EOF;
}

PRIVATE int
doThingWithDelim(db, ch, inputFile, fileName, OutputFile)
    t_LQTEXT_Database *db;
    int ch;
    FILE *inputFile;
    char *fileName;
    FILE *OutputFile;
{
    putc(ch, OutputFile);

    if ((ch= getc(inputFile)) == EOF) {
	return EOF;
    }
    if (ch == OPEN_PAREN) {
	return doDelim(db, inputFile, ch, PUTMODE_IGNORE, fileName, OutputFile);
    } else {
	return doDelim(db, inputFile, ch, PUTMODE_PRINT, fileName, OutputFile);
    }
}

PRIVATE int
doThingWithName(db, inputFile, fileName, OutputFile)
    t_LQTEXT_Database *db;
    FILE *inputFile;
    char *fileName;
    FILE *OutputFile;
{
    int ch;

    if ((ch= getc(inputFile)) == EOF) {
	return EOF;
    }

    if (ch == OPEN_PAREN) {
	ch = getc(inputFile);
	if (ch == EOF) {
	    return EOF;
	}
	putc(OPEN_PAREN, OutputFile);
	putc(' ', OutputFile);
	ch = getc(inputFile);
	if (ch == EOF) {
	    return EOF;
	}
	{
	    int ch2 = getc(inputFile);

	    if (ch2 == EOF) {
		return EOF;
	    }

	    if (isalnum(ch2)) {
		putc(' ', OutputFile);
		putc(ch2, OutputFile);
	    } else {
		OutputChar(ch, PUTMODE_IGNORE, OutputFile);
		OutputChar(ch2, PUTMODE_IGNORE, OutputFile);
	    }
	}
	return 0;
    } else if (ch == '[') {
	return doDelim(db, inputFile, ch, PUTMODE_IGNORE, fileName, OutputFile);
    } else {
	putc(' ', OutputFile);
    }
    return 0;
}

PRIVATE int
LQFpReadOneCharacter(db, inputFile, fileName, OutputFile)
    t_LQTEXT_Database *db;
    FILE *inputFile;
    char *fileName;
    FILE *OutputFile;
{
    int ch;
    
    while ((ch = getc(inputFile)) != EOF) {

    if (ch == '\\') {
	switch ((ch = getc(inputFile))) {
	    case EOF:
		return EOF;
	    case '\\':
		putc('\\', OutputFile);
		putc('\\', OutputFile);
		break;
	    case '\n':
		/* \ at the end of the line joins the lines together */
		putc('\\', OutputFile);
		putc(ch, OutputFile);
		break;
	    
		/* TODO: join the lines together and adjust the characters
		 * so that we don't move the start of any words.
		 * That's a little tricky.
		 */
	    case '[':
		putc('\\', OutputFile);
		doDelim(db, inputFile, ch, PUTMODE_IGNORE, fileName,OutputFile);
		break;
	    case OPEN_PAREN:
		putc('\\', OutputFile);
		(void) ungetc(OPEN_PAREN, inputFile);
		doThingWithName(db, inputFile, fileName, OutputFile);
		break;
	    case '_':
		putc('\\', OutputFile);
		OutputChar(ch, PUTMODE_IGNORE, OutputFile);
		return 0;
	    case '^': case '`': case '{':  case '|': case '}': case ' ':
	    case '+': case '~': case '#': /* sqtroff and UCB ditroff only */
		putc('\\', OutputFile); putc(ch, OutputFile);
		break;
	    case '0':
		putc('\\', OutputFile);
		putc(' ', OutputFile);
		break;
	    /* unknown escape characters: */
	    case 'C': case 'E': case 'F': case 'G':
	    case 'i': case 'j': case 'J':
	    case 'I': /* Immediate evaluation */
	    case 'K': case 'm': case 'M': case 'N':
	    case 'O': case 'P': case 'q': case 'R':
	    case 'U': case 'V': case 'W': case 'y': case 'Y':
	    default:
		putc('\\', OutputFile); /* TODO swallow the \ */
		putc(ch, OutputFile);
		break;

	    /* self-contained escapes of the form "\c" */
	    case 'a': case 'A': case 'c': case 'd': case 'e': case 'p':
	    case 'r': case 't': case 'u': case 'z':
		putc('\\', OutputFile);
		{
		    int ch2 = getc(inputFile);

		    if (ch2 == EOF) {
			/* drop it */
			return EOF;
		    }

		    if (isalnum(ch2)) {
			putc(' ', OutputFile);
			putc(ch2, OutputFile);
			return 0;
		    }

		    if (isspace(ch2)) {
			(void) OutputChar(ch, PUTMODE_IGNORE, OutputFile);
			putc(ch2, OutputFile);
			return 0;
		    }

		    putc(LQT_CHAR_TO_IGNORE, OutputFile);
		    ungetc(ch2, inputFile);
		}
		return 0;

	    /* escapes with an argument, \c'value' or \c[value] */
	    case 'B': case 'b': case 'D': 
	    case 'H': case 'h':
	    case 'l': case 'L': case 'o':
	    case 'S': /* slant */
	    case 'T': /* what this?? */
	    case 'v': case 'w': case 'x': case 'X':
		putc('\\', OutputFile);

		return doThingWithDelim(
		    db, ch, inputFile, fileName, OutputFile
		);

	    /* escapes with a name, \cx or \c[xxxx] or \c(xx */
	    case '*':
	    case 'Q': /* \Q is for sqtroff only, reads a qonfig variable */
	    case 'f': case 'g': case 'k':
		putc('\\', OutputFile);
		if ((ch = getc(inputFile)) == EOF) return EOF;

		if (ch == OPEN_PAREN || ch == '[') {
		    (void) ungetc(ch, inputFile);
		    return doThingWithName(db, inputFile, fileName, OutputFile);
		} else {
		    int ch2;

		    if ((ch2 = getc(inputFile)) == EOF) return EOF;

		    if (LQT_OnlyWithinWord(db, ch2) ||
			LQT_StartsWord(db, ch2) ||
			isdigit(ch2)
		    ){
			putc(' ', OutputFile); /* the f */
			putc(' ', OutputFile); /* the font name */
			return ch2;
		    } else {
			putc(LQT_CHAR_TO_IGNORE, OutputFile);
			putc(ch, OutputFile);
			(void) ungetc(ch2, inputFile);
			return 0;
		    }
		}

	    /* special cases: */
	    case 'n': /* handle optional + or - first */
		{
		    int ch2 = getc(inputFile);

		    putc('\\', OutputFile);
		    putc('n', OutputFile);

		    if (ch2 == EOF) {
			return EOF;
		    } else if (ch2 == '+' || ch2 == '-') {
			putc(ch2, OutputFile);
		    } else {
			/* replace "nn" with "q " */
			putc(LQT_CHAR_TO_IGNORE, OutputFile);
			(void) ungetc(ch2, inputFile);
		    }
		    return doThingWithName(db, inputFile, fileName, OutputFile);
		}
		break;
	    
	    case 's': /* \s[+-][expr], \s[+-](NN, \sNN, \s[+-]N */
		/* It's a little complicated to deal with \s,
		 * because we have to be careful about word counts.
		 * We're going to end up with lots of extra numbers :-(
		 * There are 36 cases, some of which are shown here:
		 *
		 *	  xxxhello	xxx\c	 xxx yyy
		 * \s6    \  hello	\q6\c	 \q6 yyy
		 * \s12   \  hello	\q12\c	 \q12 yyy
		 * \s(22  \q(  hello	\q(22\c	 \q(22 yyy
		 * \s[22] \q[22]hello	\q[22]\c \q[22] yyy
		 *
		 *	  	xxxhello	xxx\c	 	xxx yyy
		 * \s+6hello	\q+ hello	\q+6\c		\q+6 yyy
		 * \s+12hello	\q+  hello	\q+12\c		\q+12 yyy
		 * \s+(22hello	\q+(  hello	\q+(12\c	\q+(12 yyy
		 * \s+[12]hello	\q+[12]hello	\q+[12]\c	\q+[12] yyy
		 *
		 * \s- is the same as \s+, not shown.
		 * a . is the same as the h in hello, e.g. 6.hello is a number.
		 *
		 * \s-2SMALL CAPS\s0 is quite a common construct, so this
		 * does have to be dealt with.
		 *
		 * Hence, [nn] is always unchanged.
		 *
		 */
		putc('\\', OutputFile);

		ch = getc(inputFile);
		if (ch == EOF) {
		    return EOF;
		}

		if (ch == '+' || ch == '-') {
		    putc(LQT_CHAR_TO_IGNORE, OutputFile); /* the "s" */
		    putc(ch, OutputFile); /* the + or - */
		    if ((ch = getc(inputFile)) == EOF) {
			return EOF;
		    }
		    if (ch == OPEN_PAREN) {
			/* now, if the 3rd next char is a WithinWord sort
			 *  of thing, we have to turn the next 2 chars into
			 * spaces, otherwise we have to leave them.
			 */
			int ch1, ch2;

Back_S_Open_Paren:
			putc(OPEN_PAREN, OutputFile);
			if ((ch1 = getc(inputFile)) == EOF) return EOF;
			if ((ch2 = getc(inputFile)) == EOF) return EOF;
			if ((ch = getc(inputFile)) == EOF) return EOF;

			if (LQT_OnlyWithinWord(db, ch) ||
			    LQT_StartsWord(db, ch) ||
			    isdigit(ch) ||
			    ch == '.'
			) {
			    putc(' ', OutputFile);
			    putc(' ', OutputFile);
			    return ch;
			} else {
			    putc(ch1, OutputFile);
			    putc(ch2, OutputFile);
			    ungetc(ch, inputFile);
			    return 0;
			}
		    } else if (ch == '[') {
			doDelim(
			    db,
			    inputFile,
			    ch,
			    PUTMODE_IGNORE,
			    fileName,
			    OutputFile
			);
			return 0;
		    } else if (isdigit(ch)) {
			if (ch == '0' || ch > 3) {
			    int ch1;

			    if ((ch1 = getc(inputFile)) == EOF) return EOF;
			    if (LQT_OnlyWithinWord(db, ch1) ||
				LQT_StartsWord(db, ch1) ||
				isdigit(ch) ||
				ch == '.'
			    ) {
				/* \s+1hello -> \q+ hello */
				putc(' ', OutputFile);
			    } else {
				/* \s+1\c -> \q+q\c */
				putc(ch, OutputFile);
			    }
			    return ch1;
			} else {
			    /* \s+12hello   \q+  hello   \q+12\c   \q+12 yyy */
			    goto Back_S_Open_Paren;
			}
		    }
		} else if (ch == OPEN_PAREN) {
		    putc(LQT_CHAR_TO_IGNORE, OutputFile); /* the "s" */
		    goto Back_S_Open_Paren;
		} else if (ch == '[') {
		    putc(LQT_CHAR_TO_IGNORE, OutputFile); /* the "s" */
		    doDelim(
			db,
			inputFile,
			ch,
			PUTMODE_IGNORE,
			fileName,
			OutputFile
		    );
		    return 0;
		} else if (isdigit(ch)) {
		    /* \s6 or \s0 or \s12 */
		    if (ch == '0' || ch >= '4') {
			/* single digit, \s6hello
			 * \s6hello -> \  hello
			 */
			int ch2;

			if ((ch2 = getc(inputFile)) == EOF) return EOF;
			if (LQT_OnlyWithinWord(db, ch2) ||
			    LQT_StartsWord(db, ch2) ||
			    isdigit(ch2) ||
			    ch2 == '.'
			) {
			    putc(' ', OutputFile); /* the s */
			    putc(' ', OutputFile); /* the digit */
			    return ch2;
			} else {
			    putc(LQT_CHAR_TO_IGNORE, OutputFile); /* the s */
			    putc(ch, OutputFile); /* the digit */
			    ungetc(ch2, inputFile);
			    return 0;
			}
		    } else {
			/* two digits, \s12hello */
			int ch1, ch2;

			if ((ch1 = getc(inputFile)) == EOF) return EOF;
			if ((ch2 = getc(inputFile)) == EOF) return EOF;

			if (LQT_OnlyWithinWord(db, ch2) ||
			    LQT_StartsWord(db, ch2) ||
			    isdigit(ch2) ||
			    ch2 == '.'
			) {
			    putc(' ', OutputFile); /* the s */
			    putc(' ', OutputFile); /* the first digit (ch) */
			    putc(' ', OutputFile); /* the second digit digit (ch1) */
			    return ch2;
			} else {
			    /* \s12\fP or something, put the 12 out */
			    putc(LQT_CHAR_TO_IGNORE, OutputFile); /* the s */
			    putc(ch, OutputFile);
			    putc(ch1, OutputFile);
			    ungetc(ch2, inputFile);
			    return 0;
			}
		    }
		} else {
		    /* something odd! perhaps someone put a \ in by mistake */
		    putc('s', OutputFile); /* the s */
		    ungetc(ch, inputFile);
		    return 0;
		}
		/*NOTREACHED*/
		break;
	    }
	} else {
	    if (ch) {
		return ch;
	    } else {
		/* 0 return, so read more */
		return
		    LQFpReadOneCharacter(db, inputFile, fileName, OutputFile);
	    }
	}
    }

    /*NOTREACHED*/
    if (ch == EOF) {
	return EOF;
    } else {
	return ch;
    }
}


