#!/usr/local/bin/perl
#
# intersect.pl  -- find intersection of lq phrases
#
# Tom Christiansen 
# tchrist@convex.com
# 8 Aug 91
#
# Note: this probably needs updating, as the match format has changed:
# it now has an extra number at the start (the numbr of words to highlight).
# -- Liam Quin, 1996

$match 		= '-mh';
$lqprog 	= 'lqshow';

sub gensym { 'gensym' . ++$gensym; } 
*choices = &gensym;  # init list of lists

sub usage {
    select(STDERR);
    print <<FINIS;
usage: $0 [-CAB <num>] [-v] [-q]  [-d directory] [-s | -n] [-l | -L] 
	[-mp | -mh | -ma] phrase ... [ -a phrase ... ] ...


-A	lines of context after
-B	lines of context before
-C	lines of context before and after

-s	sort files
-n	sort numerically

-q	use lqkwik instead of lqshow
-v	verbose mode

-l	just list names
-L	just list base names 
-d	set LQTEXTDIR

-m[pha] lqphrase match fuzziness
	    p	precise
	    h	heuristic
	    a	almost anything goes

FINIS

    exit 1;

} 

while ($_ = shift) {
    if    (/^-m[pha]$/) { $match = $_; } 
    elsif (/^-q$/) 	{ $lqprog = 'lqkwik'; } 
    elsif (/^-n$/) 	{ $sort= 'sort -n +2'; }
    elsif (/^-s$/) 	{ $sort= 'sort +2'; }
    elsif (/^-v$/) 	{ $verbose++; }
    elsif (/^-a$/) 	{ *choices = &gensym; } # start a new list 
    elsif (/^-d(.*)$/) 	{ $ENV{'LQTEXTDIR'} = $1 || shift; } 
    elsif (/^-l$/i) {
	$listonly = 1;
	$shortlist = /L/;
    }
    elsif (/^-[AB](\d*)$/) { 
	y/A-Z/a-z/; # gotta pass lower case
	$lqshow_opts .= ' '. $_;
	$lqshow_opts .= ' ' . shift unless $1;
    } 
    elsif (/^-C(\d*)$/) {
	$context = $1 || shift;
	$lqshow_opts = "-a $context -b $context";
    }
    elsif (/^-(.*)/) {
	warn "unknown option: $1\n";
	&usage;
    } 
    else { 
	push(@choices, $_); 
    } 
}

&usage unless @choices;

&read_choices(*need, 'gensym1', 1);

for ($i = 2; $i <= $gensym; $i++) {
    &read_choices(*want, "gensym$i");
    undef %need;
    grep($need{$_}++, @need);
    @need = grep($need{$_}, @want);
}


print STDERR "computing intersection: " if $verbose && $gensym > 1;

for $file (@need) {
    push(@use, $lines{$file}) if $lines{$file};
} 

$TMP = "/tmp/inter$$";
open (TMP, ($sort ? "| $sort " : '') . ">$TMP" );
for (@use) {
    s/^ /0/; # convex printf is broken
    print TMP;
} 
close TMP;

$count = @use;

if ($verbose) {
    print STDERR "total of $count matches\n";
    sleep 1;
}


if ($listonly) {
    open (TMP, "<$TMP");
    while (<TMP>) {
	s/.*\s(\S+)$/$1/;
	$shortlist && s#.*/##;
	print;
    } 
} else {
    unlink $TMP, die "No matches found\n" unless $count;
    $cmd = "$lqprog ";
    $cmd .= "$lqshow_opts " if $lqprog =~ /show/;
    $cmd .= "-f $TMP";
    warn "running: $cmd\n" if $verbose;
    system $cmd;
}

unlink $TMP;

exit;


##############################################################

sub read_choices {
    local(*list, *choices, $init) = @_;
    local($file);
    local($count) = 0;

    @list = ();
    local(@pretty);
    for (@choices) { push (@pretty, "\"$_\""); } 
    local($pretty) = join(', ', @pretty);
    print STDERR "computing $pretty: " if $verbose;

    # don't let the shell get even close to touching my choices
    die "can't fork: $!" unless defined ($pid = open(KID, "-|"));
    unless ($pid) {
	exec 'lqphrase', $match, @choices;
	die "can't exec lqphrase: $!" ;
    }

    while (<KID>) {
	$count++;
	$file = (split)[2];
	push(@list, $file);
	$lines{$file} = $_ if $init || $lines{$file}; # show most restrictive match only
    }
    print STDERR "$count\n" if $verbose;
    close KID;
    die "lqphrase exited badly" if $?;
    die "No matches for $pretty\n" unless $count;
} 

