#! /bin/sh

# e.g. lqbetter "some longish but approximate phrase"


# # FID 1 rank 336
# 2 2334 0 1 /tmp/titles/master-titles
# 3 2334 0 1 /tmp/titles/master-titles
# 4 2334 0 1 /tmp/titles/master-titles

# the first number is the number of words matched by the phrase.
# then the block (or line) number, word in block, FID and filename.
# the matches from lqsimilar are sorted on the first word that matched;
# we will pick out the best match from each block.

lqsimilar ${@+"$@"} |
sort -0 +1n -2 +0n |
mawk '
BEGIN {
    Block = -1;
}

/^#/ {
    next
}

($2 == Block) {
    if ($1 > BestMatch) {
	BestMatch = $1
	Line = $0;
    }
    next
}

{
    if (Line != "") {
	print Line;
	Line = ""
    }

    Block = $2;
    BestMatch = $1
    Line = $0
}

END {
    if (Line != "") {
	print Line;
    }
}

' | sort -nr | sed -e 100q