#! /bin/sh # e.g. lqbetter "some longish but approximate phrase" # # FID 1 rank 336 # 2 2334 0 1 /tmp/titles/master-titles # 3 2334 0 1 /tmp/titles/master-titles # 4 2334 0 1 /tmp/titles/master-titles # the first number is the number of words matched by the phrase. # then the block (or line) number, word in block, FID and filename. # the matches from lqsimilar are sorted on the first word that matched; # we will pick out the best match from each block. lqsimilar ${@+"$@"} | sort -0 +1n -2 +0n | mawk ' BEGIN { Block = -1; } /^#/ { next } ($2 == Block) { if ($1 > BestMatch) { BestMatch = $1 Line = $0; } next } { if (Line != "") { print Line; Line = "" } Block = $2; BestMatch = $1 Line = $0 } END { if (Line != "") { print Line; } } ' | sort -nr | sed -e 100q