Fix deduplicate for non-printable (and non-ASCII) characters.

This commit is contained in:
Ingo Karkat
2012-01-13 22:44:35 +01:00
parent bf2ca0ed6a
commit fe5cdcb13a
2 changed files with 25 additions and 6 deletions

View File

@@ -1242,15 +1242,14 @@ note: PRIORITY must be anywhere from A to Z."
# We start with an empty hold space on the first line. For each line:
# G - appends newline + hold space to the pattern space
# s/\n/&&/; - double up the first new line so we catch adjacent dups
# /^\([ -~]*\n\).*\n\1/d;
# If the first line (i.e. the new input line) of the hold space
# is printable, and if that same pattern shows up again later as
# an entire line, it's a duplicate. Delete the current pattern space,
# /^\([^\n]*\n\).*\n\1/d;
# If the first line of the hold space shows up again later as an
# entire line, it's a duplicate. Delete the current pattern space,
# quit this line and move on to the next
# s/\n//; - else, drop the doubled newline
# h; - replace the hold space with the expanded pattern space
# P; - print up to the first newline (that is, the input line)
sed -i.bak -n 'G; s/\n/&&/; /^\([ -~]*\n\).*\n\1/d; s/\n//; h; P' "$TODO_FILE"
sed -i.bak -n 'G; s/\n/&&/; /^\([^\n]*\n\).*\n\1/d; s/\n//; h; P' "$TODO_FILE"
newTaskNum=$( sed -n '$ =' "$TODO_FILE" )
deduplicateNum=$(( originalTaskNum - newTaskNum ))
if [ $deduplicateNum -eq 0 ]; then