Fix deduplicate for non-printable (and non-ASCII) characters.

This commit is contained in:
Ingo Karkat
2012-01-13 22:44:35 +01:00
parent bf2ca0ed6a
commit fe5cdcb13a
2 changed files with 25 additions and 6 deletions

View File

@@ -6,7 +6,7 @@ Ensure we can deduplicate items successfully.
' '
. ./test-lib.sh . ./test-lib.sh
cat >> todo.txt<<EOF cat > todo.txt <<EOF
duplicated duplicated
two two
x done x done
@@ -35,4 +35,24 @@ test_todo_session 'deduplicate without duplicates' <<EOF
TODO: No duplicate tasks found TODO: No duplicate tasks found
EOF EOF
cat > todo.txt <<EOF
normal task
a bold action
something else
a bold action
something more
EOF
test_todo_session 'deduplicate with non-printable duplicates' <<EOF
>>> todo.sh deduplicate
TODO: 1 duplicate task(s) removed
>>> todo.sh -p ls
2 a bold action
1 normal task
3 something else
4 something more
--
TODO: 4 of 4 tasks shown
EOF
test_done test_done

View File

@@ -1242,15 +1242,14 @@ note: PRIORITY must be anywhere from A to Z."
# We start with an empty hold space on the first line. For each line: # We start with an empty hold space on the first line. For each line:
# G - appends newline + hold space to the pattern space # G - appends newline + hold space to the pattern space
# s/\n/&&/; - double up the first new line so we catch adjacent dups # s/\n/&&/; - double up the first new line so we catch adjacent dups
# /^\([ -~]*\n\).*\n\1/d; # /^\([^\n]*\n\).*\n\1/d;
# If the first line (i.e. the new input line) of the hold space # If the first line of the hold space shows up again later as an
# is printable, and if that same pattern shows up again later as # entire line, it's a duplicate. Delete the current pattern space,
# an entire line, it's a duplicate. Delete the current pattern space,
# quit this line and move on to the next # quit this line and move on to the next
# s/\n//; - else, drop the doubled newline # s/\n//; - else, drop the doubled newline
# h; - replace the hold space with the expanded pattern space # h; - replace the hold space with the expanded pattern space
# P; - print up to the first newline (that is, the input line) # P; - print up to the first newline (that is, the input line)
sed -i.bak -n 'G; s/\n/&&/; /^\([ -~]*\n\).*\n\1/d; s/\n//; h; P' "$TODO_FILE" sed -i.bak -n 'G; s/\n/&&/; /^\([^\n]*\n\).*\n\1/d; s/\n//; h; P' "$TODO_FILE"
newTaskNum=$( sed -n '$ =' "$TODO_FILE" ) newTaskNum=$( sed -n '$ =' "$TODO_FILE" )
deduplicateNum=$(( originalTaskNum - newTaskNum )) deduplicateNum=$(( originalTaskNum - newTaskNum ))
if [ $deduplicateNum -eq 0 ]; then if [ $deduplicateNum -eq 0 ]; then