Rework fixed archive deduplication into new deduplicate action.

As per discussion on the mailing list (http://tech.groups.yahoo.com/group/todotxt/message/3775), the automatic deduplication during archiving is unexpected and difficult to enforce in other implementations. Rather, make this a separate (optional) action.
This commit is contained in:
Ingo Karkat
2012-01-13 22:31:08 +01:00
parent 62d78a0034
commit bf2ca0ed6a
3 changed files with 71 additions and 18 deletions

44
todo.sh
View File

@@ -50,6 +50,7 @@ shorthelp()
append|app ITEM# "TEXT TO APPEND"
archive
command [ACTIONS]
deduplicate
del|rm ITEM# [TERM]
depri|dp ITEM#[, ITEM#, ITEM#, ...]
do ITEM#[, ITEM#, ITEM#, ...]
@@ -181,6 +182,9 @@ help()
Runs the remaining arguments using only todo.sh builtins.
Will not call any .todo.actions.d scripts.
deduplicate
Removes duplicate lines from todo.txt.
del ITEM# [TERM]
rm ITEM# [TERM]
Deletes the task on line ITEM# in todo.txt.
@@ -370,21 +374,6 @@ archive()
[ $TODOTXT_VERBOSE -gt 0 ] && grep "^x " "$TODO_FILE"
grep "^x " "$TODO_FILE" >> "$DONE_FILE"
sed -i.bak '/^x /d' "$TODO_FILE"
cp "$TODO_FILE" "$TMP_FILE"
# look for duplicate lines with printable chars (i.e. duplicate tasks)
# and discard the second occurrence
# we start with an empty hold space on the first line. For each line:
# G - appends newline + hold space to the pattern space
# s/\n/&&/; - double up the first new line so we catch adjacent dups
# /^\([ -~]*\n\).*\n\1/d;
# If the first line (i.e. the new input line) of the hold space
# is printable, and if that same pattern shows up again later as
# an entire line, it's a duplicate. Delete the current pattern space,
# quit this line and move on to the next
# s/\n//; - else, drop the doubled newline
# h; - replace the hold space with the expanded pattern space
# P; - print up to the first newline (that is, the input line)
sed -n 'G; s/\n/&&/; /^\([ -~]*\n\).*\n\1/d; s/\n//; h; P' "$TMP_FILE" > "$TODO_FILE"
if [ $TODOTXT_VERBOSE -gt 0 ]; then
echo "TODO: $TODO_FILE archived."
fi
@@ -1246,6 +1235,31 @@ note: PRIORITY must be anywhere from A to Z."
cat "$REPORT_FILE"
;;
"deduplicate" )
originalTaskNum=$( sed -n '$ =' "$TODO_FILE" )
# Look for duplicate lines and discard the second occurrence.
# We start with an empty hold space on the first line. For each line:
# G - appends newline + hold space to the pattern space
# s/\n/&&/; - double up the first new line so we catch adjacent dups
# /^\([ -~]*\n\).*\n\1/d;
# If the first line (i.e. the new input line) of the hold space
# is printable, and if that same pattern shows up again later as
# an entire line, it's a duplicate. Delete the current pattern space,
# quit this line and move on to the next
# s/\n//; - else, drop the doubled newline
# h; - replace the hold space with the expanded pattern space
# P; - print up to the first newline (that is, the input line)
sed -i.bak -n 'G; s/\n/&&/; /^\([ -~]*\n\).*\n\1/d; s/\n//; h; P' "$TODO_FILE"
newTaskNum=$( sed -n '$ =' "$TODO_FILE" )
deduplicateNum=$(( originalTaskNum - newTaskNum ))
if [ $deduplicateNum -eq 0 ]; then
echo "TODO: No duplicate tasks found"
else
echo "TODO: $deduplicateNum duplicate task(s) removed"
fi
;;
* )
usage;;
esac