Rework fixed archive deduplication into new deduplicate action.

As per discussion on the mailing list (http://tech.groups.yahoo.com/group/todotxt/message/3775), the automatic deduplication during archiving is unexpected and difficult to enforce in other implementations. Rather, make this a separate (optional) action.
2012-01-13 22:31:08 +01:00
parent 62d78a0034
commit bf2ca0ed6a
3 changed files with 71 additions and 18 deletions
--- a/todo.sh
+++ b/todo.sh
@@ -50,6 +50,7 @@ shorthelp()
 		    append|app ITEM# "TEXT TO APPEND"
 		    archive
 		    command [ACTIONS]
+		    deduplicate
 		    del|rm ITEM# [TERM]
 		    depri|dp ITEM#[, ITEM#, ITEM#, ...]
 		    do ITEM#[, ITEM#, ITEM#, ...]
@@ -181,6 +182,9 @@ help()
 		      Runs the remaining arguments using only todo.sh builtins.
 		      Will not call any .todo.actions.d scripts.

+		    deduplicate
+		      Removes duplicate lines from todo.txt.
+
 		    del ITEM# [TERM]
 		    rm ITEM# [TERM]
 		      Deletes the task on line ITEM# in todo.txt.
@@ -370,21 +374,6 @@ archive()
    [ $TODOTXT_VERBOSE -gt 0 ] && grep "^x " "$TODO_FILE"
    grep "^x " "$TODO_FILE" >> "$DONE_FILE"
    sed -i.bak '/^x /d' "$TODO_FILE"
-    cp "$TODO_FILE" "$TMP_FILE"
-	# look for duplicate lines with printable chars (i.e. duplicate tasks)
-	# and discard the second occurrence
-	# we start with an empty hold space on the first line.  For each line:
-	#   G - appends newline + hold space to the pattern space
-	#   s/\n/&&/; - double up the first new line so we catch adjacent dups
-	#   /^\([ -~]*\n\).*\n\1/d; 
-	#       If the first line (i.e. the new input line) of the hold space
-	#       is printable, and if that same pattern shows up again later as
-	#       an entire line, it's a duplicate.  Delete the current pattern space,
-	#	    quit this line and move on to the next
-	#   s/\n//;   - else, drop the doubled newline
-	#   h;        - replace the hold space with the expanded pattern space
-	#   P;        - print up to the first newline (that is, the input line)
-    sed -n 'G; s/\n/&&/; /^\([ -~]*\n\).*\n\1/d; s/\n//; h; P' "$TMP_FILE" > "$TODO_FILE"
    if [ $TODOTXT_VERBOSE -gt 0 ]; then
 	echo "TODO: $TODO_FILE archived."
    fi
@@ -1246,6 +1235,31 @@ note: PRIORITY must be anywhere from A to Z."
    cat "$REPORT_FILE"
    ;;

+"deduplicate" )
+    originalTaskNum=$( sed -n '$ =' "$TODO_FILE" )
+
+    # Look for duplicate lines and discard the second occurrence.
+    # We start with an empty hold space on the first line.  For each line:
+    #   G - appends newline + hold space to the pattern space
+    #   s/\n/&&/; - double up the first new line so we catch adjacent dups
+    #   /^\([ -~]*\n\).*\n\1/d;
+    #       If the first line (i.e. the new input line) of the hold space
+    #       is printable, and if that same pattern shows up again later as
+    #       an entire line, it's a duplicate.  Delete the current pattern space,
+    #       quit this line and move on to the next
+    #   s/\n//;   - else, drop the doubled newline
+    #   h;        - replace the hold space with the expanded pattern space
+    #   P;        - print up to the first newline (that is, the input line)
+    sed -i.bak -n 'G; s/\n/&&/; /^\([ -~]*\n\).*\n\1/d; s/\n//; h; P' "$TODO_FILE"
+    newTaskNum=$( sed -n '$ =' "$TODO_FILE" )
+    deduplicateNum=$(( originalTaskNum - newTaskNum ))
+    if [ $deduplicateNum -eq 0 ]; then
+        echo "TODO: No duplicate tasks found"
+    else
+        echo "TODO: $deduplicateNum duplicate task(s) removed"
+    fi
+    ;;
+
 * )
    usage;;
 esac