From 62d78a003482a4c4463b0f3ebd2bf5568fe2338f Mon Sep 17 00:00:00 2001
From: Paul Roub <paul@roub.net>
Date: Mon, 9 Jan 2012 10:34:05 -0500
Subject: [PATCH] explained the sed duplicate-removal pattern

---
 tests/t1900-archive.sh |  2 +-
 todo.sh                | 13 +++++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/tests/t1900-archive.sh b/tests/t1900-archive.sh
index 5ac9316..630359b 100755
--- a/tests/t1900-archive.sh
+++ b/tests/t1900-archive.sh
@@ -2,7 +2,7 @@
 
 test_description='archive functionality
 
-Ensure we can append items successfully.
+Ensure we can archive items successfully.
 '
 . ./test-lib.sh
 
diff --git a/todo.sh b/todo.sh
index f945fa6..b817735 100755
--- a/todo.sh
+++ b/todo.sh
@@ -371,6 +371,19 @@ archive()
     grep "^x " "$TODO_FILE" >> "$DONE_FILE"
     sed -i.bak '/^x /d' "$TODO_FILE"
     cp "$TODO_FILE" "$TMP_FILE"
+	# look for duplicate lines with printable chars (i.e. duplicate tasks)
+	# and discard the second occurrence
+	# we start with an empty hold space on the first line.  For each line:
+	#   G - appends newline + hold space to the pattern space
+	#   s/\n/&&/; - double up the first new line so we catch adjacent dups
+	#   /^\([ -~]*\n\).*\n\1/d; 
+	#       If the first line (i.e. the new input line) of the hold space
+	#       is printable, and if that same pattern shows up again later as
+	#       an entire line, it's a duplicate.  Delete the current pattern space,
+	#	    quit this line and move on to the next
+	#   s/\n//;   - else, drop the doubled newline
+	#   h;        - replace the hold space with the expanded pattern space
+	#   P;        - print up to the first newline (that is, the input line)
     sed -n 'G; s/\n/&&/; /^\([ -~]*\n\).*\n\1/d; s/\n//; h; P' "$TMP_FILE" > "$TODO_FILE"
     if [ $TODOTXT_VERBOSE -gt 0 ]; then
 	echo "TODO: $TODO_FILE archived."