explained the sed duplicate-removal pattern

This commit is contained in:
Paul Roub
2012-01-09 10:34:05 -05:00
parent 492d98e50a
commit cd2f585fb6
2 changed files with 14 additions and 1 deletions

13
todo.sh
View File

@@ -361,6 +361,19 @@ archive()
grep "^x " "$TODO_FILE" >> "$DONE_FILE"
sed -i.bak '/^x /d' "$TODO_FILE"
cp "$TODO_FILE" "$TMP_FILE"
# look for duplicate lines with printable chars (i.e. duplicate tasks)
# and discard the second occurrence
# we start with an empty hold space on the first line. For each line:
# G - appends newline + hold space to the pattern space
# s/\n/&&/; - double up the first new line so we catch adjacent dups
# /^\([ -~]*\n\).*\n\1/d;
# If the first line (i.e. the new input line) of the hold space
# is printable, and if that same pattern shows up again later as
# an entire line, it's a duplicate. Delete the current pattern space,
# quit this line and move on to the next
# s/\n//; - else, drop the doubled newline
# h; - replace the hold space with the expanded pattern space
# P; - print up to the first newline (that is, the input line)
sed -n 'G; s/\n/&&/; /^\([ -~]*\n\).*\n\1/d; s/\n//; h; P' "$TMP_FILE" > "$TODO_FILE"
if [ $TODOTXT_VERBOSE -gt 0 ]; then
echo "TODO: $TODO_FILE archived."