#!/bin/sh set -uf IFS="$(printf '\n\t')" LC_ALL="C" # Copyright (C) 2019 Jamie Nguyen # # A simple shell script to recursively generate, update and verify checksums # for files you care about. It's useful for detecting bit rot. # # It's written in POSIX shell, but requires GNU coreutils, BusyBox or some # other collection that includes similar checksum tools. VERSION=1.1.2 COMMAND="sha512sum" CHECKFILE="./.rotcheck" APPEND_MODE=0 CHECK_MODE=0 DELETE_MODE=0 UPDATE_MODE=0 IGNORE_MISSING=0 FOLLOW_SYMLINKS=1 VERBOSE=0 WARN_FORMATTING=0 EXCLUDE_HIDDEN=0 FORCE_UPDATE=0 usage() { cat << EOF rotcheck $VERSION Usage: rotcheck MODE [OPTIONS] or: rotcheck MODE [OPTIONS] -- [DIRECTORY]... [ARBITRARY FIND OPTION]... Recursively generate, update and verify checksums. MODES: -a APPEND mode: Record checksums for any files without a checksum already. Never modify existing checksums. -c CHECK mode: Check that files checksums are the same. -d DELETE mode: Remove checksums for files that don't exist. -u APPEND-AND-UPDATE mode: Like append-only mode, but also update checksums for files with a modification date newer than the the checksum file. (NB: Also see \`-M\`.) OPTIONS: -b COMMAND Checksum command to use. Default: sha512sum -f FILE File to store checksums. For relative paths, prefix with "./" or the checksum file will be checksummed. Default: ./.rotcheck -h Display this help. -n Don't follow symlinks. The default is to follow symlinks. -v Be more verbose when adding, deleting, changing or verifying checksums. -w Warn about improperly formatted checksum lines. -x Exclude all hidden files and directories when generating checksums. The default is to include them. -M Use with \`-u\` to update checksums regardless of modification time. This is very slow so avoid if possible; try \`touch\` instead to bump the modification time of specific files. WARNING: The checksums might have changed due to bit rot so use this option with care! (specific to GNU coreutils >= 8.25) -i Ignore missing files when verifying checksums. Supported commands: GNU coreutils: md5sum, sha1sum, sha224sum, sha256sum, sha384sum, sha512sum, b2sum BusyBox (applets must be symlinked): md5sum, sha1sum, sha256sum, sha512sum, sha3sum BSD & macOS (install GNU coreutils): gmd5sum, gsha1sum, gsha224sum, gsha256sum, gsha384sum, gsha512sum, gb2sum Examples: # Create checksum file (located at "./.rotcheck"): rotcheck -a # You've added some new files and need to append some checksums: rotcheck -va # You've edited some files and need to update the checksums (for files with # a modification time newer than the checksum file): rotcheck -vu # Verify checksums: rotcheck -c # Search other directories instead of the current directory. # WARNING: checksums might get duplicated if mixing relative and absolute # paths, or if you change the way you specify directory paths! rotcheck -a -- /mnt/archive-2018/ /mnt/archive-2019/ # Exclude .git folders (these arguments are passed directly to find): rotcheck -a -- ! -path '*/\\.git/*' EOF exit 0 } fail() { printf '%s\n' "$@"; exit 1 } # Curiously, I stumbled across a bug in bash-3.0.16 (c. 2004) or older # where \0177 (DEL) isn't handled properly. See the `find_safe` function below. # bash-3.1 (c. 2005), dash-0.5.2 (c. 2005), and zsh-3.1 (c. 2000) all work # and probably others too. if [ -n ${BASH+x} ] && [ -n ${BASH_VERSION+x} ]; then if printf '%s' "${BASH_VERSION:-x}" | grep -qE '^[0-2]+|^3\.0'; then fail "bash-3.0.16 and older are broken." \ "Try bash>=3.1, dash, zsh, or another POSIX shell." fi fi # Command-line arguments. `getopts` is POSIX, while `getopt` is not. [ $# -gt 0 ] && [ "$1" = "--help" ] && usage while getopts ":acdub:f:hinvwxM" opt; do case "$opt" in a) APPEND_MODE=1;; c) CHECK_MODE=1;; d) DELETE_MODE=1;; u) UPDATE_MODE=1;; b) COMMAND="$OPTARG";; f) CHECKFILE="$OPTARG";; h) usage;; i) IGNORE_MISSING=1;; n) FOLLOW_SYMLINKS=0;; v) VERBOSE=1;; w) WARN_FORMATTING=1;; x) EXCLUDE_HIDDEN=1;; M) FORCE_UPDATE=1;; \?) fail "-$OPTARG: Invalid argument";; :) fail "-$OPTARG requires an argument";; esac done; shift $(($OPTIND - 1)) # A few sanity checks. MODE=$(($APPEND_MODE + $CHECK_MODE + $DELETE_MODE + $UPDATE_MODE)) if [ $MODE -eq 0 ]; then fail "Please specify one of -a, -c, -d, or -u." \ "See \`rotcheck -h\` for help with usage." elif [ $MODE -gt 1 ]; then fail "You can only use one of -a, -c, -d, or -u options." \ "See \`rotcheck -h\` for help with usage." elif [ $CHECK_MODE -eq 1 ] || [ $DELETE_MODE -eq 1 ]; then if [ ! -f "$CHECKFILE" ]; then fail "$CHECKFILE: No such file." \ "Try running \`rotcheck -a\` first, or see \`rotcheck -h\`." fi elif ! command -v "$COMMAND" >/dev/null 2>/dev/null; then fail "$COMMAND: command not found" \ "Try specifying a supported command using \`rotcheck -b COMMAND\`." \ "You may need to install GNU coreutils or BusyBox." \ "On *BSD, GNU coreutils commands begin with 'g', like 'gsha512sum'." \ "See \`rotcheck -h\` for help with usage." fi # When printing text to terminal, make sure it won't do anything unexpected. printf_sanitized() { printf '%s' "$@" | tr -d '[:cntrl:]' | iconv -cs -f UTF-8 -t UTF-8 printf '\n' } verify_checksums() { IGNORE="" ; [ $IGNORE_MISSING -eq 1 ] && IGNORE="--ignore-missing" WARN="" ; [ $WARN_FORMATTING -eq 1 ] && WARN="-w" $COMMAND -c $WARN $IGNORE -- "$CHECKFILE" } # Just verify checksums. if [ $CHECK_MODE -eq 1 ]; then # Only GNU coreutils supports `--quiet`, so use `grep -v` instead. # Unfortunately, pipefail isn't POSIX so to return the exit status from the # checksum command, we have to be clever (aka crazy) with file descriptors # and subshells instead. if [ $VERBOSE -eq 1 ]; then verify_checksums exit $? else exec 4>&1 ( exec 3>&1 ( # 2>&1 preserves order of stdout/stderr. verify_checksums 2>&1; printf '%d' $? 1>&3 ) | grep -Ev ': OK$' 1>&4 exec 3>&- ) | ( read -r retval; exit $retval ); retval=$? exec 4>&- exit $retval fi fi # Delete checksums for files that no longer exist. if [ $DELETE_MODE -eq 1 ]; then i=1 for file in $(cut -d ' ' -f 3- -- "$CHECKFILE"); do # `sed -i` isn't POSIX (nor is `mktemp`), so use `ex` instead. if [ ! -f "$file" ]; then cat << EOF | ex -s -- "$CHECKFILE" ${i}d x EOF # Print what checksums were deleted. if [ $VERBOSE -eq 1 ]; then printf '%s' "DELETED: " printf_sanitized "$file" fi else # Only increment the line number if we didn't delete a line. i=$(($i + 1)) fi done exit $? fi # For safety and sanity, ignore all filenames that have control characters # like newline, tab, delete etc. find_safe() { FIND_L="" FIND_FOLLOW="" if [ $FOLLOW_SYMLINKS -eq 1 ]; then # Old versions of findutils don't have -L. Use it if available. if find -L / -maxdepth 0 -type d >/dev/null 2>/dev/null; then FIND_L="-L" else FIND_FOLLOW="-follow" fi fi # POSIX find requires that you specify the search path either first # or immediately after -H/-L. Use current directory by default unless # user has specified a path. FIND_DOT="./" if [ $# -gt 0 ]; then first_char="$(printf '%s' "$1" | cut -c 1)" # Replace search path unless first arg is a non-path `find` option. if [ "$first_char" != "-" ] \ && [ "$first_char" != "!" ] && [ "$first_char" != "(" ]; then FIND_DOT="" fi fi HIDDEN="" [ $EXCLUDE_HIDDEN -eq 1 ] && HIDDEN='*/\.*' find $FIND_L $FIND_DOT "$@" $FIND_FOLLOW \ -type f ! -path "$CHECKFILE" ! -path "$HIDDEN" \ ! -name "$(printf '*%b*' '\0001')" ! -name "$(printf '*%b*' '\0002')" \ ! -name "$(printf '*%b*' '\0003')" ! -name "$(printf '*%b*' '\0004')" \ ! -name "$(printf '*%b*' '\0005')" ! -name "$(printf '*%b*' '\0006')" \ ! -name "$(printf '*%b*' '\0007')" ! -name "$(printf '*%b*' '\0010')" \ ! -name "$(printf '*%b*' '\0011')" ! -name "$(printf '*%b*' '\0012')" \ ! -name "$(printf '*%b*' '\0013')" ! -name "$(printf '*%b*' '\0014')" \ ! -name "$(printf '*%b*' '\0015')" ! -name "$(printf '*%b*' '\0016')" \ ! -name "$(printf '*%b*' '\0017')" ! -name "$(printf '*%b*' '\0020')" \ ! -name "$(printf '*%b*' '\0021')" ! -name "$(printf '*%b*' '\0022')" \ ! -name "$(printf '*%b*' '\0023')" ! -name "$(printf '*%b*' '\0024')" \ ! -name "$(printf '*%b*' '\0025')" ! -name "$(printf '*%b*' '\0026')" \ ! -name "$(printf '*%b*' '\0027')" ! -name "$(printf '*%b*' '\0030')" \ ! -name "$(printf '*%b*' '\0031')" ! -name "$(printf '*%b*' '\0032')" \ ! -name "$(printf '*%b*' '\0033')" ! -name "$(printf '*%b*' '\0034')" \ ! -name "$(printf '*%b*' '\0035')" ! -name "$(printf '*%b*' '\0036')" \ ! -name "$(printf '*%b*' '\0037')" ! -name "$(printf '*%b*' '\0177')" } find_updated_files() { if [ $FORCE_UPDATE -eq 1 ]; then find_safe "$@" else find_safe "$@" -newer "$CHECKFILE" fi } # This function could be replaced entirely with the much simpler: # cut -d ' ' -f 3- "$CHECKFILE" | grep -Fxn -- "$file" | cut -d ':' -f 1 # But this function is slightly faster as it avoids passing huge chunks of text # (ie, the whole checksum file minus the first column) through a pipe. get_line_number() { # Avoid `grep -E` as filename characters might get interpreted (eg, $). for l in $(grep -Fn -- "$file" "$CHECKFILE" | cut -d ':' -f 1); do if sed -n -e "${l}p" -- "$CHECKFILE" \ | cut -d ' ' -f 3- | grep -Fxq -- "$file" >/dev/null; then printf '%d' "$l" return 0 fi done printf '%d' "0" } umask 077 # For files with a modification date newer than the checksum file, if there's # an existing checksum then update it. Otherwise append a new checksum. if [ $UPDATE_MODE -eq 1 ] && [ -f "$CHECKFILE" ]; then for file in $(find_updated_files "$@"); do line_num="$(get_line_number)" if [ ${line_num:-0} -eq 0 ]; then # No checksum yet, so append one. $COMMAND -- "$file" >> "$CHECKFILE" else old="$(sed -n -e "${line_num}p" -- "$CHECKFILE" | cut -d ' ' -f 1)" new="$($COMMAND -- "$file")" # Should never happen, but double check these aren't empty: if [ -z ${old:+x} ] || [ -z ${new:+x} ]; then continue fi # `sed -i` isn't POSIX (nor is `mktemp`), so use `ex` instead. if [ "$old" != "${new%% *}" ]; then cat << EOF | ex -s -- "$CHECKFILE" ${line_num}c $new . x EOF # Bail immediately if something went wrong. [ $? -ne 0 ] && fail "Failed to update checksum file." # Print what checksums were changed. if [ $VERBOSE -eq 1 ]; then printf '%s' "CHANGED: " printf_sanitized "$file" fi fi fi done fi # Append checksums for files that have no checksum yet. if [ $APPEND_MODE -eq 1 ] || [ $UPDATE_MODE -eq 1 ]; then for file in $(find_safe "$@"); do # Avoid `grep -E` as filename characters might get interpreted (eg, $). # The first grep isn't strictly needed, but grep+cut+grep is faster # than just cut+grep here. if [ ! -f "$CHECKFILE" ] || ! grep -- "$file" "$CHECKFILE" \ | cut -d ' ' -f 3- | grep -Fxq -- "$file"; then if ! $COMMAND -- "$file" >> "$CHECKFILE"; then fail "Failed to write to checksum file." fi # Print what checksums were appended. if [ $VERBOSE -eq 1 ]; then printf '%s' "ADDED: " printf_sanitized "$file" fi fi done fi