loadout/bin/rotcheck
mediocregopher 1957081c12 Update branch with all changes which could be brought in from private branches
For a while I was keeping a private branch where there were a lot of
non-public things included, and that became the de-facto branch while
this one lagged. This one is now up-to-date, all private stuff is dealt
with via config files which are not committed.
2023-08-13 21:35:17 +02:00

354 lines
12 KiB
Bash
Executable File

#!/bin/sh
set -uf
IFS="$(printf '\n\t')"
LC_ALL="C"
# Copyright (C) 2019 Jamie Nguyen <j@jamielinux.com>
#
# A simple shell script to recursively generate, update and verify checksums
# for files you care about. It's useful for detecting bit rot.
#
# It's written in POSIX shell, but requires GNU coreutils, BusyBox or some
# other collection that includes similar checksum tools.
VERSION=1.1.2
COMMAND="sha512sum"
CHECKFILE="./.rotcheck"
APPEND_MODE=0
CHECK_MODE=0
DELETE_MODE=0
UPDATE_MODE=0
IGNORE_MISSING=0
FOLLOW_SYMLINKS=1
VERBOSE=0
WARN_FORMATTING=0
EXCLUDE_HIDDEN=0
FORCE_UPDATE=0
usage() {
cat << EOF
rotcheck $VERSION
Usage: rotcheck MODE [OPTIONS]
or: rotcheck MODE [OPTIONS] -- [DIRECTORY]... [ARBITRARY FIND OPTION]...
Recursively generate, update and verify checksums.
MODES:
-a APPEND mode: Record checksums for any files without a checksum
already. Never modify existing checksums.
-c CHECK mode: Check that files checksums are the same.
-d DELETE mode: Remove checksums for files that don't exist.
-u APPEND-AND-UPDATE mode: Like append-only mode, but also update
checksums for files with a modification date newer than the
the checksum file. (NB: Also see \`-M\`.)
OPTIONS:
-b COMMAND Checksum command to use. Default: sha512sum
-f FILE File to store checksums. For relative paths, prefix with "./"
or the checksum file will be checksummed. Default: ./.rotcheck
-h Display this help.
-n Don't follow symlinks. The default is to follow symlinks.
-v Be more verbose when adding, deleting, changing or verifying
checksums.
-w Warn about improperly formatted checksum lines.
-x Exclude all hidden files and directories when generating
checksums. The default is to include them.
-M Use with \`-u\` to update checksums regardless of modification
time. This is very slow so avoid if possible; try \`touch\`
instead to bump the modification time of specific files.
WARNING: The checksums might have changed due to bit rot so
use this option with care!
(specific to GNU coreutils >= 8.25)
-i Ignore missing files when verifying checksums.
Supported commands:
GNU coreutils:
md5sum, sha1sum, sha224sum, sha256sum, sha384sum, sha512sum, b2sum
BusyBox (applets must be symlinked):
md5sum, sha1sum, sha256sum, sha512sum, sha3sum
BSD & macOS (install GNU coreutils):
gmd5sum, gsha1sum, gsha224sum, gsha256sum, gsha384sum, gsha512sum, gb2sum
Examples:
# Create checksum file (located at "./.rotcheck"):
rotcheck -a
# You've added some new files and need to append some checksums:
rotcheck -va
# You've edited some files and need to update the checksums (for files with
# a modification time newer than the checksum file):
rotcheck -vu
# Verify checksums:
rotcheck -c
# Search other directories instead of the current directory.
# WARNING: checksums might get duplicated if mixing relative and absolute
# paths, or if you change the way you specify directory paths!
rotcheck -a -- /mnt/archive-2018/ /mnt/archive-2019/
# Exclude .git folders (these arguments are passed directly to find):
rotcheck -a -- ! -path '*/\\.git/*'
EOF
exit 0
}
fail() {
printf '%s\n' "$@"; exit 1
}
# Curiously, I stumbled across a bug in bash-3.0.16 (c. 2004) or older
# where \0177 (DEL) isn't handled properly. See the `find_safe` function below.
# bash-3.1 (c. 2005), dash-0.5.2 (c. 2005), and zsh-3.1 (c. 2000) all work
# and probably others too.
if [ -n ${BASH+x} ] && [ -n ${BASH_VERSION+x} ]; then
if printf '%s' "${BASH_VERSION:-x}" | grep -qE '^[0-2]+|^3\.0'; then
fail "bash-3.0.16 and older are broken." \
"Try bash>=3.1, dash, zsh, or another POSIX shell."
fi
fi
# Command-line arguments. `getopts` is POSIX, while `getopt` is not.
[ $# -gt 0 ] && [ "$1" = "--help" ] && usage
while getopts ":acdub:f:hinvwxM" opt; do
case "$opt" in
a) APPEND_MODE=1;;
c) CHECK_MODE=1;;
d) DELETE_MODE=1;;
u) UPDATE_MODE=1;;
b) COMMAND="$OPTARG";;
f) CHECKFILE="$OPTARG";;
h) usage;;
i) IGNORE_MISSING=1;;
n) FOLLOW_SYMLINKS=0;;
v) VERBOSE=1;;
w) WARN_FORMATTING=1;;
x) EXCLUDE_HIDDEN=1;;
M) FORCE_UPDATE=1;;
\?) fail "-$OPTARG: Invalid argument";;
:) fail "-$OPTARG requires an argument";;
esac
done; shift $(($OPTIND - 1))
# A few sanity checks.
MODE=$(($APPEND_MODE + $CHECK_MODE + $DELETE_MODE + $UPDATE_MODE))
if [ $MODE -eq 0 ]; then
fail "Please specify one of -a, -c, -d, or -u." \
"See \`rotcheck -h\` for help with usage."
elif [ $MODE -gt 1 ]; then
fail "You can only use one of -a, -c, -d, or -u options." \
"See \`rotcheck -h\` for help with usage."
elif [ $CHECK_MODE -eq 1 ] || [ $DELETE_MODE -eq 1 ]; then
if [ ! -f "$CHECKFILE" ]; then
fail "$CHECKFILE: No such file." \
"Try running \`rotcheck -a\` first, or see \`rotcheck -h\`."
fi
elif ! command -v "$COMMAND" >/dev/null 2>/dev/null; then
fail "$COMMAND: command not found" \
"Try specifying a supported command using \`rotcheck -b COMMAND\`." \
"You may need to install GNU coreutils or BusyBox." \
"On *BSD, GNU coreutils commands begin with 'g', like 'gsha512sum'." \
"See \`rotcheck -h\` for help with usage."
fi
# When printing text to terminal, make sure it won't do anything unexpected.
printf_sanitized() {
printf '%s' "$@" | tr -d '[:cntrl:]' | iconv -cs -f UTF-8 -t UTF-8
printf '\n'
}
verify_checksums() {
IGNORE="" ; [ $IGNORE_MISSING -eq 1 ] && IGNORE="--ignore-missing"
WARN="" ; [ $WARN_FORMATTING -eq 1 ] && WARN="-w"
$COMMAND -c $WARN $IGNORE -- "$CHECKFILE"
}
# Just verify checksums.
if [ $CHECK_MODE -eq 1 ]; then
# Only GNU coreutils supports `--quiet`, so use `grep -v` instead.
# Unfortunately, pipefail isn't POSIX so to return the exit status from the
# checksum command, we have to be clever (aka crazy) with file descriptors
# and subshells instead.
if [ $VERBOSE -eq 1 ]; then
verify_checksums
exit $?
else
exec 4>&1
(
exec 3>&1
(
# 2>&1 preserves order of stdout/stderr.
verify_checksums 2>&1; printf '%d' $? 1>&3
) | grep -Ev ': OK$' 1>&4
exec 3>&-
) | ( read -r retval; exit $retval ); retval=$?
exec 4>&-
exit $retval
fi
fi
# Delete checksums for files that no longer exist.
if [ $DELETE_MODE -eq 1 ]; then
i=1
for file in $(cut -d ' ' -f 3- -- "$CHECKFILE"); do
# `sed -i` isn't POSIX (nor is `mktemp`), so use `ex` instead.
if [ ! -f "$file" ]; then
cat << EOF | ex -s -- "$CHECKFILE"
${i}d
x
EOF
# Print what checksums were deleted.
if [ $VERBOSE -eq 1 ]; then
printf '%s' "DELETED: "
printf_sanitized "$file"
fi
else
# Only increment the line number if we didn't delete a line.
i=$(($i + 1))
fi
done
exit $?
fi
# For safety and sanity, ignore all filenames that have control characters
# like newline, tab, delete etc.
find_safe() {
FIND_L=""
FIND_FOLLOW=""
if [ $FOLLOW_SYMLINKS -eq 1 ]; then
# Old versions of findutils don't have -L. Use it if available.
if find -L / -maxdepth 0 -type d >/dev/null 2>/dev/null; then
FIND_L="-L"
else
FIND_FOLLOW="-follow"
fi
fi
# POSIX find requires that you specify the search path either first
# or immediately after -H/-L. Use current directory by default unless
# user has specified a path.
FIND_DOT="./"
if [ $# -gt 0 ]; then
first_char="$(printf '%s' "$1" | cut -c 1)"
# Replace search path unless first arg is a non-path `find` option.
if [ "$first_char" != "-" ] \
&& [ "$first_char" != "!" ] && [ "$first_char" != "(" ]; then
FIND_DOT=""
fi
fi
HIDDEN=""
[ $EXCLUDE_HIDDEN -eq 1 ] && HIDDEN='*/\.*'
find $FIND_L $FIND_DOT "$@" $FIND_FOLLOW \
-type f ! -path "$CHECKFILE" ! -path "$HIDDEN" \
! -name "$(printf '*%b*' '\0001')" ! -name "$(printf '*%b*' '\0002')" \
! -name "$(printf '*%b*' '\0003')" ! -name "$(printf '*%b*' '\0004')" \
! -name "$(printf '*%b*' '\0005')" ! -name "$(printf '*%b*' '\0006')" \
! -name "$(printf '*%b*' '\0007')" ! -name "$(printf '*%b*' '\0010')" \
! -name "$(printf '*%b*' '\0011')" ! -name "$(printf '*%b*' '\0012')" \
! -name "$(printf '*%b*' '\0013')" ! -name "$(printf '*%b*' '\0014')" \
! -name "$(printf '*%b*' '\0015')" ! -name "$(printf '*%b*' '\0016')" \
! -name "$(printf '*%b*' '\0017')" ! -name "$(printf '*%b*' '\0020')" \
! -name "$(printf '*%b*' '\0021')" ! -name "$(printf '*%b*' '\0022')" \
! -name "$(printf '*%b*' '\0023')" ! -name "$(printf '*%b*' '\0024')" \
! -name "$(printf '*%b*' '\0025')" ! -name "$(printf '*%b*' '\0026')" \
! -name "$(printf '*%b*' '\0027')" ! -name "$(printf '*%b*' '\0030')" \
! -name "$(printf '*%b*' '\0031')" ! -name "$(printf '*%b*' '\0032')" \
! -name "$(printf '*%b*' '\0033')" ! -name "$(printf '*%b*' '\0034')" \
! -name "$(printf '*%b*' '\0035')" ! -name "$(printf '*%b*' '\0036')" \
! -name "$(printf '*%b*' '\0037')" ! -name "$(printf '*%b*' '\0177')"
}
find_updated_files() {
if [ $FORCE_UPDATE -eq 1 ]; then
find_safe "$@"
else
find_safe "$@" -newer "$CHECKFILE"
fi
}
# This function could be replaced entirely with the much simpler:
# cut -d ' ' -f 3- "$CHECKFILE" | grep -Fxn -- "$file" | cut -d ':' -f 1
# But this function is slightly faster as it avoids passing huge chunks of text
# (ie, the whole checksum file minus the first column) through a pipe.
get_line_number() {
# Avoid `grep -E` as filename characters might get interpreted (eg, $).
for l in $(grep -Fn -- "$file" "$CHECKFILE" | cut -d ':' -f 1); do
if sed -n -e "${l}p" -- "$CHECKFILE" \
| cut -d ' ' -f 3- | grep -Fxq -- "$file" >/dev/null; then
printf '%d' "$l"
return 0
fi
done
printf '%d' "0"
}
umask 077
# For files with a modification date newer than the checksum file, if there's
# an existing checksum then update it. Otherwise append a new checksum.
if [ $UPDATE_MODE -eq 1 ] && [ -f "$CHECKFILE" ]; then
for file in $(find_updated_files "$@"); do
line_num="$(get_line_number)"
if [ ${line_num:-0} -eq 0 ]; then
# No checksum yet, so append one.
$COMMAND -- "$file" >> "$CHECKFILE"
else
old="$(sed -n -e "${line_num}p" -- "$CHECKFILE" | cut -d ' ' -f 1)"
new="$($COMMAND -- "$file")"
# Should never happen, but double check these aren't empty:
if [ -z ${old:+x} ] || [ -z ${new:+x} ]; then
continue
fi
# `sed -i` isn't POSIX (nor is `mktemp`), so use `ex` instead.
if [ "$old" != "${new%% *}" ]; then
cat << EOF | ex -s -- "$CHECKFILE"
${line_num}c
$new
.
x
EOF
# Bail immediately if something went wrong.
[ $? -ne 0 ] && fail "Failed to update checksum file."
# Print what checksums were changed.
if [ $VERBOSE -eq 1 ]; then
printf '%s' "CHANGED: "
printf_sanitized "$file"
fi
fi
fi
done
fi
# Append checksums for files that have no checksum yet.
if [ $APPEND_MODE -eq 1 ] || [ $UPDATE_MODE -eq 1 ]; then
for file in $(find_safe "$@"); do
# Avoid `grep -E` as filename characters might get interpreted (eg, $).
# The first grep isn't strictly needed, but grep+cut+grep is faster
# than just cut+grep here.
if [ ! -f "$CHECKFILE" ] || ! grep -- "$file" "$CHECKFILE" \
| cut -d ' ' -f 3- | grep -Fxq -- "$file"; then
if ! $COMMAND -- "$file" >> "$CHECKFILE"; then
fail "Failed to write to checksum file."
fi
# Print what checksums were appended.
if [ $VERBOSE -eq 1 ]; then
printf '%s' "ADDED: "
printf_sanitized "$file"
fi
fi
done
fi