43 lines
965 B
Bash
Executable File
43 lines
965 B
Bash
Executable File
#!/bin/sh
|
|
|
|
usage () {
|
|
cat <<EOF>&2
|
|
Usage: ${0##*/} FILES...
|
|
|
|
Convert all "badly" encoded text files to UTF-8/LF without BOM.
|
|
|
|
EOF
|
|
}
|
|
|
|
[ $# -eq 0 ] && usage && exit 1
|
|
[ "$1" = "-h" ] && usage && exit
|
|
[ "$1" = "--" ] && shift
|
|
|
|
for i ; do
|
|
[ ! -f "$i" ] && continue
|
|
mimetype=$(file -bi "$i")
|
|
description=$(file -b "$i")
|
|
type=$(echo "$mimetype" | awk -F/ '{print $1}')
|
|
[ "$type" != text ] && continue
|
|
|
|
charset=$(echo "$mimetype" | awk -F "=" '{print $2}')
|
|
if [ "$charset" != utf-8 ]; then
|
|
echo "$i: Convert to UTF-8"
|
|
iconv -f "$charset" -t utf8 "$i" -o "$i"
|
|
fi
|
|
|
|
if echo "$description" | grep -q 'UTF-8 Unicode (with BOM)'; then
|
|
echo "$i: Remove BOM"
|
|
ex -sc '1s/^.//|xit' "$i"
|
|
## Interesting alternatives:
|
|
# dd iflag=skip_bytes skip=3 if=file.srt of=temp.srt
|
|
# dd bs=1 skip=3 if=file.srt of=temp.srt
|
|
# tail -c +32 file.srt > temp.srt
|
|
fi
|
|
|
|
if echo "$description" | grep -q 'CRLF'; then
|
|
echo "$i: Remove CR"
|
|
ex -sc '%s/
|
|
//g|xit' "$i"
|
|
fi
|
|
done
|