56 lines
1.2 KiB
Bash
Executable File
56 lines
1.2 KiB
Bash
Executable File
#!/bin/sh
|
|
|
|
usage () {
|
|
cat <<EOF>&2
|
|
Usage: ${0##*/} FOLDERS
|
|
|
|
Convert all 'bad' encoding to UTF-8/LF.
|
|
|
|
WARNING: It will fail with encodings other the one explicitly supported in the script code.
|
|
|
|
EOF
|
|
}
|
|
|
|
[ $# -eq 0 ] && usage && exit 1
|
|
[ "$1" = "-h" ] && usage && exit
|
|
[ "$1" = "--" ] && shift
|
|
|
|
if ! command -v recode >/dev/null 2>&1; then
|
|
echo >&2 "recode needed."
|
|
exit 1
|
|
fi
|
|
|
|
for i ; do
|
|
while IFS= read -r j; do
|
|
CODING=$(file "$j")
|
|
|
|
case "$CODING" in
|
|
*ISO-8859*)
|
|
echo "ISO-8859: [$j]"
|
|
recode latin1..utf-8 "$j";;
|
|
*'Non-ISO extended-ASCII'*)
|
|
echo "cp1252: [$j]"
|
|
recode cp1252..utf-8 "$j";;
|
|
*'UTF-16 Unicode text'*)
|
|
echo "UTF-16: [$j]"
|
|
recode utf-16..utf-8 "$j";;
|
|
*'UTF-8 Unicode (with BOM)'*)
|
|
echo "UTF-8 BOM: [$j]"
|
|
ex -sc '1s/^.//|xit' "$j";;
|
|
## The following commands are funny alternatives, but are completely
|
|
## overkill.
|
|
# dd iflag=skip_bytes skip=3 if=file.srt of=temp.srt
|
|
# dd bs=1 skip=3 if=file.srt of=temp.srt
|
|
# tail -c +32 file.srt > temp.srt
|
|
esac
|
|
|
|
if echo "$CODING" | grep -q 'CRLF'; then
|
|
echo "CRLF: [$j]"
|
|
ex -sc '%s/
|
|
//g|xit' "$j"
|
|
fi
|
|
|
|
done <<EOF
|
|
$(find "$i" -type f -size -50M -print)
|
|
EOF
|
|
done
|