2013-03-03 00:01:20 +01:00
|
|
|
#!/bin/sh
|
|
|
|
|
2013-10-23 17:44:23 +02:00
|
|
|
if [ $# -eq 0 ] || [ "$1" = "-h" ]; then
|
|
|
|
cat<<EOF
|
|
|
|
Usage: ${1##*/} FOLDERS
|
|
|
|
|
|
|
|
Convert all 'bad' encoding to UTF-8/LF.
|
|
|
|
|
|
|
|
WARNING: It will fail for encodings other the one explicitly supported below.
|
|
|
|
|
|
|
|
EOF
|
|
|
|
fi
|
|
|
|
|
2013-10-27 19:50:23 +01:00
|
|
|
if ! command -v recode >/dev/null; then
|
2013-03-03 00:01:20 +01:00
|
|
|
echo "recode needed."
|
|
|
|
exit
|
|
|
|
fi
|
|
|
|
|
2013-10-23 17:44:23 +02:00
|
|
|
for i ; do
|
|
|
|
while IFS= read -r j; do
|
|
|
|
CODING=$(file "$j")
|
|
|
|
|
|
|
|
if [ -n "$(echo $CODING | grep 'ISO-8859')" ]; then
|
|
|
|
echo "ISO-8859: [$j]"
|
|
|
|
recode latin1..utf-8 "$j"
|
|
|
|
|
|
|
|
elif [ -n "$(echo $CODING | grep 'Non-ISO extended-ASCII')" ]; then
|
|
|
|
echo "cp1252: [$j]"
|
|
|
|
recode cp1252..utf-8 "$j"
|
|
|
|
|
|
|
|
elif [ -n "$(echo $CODING | grep 'UTF-16 Unicode text')" ]; then
|
|
|
|
echo "UTF-16: [$j]"
|
|
|
|
recode utf-16..utf-8 "$j"
|
|
|
|
|
|
|
|
elif [ -n "$(echo $CODING | grep 'UTF-8 Unicode (with BOM)')" ]; then
|
|
|
|
echo "UTF-8 BOM: [$j]"
|
|
|
|
sed -i '1s/^.//' "$j"
|
|
|
|
## sed -i is not the fastest depending on the implementations. The
|
|
|
|
## following commands work, but may be overkill.
|
|
|
|
# dd iflag=skip_bytes skip=3 if=file.srt of=temp.srt
|
|
|
|
# dd bs=1 skip=3 if=file.srt of=temp.srt
|
|
|
|
# tail -c +32 file.srt > temp.srt
|
|
|
|
fi
|
|
|
|
|
|
|
|
if [ -n "$(echo $CODING | grep 'CRLF')" ]; then
|
|
|
|
echo "CRLF: [$j]"
|
|
|
|
sed -i 's/\r//g' "$j"
|
|
|
|
fi
|
|
|
|
|
|
|
|
done <<EOF
|
|
|
|
$(find "$i" -type f -size -50M -print)
|
2013-03-03 00:01:20 +01:00
|
|
|
EOF
|
2013-10-23 17:44:23 +02:00
|
|
|
done
|