#!/bin/sh if [ -z "$(command -v recode)" ]; then echo "recode needed." exit fi ## Convert all 'bad' encoding to UTF-8/LF. WARNING: It will fail for encodings ## other the one explicitly supported below. while read -r i; do CODING=$(file "$i") if [ -n "$(echo $CODING | grep 'ISO-8859')" ]; then echo "ISO-8859: [$i]" recode latin1..utf-8 "$i" elif [ -n "$(echo $CODING | grep 'Non-ISO extended-ASCII')" ]; then echo "cp1252: [$i]" recode cp1252..utf-8 "$i" elif [ -n "$(echo $CODING | grep 'UTF-16 Unicode text')" ]; then echo "UTF-16: [$i]" recode utf-16..utf-8 "$i" elif [ -n "$(echo $CODING | grep 'UTF-8 Unicode (with BOM)')" ]; then echo "UTF-8 BOM: [$i]" sed -i '1s/^.//' "$i" ## sed -i is not the fastest depending on the implementations. The ## following commands work, but may be overkill. # dd iflag=skip_bytes skip=3 if=file.srt of=temp.srt # dd bs=1 skip=3 if=file.srt of=temp.srt # tail -c +32 file.srt > temp.srt fi if [ -n "$(echo $CODING | grep 'CRLF')" ]; then echo "CRLF: [$i]" sed -i 's/\r//g' "$i" fi done <