#!/bin/sh usage () { cat <&2 Usage: ${0##*/} FILES... Convert all "badly" encoded text files to UTF-8/LF without BOM. EOF } [ $# -eq 0 ] && usage && exit 1 [ "$1" = "-h" ] && usage && exit [ "$1" = "--" ] && shift for i ; do [ ! -f "$i" ] && continue mimetype=$(file -bi "$i") description=$(file -b "$i") type=$(echo "$mimetype" | awk -F/ '{print $1}') [ "$type" != text ] && continue charset=$(echo "$mimetype" | awk -F "=" '{print $2}') if [ "$charset" != utf-8 ]; then echo "$i: Convert to UTF-8" iconv -f "$charset" -t utf8 "$i" -o "$i" fi if echo "$description" | grep -q 'UTF-8 Unicode (with BOM)'; then echo "$i: Remove BOM" ex -sc '1s/^.//|xit' "$i" ## Interesting alternatives: # dd iflag=skip_bytes skip=3 if=file.srt of=temp.srt # dd bs=1 skip=3 if=file.srt of=temp.srt # tail -c +32 file.srt > temp.srt fi if echo "$description" | grep -q 'CRLF'; then echo "$i: Remove CR" ex -sc '%s/ //g|xit' "$i" fi done