ambevar-dotfiles/.scripts/tc-audio-transcode

426 lines
13 KiB
Plaintext
Raw Normal View History

#!/bin/sh
if [ -z "$(command -v ffmpeg)" ]; then
echo "ffmpeg required for transcoding."
exit
fi
if [ -z "$(command -v realpath)" ]; then
echo "realpath required to get input file folder."
exit
fi
TITLECASE_SCRIPT="${0%/*}/titlecase.awk"
if [ ! -f "$TITLECASE_SCRIPT" ]; then
echo "AWK titlecase script required."
exit
fi
################################################################################
## User options
################################################################################
## For the sake of simplicity we convert everything to OGG.
## You can easily set output folder to current folder with OUTPUT_ROOT=".".
OUTPUT_ROOT="/media/data2/Musics/"
## OGG quality ranges from -1 to 10.
## -q-1 45 kbit/s
## -q0 64 kbit/s
## -q1 80 kbit/s
## -q2 96 kbit/s
## -q3 112 kbit/s
## -q4 128 kbit/s
## -q5 160 kbit/s
## -q6 192 kbit/s
## -q7 224 kbit/s
## -q8 256 kbit/s
## -q9 320 kbit/s
## -q10 500 kbit/s
## Artists: we use same value for artist and album_artist.
## Genres: since this is not universal by nature, we do not put a genre in tags,
## except for special cases like Soundtrack.
## Composer: not universal neither, we prefer ARTIST over COMPOSER, so COMPOSER
## will be empty.
################################################################################
## End of user options
################################################################################
## This function is supposed to work on a per-album basis. Folders will not be
## accepted as argument. This function will work best if all tracks of a folder
## belong to the same album. There's is no way to handle covers reliably, so you
## should leave the only covers you want to keep in the working folder.
## TODO: cover resolution check. Should be > 100x100. Above 1000x1000 should
## give a warning.
## TODO: extract cover from tags if checksum does not match any from current folder.
_printhelp ()
{
cat <<EOF
Usage: ${1##*/} [OPTIONS] FILE
Options:
-c : capital case (only first letter in upper case)
-f : overwrite if file exists
-p : preview (do not change file)
-s : skip encoding
Tags:
-a : artist
-b : bitrate
-d : date
-g : genre
-l : album
-n : track number
-r : library root folder
-t : title
You can use the following variables to refer to existing values:
\$ALBUM
\$ALBUM_ARTIST
\$ARTIST
\$COMPOSER
\$DATE
\$FILENAME
\$GENRE
\$TRACK
\$TYER
Default output folder:
OUTPUT_FOLDER="\$OUTPUT_ROOT/\$OUTPUT_ARTIST/\${OUTPUT_ALBUM:+\${OUTPUT_DATE:+\$OUTPUT_DATE - }\$OUTPUT_ALBUM/}"
Default output file:
OUTPUT_FILE="\$OUTPUT\$OUTPUT_ARTIST - \${OUTPUT_TRACK:+\$OUTPUT_TRACK - }\$OUTPUT_TITLE.\$OUTPUT_FORMAT"
Examples:
Set the 'artist' tag and reencode:
${1##*/} -a 'Franz Liszt' file.mp3
Set 'artist' to be 'composer', and 'title' to be preceded by 'artist', do not reencode:
${1##*/} -s -a '\$COMPOSER' -t '\$ARTIST - \$TITLE' file.ogg
IMPORTANT: you *must* use single quotes when using variables.
EOF
}
## OPTIONS
CAPITAL=0
OVERWRITE="-n"
PREVIEW=0
SKIP=0
## TAGS
OUTPUT_ALBUM='$ALBUM'
OUTPUT_ARTIST='$ARTIST'
OUTPUT_DATE='$DATE'
OUTPUT_GENRE='$GENRE'
OUTPUT_TITLE='$TITLE'
OUTPUT_TRACK='$TRACK'
## PROPERTIES
OUTPUT_BITRATE=0
## Non-CLI-option data. Modifying these imply modifications in code below.
OUTPUT_FORMAT="ogg"
OGG_PARAM="-c:a libvorbis -b:a ${OUTPUT_BITRATE}k"
## These ones are not CLI-options either, but this could be easily changed.
OUTPUT_FOLDER='$OUTPUT_ROOT/$OUTPUT_ARTIST/${OUTPUT_ALBUM:+${OUTPUT_DATE:+$OUTPUT_DATE - }$OUTPUT_ALBUM/}'
OUTPUT_FILE='$OUTPUT_FOLDER$OUTPUT_ARTIST - ${OUTPUT_PADDEDTRACK:+$OUTPUT_PADDEDTRACK - }$OUTPUT_TITLE'
while getopts ":a:b:cd:fg:l:n:r:t:hps" opt; do
case $opt in
a) OUTPUT_ARTIST=$OPTARG ;;
b) OUTPUT_BITRATE=$OPTARG ;;
d) OUTPUT_DATE=$OPTARG ;;
g) OUTPUT_GENRE=$OPTARG ;;
l) OUTPUT_ALBUM=$OPTARG ;;
n) OUTPUT_TRACK=$OPTARG ;;
r) OUTPUT_ROOT=$OPTARG ;;
t) OUTPUT_TITLE=$OPTARG ;;
h)
_printhelp "$0"
exit 1
;;
c)
CAPITAL=1
;;
f)
OVERWRITE="-y"
;;
p)
PREVIEW=1
;;
s)
SKIP=1
;;
?)
_printhelp "$0"
exit 1
;;
:)
echo "Missing argument."
_printhelp "$0"
exit 1
;;
esac
done
shift $(($OPTIND - 1))
if [ $# -eq 0 ]; then
_printhelp "$0"
exit
fi
##================================================================================
## Get metadata.
STREAM=$(ffmpeg -i "$1" 2>&1)
if [ -z "$(echo $STREAM | grep "Stream")" ]; then
echo "[$1] Non-audio file."
exit
fi
METADATA=$(echo "$STREAM" | sed -n '/Metadata/ ! d; /Metada/{b cont}; :cont ; {n;p;b cont}')
## Filename without extension nor path.
FILENAME="${1%.*}"
FILENAME="${FILENAME##*/}"
## Folder of the file. Needed for cover.
SOURCEFOLDER="$(realpath "$1")"
SOURCEFOLDER="${SOURCEFOLDER%/*}"
## We get format from extension, because codec is not reliable either.
FORMAT="${1##*.}"
## CODEC is unused for now.
CODEC=$(echo "$STREAM" | sed -n '/Stream.*Audio:/ {s/.*Audio: \([^,]*\),.*/\1/;p}')
BITRATE=$(echo "$STREAM" | sed -n '/Duration/ {s|.* \([[:digit:]]\+\) kb/s|\1|;p;q}')
if [ -z "$FORMAT" ]; then
echo "ERROR: non-audio file."
exit
fi
## This function greps for one match only, so if several metadata are present,
## this may not be the desired values.
_metadata_filter()
{
echo "$METADATA" | grep -im1 "\<$1\>" | sed 's/[^:]* : //g'
}
INPUT_TITLE=$(_metadata_filter "title")
INPUT_ARTIST=$(_metadata_filter "artist")
INPUT_ALBUM=$(_metadata_filter "album")
INPUT_ALBUM_ARTIST=$(_metadata_filter "album_artist")
INPUT_COMPOSER=$(_metadata_filter "composer")
INPUT_DISC=$(_metadata_filter "disc")
INPUT_GENRE=$(_metadata_filter "genre")
INPUT_TRACK=$(_metadata_filter "track")
INPUT_DATE=$(_metadata_filter "date")
INPUT_TYER=$(_metadata_filter "TYER")
##==============================================================================
## Variable cleansing.
## We use the AWK script to set title case. The script contains
## exceptions that can be configured. We fix some chars with sed.
# => '
# : => -
# / => -
_string_cleanser()
{
echo "$@" | awk -v capital=$CAPITAL -f "$TITLECASE_SCRIPT" \
| sed -n -e "s//'/g ; s/ *: */ - /g ; s| */ *| - |g; p; q"
}
TITLE=$(_string_cleanser "$INPUT_TITLE")
ARTIST=$(_string_cleanser "$INPUT_ARTIST")
ALBUM=$(_string_cleanser "$INPUT_ALBUM")
COMPOSER=$(_string_cleanser "$INPUT_COMPOSER")
DISC=$(_string_cleanser "$INPUT_DISC")
GENRE=$(_string_cleanser "$INPUT_GENRE")
TRACK=$(_string_cleanser "$INPUT_TRACK")
DATE=$(_string_cleanser "$INPUT_DATE")
TYER=$(_string_cleanser "$INPUT_TYER")
ALBUM_ARTIST=$(_string_cleanser "$INPUT_ALBUM_ARTIST")
##================================================================================
## OUTPUT variables.
OUTPUT_TITLE=$(eval echo ${OUTPUT_TITLE:-Unknown Title})
OUTPUT_ALBUM=$(eval echo ${OUTPUT_ALBUM:-Unknown Album})
## We use album artist if artist is empty.
[ -z "$OUTPUT_ARTIST" ] && [ -n "$ALBUM_ARTIST" ] && OUTPUT_ARTIST="$ALBUM_ARTIST"
OUTPUT_ARTIST=$(eval echo ${OUTPUT_ARTIST:-Unknown Artist})
## If OUTPUT_GENRE is set from command-line parameters, we clease the
## string. Otherwise we put GENRE in lower case and underscore to ease matching.
## If it matches, we use the Title Case match. If it does not, we set it to
## empty.
if [ -n "$OUTPUT_GENRE" ]; then
OUTPUT_GENRE=$(eval _string_cleanser $OUTPUT_GENRE)
else
GENRE=$(echo "$GENRE" | tr '[:upper:] ' '[:lower:]_')
case $GENRE in
ost) OUTPUT_GENRE="Soundtrack" ;;
soundtrack) OUTPUT_GENRE="Soundtrack";;
original_soundtrack) OUTPUT_GENRE="Soundtrack";;
classical) OUTPUT_GENRE="Classical";;
classics) OUTPUT_GENRE="Classical";;
classic) OUTPUT_GENRE="Classical";;
humour) OUTPUT_GENRE="Humour";;
*) OUTPUT_GENRE="";;
esac
fi
## We remove the track count if any, we suppress leading zeros, we suppress all
## non-digit characters.
OUTPUT_TRACK=$(eval _string_cleanser $OUTPUT_TRACK | sed -e 's/^0*//' -e 's|[^[:digit:]].*||')
## We extract the four-digits number from the date.
OUTPUT_DATE=$(eval _string_cleanser $OUTPUT_DATE)
OUTPUT_DATE=$(echo "$OUTPUT_DATE" | sed -n 's/.*\([[:digit:]]\{4\}\).*/\1/p')
## If DATE is not a year, we use TYER if it is a year.
TYER_REG=$(echo "$TYER" | sed -n 's/.*\([[:digit:]]\{4\}\).*/\1/p')
if [ ${#DATE} -ne 4 ] && [ ${#TYER_REG} -eq 4 ]; then
OUTPUT_DATE="$TYER_REG"
fi
## QUALITY
## If bitrate argument is not provided, we use BITRATE value.
[ $OUTPUT_BITRATE -eq 0 ] && OUTPUT_BITRATE=${BITRATE}
## If OUTPUT_BITRATE is beyond OGG's limit, we trim it.
[ $OUTPUT_BITRATE -gt 500 ] && OUTPUT_BITRATE=500
## Only reencode if not in OGG and if SKIP not set.
OGG_PARAM="-c:a libvorbis -b:a ${OUTPUT_BITRATE}k"
[ $SKIP -ne 0 ] && OGG_PARAM="-c:a copy" && OUTPUT_FORMAT="$FORMAT"
[ "$FORMAT" = "ogg" ] && OGG_PARAM="-c:a copy"
## Make sure track number has two digits for file name only.
OUTPUT_PADDEDTRACK=$OUTPUT_TRACK
if [ -n "$OUTPUT_PADDEDTRACK" ]; then
[ ${OUTPUT_PADDEDTRACK} -lt 10 ] && OUTPUT_PADDEDTRACK="0$OUTPUT_PADDEDTRACK"
fi
OUTPUT_FOLDER=$(eval echo $OUTPUT_FOLDER)
OUTPUT_FILE=$(eval echo $OUTPUT_FILE)
if [ -e "${OUTPUT_FILE}.$OUTPUT_FORMAT" ]; then
if [ $OVERWRITE = "-n" ]; then
## If file exist, we append a unique timestamp to the name.
OUTPUT_FILE="$OUTPUT_FILE-$(date '+%F-%H%M%S')"
OUTPUT_MSG="$(tput setf 1)$(tput bold)(Warning: destination exists, appending timestamp.)$(tput sgr0)"
else
OUTPUT_MSG="$(tput setf 4)$(tput bold)(Warning: overwriting destination!)$(tput sgr0)"
fi
fi
##==============================================================================
## PREVIEW
echo ":: BEFORE"
echo "ALBUM [$INPUT_ALBUM]"
echo "ALBUM_ARTIST [$INPUT_ALBUM_ARTIST]"
echo "ARTIST [$INPUT_ARTIST]"
echo "BITRATE [$INPUT_BITRATE]"
echo "COMPOSER [$INPUT_COMPOSER]"
echo "DATE [$INPUT_DATE]"
echo "DISC [$INPUT_DISC]"
echo "FORMAT [$INPUT_FORMAT]"
echo "GENRE [$INPUT_GENRE]"
echo "TITLE [$INPUT_TITLE]"
echo "TRACK [$INPUT_TRACK]"
echo "TYER [$INPUT_TYER]"
echo
echo ":: AFTER"
echo "ALBUM [$OUTPUT_ALBUM]"
echo "ARTIST [$OUTPUT_ARTIST]"
echo "BITRATE [$OUTPUT_BITRATE]"
echo "DATE [$OUTPUT_DATE]"
echo "FORMAT [$OUTPUT_FORMAT]"
echo "GENRE [$OUTPUT_GENRE]"
echo "TITLE [$OUTPUT_TITLE]"
echo "TRACK [$OUTPUT_TRACK]"
echo
echo ":: OUTPUT $OUTPUT_MSG"
echo "[$OUTPUT_FILE.$OUTPUT_FORMAT]"
# echo "[$OGG_PARAM]"
echo
[ $PREVIEW -eq 1 ] && exit
##==============================================================================
## RUN PROCESS
## Make sure directory exists.
mkdir -p "$OUTPUT_FOLDER"
if [ $? -ne 0 ]; then
echo "ERROR: could not create output folder [$OUTPUT]."
exit
fi
## COVER. We copy cover only if it does not already exist. Only files found in
## the folder where the music is located will be taken into account.
while read -r i; do
[ -z "$i" ] && break
OUTPUT_COVER="$OUTPUT_FOLDER/${OUTPUT_ALBUM:+$OUTPUT_ALBUM - }Cover"
OUTPUT_COVERFILE="$OUTPUT_COVER.${i##*.}"
COVER_COUNTER=1
## Same cover is already in target folder.
if [ -e "$OUTPUT_COVERFILE" ] && \
[ "$(sha1sum "$OUTPUT_COVERFILE" | cut -f1 -d' ')" = "$(sha1sum "$i" | cut -f1 -d' ')" ]; then
continue
fi
## Different cover with same name is in target folder. We append a number.
while [ -e "$OUTPUT_COVERFILE" ] && \
[ ! "$(sha1sum "$OUTPUT_COVERFILE" | cut -f1 -d' ')" = "$(sha1sum "$i" | cut -f1 -d' ')" ]; do
OUTPUT_COVERFILE="${OUTPUT_COVER} $COVER_COUNTER.${i##*.}"
COVER_COUNTER=$(($COVER_COUNTER+1))
done
echo ":: COVER [$i] from $SOURCEFOLDER"
cp -nv "$i" "$OUTPUT_COVERFILE"
echo
done <<EOF
$(find "$SOURCEFOLDER" -maxdepth 1 \( -iname '*.png' -o -iname '*.jpg' \) )
EOF
## Zsh compatibility. We need it otherwise word splitting of parameter like
## OGG_PARAM will not work.
STATUS="$(set -o | grep 'shwordsplit' | awk '{print $2}')"
[ "$STATUS" = "off" ] && set -o shwordsplit
## TAG/RECODE
## With the -map_metadata parameter we clear all metadata.
ffmpeg $OVERWRITE -i "$1" -vn -sn $OGG_PARAM \
-map_metadata -1 \
-metadata title="$OUTPUT_TITLE" \
-metadata artist="$OUTPUT_ARTIST" \
-metadata track="$OUTPUT_TRACK" \
-metadata date="$OUTPUT_DATE" \
-metadata album="$OUTPUT_ALBUM" \
-metadata album_artist="$OUTPUT_ARTIST" \
-metadata genre="$OUTPUT_GENRE" \
"$OUTPUT_FILE.$OUTPUT_FORMAT"
echo
## Restore Zsh previous options. This will not turn off shwordsplit if it
## was on before calling the function.
[ "$STATUS" = "off" ] && set +o shwordsplit