ambevar-dotfiles/.scripts/tc-audio-transcode

417 lines
12 KiB
Bash
Executable File
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

#!/bin/sh
if [ -z "$(command -v ffmpeg)" ]; then
echo "ffmpeg required."
exit
fi
TITLECASE_SCRIPT="${0%/*}/titlecase.awk"
if [ ! -f "$TITLECASE_SCRIPT" ]; then
echo "AWK titlecase script required."
exit
fi
################################################################################
## User options
################################################################################
## For the sake of simplicity we convert everything to OGG.
## You can easily set output folder to current folder with OUTPUT_ROOT=".".
OUTPUT_ROOT="/media/data1/Musics/"
## OGG quality ranges from -1 to 10.
## -q-1 45 kbit/s
## -q0 64 kbit/s
## -q1 80 kbit/s
## -q2 96 kbit/s
## -q3 112 kbit/s
## -q4 128 kbit/s
## -q5 160 kbit/s
## -q6 192 kbit/s
## -q7 224 kbit/s
## -q8 256 kbit/s
## -q9 320 kbit/s
## -q10 500 kbit/s
## Artists: we use same value for artist and album_artist.
## Genres: since this is not universal by nature, we do not put a genre in tags,
## except for special cases like Soundtrack.
## Composer: not universal neither, we prefer ARTIST over COMPOSER, so COMPOSER
## will be empty.
################################################################################
## End of user options
################################################################################
## This function is supposed to work on a per-album basis. Folders will not be
## accepted as argument. This function will work best if all tracks of a folder
## belong to the same album. There's is no way to handle covers reliably, so you
## should leave the only covers you want to keep in the working folder.
## TODO: cover resolution check. Should be > 100x100. Above 1000x1000 should
## give a warning.
## TODO: extract cover from tags if checksum does not match any from current folder.
_printhelp ()
{
cat <<EOF
Usage: ${1##*/} [OPTIONS] FILE
Options:
-c : capital case (only first letter in upper case)
-f : overwrite if file exists
-p : preview (do not change file)
-s : skip encoding
Tags:
-a : artist
-b : bitrate
-d : date
-g : genre
-l : album
-n : track number
-r : library root folder
-t : title
You can use the following variables to refer to existing values:
\$ALBUM
\$ALBUM_ARTIST
\$ARTIST
\$COMPOSER
\$DATE
\$FILENAME
\$GENRE
\$TRACK
\$TYER
Default output folder:
OUTPUT_FOLDER="\$OUTPUT_ROOT/\$OUTPUT_ARTIST/\${OUTPUT_ALBUM:+\${OUTPUT_DATE:+\$OUTPUT_DATE - }\$OUTPUT_ALBUM/}"
Default output file:
OUTPUT_FILE="\$OUTPUT\$OUTPUT_ARTIST - \${OUTPUT_TRACK:+\$OUTPUT_TRACK - }\$OUTPUT_TITLE.\$OUTPUT_FORMAT"
Examples:
Set the 'artist' tag and reencode:
${1##*/} -a 'Franz Liszt' file.mp3
Set 'artist' to be 'composer', and 'title' to be preceded by 'artist', do not reencode:
${1##*/} -s -a '\$COMPOSER' -t '\$ARTIST - \$TITLE' file.ogg
IMPORTANT: you *must* use single quotes when using variables.
EOF
}
## OPTIONS
CAPITAL=0
OVERWRITE="-n"
PREVIEW=0
SKIP=0
## TAGS
OUTPUT_ALBUM='$ALBUM'
OUTPUT_ARTIST='$ARTIST'
OUTPUT_DATE='$DATE'
OUTPUT_GENRE='$GENRE'
OUTPUT_TITLE='$TITLE'
OUTPUT_TRACK='$TRACK'
## PROPERTIES
OUTPUT_BITRATE=0
## Non-CLI-option data. Modifying these imply modifications in code below.
OUTPUT_FORMAT="ogg"
OGG_PARAM="-c:a libvorbis -b:a ${OUTPUT_BITRATE}k"
## These ones are not CLI-options either, but this could be easily changed.
OUTPUT_FOLDER='$OUTPUT_ROOT/$OUTPUT_ARTIST/${OUTPUT_ALBUM:+${OUTPUT_DATE:+$OUTPUT_DATE - }$OUTPUT_ALBUM/}'
OUTPUT_FILE='$OUTPUT_FOLDER$OUTPUT_ARTIST - ${OUTPUT_PADDEDTRACK:+$OUTPUT_PADDEDTRACK - }$OUTPUT_TITLE'
while getopts ":a:b:cd:fg:l:n:r:t:hps" opt; do
case $opt in
a) OUTPUT_ARTIST=$OPTARG ;;
b) OUTPUT_BITRATE=$OPTARG ;;
d) OUTPUT_DATE=$OPTARG ;;
g) OUTPUT_GENRE=$OPTARG ;;
l) OUTPUT_ALBUM=$OPTARG ;;
n) OUTPUT_TRACK=$OPTARG ;;
r) OUTPUT_ROOT=$OPTARG ;;
t) OUTPUT_TITLE=$OPTARG ;;
h)
_printhelp "$0"
exit 1
;;
c)
CAPITAL=1
;;
f)
OVERWRITE="-y"
;;
p)
PREVIEW=1
;;
s)
SKIP=1
;;
?)
_printhelp "$0"
exit 1
;;
:)
echo "Missing argument."
_printhelp "$0"
exit 1
;;
esac
done
shift $(($OPTIND - 1))
if [ $# -eq 0 ]; then
_printhelp "$0"
exit
fi
##================================================================================
## Get metadata.
STREAM=$(ffmpeg -i "$1" 2>&1)
if [ -z "$(echo $STREAM | grep "Stream")" ]; then
echo "[$1] Non-audio file."
exit
fi
METADATA=$(echo "$STREAM" | sed -n '/Metadata/ ! d; /Metada/{b cont}; :cont ; {n;p;b cont}')
## Filename without extension nor path.
FILENAME="${1%.*}"
FILENAME="${FILENAME##*/}"
## Folder of the file. Needed for cover.
SOURCEFOLDER="$(realpath "$1")"
SOURCEFOLDER="${SOURCEFOLDER%/*}"
## We get format from extension, because codec is not reliable either.
FORMAT="${1##*.}"
## CODEC is unused for now.
CODEC=$(echo "$STREAM" | sed -n '/Stream.*Audio:/ {s/.*Audio: \([^,]*\),.*/\1/;p}')
BITRATE=$(echo "$STREAM" | sed -n '/Duration/ {s|.* \([[:digit:]]\+\) kb/s|\1|;p;q}')
if [ -z "$FORMAT" ]; then
echo "ERROR: non-audio file."
exit
fi
## This function greps for one match only, so if several metadata are present,
## this may not be the desired values.
_metadata_filter()
{
echo "$METADATA" | grep -im1 "\<$1\>" | sed 's/[^:]* : //g'
}
TITLE=$(_metadata_filter "title")
ARTIST=$(_metadata_filter "artist")
ALBUM=$(_metadata_filter "album")
ALBUM_ARTIST=$(_metadata_filter "album_artist")
COMPOSER=$(_metadata_filter "composer")
DISC=$(_metadata_filter "disc")
GENRE=$(_metadata_filter "genre")
TRACK=$(_metadata_filter "track")
DATE=$(_metadata_filter "date")
TYER=$(_metadata_filter "TYER")
##================================================================================
## OUTPUT variables.
## We use the AWK script to set title case. The script contains
## exceptions that can be configured. We fix some chars with sed.
# => '
# : => -
# / => -
_string_cleanser()
{
echo "$@" | awk -v capital=$CAPITAL -f "$TITLECASE_SCRIPT" \
| sed -n -e "s//'/g ; s/ *: */ - /g ; s| */ *| - |g; p; q"
}
if [ "$OUTPUT_TITLE" = '$TITLE' ]; then
OUTPUT_TITLE=$(eval _string_cleanser $OUTPUT_TITLE)
fi
OUTPUT_TITLE="${OUTPUT_TITLE:-Unknown Title}"
if [ "$OUTPUT_ARTIST" = '$ARTIST' ]; then
## We use album artist if artist is empty.
OUTPUT_ARTIST=$(eval _string_cleanser $OUTPUT_ARTIST)
[ -z "$OUTPUT_ARTIST" ] && [ -n "$ALBUM_ARTIST" ] && OUTPUT_ARTIST="$ALBUM_ARTIST"
fi
OUTPUT_ARTIST="${OUTPUT_ARTIST:-Unknown Artist}"
if [ "$OUTPUT_ALBUM" = '$ALBUM' ]; then
OUTPUT_ALBUM=$(eval _string_cleanser $OUTPUT_ALBUM)
fi
OUTPUT_ALBUM="${OUTPUT_ALBUM:-Unknown Album}"
## If OUTPUT_GENRE is set from command-line parameters, we clease the
## string. Otherwise we put GENRE in lower case and underscore to ease matching.
## If it matches, we use the Title Case match. If it does not, we set it to
## empty.
if [ "$OUTPUT_GENRE" = '$GENRE' ]; then
GENRE=$(_string_cleanser "$GENRE" | tr '[:upper:] ' '[:lower:]_')
case $GENRE in
ost) OUTPUT_GENRE="Soundtrack" ;;
soundtrack) OUTPUT_GENRE="Soundtrack";;
original_soundtrack) OUTPUT_GENRE="Soundtrack";;
classical) OUTPUT_GENRE="Classical";;
classics) OUTPUT_GENRE="Classical";;
classic) OUTPUT_GENRE="Classical";;
humour) OUTPUT_GENRE="Humour";;
*) OUTPUT_GENRE="";;
esac
else
OUTPUT_GENRE=$(eval _string_cleanser $OUTPUT_GENRE)
fi
## We remove the track count if any, we suppress leading zeros, we suppress all
## non-digit characters.
OUTPUT_TRACK=$(eval _string_cleanser $OUTPUT_TRACK | sed -e 's/^0*//' -e 's|[^[:digit:]].*||')
## We extract the four-digits number from the date.
OUTPUT_DATE=$(eval _string_cleanser $OUTPUT_DATE)
OUTPUT_DATE=$(echo "$OUTPUT_DATE" | sed -n 's/.*\([[:digit:]]\{4\}\).*/\1/p')
## If DATE is not a year, we use TYER if it is a year.
TYER_REG=$(echo "$TYER" | sed -n 's/.*\([[:digit:]]\{4\}\).*/\1/p')
if [ ${#DATE} -ne 4 ] && [ ${#TYER_REG} -eq 4 ]; then
OUTPUT_DATE="$TYER_REG"
fi
## QUALITY
## If bitrate argument is not provided, we use BITRATE value.
[ $OUTPUT_BITRATE -eq 0 ] && OUTPUT_BITRATE=${BITRATE}
## If OUTPUT_BITRATE is beyond OGG's limit, we trim it.
[ $OUTPUT_BITRATE -gt 500 ] && OUTPUT_BITRATE=500
## Only reencode if not in OGG and if SKIP not set.
OGG_PARAM="-c:a libvorbis -b:a ${OUTPUT_BITRATE}k"
[ $SKIP -ne 0 ] && OGG_PARAM="-c:a copy" && OUTPUT_FORMAT="$FORMAT"
[ "$FORMAT" = "ogg" ] && OGG_PARAM="-c:a copy"
## Make sure track number has two digits for file name only.
OUTPUT_PADDEDTRACK=$OUTPUT_TRACK
if [ -n "$OUTPUT_PADDEDTRACK" ]; then
[ ${OUTPUT_PADDEDTRACK} -lt 10 ] && OUTPUT_PADDEDTRACK="0$OUTPUT_PADDEDTRACK"
fi
OUTPUT_FOLDER=$(eval echo $OUTPUT_FOLDER)
OUTPUT_FILE=$(eval echo $OUTPUT_FILE)
if [ -e "${OUTPUT_FILE}.$OUTPUT_FORMAT" ]; then
if [ $OVERWRITE = "-n" ]; then
## If file exist, we append a unique timestamp to the name.
OUTPUT_FILE="$OUTPUT_FILE-$(date '+%F-%H%M%S')"
OUTPUT_MSG="$(tput setf 1)$(tput bold)(Warning: destination exists, appending timestamp.)$(tput sgr0)"
else
OUTPUT_MSG="$(tput setf 4)$(tput bold)(Warning: overwriting destination!)$(tput sgr0)"
fi
fi
##==============================================================================
## PREVIEW
echo ":: BEFORE"
echo "ALBUM [$ALBUM]"
echo "ALBUM_ARTIST [$ALBUM_ARTIST]"
echo "ARTIST [$ARTIST]"
echo "BITRATE [$BITRATE]"
echo "COMPOSER [$COMPOSER]"
echo "DATE [$DATE]"
echo "DISC [$DISC]"
echo "FORMAT [$FORMAT]"
echo "GENRE [$GENRE]"
echo "TITLE [$TITLE]"
echo "TRACK [$TRACK]"
echo "TYER [$TYER]"
echo
echo ":: AFTER"
echo "ALBUM [$OUTPUT_ALBUM]"
echo "ARTIST [$OUTPUT_ARTIST]"
echo "BITRATE [$OUTPUT_BITRATE]"
echo "DATE [$OUTPUT_DATE]"
echo "FORMAT [$OUTPUT_FORMAT]"
echo "GENRE [$OUTPUT_GENRE]"
echo "TITLE [$OUTPUT_TITLE]"
echo "TRACK [$OUTPUT_TRACK]"
echo
echo ":: OUTPUT $OUTPUT_MSG"
echo "[$OUTPUT_FILE.$OUTPUT_FORMAT]"
# echo "[$OGG_PARAM]"
echo
[ $PREVIEW -eq 1 ] && exit
##==============================================================================
## RUN PROCESS
## Make sure directory exists.
mkdir -p "$OUTPUT_FOLDER"
if [ $? -ne 0 ]; then
echo "ERROR: could not create output folder [$OUTPUT]."
exit
fi
## COVER. We copy cover only if it does not already exist. Only files found in
## the folder where the music is located will be taken into account.
while read -r i; do
OUTPUT_COVER="$OUTPUT/${OUTPUT_ALBUM:+$OUTPUT_ALBUM - }Cover"
OUTPUT_COVERFILE="$OUTPUT_COVER.${i##*.}"
COVER_COUNTER=1
## Same cover is already in target folder.
if [ -e "$OUTPUT_COVERFILE" ] && \
[ "$(sha1sum "$OUTPUT_COVERFILE" | cut -f1 -d' ')" = "$(sha1sum "$i" | cut -f1 -d' ')" ]; then
continue
fi
## Different cover with same name is in target folder. We append a number.
while [ -e "$OUTPUT_COVERFILE" ] && \
[ ! "$(sha1sum "$OUTPUT_COVERFILE" | cut -f1 -d' ')" = "$(sha1sum "$i" | cut -f1 -d' ')" ]; do
OUTPUT_COVERFILE="${OUTPUT_COVER} $COVER_COUNTER.${i##*.}"
COVER_COUNTER=$(($COVER_COUNTER+1))
done
echo ":: COVER from $SOURCEFOLDER"
cp -nv "$i" "$OUTPUT_COVERFILE"
echo
done <<EOF
$(find "$SOURCEFOLDER" -maxdepth 1 \( -iname '*.png' -o -iname '*.jpg' \) )
EOF
## Zsh compatibility. We need it otherwise word splitting of parameter like
## OGG_PARAM will not work.
STATUS="$(set -o | grep 'shwordsplit' | awk '{print $2}')"
[ "$STATUS" = "off" ] && set -o shwordsplit
## TAG/RECODE
## With the -map_metadata parameter we clear all metadata.
ffmpeg $OVERWRITE -i "$1" -vn -sn $OGG_PARAM \
-map_metadata -1 \
-metadata title="$OUTPUT_TITLE" \
-metadata artist="$OUTPUT_ARTIST" \
-metadata track="$OUTPUT_TRACK" \
-metadata date="$OUTPUT_DATE" \
-metadata album="$OUTPUT_ALBUM" \
-metadata album_artist="$OUTPUT_ARTIST" \
-metadata genre="$OUTPUT_GENRE" \
"$OUTPUT_FILE.$OUTPUT_FORMAT"
echo
## Restore Zsh previous options. This will not turn off shwordsplit if it
## was on before calling the function.
[ "$STATUS" = "off" ] && set +o shwordsplit