ambevar-dotfiles/.scripts/tc-audio-transcode

534 lines
16 KiB
Bash
Executable File
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

#!/bin/sh
################################################################################
## User options
## You can easily set output folder to current folder with OUTPUT_ROOT=".".
[ -z "$OUTPUT_ROOT" ] && OUTPUT_ROOT="$HOME/musics"
## End of user options
################################################################################
## For the sake of simplicity we convert everything to OGG.
## OGG quality ranges from -1 to 10.
## -q-1 45 kbit/s
## -q0 64 kbit/s
## -q1 80 kbit/s
## -q2 96 kbit/s
## -q3 112 kbit/s
## -q4 128 kbit/s
## -q5 160 kbit/s
## -q6 192 kbit/s
## -q7 224 kbit/s
## -q8 256 kbit/s
## -q9 320 kbit/s
## -q10 500 kbit/s
_printhelp ()
{
cat <<EOF | less
Usage: ${1##*/} [OPTIONS] FILE
Encode FILE in OGG with proper tags thanks to a very efficient titlecase
checker. Output is written to user library with subfolders created according to
tags. It is smart enough to handle empty tags. Covers are extracted from tags
and found in input folder. Identical covers are only processed once.
Tags are proccessed according to the following rules (among others):
* Artist: we use same value for artist and album_artist.
* Genre: since this is not universal by nature, we do not put a genre in tags,
except for special cases like Soundtrack.
* Composer: not universal neither, we prefer ARTIST over COMPOSER, so COMPOSER
will be empty.
Encoding quality is set to be the same as the source. OGG cannot go beyond 500,
so lossless formats like FLAC and WavPack will suffer from a quality loss
(should you mind).
Options:
-c : capital case (only first letter in upper case)
-f : overwrite if file exists
-p : preview (do not change file)
-q : hide FFmpeg runtime output.
-s : skip encoding
Tags:
-a : artist
-b : bitrate
-d : date
-g : genre
-l : album
-n : track number
-r : library root folder
-t : title
Everything tag you set from command-line will not get titlecased.
You can use the following variables to refer to the titlecased values:
\$ALBUM
\$ALBUMARTIST
\$ARTIST
\$COMPOSER
\$DATE
\$FILENAME
\$GENRE
\$TRACK
\$TYER
If bitrate argument is not provided, we use the bitrate of the source. If
bitrate argument is 0, we leave FFmpeg chose the value.
Default output folder:
OUTPUT_FOLDER="\$OUTPUT_ROOT/\$OUTPUT_ARTIST/\${OUTPUT_ALBUM:+\${OUTPUT_DATE:+\$OUTPUT_DATE - }\$OUTPUT_ALBUM/}"
Default output file:
OUTPUT_FILE="\$OUTPUT\$OUTPUT_ARTIST - \${OUTPUT_TRACK:+\$OUTPUT_TRACK - }\$OUTPUT_TITLE.\$OUTPUT_EXT"
Examples:
Set the 'artist' tag and reencode:
${1##*/} -a 'Franz Liszt' file.mp3
Set 'artist' to be 'composer', and 'title' to be preceded by 'artist', do not reencode:
${1##*/} -s -a '\$COMPOSER' -t '\$ARTIST - \$TITLE' file.ogg
Set track number to first field in file name:
${1##*/} -n '${FILENAME%% }'
IMPORTANT: you *must* use single quotes when using variables.
EOF
}
## OPTIONS
CAPITAL=0
OVERWRITE="-n"
PREVIEW=false
SKIP=false
LOGLEVEL=""
## TAGS
OUTPUT_ALBUM='$ALBUM'
OUTPUT_ARTIST='$ARTIST'
OUTPUT_DATE='$DATE'
OUTPUT_GENRE='$GENRE'
OUTPUT_TITLE='$TITLE'
OUTPUT_TRACK='$TRACK'
## PROPERTIES
OUTPUT_BITRATE=-1
## Non-CLI-option data. Modifying these imply modifications in code below.
OUTPUT_EXT="ogg"
OGG_PARAM='-c:a libvorbis -b:a ${OUTPUT_BITRATE}k'
## These ones are not CLI-options either, but this could be easily changed.
OUTPUT_FOLDER='$OUTPUT_ROOT/$OUTPUT_ARTIST${OUTPUT_ALBUM:+/${OUTPUT_DATE:+$OUTPUT_DATE - }$OUTPUT_ALBUM}'
OUTPUT_FILE='$OUTPUT_ARTIST - ${OUTPUT_PADDEDTRACK:+$OUTPUT_PADDEDTRACK - }$OUTPUT_TITLE'
while getopts ":a:b:cd:fg:l:n:r:t:hpsq" opt; do
case $opt in
a) OUTPUT_ARTIST=$OPTARG ;;
b) OUTPUT_BITRATE=$OPTARG ;;
d) OUTPUT_DATE=$OPTARG ;;
g) OUTPUT_GENRE=$OPTARG ;;
l) OUTPUT_ALBUM=$OPTARG ;;
n) OUTPUT_TRACK=$OPTARG ;;
r) OUTPUT_ROOT=$OPTARG ;;
t) OUTPUT_TITLE=$OPTARG ;;
h)
_printhelp "$0"
exit 1
;;
c)
CAPITAL=1 ;;
f)
OVERWRITE="-y" ;;
p)
PREVIEW=true ;;
s)
SKIP=true ;;
q)
LOGLEVEL="-v fatal" ;;
?)
_printhelp "$0"
exit 1
;;
:)
echo "Missing argument."
_printhelp "$0"
exit 1
;;
esac
done
shift $(($OPTIND - 1))
if [ $# -eq 0 ]; then
_printhelp "$0"
exit
fi
if ! command -v ffmpeg >/dev/null; then
echo "ffmpeg required for transcoding."
exit
fi
if ! command -v realpath >/dev/null; then
echo "realpath required to get input file folder."
exit
fi
OUTPUT_ROOT="$(realpath "$OUTPUT_ROOT")"
if [ ! -d "$OUTPUT_ROOT" ]; then
echo "Output folder '$OUTPUT_ROOT' does not exist."
exit
fi
TITLECASE_SCRIPT="${0%/*}/titlecase.awk"
if [ ! -f "$TITLECASE_SCRIPT" ]; then
echo "AWK titlecase script required."
exit
fi
##================================================================================
## Get metadata.
STREAM=$(ffmpeg -nostdin -i "$1" 2>&1)
if [ -z "$(echo $STREAM | grep "Stream")" ]; then
echo "ERROR: Non-audio file [$1]."
exit
fi
METADATA=$(echo "$STREAM" | sed -n '/Metadata/ ! d; /Metada/{b cont}; :cont ; {n;p;b cont}')
## Filename without extension nor path.
INPUT_FILE="${1%.*}"
INPUT_FILE="${INPUT_FILE##*/}"
## Folder of the file. Needed for cover.
INPUT_FOLDER="$(realpath "$1")"
INPUT_FOLDER="${INPUT_FOLDER%/*}"
INPUT_EXT="${1##*.}"
INPUT_BITRATE=$(echo "$STREAM" | sed -n '/Duration/ {s|.* \([[:digit:]]\+\) kb/s|\1|;p;q}')
## CODEC is unused for now.
# CODEC=$(echo "$STREAM" | sed -n '/Stream.*Audio:/ {s/.*Audio: \([^,]*\),.*/\1/;p}')
## Extension needs to be set in case we skip encoding so that ffmpeg will not be
## disturbed by unappropriate extension.
if $SKIP && [ -z "$INPUT_EXT" ]; then
echo "ERROR: Extension missing [$1]."
exit
fi
## WARNING: This function greps for one match only, so if several metadata are
## present, this may not be the desired values.
_metadata_filter()
{
echo "$METADATA" | grep -im1 "^ *$1 *:" | sed 's/[^:]* : //g'
}
INPUT_TITLE=$(_metadata_filter "title")
INPUT_ARTIST=$(_metadata_filter "artist")
INPUT_ALBUM=$(_metadata_filter "album")
INPUT_ALBUMARTIST=$(_metadata_filter "album_artist")
INPUT_COMPOSER=$(_metadata_filter "composer")
INPUT_DISC=$(_metadata_filter "disc")
INPUT_GENRE=$(_metadata_filter "genre")
INPUT_TRACK=$(_metadata_filter "track")
INPUT_DATE=$(_metadata_filter "date")
INPUT_TYER=$(_metadata_filter "TYER")
##==============================================================================
## Variable cleansing.
## We use the AWK script to set title case. The script contains
## exceptions that can be configured. We fix some chars with sed.
# => '
# : => -
# / => -
# \ => -
# & => \&
_string_cleanser()
{
echo "$@" | awk -v capital=$CAPITAL -f "$TITLECASE_SCRIPT" \
| sed -e "s//'/g ; s| *[/\\:] *| - |g" -e 's/ \+/ /g' -e 's|&|\\\\&|g;'
}
## These are the "titlecased" variables.
TITLE=$(_string_cleanser "$INPUT_TITLE")
ARTIST=$(_string_cleanser "$INPUT_ARTIST")
ALBUM=$(_string_cleanser "$INPUT_ALBUM")
ALBUMARTIST=$(_string_cleanser "$INPUT_ALBUMARTIST")
COMPOSER=$(_string_cleanser "$INPUT_COMPOSER")
DISC=$(_string_cleanser "$INPUT_DISC")
GENRE=$(_string_cleanser "$INPUT_GENRE")
TRACK=$(_string_cleanser "$INPUT_TRACK")
DATE=$(_string_cleanser "$INPUT_DATE")
TYER=$(_string_cleanser "$INPUT_TYER")
FILENAME=$(_string_cleanser "$INPUT_FILE")
## We also convert spaces to underscores.
GENRE=$(echo "$GENRE" | tr '[:upper:] ' '[:lower:]_')
case $GENRE in
ost) GENRE="Soundtrack" ;;
soundtrack) GENRE="Soundtrack";;
original_soundtrack) GENRE="Soundtrack";;
classical) GENRE="Classical";;
classics) GENRE="Classical";;
classic) GENRE="Classical";;
humour) GENRE="Humour";;
*) GENRE="";;
esac
##================================================================================
## OUTPUT variables.
## The following function replaces all variables with their value. This is much
## safer than using shell expansion through 'eval.'
_revar()
{
echo "$1" | awk \
-v title="$TITLE" \
-v artist="$ARTIST" \
-v album="$ALBUM" \
-v albumartist="$ALBUMARTIST" \
-v composer="$COMPOSER" \
-v disc="$DISC" \
-v genre="$GENRE" \
-v track="$TRACK" \
-v date="$DATE" \
-v tyer="$TYER" \
-v filename="$FILENAME" \
'{ \
gsub(/\$TITLE/, title); \
gsub(/\$ARTIST/, artist); \
gsub(/\$ALBUM/, album); \
gsub(/\$ALBUMARTIST/, albumartist); \
gsub(/\$COMPOSER/, composer); \
gsub(/\$DISC/, disc); \
gsub(/\$GENRE/, genre); \
gsub(/\$TRACK/, track); \
gsub(/\$DATE/, date); \
gsub(/\$TYER/, tyer); \
gsub(/\$FILENAME/, filename); \
print}'
}
OUTPUT_TITLE=$(_revar "${OUTPUT_TITLE:-Unknown Title}")
OUTPUT_ALBUM=$(_revar "${OUTPUT_ALBUM:-Unknown Album}")
## We use album artist if artist is empty.
[ -z "$OUTPUT_ARTIST" ] && OUTPUT_ARTIST="$ALBUMARTIST"
OUTPUT_ARTIST=$(_revar "${OUTPUT_ARTIST:-Unknown Artist}")
OUTPUT_GENRE=$(_revar "$OUTPUT_GENRE")
## We remove the track count if any, we suppress leading zeros, we suppress all
## non-digit characters.
OUTPUT_TRACK=$(_revar "$OUTPUT_TRACK" | sed -e 's/^0*//' -e 's|[^[:digit:]].*||')
## We extract the four-digits number from the date.
OUTPUT_DATE=$(_revar "$OUTPUT_DATE")
OUTPUT_DATE=$(echo "$OUTPUT_DATE" | sed -n 's/.*\([[:digit:]]\{4\}\).*/\1/p')
## If DATE is not a year, we use TYER if it is a year.
TYER_REG=$(_revar "$TYER" | sed -n 's/.*\([[:digit:]]\{4\}\).*/\1/p')
[ ${#DATE} -ne 4 ] && [ ${#TYER_REG} -eq 4 ] && OUTPUT_DATE="$TYER_REG"
## QUALITY
## Only reencode if not in OGG and if SKIP not set, or if explicitly specified.
INPUT_EXT_LOW="$(echo $INPUT_EXT | tr [:upper:] [:lower:])"
if $SKIP; then
OGG_PARAM="-c:a copy"
OUTPUT_EXT="$INPUT_EXT_LOW"
fi
[ "$INPUT_EXT_LOW" = "ogg" ] && [ $OUTPUT_BITRATE -lt 0 ] && OGG_PARAM="-c:a copy"
[ $OUTPUT_BITRATE -lt 0 ] && OGG_PARAM="-c:a libvorbis ${INPUT_BITRATE}"
[ $OUTPUT_BITRATE -eq 0 ] && OGG_PARAM="-c:a libvorbis"
## If OUTPUT_BITRATE is beyond OGG's limit, we trim it.
[ $OUTPUT_BITRATE -gt 500 ] && OGG_PARAM="-c:a libvorbis 500"
## Make sure track number has two digits for file name only.
OUTPUT_PADDEDTRACK=$OUTPUT_TRACK
if [ -n "$OUTPUT_PADDEDTRACK" ]; then
[ ${OUTPUT_PADDEDTRACK} -lt 10 ] && OUTPUT_PADDEDTRACK="0$OUTPUT_PADDEDTRACK"
fi
OUTPUT_FOLDER=$(_revar "$OUTPUT_FOLDER")
OUTPUT_FILE=$(_revar "$OUTPUT_FILE")
unset OUTPUT_FILE_ORIGINAL
if [ -e "$OUTPUT_FOLDER/$OUTPUT_FILE.$OUTPUT_EXT" ]; then
if [ $OVERWRITE = "-n" ]; then
## If file exist, we append a unique timestamp to the name.
OUTPUT_FILE="$OUTPUT_FILE-$(date '+%F-%H%M%S')"
OUTPUT_MSG="$(tput setf 1)$(tput bold)(Warning: destination exists, appending timestamp.)$(tput sgr0)"
else
## WARNING: here it is important that no folder are suffixed by slashes.
if [ "$INPUT_FOLDER/$INPUT_FILE.$INPUT_EXT" = "$OUTPUT_FOLDER/$OUTPUT_FILE.$OUTPUT_EXT" ]; then
OUTPUT_FILE_ORIGINAL="$OUTPUT_FILE"
OUTPUT_FILE="$OUTPUT_FILE-$(date '+%F-%H%M%S')"
fi
OUTPUT_MSG="$(tput setf 4)$(tput bold)(Warning: overwriting destination!)$(tput sgr0)"
fi
fi
##==============================================================================
## PREVIEW
## Note: most (all?) shell printf have an alignment issue when strings contain
## wide characters. We need to use AWK for proper alignment. Hence the 'aprint'
## function.
ATTR_WIDTH="%-13.13s" # Length of longest attribute +2
## INPUT_WIDTH = COLUNMS - ATTR_WIDTH -2 (for |))
INPUT_WIDTH=$((($(tput cols)-15)/2))
INPUT_WIDTH="%$INPUT_WIDTH.${INPUT_WIDTH}s"
## We output everything in one pass to speed up the process since this is quite
## demanding and called frequently. This function is reliable as long as no
## tabs are found in tags. But since we have no control over the input, we never
## no.
aprint()
{
awk -F'\t+' -v FMT="$INPUT_WIDTH | $ATTR_WIDTH| %s\n" '{printf FMT, $1, $2, $3 }'
}
aprint <<EOF
:: INTPUT :: :: OUTPUT ::
[$INPUT_ARTIST] Artist [$OUTPUT_ARTIST]
[$INPUT_ALBUM] Album [$OUTPUT_ALBUM]
[$INPUT_TRACK] Track [$OUTPUT_TRACK]
[$INPUT_TITLE] Title [$OUTPUT_TITLE]
[$INPUT_DATE] Date [$OUTPUT_DATE]
[$INPUT_GENRE] Genre [$OUTPUT_GENRE]
[$INPUT_EXT] Ext [$OUTPUT_EXT]
[$INPUT_BITRATE] Bitrate [$OUTPUT_BITRATE]
[$INPUT_ALBUMARTIST] Albumartist
[$INPUT_COMPOSER] Composer
[$INPUT_DISC] Disc
[$INPUT_TYER] Tyer
EOF
cat <<EOF
:: DESTINATION $OUTPUT_MSG
[$OUTPUT_FOLDER/$OUTPUT_FILE.$OUTPUT_EXT]
EOF
$PREVIEW && exit
##==============================================================================
## RUN PROCESS
echo ":: Processing..."
## Make sure directory exists.
mkdir -p "$OUTPUT_FOLDER"
if [ $? -ne 0 ]; then
echo "ERROR: could not create output folder [$OUTPUT]."
exit
fi
## COVER. We copy the covers only if they do not already exist. All covers
## embedded in tags will be extracted. Only files found in the folder where the
## music is located will be taken into account, subfolders will be discarded.
COVER_LIMIT=100
_cover()
{
[ ! -f "$1" ] && return
echo -n "$1 -> "
OUTPUT_COVER="$OUTPUT_FOLDER/${OUTPUT_ALBUM:+$OUTPUT_ALBUM - }Cover"
OUTPUT_COVERFILE="$OUTPUT_COVER.${1##*.}"
COVER_COUNTER=1
## Different cover with same name is in target folder. We append a number.
## If a different cover with the same name already exist, we append a number
## and check again. If it is the same cover, we skip it.
while [ -e "$OUTPUT_COVERFILE" ]; do
if [ "$(sha1sum "$OUTPUT_COVERFILE" | cut -f1 -d' ')" = "$(sha1sum "$1" | cut -f1 -d' ')" ]; then
echo "Skipping"
return
else
OUTPUT_COVERFILE="${OUTPUT_COVER} $COVER_COUNTER.${1##*.}"
COVER_COUNTER=$(($COVER_COUNTER+1))
fi
done
echo -n "$OUTPUT_COVERFILE"
## Output warning if cover is too small.
if command -v mediainfo >/dev/null; then
BUF="$(mediainfo "$1")"
COVER_WIDTH=$(echo "$BUF" | awk '/^Width/ {print $3}')
COVER_HEIGHT=$(echo "$BUF" | awk '/^Height/ {print $3}')
if [ -z "$COVER_WIDTH" ] || [ $COVER_WIDTH -lt $COVER_LIMIT ] || \
[ -z "$COVER_HEIGHT" ] || [ $COVER_HEIGHT -lt $COVER_LIMIT ]; then
echo -n " $(tput setf 1)$(tput bold)(Warning: bad quality cover.)$(tput sgr0)"
fi
fi
cp -n "$1" "$OUTPUT_COVERFILE"
echo
echo
}
## Embedded covers.
for i in $(seq 0 $(($(echo "$STREAM" | grep -c '^ *Stream.*Video')-1))); do
COVER_EXT="$(echo "$STREAM" | awk '/^ *Stream.*Video/ {gsub(/,/, "", $4);print $4}')"
[ -z "$COVER_EXT" ] && continue
[ "$COVER_EXT" = "mjpeg" ] && COVER_EXT="jpg"
TEMP_COVER="$(mktemp "/tmp/cover-XXXXXX.$COVER_EXT")"
ffmpeg -nostdin -v quiet -y -i "$1" -an -sn -c:v copy -map 0:v:$i "$TEMP_COVER"
_cover "$TEMP_COVER"
## We do not want to bloat the temp folder with covers, so we remove it.
rm "$TEMP_COVER"
done
while IFS= read -r i; do
_cover "$i"
done <<EOF
$(find "$INPUT_FOLDER" -maxdepth 1 \( -iname '*.png' -o -iname '*.jpg' \) )
EOF
## Zsh compatibility. We need it otherwise word splitting of parameter like
## OGG_PARAM will not work.
STATUS="$(set -o | grep 'shwordsplit' | awk '{print $2}')"
[ "$STATUS" = "off" ] && set -o shwordsplit
## TAG/RECODE
## With the -map_metadata parameter we clear all metadata.
## WARNING: ffmpeg continues to read stdin once it has started, so it should not
## be called from within a while<<EOF loop without disabling stdin.
ffmpeg -nostdin $LOGLEVEL $OVERWRITE -i "$1" -vn -sn $OGG_PARAM \
-map_metadata -1 \
-metadata title="$OUTPUT_TITLE" \
-metadata artist="$OUTPUT_ARTIST" \
-metadata track="$OUTPUT_TRACK" \
-metadata date="$OUTPUT_DATE" \
-metadata album="$OUTPUT_ALBUM" \
-metadata album_artist="$OUTPUT_ARTIST" \
-metadata genre="$OUTPUT_GENRE" \
"$OUTPUT_FOLDER/$OUTPUT_FILE.$OUTPUT_EXT"
## If we are overwriting inplace.
if [ -n "$OUTPUT_FILE_ORIGINAL" ]; then
mv -f "$OUTPUT_FOLDER/$OUTPUT_FILE.$OUTPUT_EXT" "$OUTPUT_FOLDER/$OUTPUT_FILE_ORIGINAL.$OUTPUT_EXT"
fi
echo
## Restore Zsh previous options. This will not turn off shwordsplit if it
## was on before calling the function.
[ "$STATUS" = "off" ] && set +o shwordsplit
echo ":: Process finished!"