ambevar-dotfiles/.scripts/tc-audio-transcode

#!/bin/sh

################################################################################
## User options

## You can easily set output folder to current folder with OUTPUT_ROOT=".".
[ -z "$OUTPUT_ROOT" ] && OUTPUT_ROOT="$HOME/musics"

## End of user options
################################################################################

## For the sake of simplicity we convert everything to OGG.
## OGG quality ranges from -1 to 10.
## -q-1 45 kbit/s
## -q0  64 kbit/s
## -q1  80 kbit/s
## -q2  96 kbit/s
## -q3  112 kbit/s
## -q4  128 kbit/s
## -q5  160 kbit/s
## -q6  192 kbit/s
## -q7  224 kbit/s
## -q8  256 kbit/s
## -q9  320 kbit/s
## -q10 500 kbit/s

_printhelp ()
{
    cat <<EOF | less
Usage: ${1##*/} [OPTIONS] FILE

Encode FILE in OGG with proper tags thanks to a very efficient titlecase
checker.  Output is written to user library with subfolders created according to
tags. It is smart enough to handle empty tags. Covers are extracted from tags
and found in input folder. Identical covers are only processed once.

Tags are proccessed according to the following rules (among others):

* Artist: we use same value for artist and album_artist.

* Genre: since this is not universal by nature, we do not put a genre in tags,
  except for special cases like Soundtrack.

* Composer: not universal neither, we prefer ARTIST over COMPOSER, so COMPOSER
  will be empty.

Encoding quality is set to be the same as the source. OGG cannot go beyond 500,
so lossless formats like FLAC and WavPack will suffer from a quality loss
(should you mind).

Options:
  -c : capital case (only first letter in upper case)
  -f : overwrite if file exists
  -p : preview (do not change file)
  -q : hide FFmpeg runtime output.
  -s : skip encoding

Tags:
  -a : artist
  -b : bitrate
  -d : date
  -g : genre
  -l : album
  -n : track number
  -r : library root folder
  -t : title

Everything tag you set from command-line will not get titlecased.
You can use the following variables to refer to the titlecased values:
  \$ALBUM
  \$ALBUMARTIST
  \$ARTIST
  \$COMPOSER
  \$DATE
  \$FILENAME
  \$GENRE
  \$TRACK
  \$TYER

If bitrate argument is not provided, we use the bitrate of the source. If
bitrate argument is 0, we leave FFmpeg chose the value.

Default output folder:
    OUTPUT_FOLDER="\$OUTPUT_ROOT/\$OUTPUT_ARTIST/\${OUTPUT_ALBUM:+\${OUTPUT_DATE:+\$OUTPUT_DATE - }\$OUTPUT_ALBUM/}"

Default output file:
    OUTPUT_FILE="\$OUTPUT\$OUTPUT_ARTIST - \${OUTPUT_TRACK:+\$OUTPUT_TRACK - }\$OUTPUT_TITLE.\$OUTPUT_EXT"

Examples:

  Set the 'artist' tag and reencode:
    ${1##*/} -a 'Franz Liszt' file.mp3

  Set 'artist' to be 'composer', and 'title' to be preceded by 'artist', do not reencode:
    ${1##*/} -s -a '\$COMPOSER' -t '\$ARTIST - \$TITLE' file.ogg

  Set track number to first field in file name:
    ${1##*/} -n '${FILENAME%% }'

IMPORTANT: you *must* use single quotes when using variables.

EOF
}

## OPTIONS
CAPITAL=0
OVERWRITE="-n"
PREVIEW=false
SKIP=false
LOGLEVEL=""

## TAGS
OUTPUT_ALBUM='$ALBUM'
OUTPUT_ARTIST='$ARTIST'
OUTPUT_DATE='$DATE'
OUTPUT_GENRE='$GENRE'
OUTPUT_TITLE='$TITLE'
OUTPUT_TRACK='$TRACK'

## PROPERTIES
OUTPUT_BITRATE=-1

## Non-CLI-option data. Modifying these imply modifications in code below.
OUTPUT_EXT="ogg"
OGG_PARAM='-c:a libvorbis -b:a ${OUTPUT_BITRATE}k'

## These ones are not CLI-options either, but this could be easily changed.
OUTPUT_FOLDER='$OUTPUT_ROOT/$OUTPUT_ARTIST${OUTPUT_ALBUM:+/${OUTPUT_DATE:+$OUTPUT_DATE - }$OUTPUT_ALBUM}'
OUTPUT_FILE='$OUTPUT_ARTIST - ${OUTPUT_PADDEDTRACK:+$OUTPUT_PADDEDTRACK - }$OUTPUT_TITLE'

while getopts ":a:b:cd:fg:l:n:r:t:hpsq" opt; do
    case $opt in

        a) OUTPUT_ARTIST=$OPTARG ;;
        b) OUTPUT_BITRATE=$OPTARG ;;
        d) OUTPUT_DATE=$OPTARG ;;
        g) OUTPUT_GENRE=$OPTARG ;;
        l) OUTPUT_ALBUM=$OPTARG ;;
        n) OUTPUT_TRACK=$OPTARG ;;
        r) OUTPUT_ROOT=$OPTARG ;;
        t) OUTPUT_TITLE=$OPTARG ;;

        h)
            _printhelp "$0"
            exit 1
            ;;
        c)
            CAPITAL=1 ;;
        f)
            OVERWRITE="-y" ;;
        p)
            PREVIEW=true ;;
        s)
            SKIP=true ;;

        q)
            LOGLEVEL="-v fatal" ;;

        ?)
            _printhelp "$0"
            exit 1
            ;;
        :)
            echo "Missing argument."
            _printhelp "$0"
            exit 1
            ;;
    esac
done

shift $(($OPTIND - 1))
if [ $# -eq 0 ]; then
    _printhelp "$0"
    exit
fi

if ! command -v ffmpeg >/dev/null; then
    echo "ffmpeg required for transcoding."
    exit
fi

if ! command -v realpath >/dev/null; then
    echo "realpath required to get input file folder."
    exit
fi

OUTPUT_ROOT="$(realpath "$OUTPUT_ROOT")"
if [ ! -d "$OUTPUT_ROOT" ]; then
    echo "Output folder '$OUTPUT_ROOT' does not exist."
    exit
fi

TITLECASE_SCRIPT="${0%/*}/titlecase.awk"
if [ ! -f "$TITLECASE_SCRIPT" ]; then
    echo "AWK titlecase script required."
    exit
fi

##================================================================================
## Get metadata.
STREAM=$(ffmpeg -nostdin -i "$1" 2>&1)

if [ -z "$(echo $STREAM | grep "Stream")" ]; then
    echo "ERROR: Non-audio file [$1]."
    exit
fi

METADATA=$(echo "$STREAM" | sed -n '/Metadata/ ! d; /Metada/{b cont}; :cont ; {n;p;b cont}')

## Filename without extension nor path.
INPUT_FILE="${1%.*}"
INPUT_FILE="${INPUT_FILE##*/}"
## Folder of the file. Needed for cover.
INPUT_FOLDER="$(realpath "$1")"
INPUT_FOLDER="${INPUT_FOLDER%/*}"
INPUT_EXT="${1##*.}"
INPUT_BITRATE=$(echo "$STREAM" | sed -n '/Duration/ {s|.* \([[:digit:]]\+\) kb/s|\1|;p;q}')
## CODEC is unused for now.
# CODEC=$(echo "$STREAM" | sed -n '/Stream.*Audio:/ {s/.*Audio: \([^,]*\),.*/\1/;p}')

## Extension needs to be set in case we skip encoding so that ffmpeg will not be
## disturbed by unappropriate extension.
if $SKIP && [ -z "$INPUT_EXT" ]; then
    echo "ERROR: Extension missing [$1]."
    exit
fi

## WARNING: This function greps for one match only, so if several metadata are
## present, this may not be the desired values.
_metadata_filter()
{
    echo "$METADATA" | grep -im1 "^ *$1 *:" | sed 's/[^:]* : //g'
}

INPUT_TITLE=$(_metadata_filter "title")
INPUT_ARTIST=$(_metadata_filter "artist")
INPUT_ALBUM=$(_metadata_filter "album")
INPUT_ALBUMARTIST=$(_metadata_filter "album_artist")
INPUT_COMPOSER=$(_metadata_filter "composer")
INPUT_DISC=$(_metadata_filter "disc")
INPUT_GENRE=$(_metadata_filter "genre")
INPUT_TRACK=$(_metadata_filter "track")
INPUT_DATE=$(_metadata_filter "date")
INPUT_TYER=$(_metadata_filter "TYER")

##==============================================================================
## Variable cleansing.

## We use the AWK script to set title case. The script contains
## exceptions that can be configured.  We fix some chars with sed.
# ’ => '
# : => -
# / => -
# \ => -
# & => \&
_string_cleanser()
{
    echo "$@" | awk -v capital=$CAPITAL -f "$TITLECASE_SCRIPT" \
        | sed -e "s/’/'/g ;  s| *[/\\:] *| - |g" -e 's/  \+/ /g' -e 's|&|\\\\&|g;'
}

## These are the "titlecased" variables.
TITLE=$(_string_cleanser "$INPUT_TITLE")
ARTIST=$(_string_cleanser "$INPUT_ARTIST")
ALBUM=$(_string_cleanser "$INPUT_ALBUM")
ALBUMARTIST=$(_string_cleanser "$INPUT_ALBUMARTIST")
COMPOSER=$(_string_cleanser "$INPUT_COMPOSER")
DISC=$(_string_cleanser "$INPUT_DISC")
GENRE=$(_string_cleanser "$INPUT_GENRE")
TRACK=$(_string_cleanser "$INPUT_TRACK")
DATE=$(_string_cleanser "$INPUT_DATE")
TYER=$(_string_cleanser "$INPUT_TYER")

FILENAME=$(_string_cleanser "$INPUT_FILE")

## We also convert spaces to underscores.
GENRE=$(echo "$GENRE" | tr '[:upper:] ' '[:lower:]_')
case $GENRE in
    ost) GENRE="Soundtrack" ;;
    soundtrack) GENRE="Soundtrack";;
    original_soundtrack) GENRE="Soundtrack";;
    classical) GENRE="Classical";;
    classics) GENRE="Classical";;
    classic) GENRE="Classical";;
    humour) GENRE="Humour";;
    *) GENRE="";;
esac

##================================================================================
## OUTPUT variables.

## The following function replaces all variables with their value. This is much
## safer than using shell expansion through 'eval.'
_revar()
{
    echo "$1" | awk \
        -v title="$TITLE" \
        -v artist="$ARTIST" \
        -v album="$ALBUM" \
        -v albumartist="$ALBUMARTIST" \
        -v composer="$COMPOSER" \
        -v disc="$DISC" \
        -v genre="$GENRE" \
        -v track="$TRACK" \
        -v date="$DATE" \
        -v tyer="$TYER" \
        -v filename="$FILENAME" \
        '{ \
gsub(/\$TITLE/, title); \
gsub(/\$ARTIST/, artist); \
gsub(/\$ALBUM/, album); \
gsub(/\$ALBUMARTIST/, albumartist); \
gsub(/\$COMPOSER/, composer); \
gsub(/\$DISC/, disc); \
gsub(/\$GENRE/, genre); \
gsub(/\$TRACK/, track); \
gsub(/\$DATE/, date); \
gsub(/\$TYER/, tyer); \
gsub(/\$FILENAME/, filename); \
print}'
}

OUTPUT_TITLE=$(_revar "${OUTPUT_TITLE:-Unknown Title}")
OUTPUT_ALBUM=$(_revar "${OUTPUT_ALBUM:-Unknown Album}")

## We use album artist if artist is empty.
[ -z "$OUTPUT_ARTIST" ] && OUTPUT_ARTIST="$ALBUMARTIST"
OUTPUT_ARTIST=$(_revar "${OUTPUT_ARTIST:-Unknown Artist}")

OUTPUT_GENRE=$(_revar "$OUTPUT_GENRE")

## We remove the track count if any, we suppress leading zeros, we suppress all
## non-digit characters.
OUTPUT_TRACK=$(_revar "$OUTPUT_TRACK" | sed -e 's/^0*//' -e 's|[^[:digit:]].*||')

## We extract the four-digits number from the date.
OUTPUT_DATE=$(_revar "$OUTPUT_DATE")
OUTPUT_DATE=$(echo "$OUTPUT_DATE" | sed -n 's/.*\([[:digit:]]\{4\}\).*/\1/p')

## If DATE is not a year, we use TYER if it is a year.
TYER_REG=$(_revar "$TYER" | sed -n 's/.*\([[:digit:]]\{4\}\).*/\1/p')
[ ${#DATE} -ne 4 ] && [ ${#TYER_REG} -eq 4 ] && OUTPUT_DATE="$TYER_REG"

## QUALITY
## Only reencode if not in OGG and if SKIP not set, or if explicitly specified.
INPUT_EXT_LOW="$(echo $INPUT_EXT | tr [:upper:] [:lower:])"
if $SKIP; then
    OGG_PARAM="-c:a copy"
    OUTPUT_EXT="$INPUT_EXT_LOW"
fi
[ "$INPUT_EXT_LOW" = "ogg" ] && [ $OUTPUT_BITRATE -lt 0 ] && OGG_PARAM="-c:a copy"
[ $OUTPUT_BITRATE -lt 0 ] && OGG_PARAM="-c:a libvorbis ${INPUT_BITRATE}"
[ $OUTPUT_BITRATE -eq 0 ] && OGG_PARAM="-c:a libvorbis"
## If OUTPUT_BITRATE is beyond OGG's limit, we trim it.
[ $OUTPUT_BITRATE -gt 500 ] && OGG_PARAM="-c:a libvorbis 500"

## Make sure track number has two digits for file name only.
OUTPUT_PADDEDTRACK=$OUTPUT_TRACK
if [ -n "$OUTPUT_PADDEDTRACK" ]; then
    [ ${OUTPUT_PADDEDTRACK} -lt 10 ] && OUTPUT_PADDEDTRACK="0$OUTPUT_PADDEDTRACK"
fi

OUTPUT_FOLDER=$(_revar "$OUTPUT_FOLDER")
OUTPUT_FILE=$(_revar "$OUTPUT_FILE")
unset OUTPUT_FILE_ORIGINAL

if [ -e "$OUTPUT_FOLDER/$OUTPUT_FILE.$OUTPUT_EXT" ]; then
    if [ $OVERWRITE = "-n" ]; then
        ## If file exist, we append a unique timestamp to the name.
        OUTPUT_FILE="$OUTPUT_FILE-$(date '+%F-%H%M%S')"
        OUTPUT_MSG="$(tput setf 1)$(tput bold)(Warning: destination exists, appending timestamp.)$(tput sgr0)"
    else
        ## WARNING: here it is important that no folder are suffixed by slashes.
        if  [ "$INPUT_FOLDER/$INPUT_FILE.$INPUT_EXT" = "$OUTPUT_FOLDER/$OUTPUT_FILE.$OUTPUT_EXT" ]; then
            OUTPUT_FILE_ORIGINAL="$OUTPUT_FILE"
            OUTPUT_FILE="$OUTPUT_FILE-$(date '+%F-%H%M%S')"
        fi
        OUTPUT_MSG="$(tput setf 4)$(tput bold)(Warning: overwriting destination!)$(tput sgr0)"
    fi
fi

##==============================================================================
## PREVIEW

## Note: most (all?) shell printf have an alignment issue when strings contain
## wide characters. We need to use AWK for proper alignment. Hence the 'aprint'
## function.

ATTR_WIDTH="%-13.13s" # Length of longest attribute +2
## INPUT_WIDTH = COLUNMS - ATTR_WIDTH -2 (for |))
INPUT_WIDTH=$((($(tput cols)-15)/2))
INPUT_WIDTH="%$INPUT_WIDTH.${INPUT_WIDTH}s"

## We output everything in one pass to speed up the process since this is quite
## demanding and called frequently.  This function is reliable as long as no
## tabs are found in tags. But since we have no control over the input, we never
## no.
aprint()
{
    awk -F'\t+' -v FMT="$INPUT_WIDTH | $ATTR_WIDTH| %s\n" '{printf FMT, $1, $2, $3 }'
}

aprint <<EOF
:: INTPUT ::			 				 :: OUTPUT ::
[$INPUT_ARTIST]			 Artist			 [$OUTPUT_ARTIST]
[$INPUT_ALBUM]			 Album			 [$OUTPUT_ALBUM]
[$INPUT_TRACK]			 Track			 [$OUTPUT_TRACK]
[$INPUT_TITLE]			 Title			 [$OUTPUT_TITLE]
[$INPUT_DATE]			 Date			 [$OUTPUT_DATE]
[$INPUT_GENRE]			 Genre			 [$OUTPUT_GENRE]
[$INPUT_EXT]			 Ext			 [$OUTPUT_EXT]
[$INPUT_BITRATE]		 Bitrate		 [$OUTPUT_BITRATE]
[$INPUT_ALBUMARTIST]	 Albumartist
[$INPUT_COMPOSER]		 Composer
[$INPUT_DISC]			 Disc
[$INPUT_TYER]			 Tyer
EOF

cat <<EOF
:: DESTINATION $OUTPUT_MSG
[$OUTPUT_FOLDER/$OUTPUT_FILE.$OUTPUT_EXT]

EOF

$PREVIEW && exit

##==============================================================================
## RUN PROCESS
echo ":: Processing..."

## Make sure directory exists.
mkdir -p "$OUTPUT_FOLDER"
if [ $? -ne 0 ]; then
    echo "ERROR: could not create output folder [$OUTPUT]."
    exit
fi

## COVER. We copy the covers only if they do not already exist. All covers
## embedded in tags will be extracted. Only files found in the folder where the
## music is located will be taken into account, subfolders will be discarded.
COVER_LIMIT=100
_cover()
{
    [ ! -f "$1" ] && return
    echo -n "$1 -> "

    OUTPUT_COVER="$OUTPUT_FOLDER/${OUTPUT_ALBUM:+$OUTPUT_ALBUM - }Cover"
    OUTPUT_COVERFILE="$OUTPUT_COVER.${1##*.}"
    COVER_COUNTER=1

    ## Different cover with same name is in target folder. We append a number.
    ## If a different cover with the same name already exist, we append a number
    ## and check again. If it is the same cover, we skip it.
    while [ -e "$OUTPUT_COVERFILE" ]; do
        if [ "$(sha1sum "$OUTPUT_COVERFILE" | cut -f1 -d' ')" = "$(sha1sum "$1" | cut -f1 -d' ')" ]; then
            echo "Skipping"
            return
        else
            OUTPUT_COVERFILE="${OUTPUT_COVER} $COVER_COUNTER.${1##*.}"
            COVER_COUNTER=$(($COVER_COUNTER+1))
        fi
    done

    echo -n "$OUTPUT_COVERFILE"
    ## Output warning if cover is too small.
    if command -v mediainfo >/dev/null; then
        BUF="$(mediainfo "$1")"
        COVER_WIDTH=$(echo "$BUF" | awk '/^Width/ {print $3}')
        COVER_HEIGHT=$(echo "$BUF" | awk '/^Height/ {print $3}')

        if [ -z "$COVER_WIDTH" ] || [ $COVER_WIDTH -lt $COVER_LIMIT ] || \
            [ -z "$COVER_HEIGHT" ] || [ $COVER_HEIGHT -lt $COVER_LIMIT ]; then
            echo -n " $(tput setf 1)$(tput bold)(Warning: bad quality cover.)$(tput sgr0)"
        fi
    fi

    cp -n "$1" "$OUTPUT_COVERFILE"
    echo
    echo
}

## Embedded covers.
for i in $(seq 0 $(($(echo "$STREAM" | grep -c '^ *Stream.*Video')-1))); do
    COVER_EXT="$(echo "$STREAM" | awk '/^ *Stream.*Video/ {gsub(/,/, "", $4);print $4}')"
    [ -z "$COVER_EXT" ] && continue
    [ "$COVER_EXT" = "mjpeg" ] && COVER_EXT="jpg"

    TEMP_COVER="$(mktemp "/tmp/cover-XXXXXX.$COVER_EXT")"
    ffmpeg -nostdin -v quiet -y -i "$1" -an -sn -c:v copy -map 0:v:$i "$TEMP_COVER"
    _cover "$TEMP_COVER"

    ## We do not want to bloat the temp folder with covers, so we remove it.
    rm "$TEMP_COVER"
done

while IFS= read -r i; do
    _cover "$i"
done <<EOF
$(find "$INPUT_FOLDER" -maxdepth 1  \( -iname '*.png' -o -iname '*.jpg' \) )
EOF


## Zsh compatibility. We need it otherwise word splitting of parameter like
## OGG_PARAM will not work.
STATUS="$(set -o | grep 'shwordsplit' | awk '{print $2}')"
[ "$STATUS" = "off" ] && set -o shwordsplit

## TAG/RECODE
## With the -map_metadata parameter we clear all metadata.
## WARNING: ffmpeg continues to read stdin once it has started, so it should not
## be called from within a while<<EOF loop without disabling stdin.
ffmpeg -nostdin $LOGLEVEL $OVERWRITE -i "$1" -vn -sn $OGG_PARAM \
    -map_metadata -1 \
    -metadata title="$OUTPUT_TITLE" \
    -metadata artist="$OUTPUT_ARTIST" \
    -metadata track="$OUTPUT_TRACK" \
    -metadata date="$OUTPUT_DATE" \
    -metadata album="$OUTPUT_ALBUM" \
    -metadata album_artist="$OUTPUT_ARTIST" \
    -metadata genre="$OUTPUT_GENRE" \
    "$OUTPUT_FOLDER/$OUTPUT_FILE.$OUTPUT_EXT"

## If we are overwriting inplace.
if [ -n "$OUTPUT_FILE_ORIGINAL" ]; then
    mv -f "$OUTPUT_FOLDER/$OUTPUT_FILE.$OUTPUT_EXT" "$OUTPUT_FOLDER/$OUTPUT_FILE_ORIGINAL.$OUTPUT_EXT"
fi
echo

## Restore Zsh previous options. This will not turn off shwordsplit if it
## was on before calling the function.
[ "$STATUS" = "off" ] && set +o shwordsplit

echo ":: Process finished!"