diff --git a/.scripts/tc-audio-batch b/.scripts/tc-audio-batch deleted file mode 100755 index 6f42b05b..00000000 --- a/.scripts/tc-audio-batch +++ /dev/null @@ -1,125 +0,0 @@ -#!/bin/sh - -_printhelp () -{ - cat</dev/null; then - ## lscpu is a Linux util also available on *BSD. - CPUNO=$(lscpu | awk '/^CPU\(s\)/ {print $2;exit}') -else - CPUNO=1 -fi - -while getopts ":p:h" opt; do - case $opt in - h) - _printhelp "$0" - exit 1 ;; - p) - CPUNO=$OPTARG ;; - ?) - _printhelp "$0" - exit 1 ;; - :) - _printhelp "$0" - exit 1 ;; - esac -done - -shift $(($OPTIND - 1)) -if [ $# -eq 0 ]; then - _printhelp "$0" - exit 1 -fi - -ROOT="$1" -shift - -if ! command -v ffmpeg >/dev/null; then - echo "ffmpeg required." - exit -fi - -if [ ! -f "${0%/*}/titlecase.awk" ]; then - echo "AWK titlecase script required." - exit -fi - -if [ ! -f "${0%/*}/tc-audio-transcode" ]; then - echo "tc-audio-transcode script required." - exit -fi - -INPUT="$(find "." \( \ - -iname '*.aac' -o \ - -iname '*.ape' -o \ - -iname '*.flac' -o \ - -iname '*.ogg' -o \ - -iname '*.mp3' -o \ - -iname '*.mp4' -o \ - -iname '*.mpc' -o \ - -iname '*.wav' -o \ - -iname '*.wv' \) | sort -n)" - -_worker() -{ - CORE=$1 - shift - - ## WARNING: ffmpeg continues to read stdin once it has started, so it should - ## not be called from within a while<$(tput sgr0) $file" - - [ $CPUNO -eq 1 ] && "${0%/*}"/tc-audio-transcode "$@" "$file" || \ - tc-audio-transcode -q "$@" "$file" >/dev/null - fi - done</dev/null; then - echo "ffmpeg required for transcoding." - exit -fi - -if ! command -v realpath >/dev/null; then - echo "realpath required to get input file folder." - exit -fi - -OUTPUT_ROOT="$(realpath "$OUTPUT_ROOT")" -if [ ! -d "$OUTPUT_ROOT" ]; then - echo "Output folder '$OUTPUT_ROOT' does not exist." - exit -fi - -TITLECASE_SCRIPT="${0%/*}/titlecase.awk" -if [ ! -f "$TITLECASE_SCRIPT" ]; then - echo "AWK titlecase script required." - exit -fi - -##================================================================================ -## Get metadata. -STREAM=$(ffmpeg -nostdin -i "$1" 2>&1) - -if [ -z "$(echo $STREAM | grep "Stream")" ]; then - echo "ERROR: Non-audio file [$1]." - exit -fi - -METADATA=$(echo "$STREAM" | sed -n '/Metadata/ ! d; /Metada/{b cont}; :cont ; {n;p;b cont}') - -## Filename without extension nor path. -INPUT_FILE="${1%.*}" -INPUT_FILE="${INPUT_FILE##*/}" -## Folder of the file. Needed for cover. -INPUT_FOLDER="$(realpath "$1")" -INPUT_FOLDER="${INPUT_FOLDER%/*}" -INPUT_EXT="${1##*.}" -INPUT_BITRATE=$(echo "$STREAM" | sed -n '/Duration/ {s|.* \([[:digit:]]\+\) kb/s|\1|;p;q}') -## CODEC is unused for now. -# CODEC=$(echo "$STREAM" | sed -n '/Stream.*Audio:/ {s/.*Audio: \([^,]*\),.*/\1/;p}') - -## Extension needs to be set in case we skip encoding so that ffmpeg will not be -## disturbed by unappropriate extension. -if $SKIP && [ -z "$INPUT_EXT" ]; then - echo "ERROR: Extension missing [$1]." - exit -fi - -## WARNING: This function greps for one match only, so if several metadata are -## present, this may not be the desired values. -_metadata_filter() -{ - echo "$METADATA" | grep -im1 "^ *$1 *:" | sed 's/[^:]* : //g' -} - -INPUT_TITLE=$(_metadata_filter "title") -INPUT_ARTIST=$(_metadata_filter "artist") -INPUT_ALBUM=$(_metadata_filter "album") -INPUT_ALBUMARTIST=$(_metadata_filter "album_artist") -INPUT_COMPOSER=$(_metadata_filter "composer") -INPUT_DISC=$(_metadata_filter "disc") -INPUT_GENRE=$(_metadata_filter "genre") -INPUT_TRACK=$(_metadata_filter "track") -INPUT_DATE=$(_metadata_filter "date") -INPUT_TYER=$(_metadata_filter "TYER") - -##============================================================================== -## Variable cleansing. - -## We use the AWK script to set title case. The script contains -## exceptions that can be configured. We fix some chars with sed. -# ’ => ' -# : => - -# / => - -# \ => - -# & => \& -_string_cleanser() -{ - echo "$@" | awk -v capital=$CAPITAL -f "$TITLECASE_SCRIPT" \ - | sed -e "s/’/'/g ; s| *[/\\:] *| - |g" -e 's/ \+/ /g' -e 's|&|\\\\&|g;' -} - -## These are the "titlecased" variables. -TITLE=$(_string_cleanser "$INPUT_TITLE") -ARTIST=$(_string_cleanser "$INPUT_ARTIST") -ALBUM=$(_string_cleanser "$INPUT_ALBUM") -ALBUMARTIST=$(_string_cleanser "$INPUT_ALBUMARTIST") -COMPOSER=$(_string_cleanser "$INPUT_COMPOSER") -DISC=$(_string_cleanser "$INPUT_DISC") -GENRE=$(_string_cleanser "$INPUT_GENRE") -TRACK=$(_string_cleanser "$INPUT_TRACK") -DATE=$(_string_cleanser "$INPUT_DATE") -TYER=$(_string_cleanser "$INPUT_TYER") - -FILENAME=$(_string_cleanser "$INPUT_FILE") - -## We also convert spaces to underscores. -GENRE=$(echo "$GENRE" | tr '[:upper:] ' '[:lower:]_') -case $GENRE in - ost) GENRE="Soundtrack" ;; - soundtrack) GENRE="Soundtrack";; - original_soundtrack) GENRE="Soundtrack";; - classical) GENRE="Classical";; - classics) GENRE="Classical";; - classic) GENRE="Classical";; - humour) GENRE="Humour";; - *) GENRE="";; -esac - -##================================================================================ -## OUTPUT variables. - -## The following function replaces all variables with their value. This is much -## safer than using shell expansion through 'eval.' -_revar() -{ - echo "$1" | awk \ - -v title="$TITLE" \ - -v artist="$ARTIST" \ - -v album="$ALBUM" \ - -v albumartist="$ALBUMARTIST" \ - -v composer="$COMPOSER" \ - -v disc="$DISC" \ - -v genre="$GENRE" \ - -v track="$TRACK" \ - -v date="$DATE" \ - -v tyer="$TYER" \ - -v filename="$FILENAME" \ - '{ \ -gsub(/\$TITLE/, title); \ -gsub(/\$ARTIST/, artist); \ -gsub(/\$ALBUM/, album); \ -gsub(/\$ALBUMARTIST/, albumartist); \ -gsub(/\$COMPOSER/, composer); \ -gsub(/\$DISC/, disc); \ -gsub(/\$GENRE/, genre); \ -gsub(/\$TRACK/, track); \ -gsub(/\$DATE/, date); \ -gsub(/\$TYER/, tyer); \ -gsub(/\$FILENAME/, filename); \ -print}' -} - -OUTPUT_TITLE=$(_revar "${OUTPUT_TITLE:-Unknown Title}") -OUTPUT_ALBUM=$(_revar "${OUTPUT_ALBUM:-Unknown Album}") - -## We use album artist if artist is empty. -[ -z "$OUTPUT_ARTIST" ] && OUTPUT_ARTIST="$ALBUMARTIST" -OUTPUT_ARTIST=$(_revar "${OUTPUT_ARTIST:-Unknown Artist}") - -OUTPUT_GENRE=$(_revar "$OUTPUT_GENRE") - -## We remove the track count if any, we suppress leading zeros, we suppress all -## non-digit characters. -OUTPUT_TRACK=$(_revar "$OUTPUT_TRACK" | sed -e 's/^0*//' -e 's|[^[:digit:]].*||') - -## We extract the four-digits number from the date. -OUTPUT_DATE=$(_revar "$OUTPUT_DATE") -OUTPUT_DATE=$(echo "$OUTPUT_DATE" | sed -n 's/.*\([[:digit:]]\{4\}\).*/\1/p') - -## If DATE is not a year, we use TYER if it is a year. -TYER_REG=$(_revar "$TYER" | sed -n 's/.*\([[:digit:]]\{4\}\).*/\1/p') -[ ${#DATE} -ne 4 ] && [ ${#TYER_REG} -eq 4 ] && OUTPUT_DATE="$TYER_REG" - -## QUALITY -## Only reencode if not in OGG and if SKIP not set, or if explicitly specified. -INPUT_EXT_LOW="$(echo $INPUT_EXT | tr [:upper:] [:lower:])" -if $SKIP; then - OGG_PARAM="-c:a copy" - OUTPUT_EXT="$INPUT_EXT_LOW" -fi -[ "$INPUT_EXT_LOW" = "ogg" ] && [ $OUTPUT_BITRATE -lt 0 ] && OGG_PARAM="-c:a copy" -[ $OUTPUT_BITRATE -lt 0 ] && OGG_PARAM="-c:a libvorbis ${INPUT_BITRATE}" -[ $OUTPUT_BITRATE -eq 0 ] && OGG_PARAM="-c:a libvorbis" -## If OUTPUT_BITRATE is beyond OGG's limit, we trim it. -[ $OUTPUT_BITRATE -gt 500 ] && OGG_PARAM="-c:a libvorbis 500" - -## Make sure track number has two digits for file name only. -OUTPUT_PADDEDTRACK=$OUTPUT_TRACK -if [ -n "$OUTPUT_PADDEDTRACK" ]; then - [ ${OUTPUT_PADDEDTRACK} -lt 10 ] && OUTPUT_PADDEDTRACK="0$OUTPUT_PADDEDTRACK" -fi - -OUTPUT_FOLDER=$(_revar "$OUTPUT_FOLDER") -OUTPUT_FILE=$(_revar "$OUTPUT_FILE") -unset OUTPUT_FILE_ORIGINAL - -if [ -e "$OUTPUT_FOLDER/$OUTPUT_FILE.$OUTPUT_EXT" ]; then - if [ $OVERWRITE = "-n" ]; then - ## If file exist, we append a unique timestamp to the name. - OUTPUT_FILE="$OUTPUT_FILE-$(date '+%F-%H%M%S')" - OUTPUT_MSG="$(tput setf 1)$(tput bold)(Warning: destination exists, appending timestamp.)$(tput sgr0)" - else - ## WARNING: here it is important that no folder are suffixed by slashes. - if [ "$INPUT_FOLDER/$INPUT_FILE.$INPUT_EXT" = "$OUTPUT_FOLDER/$OUTPUT_FILE.$OUTPUT_EXT" ]; then - OUTPUT_FILE_ORIGINAL="$OUTPUT_FILE" - OUTPUT_FILE="$OUTPUT_FILE-$(date '+%F-%H%M%S')" - fi - OUTPUT_MSG="$(tput setf 4)$(tput bold)(Warning: overwriting destination!)$(tput sgr0)" - fi -fi - -##============================================================================== -## PREVIEW - -## Note: most (all?) shell printf have an alignment issue when strings contain -## wide characters. We need to use AWK for proper alignment. Hence the 'aprint' -## function. - -ATTR_WIDTH="%-13.13s" # Length of longest attribute +2 -## INPUT_WIDTH = COLUNMS - ATTR_WIDTH -2 (for |)) -INPUT_WIDTH=$((($(tput cols)-15)/2)) -INPUT_WIDTH="%$INPUT_WIDTH.${INPUT_WIDTH}s" - -## We output everything in one pass to speed up the process since this is quite -## demanding and called frequently. This function is reliable as long as no -## tabs are found in tags. But since we have no control over the input, we never -## no. -aprint() -{ - awk -F'\t+' -v FMT="$INPUT_WIDTH | $ATTR_WIDTH| %s\n" '{printf FMT, $1, $2, $3 }' -} - -aprint < " - - OUTPUT_COVER="$OUTPUT_FOLDER/${OUTPUT_ALBUM:+$OUTPUT_ALBUM - }Cover" - OUTPUT_COVERFILE="$OUTPUT_COVER.${1##*.}" - COVER_COUNTER=1 - - ## Different cover with same name is in target folder. We append a number. - ## If a different cover with the same name already exist, we append a number - ## and check again. If it is the same cover, we skip it. - while [ -e "$OUTPUT_COVERFILE" ]; do - if [ "$(sha1sum "$OUTPUT_COVERFILE" | cut -f1 -d' ')" = "$(sha1sum "$1" | cut -f1 -d' ')" ]; then - echo "Skipping" - return - else - OUTPUT_COVERFILE="${OUTPUT_COVER} $COVER_COUNTER.${1##*.}" - COVER_COUNTER=$(($COVER_COUNTER+1)) - fi - done - - echo -n "$OUTPUT_COVERFILE" - ## Output warning if cover is too small. - if command -v mediainfo >/dev/null; then - BUF="$(mediainfo "$1")" - COVER_WIDTH=$(echo "$BUF" | awk '/^Width/ {print $3}') - COVER_HEIGHT=$(echo "$BUF" | awk '/^Height/ {print $3}') - - if [ -z "$COVER_WIDTH" ] || [ $COVER_WIDTH -lt $COVER_LIMIT ] || \ - [ -z "$COVER_HEIGHT" ] || [ $COVER_HEIGHT -lt $COVER_LIMIT ]; then - echo -n " $(tput setf 1)$(tput bold)(Warning: bad quality cover.)$(tput sgr0)" - fi - fi - - cp -n "$1" "$OUTPUT_COVERFILE" - echo - echo -} - -## Embedded covers. -for i in $(seq 0 $(($(echo "$STREAM" | grep -c '^ *Stream.*Video')-1))); do - COVER_EXT="$(echo "$STREAM" | awk '/^ *Stream.*Video/ {gsub(/,/, "", $4);print $4}')" - [ -z "$COVER_EXT" ] && continue - [ "$COVER_EXT" = "mjpeg" ] && COVER_EXT="jpg" - - TEMP_COVER="$(mktemp "/tmp/cover-XXXXXX.$COVER_EXT")" - ffmpeg -nostdin -v quiet -y -i "$1" -an -sn -c:v copy -map 0:v:$i "$TEMP_COVER" - _cover "$TEMP_COVER" - - ## We do not want to bloat the temp folder with covers, so we remove it. - rm "$TEMP_COVER" -done - -while IFS= read -r i; do - _cover "$i" -done < "Change to Title Case" -## -## Features: -## -## titlecase() will compress whitespace if a second parameter is passed. It -## is sufficient to use a positive number: titlecase(string,1) -## -## This function tries to implement the "Title Case" constructs specified in -## the APA Style Manual and the Chicago Manual of Style. Instead of merely -## capitalizing the first letter of each word and setting everything else in -## lowercase, this function implements the following conditions: -## -## - Conjunctions, articles, and prepositions are set lowercase, UNLESS they -## are the first word of the string or the first word after a colon, a -## question mark, or an exclamation point. -## - Compass points (NE, SW, etc.) are set in solid caps. -## - Roman numerals (II, IV, VII, IX, etc.) are set in solid caps. -## - Certain abbreviations are always capitalized (AIDS, ASCII, NT, USA, etc.) -## - Names beginning with D' or O' are set as D'Arcy, O'Reilly, etc. -## - Hyphenated strings receive internal caps (Smith-Williams, Twenty-Two) -## - Contractions such as I'll, You've, Don't, etc. are handled properly -## - Degrees such as Ph.D., M.Div., etc. are properly capitalized -## -## Sample Usage with GNU awk (gawk): -## -## gawk -f titlecase.awk infile - -## TODO: maybe it would be a good idea to implement a preprocessor that would -## search and replace special strings like AC-DC. - -## Tests: - -## all lowercase words -## ALL UPPERCASE WORDS -## aLl cRaZY cASE WordS -## And with constants in an INTO cd Contre. Feat and Feat. the machine. -## Bad ,punctuation. here , should ! not be ?a problem. -## Roman numerals XIV LIV xiv liv. liv. xiv. -## Dashed--machine--ac-dc. -## About mcdonald and o'reilly, but i'll won't say. -## The "final quote" 'on the waterfront'. - -BEGIN { - ## English - constants = constants "a an the and but for nor or so am is are against at between by from in into of on to upon " - - ## French - constants = constants "un une de du le la les et mais pour ni ou à a où contre entre chez dans sur que qui " - - ## German - constants = constants "der die das den dem des ein eine einen eines einer von wo an am in für gegen bei aus mit nach seit zu durch ohne um " - - ## Music - constants = constants "feat CD DJ " - constants = constants "KlassX Machine d'Acide BYOB MGMT AC DC JBX RZA DMX " - - ## Others - constants = constants "AIDS ASCII DHTML DNA DVD FBI GNU GPL IBM IRS ISBN ISSN PHP ROM SSN TV FM " - - ## Build array of constant words. - split(constants, constarray, " ") -} - -function titlecase(string) { - ## Initialize variables. - a = ""; # a is/will be the string ALREADY converted - b = string; # b is the rest of the string, so that (string = a b) - - ## English punctuation. It is quite hard to guess the language, so French - ## will follow English punctuation rules. - b = gensub(/ +([,!:;?.]+) */, "\\1 ", "g", b) - - ## Compress spaces or tabs. Trim prefix and suffix space. Convert - ## underscores to spaces. - gsub(/[_ \t]+/, " ", b) - gsub(/^ /, "", b) - gsub(/ $/, "", b) - - ## Capitalize everything for ease of matching. - b = toupper(b) - - do { - ## Initialize for later use. - hit = 0; - - ## 'pos' is the position of the NEXT punctuation mark (except - ## apostrophe) after the current word. If this is the last word in b, - ## pos will be 0. match() automatically sets RLENGTH. WARNING: we - ## consider digits as part of a word. - pos = match(b, /[^[:alnum:]']+/) - - if (pos > 0) word = substr(b, 1, pos + RLENGTH - 1) - else word = b - - ## 1st char of current word. - head = substr(b, 1, 1) - ## Tail of current word. - if (pos > 0) tail = substr(b, 2, pos + RLENGTH - 2) - else tail = substr(b, 2) - - ## Shorten the rest of the string. - b = substr(b, pos + RLENGTH ) - - ## RULE 1 -- Constant strings. - - ## WARNING: since we match a substring of 'word', we need to prepend and - ## append the potentially discarded values, like dashes. - for (var in constarray) { - if (debug) - print ":: Comparing " word " with " constarray[var] - hit = match(word, "^" toupper(constarray[var]) "\\>") - if ( hit > 0 ) { - word = substr(word, 1, RSTART-1) constarray[var] substr(word, RSTART+RLENGTH) - if (debug) - print ":: Match constant on [" constarray[var] "] in string [" word "]"; - break; - } - } - - ## RULE 2 -- Roman numerals - - ## Note: this match cannot distinguish between LIV (54 in Roman - ## numerals) and a personal name like "Liv Ullman". The Roman numerals - ## C (100), D (500), and M (1000) are omitted to avoid false matches on - ## words like civil, did, dim, lid, mid-, mild, Vic, etc. Most uses of - ## Roman numerals in titles stays in the lower ranges, such as "Vol. II" - ## or "Pt. XXIV". - if ( hit == 0 && match(word, /^[IVXL]+\>/) ) { - hit = 1 - ## But we can undo I'd, I'll, I'm, I've and Ill. - if (match(word,/^I'|ILL\>/)) - hit = 0 - if (debug && hit == 1) - print ":: Match on Roman numerals in [" word "]" - } - - ## RULE 3 -- Names like D'Arcy or O'Reilly - if ( hit == 0 && capital != 1 && match(word, /^[DO]'[[:alpha:]]/) ) { - word = substr(word,1,3) tolower(substr(word,4)) - hit = 1 - if (debug) - print ":: Match on mixed case: " word - } - - ## RULE 4 -- Names like MacNeil or McDonald - if ( hit == 0 && match(word,/^MA?C[B-DF-HJ-NP-TV-Z]/) ) { - if (debug) - print ":: Match on MacX: " substr(word,1,1) "-" \ - tolower(substr(word,2,RLENGTH-2)) "-" substr(word,RLENGTH,1) "-" \ - tolower(substr(word,RLENGTH+1)) - word = substr(word,1,1) tolower(substr(word,2,RLENGTH-2)) \ - substr(word,RLENGTH,1) tolower(substr(word,RLENGTH+1)) - hit = 1 - } - - ## If one of the above rule is hit, we append the result to 'a', - ## otherwise we capitalize it. - if (hit > 0 ) a = a word - else if (capital == 1) a = a tolower(head) tolower(tail) - else a = a toupper(head) tolower(tail) - - } while (pos > 0); - - ## Everything should be converted now. - - ## Double exception 1: Set 1st word of string in capital case. Need to - ## handle potential internal single/double quotes like "A Day in the Life" - ## or 'On the Waterfront'. WARNING: here we consider digits as part of a - ## word (as in 1st, 2nd, etc.). - match(a, /[[:alnum:]]/) - a = toupper(substr(a, 1, RSTART)) substr(a, RSTART+1) - - ## Double exception 2: Set 1st word after a some punctuation marks in title - ## case. This kludge handles multiple colons, question marks, etc. on the - ## line. \a is the BEL or CTRL-G character. - result = gensub(/([:{}\[\]?!"()-][^[:alnum:]]*)([a-zA-Z])/, "\\1\a\\2", "g", a) - while (match(result, /\a/)) { - beg = substr(result, 1, RSTART-1) - cap = toupper(substr(result, RSTART+1, 1)) - end = substr(result, RSTART+2) - result = beg cap end - } - - return result -} - -{print titlecase($0)} - -## End of script