tc-audio-transcode: covers are fetched from current folder only.

tc-audio-transcode: genre can now be forced. titlecase: unicode support. titlecase: fixed bug for mixed cased where following punctuation was swallowed.
2013-04-07 12:33:43 +02:00 · 2013-04-07 12:33:43 +02:00 · 661d48f55e
parent 83d29ee78d
commit 661d48f55e
2 changed files with 66 additions and 26 deletions
--- a/.scripts/tc-audio-transcode
+++ b/.scripts/tc-audio-transcode
@ -180,6 +180,9 @@ fi
 ## Filename without extension nor path.
 FILENAME="${1%.*}"
 FILENAME="${FILENAME##*/}"
+## Folder of the file. Needed for cover.
+SOURCEFOLDER="$(realpath "$1")"
+SOURCEFOLDER="${SOURCEFOLDER%/*}"
 ## We get format from extension, because codec is not reliable either.
 FORMAT="${1##*.}"
 ## CODEC is unused for now.
@ -235,18 +238,26 @@ OUTPUT_ARTIST="${OUTPUT_ARTIST:-Unknown Artist}"
 OUTPUT_ALBUM=$(eval _string_cleanser $OUTPUT_ALBUM)
 [ -z "$OUTPUT_ALBUM" ] && echo "${#OUTPUT_ALBUM}${OUTPUT_ALBUM}"

-## We put genre in lower case and underscore to ease matching.  If it
-## matches, we use the Title Case match. If it does not, we set it to empty.
-OUTPUT_GENRE=$(eval _string_cleanser $OUTPUT_GENRE | tr '[:upper:] ' '[:lower:]_')
-case $OUTPUT_GENRE in
-    ost) OUTPUT_GENRE="Soundtrack" ;;
-    soundtrack) OUTPUT_GENRE="Soundtrack";;
-    original_soundtrack) OUTPUT_GENRE="Soundtrack";;
-    classical) OUTPUT_GENRE="Classical";;
-    classics) OUTPUT_GENRE="Classical";;
-    classic) OUTPUT_GENRE="Classical";;
-    *) OUTPUT_GENRE="";;
-esac
+## If OUTPUT_GENRE is set from command-line parameters, we clease the
+## string. Otherwise we put GENRE in lower case and underscore to ease matching.
+## If it matches, we use the Title Case match. If it does not, we set it to
+## empty.
+if [ "$OUTPUT_GENRE" = '$GENRE' ]; then
+    GENRE=$(eval _string_cleanser $GENRE | tr '[:upper:] ' '[:lower:]_')
+    case $GENRE in
+        ost) OUTPUT_GENRE="Soundtrack" ;;
+        soundtrack) OUTPUT_GENRE="Soundtrack";;
+        original_soundtrack) OUTPUT_GENRE="Soundtrack";;
+        classical) OUTPUT_GENRE="Classical";;
+        classics) OUTPUT_GENRE="Classical";;
+        classic) OUTPUT_GENRE="Classical";;
+        humour) OUTPUT_GENRE="Humour";;
+        *) OUTPUT_GENRE="";;
+    esac
+else
+    OUTPUT_GENRE=$(eval _string_cleanser $OUTPUT_GENRE)
+fi
+

 ## We remove the track count if any, we suppress leading zeros, we suppress all
 ## non-digit characters.
@ -339,8 +350,8 @@ if [ $? -ne 0 ]; then
    exit
 fi

-## COVER
-## We copy cover only if it does not already exist.
+## COVER. We copy cover only if it does not already exist. Only files found in
+## the folder where the music is located will be taken into account.
 while read -r i; do
    OUTPUT_COVER="$OUTPUT/${OUTPUT_ALBUM:+$OUTPUT_ALBUM - }Cover"
    OUTPUT_COVERFILE="$OUTPUT_COVER.${i##*.}"
@ -359,11 +370,11 @@ while read -r i; do
        COVER_COUNTER=$(($COVER_COUNTER+1))
    done

-    echo "==> COVER"
+    echo "==> COVER from $SOURCEFOLDER"
    cp -nv "$i" "$OUTPUT_COVERFILE"
    echo
 done <<EOF
-$(find "."  \( -iname '*.png' -o -iname '*.jpg' \) )
+$(find "$SOURCEFOLDER" -maxdepth 1  \( -iname '*.png' -o -iname '*.jpg' \) )
 EOF

 ## Zsh compatibility. We need it otherwise word splitting of parameter like
--- a/.scripts/titlecase.awk
+++ b/.scripts/titlecase.awk
@ -1,5 +1,4 @@
-# filename: titlecase.awk
-
+#!/bin/gawk -f
 ## Original file can be found at
 ##   http://www.pement.org/awk/titlecase.awk.txt

@ -31,10 +30,15 @@
 #
 #   awk -f titlecase.awk infile

+## TODO: merge constants (MC, UC, LC) into one array. Use only one loop for matching.
+## TODO: get constants from external file. Support: languages, themes (music), etc.
+## TODO: rethink algorithm so that it does not need to turn everything to uppercase.
+## TODO: rethink algorithm so that it does not include punctuation in 'word'.
+
 BEGIN {

    #-----ABBREVIATIONS TO BE SET IN MIXEDCASE-----
-    mixed = "KlassX Machine "
+    mixed = "KlassX Machine d'Acide "
    split(mixed, keep_mixed, " ")

    #-----ABBREVIATIONS TO BE SET IN LOWERCASE-----
@ -47,6 +51,9 @@ BEGIN {
    # Omitted: over (=finished), under, through, before, after
    preps = "against at between by from in into of on to upon "

+    ## French
+    preps = preps "du "
+
    # Build array of words to be set lowercased
    split(articles conjunctions preps verbs abbrevs, keep_lower, " ")

@ -57,7 +64,6 @@ BEGIN {

    # build array of words to keep uppercase
    split(other, keep_upper, " ")
-
 }

 function titlecase(string,x)  {
@ -80,11 +86,18 @@ function titlecase(string,x)  {
    do {
        hit = 0;         # Initialize for later use

+        if(debug)
+        {
+            print "1a=" a
+            print "1b=" b
+            print "1word=" word
+        }
+
        # pos is the position of the NEXT punctuation mark (except apostrophe)
        # after the current word. If this is the last word in b, pos will be 0.
        # match() automatically sets RLENGTH
        ## WARNING: we consider digits as part of a word.
-        pos = match(b, /[^A-Z0-9']+/)
+        pos = match(b, /[^[:alnum:]']+/)
        # pos = match(b, /[^A-Z']+/)

        if (pos > 0)    word = substr(b, 1, pos + RLENGTH - 1)
@ -99,18 +112,34 @@ function titlecase(string,x)  {
        # shorten the rest of the string
        b = substr(b, pos + RLENGTH  )

-        #----Words to keep mixedcase----
+        if(debug)
+        {
+            print "2a=" a
+            print "2b=" b
+            print "2word=" word
+        }
+
+        #----Words to keep mixedcase---- WARNING: since we match a substring of
+        ## 'word', we need to prepend and append the potentially discarded
+        ## values.
        for (var in keep_mixed) {
            mix = match(word, "^" toupper(keep_mixed[var]) "\\>")
            if ( mix > 0 ) {
                hit = 1
-                word = keep_mixed[var]
+                word = substr(word, 1, RSTART-1) keep_mixed[var] substr(word, RSTART+RLENGTH)
                if (debug)
                    print "DIAG: Match MC on [" keep_mixed[var] "] in string [" word "]";
                break;
            }
        }

+        if(debug)
+        {
+            print "3a=" a
+            print "3b=" b
+            print "3word=" word
+        }
+
        #----Words to keep uppercase----
        # Case 1: abbreviations from the keep_upper array.
        if ( proect == 0) {
@ -140,7 +169,7 @@ function titlecase(string,x)  {

        #----Words to be set in MiXed case----
        # Case 3: Names like D'Arcy or O'Reilly
-        if ( hit == 0 && match(word, /^[DO]'[A-Z]/) ) {
+        if ( hit == 0 && match(word, /^[DO]'[[:alpha:]]/) ) {
            if (debug) print "DIAG: Match on mixed case: " word
            word = substr(word,1,3) tolower(substr(word,4))
            hit = 1
@ -183,8 +212,8 @@ function titlecase(string,x)  {
    ## Double exception 1: Set 1st word of string in capital case. Need to
    ## handle potential internal single/double quotes like "A Day in the Life"
    ## or 'On the Waterfront'. WARNING: here we consider digits as part of a
-    ## work (as in 1st, 2nd, etc.)
-    match(a, /[A-Za-z0-9]/)
+    ## word (as in 1st, 2nd, etc.)
+    match(a, /[[:alnum:]]/)
    a = toupper(substr(a,1,RSTART)) substr(a,RSTART+1)

    ## Double exception 2: Set 1st word after a colon, question mark or