tc-audio-transcode: covers are fetched from current folder only.

tc-audio-transcode: genre can now be forced.
titlecase: unicode support.
titlecase: fixed bug for mixed cased where following punctuation was swallowed.
master
Pierre Neidhardt 2013-04-07 12:33:43 +02:00
parent 83d29ee78d
commit 661d48f55e
2 changed files with 66 additions and 26 deletions

View File

@ -180,6 +180,9 @@ fi
## Filename without extension nor path.
FILENAME="${1%.*}"
FILENAME="${FILENAME##*/}"
## Folder of the file. Needed for cover.
SOURCEFOLDER="$(realpath "$1")"
SOURCEFOLDER="${SOURCEFOLDER%/*}"
## We get format from extension, because codec is not reliable either.
FORMAT="${1##*.}"
## CODEC is unused for now.
@ -235,18 +238,26 @@ OUTPUT_ARTIST="${OUTPUT_ARTIST:-Unknown Artist}"
OUTPUT_ALBUM=$(eval _string_cleanser $OUTPUT_ALBUM)
[ -z "$OUTPUT_ALBUM" ] && echo "${#OUTPUT_ALBUM}${OUTPUT_ALBUM}"
## We put genre in lower case and underscore to ease matching. If it
## matches, we use the Title Case match. If it does not, we set it to empty.
OUTPUT_GENRE=$(eval _string_cleanser $OUTPUT_GENRE | tr '[:upper:] ' '[:lower:]_')
case $OUTPUT_GENRE in
ost) OUTPUT_GENRE="Soundtrack" ;;
soundtrack) OUTPUT_GENRE="Soundtrack";;
original_soundtrack) OUTPUT_GENRE="Soundtrack";;
classical) OUTPUT_GENRE="Classical";;
classics) OUTPUT_GENRE="Classical";;
classic) OUTPUT_GENRE="Classical";;
*) OUTPUT_GENRE="";;
esac
## If OUTPUT_GENRE is set from command-line parameters, we clease the
## string. Otherwise we put GENRE in lower case and underscore to ease matching.
## If it matches, we use the Title Case match. If it does not, we set it to
## empty.
if [ "$OUTPUT_GENRE" = '$GENRE' ]; then
GENRE=$(eval _string_cleanser $GENRE | tr '[:upper:] ' '[:lower:]_')
case $GENRE in
ost) OUTPUT_GENRE="Soundtrack" ;;
soundtrack) OUTPUT_GENRE="Soundtrack";;
original_soundtrack) OUTPUT_GENRE="Soundtrack";;
classical) OUTPUT_GENRE="Classical";;
classics) OUTPUT_GENRE="Classical";;
classic) OUTPUT_GENRE="Classical";;
humour) OUTPUT_GENRE="Humour";;
*) OUTPUT_GENRE="";;
esac
else
OUTPUT_GENRE=$(eval _string_cleanser $OUTPUT_GENRE)
fi
## We remove the track count if any, we suppress leading zeros, we suppress all
## non-digit characters.
@ -339,8 +350,8 @@ if [ $? -ne 0 ]; then
exit
fi
## COVER
## We copy cover only if it does not already exist.
## COVER. We copy cover only if it does not already exist. Only files found in
## the folder where the music is located will be taken into account.
while read -r i; do
OUTPUT_COVER="$OUTPUT/${OUTPUT_ALBUM:+$OUTPUT_ALBUM - }Cover"
OUTPUT_COVERFILE="$OUTPUT_COVER.${i##*.}"
@ -359,11 +370,11 @@ while read -r i; do
COVER_COUNTER=$(($COVER_COUNTER+1))
done
echo "==> COVER"
echo "==> COVER from $SOURCEFOLDER"
cp -nv "$i" "$OUTPUT_COVERFILE"
echo
done <<EOF
$(find "." \( -iname '*.png' -o -iname '*.jpg' \) )
$(find "$SOURCEFOLDER" -maxdepth 1 \( -iname '*.png' -o -iname '*.jpg' \) )
EOF
## Zsh compatibility. We need it otherwise word splitting of parameter like

49
.scripts/titlecase.awk Normal file → Executable file
View File

@ -1,5 +1,4 @@
# filename: titlecase.awk
#!/bin/gawk -f
## Original file can be found at
## http://www.pement.org/awk/titlecase.awk.txt
@ -31,10 +30,15 @@
#
# awk -f titlecase.awk infile
## TODO: merge constants (MC, UC, LC) into one array. Use only one loop for matching.
## TODO: get constants from external file. Support: languages, themes (music), etc.
## TODO: rethink algorithm so that it does not need to turn everything to uppercase.
## TODO: rethink algorithm so that it does not include punctuation in 'word'.
BEGIN {
#-----ABBREVIATIONS TO BE SET IN MIXEDCASE-----
mixed = "KlassX Machine "
mixed = "KlassX Machine d'Acide "
split(mixed, keep_mixed, " ")
#-----ABBREVIATIONS TO BE SET IN LOWERCASE-----
@ -47,6 +51,9 @@ BEGIN {
# Omitted: over (=finished), under, through, before, after
preps = "against at between by from in into of on to upon "
## French
preps = preps "du "
# Build array of words to be set lowercased
split(articles conjunctions preps verbs abbrevs, keep_lower, " ")
@ -57,7 +64,6 @@ BEGIN {
# build array of words to keep uppercase
split(other, keep_upper, " ")
}
function titlecase(string,x) {
@ -80,11 +86,18 @@ function titlecase(string,x) {
do {
hit = 0; # Initialize for later use
if(debug)
{
print "1a=" a
print "1b=" b
print "1word=" word
}
# pos is the position of the NEXT punctuation mark (except apostrophe)
# after the current word. If this is the last word in b, pos will be 0.
# match() automatically sets RLENGTH
## WARNING: we consider digits as part of a word.
pos = match(b, /[^A-Z0-9']+/)
pos = match(b, /[^[:alnum:]']+/)
# pos = match(b, /[^A-Z']+/)
if (pos > 0) word = substr(b, 1, pos + RLENGTH - 1)
@ -99,18 +112,34 @@ function titlecase(string,x) {
# shorten the rest of the string
b = substr(b, pos + RLENGTH )
#----Words to keep mixedcase----
if(debug)
{
print "2a=" a
print "2b=" b
print "2word=" word
}
#----Words to keep mixedcase---- WARNING: since we match a substring of
## 'word', we need to prepend and append the potentially discarded
## values.
for (var in keep_mixed) {
mix = match(word, "^" toupper(keep_mixed[var]) "\\>")
if ( mix > 0 ) {
hit = 1
word = keep_mixed[var]
word = substr(word, 1, RSTART-1) keep_mixed[var] substr(word, RSTART+RLENGTH)
if (debug)
print "DIAG: Match MC on [" keep_mixed[var] "] in string [" word "]";
break;
}
}
if(debug)
{
print "3a=" a
print "3b=" b
print "3word=" word
}
#----Words to keep uppercase----
# Case 1: abbreviations from the keep_upper array.
if ( proect == 0) {
@ -140,7 +169,7 @@ function titlecase(string,x) {
#----Words to be set in MiXed case----
# Case 3: Names like D'Arcy or O'Reilly
if ( hit == 0 && match(word, /^[DO]'[A-Z]/) ) {
if ( hit == 0 && match(word, /^[DO]'[[:alpha:]]/) ) {
if (debug) print "DIAG: Match on mixed case: " word
word = substr(word,1,3) tolower(substr(word,4))
hit = 1
@ -183,8 +212,8 @@ function titlecase(string,x) {
## Double exception 1: Set 1st word of string in capital case. Need to
## handle potential internal single/double quotes like "A Day in the Life"
## or 'On the Waterfront'. WARNING: here we consider digits as part of a
## work (as in 1st, 2nd, etc.)
match(a, /[A-Za-z0-9]/)
## word (as in 1st, 2nd, etc.)
match(a, /[[:alnum:]]/)
a = toupper(substr(a,1,RSTART)) substr(a,RSTART+1)
## Double exception 2: Set 1st word after a colon, question mark or