scripts: removed broken tc-audio-transcode and co.

Working on a much improved standalone program.
master
Pierre Neidhardt 2013-11-14 23:59:06 +01:00
parent 08fec6982e
commit d88b9505cf
3 changed files with 0 additions and 851 deletions

View File

@ -1,125 +0,0 @@
#!/bin/sh
_printhelp ()
{
cat<<EOF
Usage: ${1##*/} [-p PROC] ROOT [OPTIONS]
This will batch-process all audio files found in ROOT folder and subfolders with
the tc-audio-transcode script.
All OPTIONS are passed to tc-audio-transcode. For more details see
tc-audio-transcode -h
This script has the ability to run multiple processes. The number of processes
defaults to the number of online cores if 'lscpu' is found, or to 1
otherwise. The number of parallel processes is bounded by the number of input
files. Outputting from several processes at the same time to the standard output
is not convenient, therefore when more than 1 process are started, the output is
disabled (except for errors).
If you still want the output, you need to restrict the number of processes to
1. For instance to preview changes in current folder you would write:
${1##*/} -p1 . -p
Options:
-p PROC: run PROC processes in parallel.
EOF
}
if command -v lscpu >/dev/null; then
## lscpu is a Linux util also available on *BSD.
CPUNO=$(lscpu | awk '/^CPU\(s\)/ {print $2;exit}')
else
CPUNO=1
fi
while getopts ":p:h" opt; do
case $opt in
h)
_printhelp "$0"
exit 1 ;;
p)
CPUNO=$OPTARG ;;
?)
_printhelp "$0"
exit 1 ;;
:)
_printhelp "$0"
exit 1 ;;
esac
done
shift $(($OPTIND - 1))
if [ $# -eq 0 ]; then
_printhelp "$0"
exit 1
fi
ROOT="$1"
shift
if ! command -v ffmpeg >/dev/null; then
echo "ffmpeg required."
exit
fi
if [ ! -f "${0%/*}/titlecase.awk" ]; then
echo "AWK titlecase script required."
exit
fi
if [ ! -f "${0%/*}/tc-audio-transcode" ]; then
echo "tc-audio-transcode script required."
exit
fi
INPUT="$(find "." \( \
-iname '*.aac' -o \
-iname '*.ape' -o \
-iname '*.flac' -o \
-iname '*.ogg' -o \
-iname '*.mp3' -o \
-iname '*.mp4' -o \
-iname '*.mpc' -o \
-iname '*.wav' -o \
-iname '*.wv' \) | sort -n)"
_worker()
{
CORE=$1
shift
## WARNING: ffmpeg continues to read stdin once it has started, so it should
## not be called from within a while<<EOF loop without redirecting stdin.
while IFS= read -r file; do
if [ -n "$file" ]; then
echo "$(tput setf 2)$(tput bold)==>$(tput sgr0) $file"
[ $CPUNO -eq 1 ] && "${0%/*}"/tc-audio-transcode "$@" "$file" || \
tc-audio-transcode -q "$@" "$file" >/dev/null
fi
done<<EOF
$(echo "$INPUT" | sed -n "$CORE~${CPUNO}p")
EOF
# Instead of sed, we could also use: awk "NR % $CPUNO == $CORE"
echo "$(tput setf 5)$(tput bold)::$(tput sgr0) Thread $CORE/$CPUNO terminated."
}
FILENO=$(echo "$INPUT" | wc -l)
if [ $CPUNO -gt $FILENO ]; then
CPUNO=$FILENO
fi
for i in $(seq 2 $CPUNO); do
_worker $i "$@" &
done
## We run one worker in the current process so that this script will hopefully
## terminate when the last job finishes.
_worker 1 "$@"

View File

@ -1,533 +0,0 @@
#!/bin/sh
################################################################################
## User options
## You can easily set output folder to current folder with OUTPUT_ROOT=".".
[ -z "$OUTPUT_ROOT" ] && OUTPUT_ROOT="$HOME/musics"
## End of user options
################################################################################
## For the sake of simplicity we convert everything to OGG.
## OGG quality ranges from -1 to 10.
## -q-1 45 kbit/s
## -q0 64 kbit/s
## -q1 80 kbit/s
## -q2 96 kbit/s
## -q3 112 kbit/s
## -q4 128 kbit/s
## -q5 160 kbit/s
## -q6 192 kbit/s
## -q7 224 kbit/s
## -q8 256 kbit/s
## -q9 320 kbit/s
## -q10 500 kbit/s
_printhelp ()
{
cat <<EOF | less
Usage: ${1##*/} [OPTIONS] FILE
Encode FILE in OGG with proper tags thanks to a very efficient titlecase
checker. Output is written to user library with subfolders created according to
tags. It is smart enough to handle empty tags. Covers are extracted from tags
and found in input folder. Identical covers are only processed once.
Tags are proccessed according to the following rules (among others):
* Artist: we use same value for artist and album_artist.
* Genre: since this is not universal by nature, we do not put a genre in tags,
except for special cases like Soundtrack.
* Composer: not universal neither, we prefer ARTIST over COMPOSER, so COMPOSER
will be empty.
Encoding quality is set to be the same as the source. OGG cannot go beyond 500,
so lossless formats like FLAC and WavPack will suffer from a quality loss
(should you mind).
Options:
-c : capital case (only first letter in upper case)
-f : overwrite if file exists
-p : preview (do not change file)
-q : hide FFmpeg runtime output.
-s : skip encoding
Tags:
-a : artist
-b : bitrate
-d : date
-g : genre
-l : album
-n : track number
-r : library root folder
-t : title
Everything tag you set from command-line will not get titlecased.
You can use the following variables to refer to the titlecased values:
\$ALBUM
\$ALBUMARTIST
\$ARTIST
\$COMPOSER
\$DATE
\$FILENAME
\$GENRE
\$TRACK
\$TYER
If bitrate argument is not provided, we use the bitrate of the source. If
bitrate argument is 0, we leave FFmpeg chose the value.
Default output folder:
OUTPUT_FOLDER="\$OUTPUT_ROOT/\$OUTPUT_ARTIST/\${OUTPUT_ALBUM:+\${OUTPUT_DATE:+\$OUTPUT_DATE - }\$OUTPUT_ALBUM/}"
Default output file:
OUTPUT_FILE="\$OUTPUT\$OUTPUT_ARTIST - \${OUTPUT_TRACK:+\$OUTPUT_TRACK - }\$OUTPUT_TITLE.\$OUTPUT_EXT"
Examples:
Set the 'artist' tag and reencode:
${1##*/} -a 'Franz Liszt' file.mp3
Set 'artist' to be 'composer', and 'title' to be preceded by 'artist', do not reencode:
${1##*/} -s -a '\$COMPOSER' -t '\$ARTIST - \$TITLE' file.ogg
Set track number to first field in file name:
${1##*/} -n '${FILENAME%% }'
IMPORTANT: you *must* use single quotes when using variables.
EOF
}
## OPTIONS
CAPITAL=0
OVERWRITE="-n"
PREVIEW=false
SKIP=false
LOGLEVEL=""
## TAGS
OUTPUT_ALBUM='$ALBUM'
OUTPUT_ARTIST='$ARTIST'
OUTPUT_DATE='$DATE'
OUTPUT_GENRE='$GENRE'
OUTPUT_TITLE='$TITLE'
OUTPUT_TRACK='$TRACK'
## PROPERTIES
OUTPUT_BITRATE=-1
## Non-CLI-option data. Modifying these imply modifications in code below.
OUTPUT_EXT="ogg"
OGG_PARAM='-c:a libvorbis -b:a ${OUTPUT_BITRATE}k'
## These ones are not CLI-options either, but this could be easily changed.
OUTPUT_FOLDER='$OUTPUT_ROOT/$OUTPUT_ARTIST${OUTPUT_ALBUM:+/${OUTPUT_DATE:+$OUTPUT_DATE - }$OUTPUT_ALBUM}'
OUTPUT_FILE='$OUTPUT_ARTIST - ${OUTPUT_PADDEDTRACK:+$OUTPUT_PADDEDTRACK - }$OUTPUT_TITLE'
while getopts ":a:b:cd:fg:l:n:r:t:hpsq" opt; do
case $opt in
a) OUTPUT_ARTIST=$OPTARG ;;
b) OUTPUT_BITRATE=$OPTARG ;;
d) OUTPUT_DATE=$OPTARG ;;
g) OUTPUT_GENRE=$OPTARG ;;
l) OUTPUT_ALBUM=$OPTARG ;;
n) OUTPUT_TRACK=$OPTARG ;;
r) OUTPUT_ROOT=$OPTARG ;;
t) OUTPUT_TITLE=$OPTARG ;;
h)
_printhelp "$0"
exit 1
;;
c)
CAPITAL=1 ;;
f)
OVERWRITE="-y" ;;
p)
PREVIEW=true ;;
s)
SKIP=true ;;
q)
LOGLEVEL="-v fatal" ;;
?)
_printhelp "$0"
exit 1
;;
:)
echo "Missing argument."
_printhelp "$0"
exit 1
;;
esac
done
shift $(($OPTIND - 1))
if [ $# -eq 0 ]; then
_printhelp "$0"
exit
fi
if ! command -v ffmpeg >/dev/null; then
echo "ffmpeg required for transcoding."
exit
fi
if ! command -v realpath >/dev/null; then
echo "realpath required to get input file folder."
exit
fi
OUTPUT_ROOT="$(realpath "$OUTPUT_ROOT")"
if [ ! -d "$OUTPUT_ROOT" ]; then
echo "Output folder '$OUTPUT_ROOT' does not exist."
exit
fi
TITLECASE_SCRIPT="${0%/*}/titlecase.awk"
if [ ! -f "$TITLECASE_SCRIPT" ]; then
echo "AWK titlecase script required."
exit
fi
##================================================================================
## Get metadata.
STREAM=$(ffmpeg -nostdin -i "$1" 2>&1)
if [ -z "$(echo $STREAM | grep "Stream")" ]; then
echo "ERROR: Non-audio file [$1]."
exit
fi
METADATA=$(echo "$STREAM" | sed -n '/Metadata/ ! d; /Metada/{b cont}; :cont ; {n;p;b cont}')
## Filename without extension nor path.
INPUT_FILE="${1%.*}"
INPUT_FILE="${INPUT_FILE##*/}"
## Folder of the file. Needed for cover.
INPUT_FOLDER="$(realpath "$1")"
INPUT_FOLDER="${INPUT_FOLDER%/*}"
INPUT_EXT="${1##*.}"
INPUT_BITRATE=$(echo "$STREAM" | sed -n '/Duration/ {s|.* \([[:digit:]]\+\) kb/s|\1|;p;q}')
## CODEC is unused for now.
# CODEC=$(echo "$STREAM" | sed -n '/Stream.*Audio:/ {s/.*Audio: \([^,]*\),.*/\1/;p}')
## Extension needs to be set in case we skip encoding so that ffmpeg will not be
## disturbed by unappropriate extension.
if $SKIP && [ -z "$INPUT_EXT" ]; then
echo "ERROR: Extension missing [$1]."
exit
fi
## WARNING: This function greps for one match only, so if several metadata are
## present, this may not be the desired values.
_metadata_filter()
{
echo "$METADATA" | grep -im1 "^ *$1 *:" | sed 's/[^:]* : //g'
}
INPUT_TITLE=$(_metadata_filter "title")
INPUT_ARTIST=$(_metadata_filter "artist")
INPUT_ALBUM=$(_metadata_filter "album")
INPUT_ALBUMARTIST=$(_metadata_filter "album_artist")
INPUT_COMPOSER=$(_metadata_filter "composer")
INPUT_DISC=$(_metadata_filter "disc")
INPUT_GENRE=$(_metadata_filter "genre")
INPUT_TRACK=$(_metadata_filter "track")
INPUT_DATE=$(_metadata_filter "date")
INPUT_TYER=$(_metadata_filter "TYER")
##==============================================================================
## Variable cleansing.
## We use the AWK script to set title case. The script contains
## exceptions that can be configured. We fix some chars with sed.
# => '
# : => -
# / => -
# \ => -
# & => \&
_string_cleanser()
{
echo "$@" | awk -v capital=$CAPITAL -f "$TITLECASE_SCRIPT" \
| sed -e "s//'/g ; s| *[/\\:] *| - |g" -e 's/ \+/ /g' -e 's|&|\\\\&|g;'
}
## These are the "titlecased" variables.
TITLE=$(_string_cleanser "$INPUT_TITLE")
ARTIST=$(_string_cleanser "$INPUT_ARTIST")
ALBUM=$(_string_cleanser "$INPUT_ALBUM")
ALBUMARTIST=$(_string_cleanser "$INPUT_ALBUMARTIST")
COMPOSER=$(_string_cleanser "$INPUT_COMPOSER")
DISC=$(_string_cleanser "$INPUT_DISC")
GENRE=$(_string_cleanser "$INPUT_GENRE")
TRACK=$(_string_cleanser "$INPUT_TRACK")
DATE=$(_string_cleanser "$INPUT_DATE")
TYER=$(_string_cleanser "$INPUT_TYER")
FILENAME=$(_string_cleanser "$INPUT_FILE")
## We also convert spaces to underscores.
GENRE=$(echo "$GENRE" | tr '[:upper:] ' '[:lower:]_')
case $GENRE in
ost) GENRE="Soundtrack" ;;
soundtrack) GENRE="Soundtrack";;
original_soundtrack) GENRE="Soundtrack";;
classical) GENRE="Classical";;
classics) GENRE="Classical";;
classic) GENRE="Classical";;
humour) GENRE="Humour";;
*) GENRE="";;
esac
##================================================================================
## OUTPUT variables.
## The following function replaces all variables with their value. This is much
## safer than using shell expansion through 'eval.'
_revar()
{
echo "$1" | awk \
-v title="$TITLE" \
-v artist="$ARTIST" \
-v album="$ALBUM" \
-v albumartist="$ALBUMARTIST" \
-v composer="$COMPOSER" \
-v disc="$DISC" \
-v genre="$GENRE" \
-v track="$TRACK" \
-v date="$DATE" \
-v tyer="$TYER" \
-v filename="$FILENAME" \
'{ \
gsub(/\$TITLE/, title); \
gsub(/\$ARTIST/, artist); \
gsub(/\$ALBUM/, album); \
gsub(/\$ALBUMARTIST/, albumartist); \
gsub(/\$COMPOSER/, composer); \
gsub(/\$DISC/, disc); \
gsub(/\$GENRE/, genre); \
gsub(/\$TRACK/, track); \
gsub(/\$DATE/, date); \
gsub(/\$TYER/, tyer); \
gsub(/\$FILENAME/, filename); \
print}'
}
OUTPUT_TITLE=$(_revar "${OUTPUT_TITLE:-Unknown Title}")
OUTPUT_ALBUM=$(_revar "${OUTPUT_ALBUM:-Unknown Album}")
## We use album artist if artist is empty.
[ -z "$OUTPUT_ARTIST" ] && OUTPUT_ARTIST="$ALBUMARTIST"
OUTPUT_ARTIST=$(_revar "${OUTPUT_ARTIST:-Unknown Artist}")
OUTPUT_GENRE=$(_revar "$OUTPUT_GENRE")
## We remove the track count if any, we suppress leading zeros, we suppress all
## non-digit characters.
OUTPUT_TRACK=$(_revar "$OUTPUT_TRACK" | sed -e 's/^0*//' -e 's|[^[:digit:]].*||')
## We extract the four-digits number from the date.
OUTPUT_DATE=$(_revar "$OUTPUT_DATE")
OUTPUT_DATE=$(echo "$OUTPUT_DATE" | sed -n 's/.*\([[:digit:]]\{4\}\).*/\1/p')
## If DATE is not a year, we use TYER if it is a year.
TYER_REG=$(_revar "$TYER" | sed -n 's/.*\([[:digit:]]\{4\}\).*/\1/p')
[ ${#DATE} -ne 4 ] && [ ${#TYER_REG} -eq 4 ] && OUTPUT_DATE="$TYER_REG"
## QUALITY
## Only reencode if not in OGG and if SKIP not set, or if explicitly specified.
INPUT_EXT_LOW="$(echo $INPUT_EXT | tr [:upper:] [:lower:])"
if $SKIP; then
OGG_PARAM="-c:a copy"
OUTPUT_EXT="$INPUT_EXT_LOW"
fi
[ "$INPUT_EXT_LOW" = "ogg" ] && [ $OUTPUT_BITRATE -lt 0 ] && OGG_PARAM="-c:a copy"
[ $OUTPUT_BITRATE -lt 0 ] && OGG_PARAM="-c:a libvorbis ${INPUT_BITRATE}"
[ $OUTPUT_BITRATE -eq 0 ] && OGG_PARAM="-c:a libvorbis"
## If OUTPUT_BITRATE is beyond OGG's limit, we trim it.
[ $OUTPUT_BITRATE -gt 500 ] && OGG_PARAM="-c:a libvorbis 500"
## Make sure track number has two digits for file name only.
OUTPUT_PADDEDTRACK=$OUTPUT_TRACK
if [ -n "$OUTPUT_PADDEDTRACK" ]; then
[ ${OUTPUT_PADDEDTRACK} -lt 10 ] && OUTPUT_PADDEDTRACK="0$OUTPUT_PADDEDTRACK"
fi
OUTPUT_FOLDER=$(_revar "$OUTPUT_FOLDER")
OUTPUT_FILE=$(_revar "$OUTPUT_FILE")
unset OUTPUT_FILE_ORIGINAL
if [ -e "$OUTPUT_FOLDER/$OUTPUT_FILE.$OUTPUT_EXT" ]; then
if [ $OVERWRITE = "-n" ]; then
## If file exist, we append a unique timestamp to the name.
OUTPUT_FILE="$OUTPUT_FILE-$(date '+%F-%H%M%S')"
OUTPUT_MSG="$(tput setf 1)$(tput bold)(Warning: destination exists, appending timestamp.)$(tput sgr0)"
else
## WARNING: here it is important that no folder are suffixed by slashes.
if [ "$INPUT_FOLDER/$INPUT_FILE.$INPUT_EXT" = "$OUTPUT_FOLDER/$OUTPUT_FILE.$OUTPUT_EXT" ]; then
OUTPUT_FILE_ORIGINAL="$OUTPUT_FILE"
OUTPUT_FILE="$OUTPUT_FILE-$(date '+%F-%H%M%S')"
fi
OUTPUT_MSG="$(tput setf 4)$(tput bold)(Warning: overwriting destination!)$(tput sgr0)"
fi
fi
##==============================================================================
## PREVIEW
## Note: most (all?) shell printf have an alignment issue when strings contain
## wide characters. We need to use AWK for proper alignment. Hence the 'aprint'
## function.
ATTR_WIDTH="%-13.13s" # Length of longest attribute +2
## INPUT_WIDTH = COLUNMS - ATTR_WIDTH -2 (for |))
INPUT_WIDTH=$((($(tput cols)-15)/2))
INPUT_WIDTH="%$INPUT_WIDTH.${INPUT_WIDTH}s"
## We output everything in one pass to speed up the process since this is quite
## demanding and called frequently. This function is reliable as long as no
## tabs are found in tags. But since we have no control over the input, we never
## no.
aprint()
{
awk -F'\t+' -v FMT="$INPUT_WIDTH | $ATTR_WIDTH| %s\n" '{printf FMT, $1, $2, $3 }'
}
aprint <<EOF
:: INTPUT :: :: OUTPUT ::
[$INPUT_ARTIST] Artist [$(tput setf 4)$OUTPUT_ARTIST$(tput sgr0)]
[$INPUT_ALBUM] Album [$OUTPUT_ALBUM]
[$INPUT_TRACK] Track [$OUTPUT_TRACK]
[$INPUT_TITLE] Title [$OUTPUT_TITLE]
[$INPUT_DATE] Date [$OUTPUT_DATE]
[$INPUT_GENRE] Genre [$OUTPUT_GENRE]
[$INPUT_EXT] Ext [$OUTPUT_EXT]
[$INPUT_BITRATE] Bitrate [$OUTPUT_BITRATE]
[$INPUT_ALBUMARTIST] Albumartist
[$INPUT_COMPOSER] Composer
[$INPUT_DISC] Disc
[$INPUT_TYER] Tyer
EOF
cat <<EOF
:: DESTINATION $OUTPUT_MSG
[$OUTPUT_FOLDER/$OUTPUT_FILE.$OUTPUT_EXT]
EOF
$PREVIEW && exit
##==============================================================================
## RUN PROCESS
echo ":: Processing..."
## Make sure directory exists.
mkdir -p "$OUTPUT_FOLDER"
if [ $? -ne 0 ]; then
echo "ERROR: could not create output folder [$OUTPUT]."
exit
fi
## COVER. We copy the covers only if they do not already exist. All covers
## embedded in tags will be extracted. Only files found in the folder where the
## music is located will be taken into account, subfolders will be discarded.
COVER_LIMIT=100
_cover()
{
[ ! -f "$1" ] && return
echo -n "$1 -> "
OUTPUT_COVER="$OUTPUT_FOLDER/${OUTPUT_ALBUM:+$OUTPUT_ALBUM - }Cover"
OUTPUT_COVERFILE="$OUTPUT_COVER.${1##*.}"
COVER_COUNTER=1
## Different cover with same name is in target folder. We append a number.
## If a different cover with the same name already exist, we append a number
## and check again. If it is the same cover, we skip it.
while [ -e "$OUTPUT_COVERFILE" ]; do
if [ "$(sha1sum "$OUTPUT_COVERFILE" | cut -f1 -d' ')" = "$(sha1sum "$1" | cut -f1 -d' ')" ]; then
echo "Skipping"
return
else
OUTPUT_COVERFILE="${OUTPUT_COVER} $COVER_COUNTER.${1##*.}"
COVER_COUNTER=$(($COVER_COUNTER+1))
fi
done
echo -n "$OUTPUT_COVERFILE"
## Output warning if cover is too small.
if command -v mediainfo >/dev/null; then
BUF="$(mediainfo "$1")"
COVER_WIDTH=$(echo "$BUF" | awk '/^Width/ {print $3}')
COVER_HEIGHT=$(echo "$BUF" | awk '/^Height/ {print $3}')
if [ -z "$COVER_WIDTH" ] || [ $COVER_WIDTH -lt $COVER_LIMIT ] || \
[ -z "$COVER_HEIGHT" ] || [ $COVER_HEIGHT -lt $COVER_LIMIT ]; then
echo -n " $(tput setf 1)$(tput bold)(Warning: bad quality cover.)$(tput sgr0)"
fi
fi
cp -n "$1" "$OUTPUT_COVERFILE"
echo
echo
}
## Embedded covers.
for i in $(seq 0 $(($(echo "$STREAM" | grep -c '^ *Stream.*Video')-1))); do
COVER_EXT="$(echo "$STREAM" | awk '/^ *Stream.*Video/ {gsub(/,/, "", $4);print $4}')"
[ -z "$COVER_EXT" ] && continue
[ "$COVER_EXT" = "mjpeg" ] && COVER_EXT="jpg"
TEMP_COVER="$(mktemp "/tmp/cover-XXXXXX.$COVER_EXT")"
ffmpeg -nostdin -v quiet -y -i "$1" -an -sn -c:v copy -map 0:v:$i "$TEMP_COVER"
_cover "$TEMP_COVER"
## We do not want to bloat the temp folder with covers, so we remove it.
rm "$TEMP_COVER"
done
while IFS= read -r i; do
_cover "$i"
done <<EOF
$(find "$INPUT_FOLDER" -maxdepth 1 \( -iname '*.png' -o -iname '*.jpg' \) )
EOF
## Zsh compatibility. We need it otherwise word splitting of parameter like
## OGG_PARAM will not work.
STATUS="$(set -o | grep 'shwordsplit' | awk '{print $2}')"
[ "$STATUS" = "off" ] && set -o shwordsplit
## TAG/RECODE
## With the -map_metadata parameter we clear all metadata.
## WARNING: ffmpeg continues to read stdin once it has started, so it should not
## be called from within a while<<EOF loop without disabling stdin.
ffmpeg -nostdin $LOGLEVEL $OVERWRITE -i "$1" -vn -sn $OGG_PARAM \
-map_metadata -1 \
-metadata title="$OUTPUT_TITLE" \
-metadata artist="$OUTPUT_ARTIST" \
-metadata track="$OUTPUT_TRACK" \
-metadata date="$OUTPUT_DATE" \
-metadata album="$OUTPUT_ALBUM" \
-metadata album_artist="$OUTPUT_ARTIST" \
-metadata genre="$OUTPUT_GENRE" \
"$OUTPUT_FOLDER/$OUTPUT_FILE.$OUTPUT_EXT"
## If we are overwriting inplace.
if [ -n "$OUTPUT_FILE_ORIGINAL" ]; then
mv -f "$OUTPUT_FOLDER/$OUTPUT_FILE.$OUTPUT_EXT" "$OUTPUT_FOLDER/$OUTPUT_FILE_ORIGINAL.$OUTPUT_EXT"
fi
echo
## Restore Zsh previous options. This will not turn off shwordsplit if it
## was on before calling the function.
[ "$STATUS" = "off" ] && set +o shwordsplit
echo ":: Process finished!"

View File

@ -1,193 +0,0 @@
#!/usr/env/gawk -f
## This script is inspired by
## http://www.pement.org/awk/titlecase.awk.txt
##
## function: titlecase("CHANGE TO TITLE CASE") --> "Change to Title Case"
##
## Features:
##
## titlecase() will compress whitespace if a second parameter is passed. It
## is sufficient to use a positive number: titlecase(string,1)
##
## This function tries to implement the "Title Case" constructs specified in
## the APA Style Manual and the Chicago Manual of Style. Instead of merely
## capitalizing the first letter of each word and setting everything else in
## lowercase, this function implements the following conditions:
##
## - Conjunctions, articles, and prepositions are set lowercase, UNLESS they
## are the first word of the string or the first word after a colon, a
## question mark, or an exclamation point.
## - Compass points (NE, SW, etc.) are set in solid caps.
## - Roman numerals (II, IV, VII, IX, etc.) are set in solid caps.
## - Certain abbreviations are always capitalized (AIDS, ASCII, NT, USA, etc.)
## - Names beginning with D' or O' are set as D'Arcy, O'Reilly, etc.
## - Hyphenated strings receive internal caps (Smith-Williams, Twenty-Two)
## - Contractions such as I'll, You've, Don't, etc. are handled properly
## - Degrees such as Ph.D., M.Div., etc. are properly capitalized
##
## Sample Usage with GNU awk (gawk):
##
## gawk -f titlecase.awk infile
## TODO: maybe it would be a good idea to implement a preprocessor that would
## search and replace special strings like AC-DC.
## Tests:
## all lowercase words
## ALL UPPERCASE WORDS
## aLl cRaZY cASE WordS
## And with constants in an INTO cd Contre. Feat and Feat. the machine.
## Bad ,punctuation. here , should ! not be ?a problem.
## Roman numerals XIV LIV xiv liv. liv. xiv.
## Dashed--machine--ac-dc.
## About mcdonald and o'reilly, but i'll won't say.
## The "final quote" 'on the waterfront'.
BEGIN {
## English
constants = constants "a an the and but for nor or so am is are against at between by from in into of on to upon "
## French
constants = constants "un une de du le la les et mais pour ni ou à a où contre entre chez dans sur que qui "
## German
constants = constants "der die das den dem des ein eine einen eines einer von wo an am in für gegen bei aus mit nach seit zu durch ohne um "
## Music
constants = constants "feat CD DJ "
constants = constants "KlassX Machine d'Acide BYOB MGMT AC DC JBX RZA DMX "
## Others
constants = constants "AIDS ASCII DHTML DNA DVD FBI GNU GPL IBM IRS ISBN ISSN PHP ROM SSN TV FM "
## Build array of constant words.
split(constants, constarray, " ")
}
function titlecase(string) {
## Initialize variables.
a = ""; # a is/will be the string ALREADY converted
b = string; # b is the rest of the string, so that (string = a b)
## English punctuation. It is quite hard to guess the language, so French
## will follow English punctuation rules.
b = gensub(/ +([,!:;?.]+) */, "\\1 ", "g", b)
## Compress spaces or tabs. Trim prefix and suffix space. Convert
## underscores to spaces.
gsub(/[_ \t]+/, " ", b)
gsub(/^ /, "", b)
gsub(/ $/, "", b)
## Capitalize everything for ease of matching.
b = toupper(b)
do {
## Initialize for later use.
hit = 0;
## 'pos' is the position of the NEXT punctuation mark (except
## apostrophe) after the current word. If this is the last word in b,
## pos will be 0. match() automatically sets RLENGTH. WARNING: we
## consider digits as part of a word.
pos = match(b, /[^[:alnum:]']+/)
if (pos > 0) word = substr(b, 1, pos + RLENGTH - 1)
else word = b
## 1st char of current word.
head = substr(b, 1, 1)
## Tail of current word.
if (pos > 0) tail = substr(b, 2, pos + RLENGTH - 2)
else tail = substr(b, 2)
## Shorten the rest of the string.
b = substr(b, pos + RLENGTH )
## RULE 1 -- Constant strings.
## WARNING: since we match a substring of 'word', we need to prepend and
## append the potentially discarded values, like dashes.
for (var in constarray) {
if (debug)
print ":: Comparing " word " with " constarray[var]
hit = match(word, "^" toupper(constarray[var]) "\\>")
if ( hit > 0 ) {
word = substr(word, 1, RSTART-1) constarray[var] substr(word, RSTART+RLENGTH)
if (debug)
print ":: Match constant on [" constarray[var] "] in string [" word "]";
break;
}
}
## RULE 2 -- Roman numerals
## Note: this match cannot distinguish between LIV (54 in Roman
## numerals) and a personal name like "Liv Ullman". The Roman numerals
## C (100), D (500), and M (1000) are omitted to avoid false matches on
## words like civil, did, dim, lid, mid-, mild, Vic, etc. Most uses of
## Roman numerals in titles stays in the lower ranges, such as "Vol. II"
## or "Pt. XXIV".
if ( hit == 0 && match(word, /^[IVXL]+\>/) ) {
hit = 1
## But we can undo I'd, I'll, I'm, I've and Ill.
if (match(word,/^I'|ILL\>/))
hit = 0
if (debug && hit == 1)
print ":: Match on Roman numerals in [" word "]"
}
## RULE 3 -- Names like D'Arcy or O'Reilly
if ( hit == 0 && capital != 1 && match(word, /^[DO]'[[:alpha:]]/) ) {
word = substr(word,1,3) tolower(substr(word,4))
hit = 1
if (debug)
print ":: Match on mixed case: " word
}
## RULE 4 -- Names like MacNeil or McDonald
if ( hit == 0 && match(word,/^MA?C[B-DF-HJ-NP-TV-Z]/) ) {
if (debug)
print ":: Match on MacX: " substr(word,1,1) "-" \
tolower(substr(word,2,RLENGTH-2)) "-" substr(word,RLENGTH,1) "-" \
tolower(substr(word,RLENGTH+1))
word = substr(word,1,1) tolower(substr(word,2,RLENGTH-2)) \
substr(word,RLENGTH,1) tolower(substr(word,RLENGTH+1))
hit = 1
}
## If one of the above rule is hit, we append the result to 'a',
## otherwise we capitalize it.
if (hit > 0 ) a = a word
else if (capital == 1) a = a tolower(head) tolower(tail)
else a = a toupper(head) tolower(tail)
} while (pos > 0);
## Everything should be converted now.
## Double exception 1: Set 1st word of string in capital case. Need to
## handle potential internal single/double quotes like "A Day in the Life"
## or 'On the Waterfront'. WARNING: here we consider digits as part of a
## word (as in 1st, 2nd, etc.).
match(a, /[[:alnum:]]/)
a = toupper(substr(a, 1, RSTART)) substr(a, RSTART+1)
## Double exception 2: Set 1st word after a some punctuation marks in title
## case. This kludge handles multiple colons, question marks, etc. on the
## line. \a is the BEL or CTRL-G character.
result = gensub(/([:{}\[\]?!"()-][^[:alnum:]]*)([a-zA-Z])/, "\\1\a\\2", "g", a)
while (match(result, /\a/)) {
beg = substr(result, 1, RSTART-1)
cap = toupper(substr(result, RSTART+1, 1))
end = substr(result, RSTART+2)
result = beg cap end
}
return result
}
{print titlecase($0)}
## End of script