ambevar-dotfiles/.scripts/translate

140 lines
4.1 KiB
Plaintext
Raw Normal View History

2013-10-06 19:54:15 +02:00
#!/bin/sh
## TODO: support for long texts.
_usage () {
cat<<EOF
2013-10-06 19:54:15 +02:00
Usage: ${1##*/} [-i source-language] target-language [text]
Translate text from one language to another. The languages are specified using
the international prefix (e.g. 'en' for English, 'de' for German, etc.).
2013-10-06 19:54:15 +02:00
When omitting the source language, the service tries to guess the language from
the input content.
When omitting the text, standard input is read.
Options:
2013-10-06 19:54:15 +02:00
-h: Show this help.
-i: Input language (optional).
-l: Show alternate translations (single words only). If jshon is found on the
system, print all translations for all gramatical types (noun, verb,
etc.).
2013-10-06 19:54:15 +02:00
2013-10-07 12:06:32 +02:00
Language codes:
af: Afrikaans, sq: Albanian, ar: Arabic, hy: Armenian, az: Azerbaijani, eu:
Basque, be: Belarusian, bn: Bengali, bs: Bosnian, bg: Bulgarian, ca: Catalan,
ceb: Cebuano, zh-CN, Chinese (Simplified) zh-TW, Chinese (Traditional) hr:
Croatian, cs: Czech, da: Danish, nl: Dutch, en: English, eo: Esperanto, et:
Estonian, tl: Filipino, fi: Finnish, fr: French, gl: Galician, ka: Georgian, de:
German, el: Greek, gu: Gujarati, ht: Haitian, Creole iw: Hebrew, hi: Hindi, Hmn:
Hmong, hu: Hungarian, is: Icelandic, id: Indonesian, ga: Irish, it: Italian, ja:
Japanese, jw: Javanese, kn: Kannada, km: Khmer, ko: Korean, lo: Lao, la: Latin,
lv: Latvian, lt: Lithuanian, mk: Macedonian, ms: Malay, mt: Maltese, mr:
Marathi, no: Norwegian, fa: Persian, pl: Ryba, pt: Portuguese, ro: Romanian, ru:
Russian, sr: Serbian, sk: Slovak, sl: Slovenian, es: Spanish, sw: Swahili, sv:
Swedish, ta: Tamil, te: Telugu, th: Thai, tr: Turkish, uk: Ukrainian, ur: Urdu,
vi: Vietnamese, cy: Welsh, yi: Yiddish
2013-10-06 19:54:15 +02:00
EOF
}
unset SL
OPT_LONG=false
while getopts ":hi:l" opt; do
case $opt in
h)
_usage "$0"
exit 1
;;
i)
SL="$OPTARG" ;;
l)
OPT_LONG=true ;;
\?)
_usage "$0"
exit 1
;;
esac
2013-10-06 19:54:15 +02:00
done
shift $(($OPTIND - 1))
if [ $# -eq 0 ]; then
_usage "$0"
exit 1
2013-10-06 19:54:15 +02:00
fi
TL=$1
shift
_filter () {
if ! $OPT_LONG; then
sed -e 's/\[\[\["\([^"]*\).*/\1/' -e 's/"//g'
echo
return
fi
if command -v jshon >/dev/null 2>&1; then
## Input may have empty fields, which is not normally allowed in
## json. Let's fix this.
input="$(sed -e 's/,\+/,/g' -e 's/\[,\+/\[/g' -e 's/,\+\]/\]/g')"
## The use of jshon without checking the array size is unsafe. If the
## size is not as expected, it probably means that source and target
## languages are the same. Anyhow, we drop the error output.
## Google Translate does not have the same structure when English is
## involved. With English, the list of translations is in cell 1,
## whitout English it is in cell 3. Without English, there is no
## distinction on grammatical type.
if [ "$(echo "$input" | jshon -e 1 -t)" = "string" ]; then
## No English
echo "$input" | jshon -e 3 -e 0 -e 2 -a -e 0 -u | tr '\n' ',' | sed 's/,$/\n/'
else
count=0
## Fetch grammatical type in 'type'.
while read -r type; do
echo -n $type:
## Fetch translation alternative from count-th entry
echo "$input" | jshon -e 1 -e $count -e 1 -a -u | tr '\n' ',' | sed 's/,$/\n/'
count=$(($count+1))
done<<EOF
$(echo "$input" | jshon -e 1 -a -e 0 -u)
EOF
fi 2>/dev/null
else
## Use sed to fetch first entry only.
sed -e 's/\[\[\[[^[]*\[\[[^[]*\[\([^[]*\)\].*/\1/' -e 's/"//g'
echo
fi
}
_translate () {
TEXT="$(tr '\n' ' ' | sed 's/[\t ]\+/%20/g')"
## If input is not a single word, disable alternative translations. We use a
## point in the grep string to ignore any space at the end.
[ -n "$(echo $TEXT | grep '%20.')" ] && OPT_LONG=false
if [ "$SL" = "" ]; then
curl -A "Mozilla/5.0" 'http://translate.google.com/translate_a/t?client=t&text='$TEXT'&tl='$TL'&ie=UTF-8&oe=UTF-8'
else
curl -A "Mozilla/5.0" 'http://translate.google.com/translate_a/t?client=t&text='$TEXT'&sl='$SL'&tl='$TL'&ie=UTF-8&oe=UTF-8'
fi 2>/dev/null | _filter
2013-10-06 19:54:15 +02:00
}
if [ $# -ne 0 ]; then
echo "$@" | _translate
2013-10-06 19:54:15 +02:00
else
_translate
2013-10-06 19:54:15 +02:00
fi
## Original command:
# curl -A "Mozilla/5.0" 'http://translate.google.com/translate_a/t?client=t&text=hello&hl=en&sl=en&tl=zh-CN&ie=UTF-8&oe=UTF-8&multires=1&prev=btn&ssel=0&tsel=0&sc=1' | sed 's/\[\[\["\([^"]*\).*/\1/'