143 lines
4.5 KiB
Bash
Executable File
143 lines
4.5 KiB
Bash
Executable File
#!/bin/sh
|
|
|
|
## TODO: support for long texts.
|
|
|
|
_printhelp ()
|
|
{
|
|
cat<<EOF
|
|
Usage: ${1##*/} [-i source-language] target-language [text]
|
|
|
|
Translate text from one language to another. The languages are specified using
|
|
the international prefix (e.g. 'en' for English, 'de' for German, etc.).
|
|
|
|
When omitting the source language, the service tries to guess the language from
|
|
the input content.
|
|
|
|
When omitting the text, standard input is read.
|
|
|
|
Options:
|
|
|
|
-h: Show this help.
|
|
-i: Input language (optional).
|
|
-l: Show alternate translations (single words only). If jshon is found on the
|
|
system, print all translations for all gramatical types (noun, verb,
|
|
etc.).
|
|
|
|
Language codes:
|
|
|
|
af: Afrikaans, sq: Albanian, ar: Arabic, hy: Armenian, az: Azerbaijani, eu:
|
|
Basque, be: Belarusian, bn: Bengali, bs: Bosnian, bg: Bulgarian, ca: Catalan,
|
|
ceb: Cebuano, zh-CN, Chinese (Simplified) zh-TW, Chinese (Traditional) hr:
|
|
Croatian, cs: Czech, da: Danish, nl: Dutch, en: English, eo: Esperanto, et:
|
|
Estonian, tl: Filipino, fi: Finnish, fr: French, gl: Galician, ka: Georgian, de:
|
|
German, el: Greek, gu: Gujarati, ht: Haitian, Creole iw: Hebrew, hi: Hindi, Hmn:
|
|
Hmong, hu: Hungarian, is: Icelandic, id: Indonesian, ga: Irish, it: Italian, ja:
|
|
Japanese, jw: Javanese, kn: Kannada, km: Khmer, ko: Korean, lo: Lao, la: Latin,
|
|
lv: Latvian, lt: Lithuanian, mk: Macedonian, ms: Malay, mt: Maltese, mr:
|
|
Marathi, no: Norwegian, fa: Persian, pl: Ryba, pt: Portuguese, ro: Romanian, ru:
|
|
Russian, sr: Serbian, sk: Slovak, sl: Slovenian, es: Spanish, sw: Swahili, sv:
|
|
Swedish, ta: Tamil, te: Telugu, th: Thai, tr: Turkish, uk: Ukrainian, ur: Urdu,
|
|
vi: Vietnamese, cy: Welsh, yi: Yiddish
|
|
|
|
EOF
|
|
}
|
|
|
|
unset SL
|
|
OPT_LONG=false
|
|
while getopts ":hi:l" opt; do
|
|
case $opt in
|
|
h)
|
|
_printhelp "$0"
|
|
exit 1
|
|
;;
|
|
i)
|
|
SL="$OPTARG" ;;
|
|
l)
|
|
OPT_LONG=true ;;
|
|
?)
|
|
_printhelp "$0"
|
|
exit 1
|
|
;;
|
|
esac
|
|
done
|
|
|
|
shift $(($OPTIND - 1))
|
|
|
|
if [ $# -eq 0 ]; then
|
|
_printhelp "$0"
|
|
exit 1
|
|
fi
|
|
|
|
TL=$1
|
|
|
|
shift
|
|
|
|
_filter()
|
|
{
|
|
if ! $OPT_LONG; then
|
|
sed -e 's/\[\[\["\([^"]*\).*/\1/' -e 's/"//g'
|
|
echo
|
|
return
|
|
fi
|
|
|
|
if command -v jshon >/dev/null 2>&1; then
|
|
## Input may have empty fields, which is not normally allowed in
|
|
## json. Let's fix this.
|
|
input="$(sed -e 's/,\+/,/g' -e 's/\[,\+/\[/g' -e 's/,\+\]/\]/g')"
|
|
|
|
## The use of jshon without checking the array size is unsafe. If the
|
|
## size is not as expected, it probably means that source and target
|
|
## languages are the same. Anyhow, we drop the error output.
|
|
|
|
## Google Translate does not have the same structure when English is
|
|
## involved. With English, the list of translations is in cell 1,
|
|
## whitout English it is in cell 3. Without English, there is no
|
|
## distinction on grammatical type.
|
|
if [ "$(echo "$input" | jshon -e 1 -t)" = "string" ]; then
|
|
## No English
|
|
echo "$input" | jshon -e 3 -e 0 -e 2 -a -e 0 -u | tr '\n' ',' | sed 's/,$/\n/'
|
|
else
|
|
count=0
|
|
## Fetch grammatical type in 'type'.
|
|
while read -r type; do
|
|
echo -n $type:
|
|
|
|
## Fetch translation alternative from count-th entry
|
|
echo "$input" | jshon -e 1 -e $count -e 1 -a -u | tr '\n' ',' | sed 's/,$/\n/'
|
|
count=$(($count+1))
|
|
|
|
done<<EOF
|
|
$(echo "$input" | jshon -e 1 -a -e 0 -u)
|
|
EOF
|
|
fi 2>/dev/null
|
|
|
|
else
|
|
## Use sed to fetch first entry only.
|
|
sed -e 's/\[\[\[[^[]*\[\[[^[]*\[\([^[]*\)\].*/\1/' -e 's/"//g'
|
|
echo
|
|
fi
|
|
}
|
|
|
|
_translate()
|
|
{
|
|
TEXT="$(tr '\n' ' ' | sed 's/[\t ]\+/%20/g')"
|
|
## If input is not a single word, disable alternative translations. We use a
|
|
## point in the grep string to ignore any space at the end.
|
|
[ -n "$(echo $TEXT | grep '%20.')" ] && OPT_LONG=false
|
|
|
|
if [ "$SL" = "" ]; then
|
|
curl -A "Mozilla/5.0" 'http://translate.google.com/translate_a/t?client=t&text='$TEXT'&tl='$TL'&ie=UTF-8&oe=UTF-8'
|
|
else
|
|
curl -A "Mozilla/5.0" 'http://translate.google.com/translate_a/t?client=t&text='$TEXT'&sl='$SL'&tl='$TL'&ie=UTF-8&oe=UTF-8'
|
|
fi 2>/dev/null | _filter
|
|
}
|
|
|
|
if [ $# -ne 0 ]; then
|
|
echo "$@" | _translate
|
|
else
|
|
_translate
|
|
fi
|
|
|
|
## Original command:
|
|
# curl -A "Mozilla/5.0" 'http://translate.google.com/translate_a/t?client=t&text=hello&hl=en&sl=en&tl=zh-CN&ie=UTF-8&oe=UTF-8&multires=1&prev=btn&ssel=0&tsel=0&sc=1' | sed 's/\[\[\["\([^"]*\).*/\1/'
|