From 49bebb9c1f51bff23605201e6a89c0f48258834c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonat=C3=A3=20Bolzan=20Loss?= Date: Fri, 29 Jan 2021 13:42:08 -0300 Subject: [PATCH 1/5] Fix on translating special characters --- pygoogletranslation/utils.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/pygoogletranslation/utils.py b/pygoogletranslation/utils.py index fb523eb..6f7cc43 100644 --- a/pygoogletranslation/utils.py +++ b/pygoogletranslation/utils.py @@ -31,10 +31,10 @@ def format_querystring(token, text, src='auto', dest='en'): def format_param(rpcids): params = { 'rpcids': rpcids, - 'bl': 'boq_translate-webserver_20201207.13_p0', - 'soc-app': 1, - 'soc-platform': 1, - 'soc-device': 1, + 'bl': 'boq_translate-webserver_20201207.13_p0', + 'soc-app': 1, + 'soc-platform': 1, + 'soc-device': 1, 'rt': 'c' } return params @@ -59,11 +59,15 @@ def format_response(a): flag = not flag _b = 'pygoogletranslation' if flag: + # Code cleanup + _b = _b.replace('\\n', '') + _b = _b.replace('\\\\', '\\') + _b = _b.replace('\\"', '"') li_filter.append(_b) - fi_data = str(''.join(li_filter)).replace('"[', '[').replace(']"', ']').replace('\\n', '').replace('\\','') - li_data = json.loads(fi_data.split('pygoogletranslation')[1].replace('"[', '[').replace(']"', ']')) + fi_data = str(''.join(li_filter)).replace('","[', '",[').replace(']",null', '],null') + li_data = json.loads(fi_data.rsplit('pygoogletranslation', 1)[-1]) return li_data - + def tokenize_sentence(text): text_len = 0 token_text = '' @@ -138,4 +142,3 @@ def fix_trans_error(translated): else: text = translated return str(text) - From c9ab60544de5ccbcac8b460b75f109bdaf9577d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonat=C3=A3=20Bolzan=20Loss?= Date: Fri, 29 Jan 2021 14:03:05 -0300 Subject: [PATCH 2/5] Small fix for special characters --- README.rst | 459 ----------------------------------- pygoogletranslation/utils.py | 2 +- 2 files changed, 1 insertion(+), 460 deletions(-) delete mode 100644 README.rst diff --git a/README.rst b/README.rst deleted file mode 100644 index 5c39afe..0000000 --- a/README.rst +++ /dev/null @@ -1,459 +0,0 @@ -PYGOOGLETRANSLATION -=================== - -https://pypi.org/project/pygoogletranslation/ - -|GitHub license| |travis status| |Documentation Status| |PyPI version| -|Coverage Status| |Code Climate| - -**Unlimited Text Translation** (no limitation) - -pygoogletranslation is a **free** and **unlimited** python library that -implemented Google Translate API. This uses the `Google Translate Ajax -API `__ to make calls to such methods as -detect and translate. - -Compatible with Python 3.6+. - - -Features --------- - -- Translation from file (.doc, .docx, .pdf, .txt) -- Fast and reliable - it uses the same servers that - translate.google.com uses -- Auto language detection -- Bulk translations -- Request - -TODO -~~~~ - -more features are coming soon. - -- Proxy support -- Internal session management (for better bulk translations) - -Python Request Module -~~~~~~~~~~~~~~ - -This library uses request to get an data from google. - -Request : - POST - GET - - - -How does this library work -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -You may wonder how this library works properly, whereas other -python translation package use the token mechanism but that is -failling because google has changed their token mechanism. - --------------- - -Installation ------------- - -To install, either use things like pip with the package "pygoogletranslation" -or download the package and put the "pygoogletranslation" directory into your -python path. - -.. code:: bash - - $ pip install pygoogletranslation - -Basic Usage ------------ - -If source language is not given, google translate attempts to detect the -source language. - - -.. code:: python - - >>> from pygoogletranslation import Translator - >>> translator = Translator() - >>> translator.translate('Good Morning', dest='ta') - # - >>> translator.translate('안녕하세요.', dest='ja') - # - >>> translator.translate('veritas lux mea', src='la') - # - -Customize proxy URL -~~~~~~~~~~~~~~~~~~~~~ - -You can use proxies in the translation. - -.. code:: python - - >>> from pygoogletranslation import Translator - >>> translator = Translator(proxies=YOUR_PROXIES) - -Advanced Usage (Bulk) -~~~~~~~~~~~~~~~~~~~~~ - -Array can be used to translate a batch of strings in a single method -call and a single HTTP session. The exact same method shown above works -for arrays as well. - -.. code:: python - - >>> from pygoogletranslation import Translator - >>> translator = Translator() - >>> t = (translator.translate(["Good ' Morning", "India"], dest="ta")) - >>> for _t in t: - >>> print(_t.text) - # காலை வணக்கம் - # இந்தியா - - -Language detection -~~~~~~~~~~~~~~~~~~ - -The detect method, as its name implies, identifies the language used in -a given sentence. - -.. code:: python - - >>> from pygoogletranslation import Translator - >>> translator = Translator() - >>> translator.detect('காலை வணக்கம்,') - # - >>> translator.detect('この文章は日本語で書かれました。') - # - >>> translator.detect('This sentence is written in English.') - # - >>> translator.detect('Tiu frazo estas skribita en Esperanto.') - # - -Translation from document (.doc, .docx, .pdf, .txt): ---------------------------------------------- - >>> from pygoogletranslation import Translator - >>> translator = Translator() - >>> translator.bulktranslate('test.txt', dest="ta") - # - # for bulk translation, sometimes you might get an error with response - # code "429" - Too Many attempts. - # To overcome this error, add below parameter. - >>> translator = Translator(retry=NO_OF_ATTEMPTS, sleep=WAIT_SECONDS, retry_message=TRUE) - >>> translator.bulktranslate('test.txt', dest="ta") - # retry - no of attemps (default- 3 times) - # sleep - no of attempts after seconds (default- 5 seconds) - # retry_message - True - display retrying message (default- False) - - -pygoogletranslation to get Language and Language Codes -------------------------------------------------------- - >>> from pygoogletranslation import Translator - >>> translator = Translator() - >>> translator.glanguage() - >>> { - "sl": { - "auto": "Detect language", - "af": "Afrikaans", - "sq": "Albanian", - "am": "Amharic", - "ar": "Arabic", - "hy": "Armenian", - "az": "Azerbaijani", - "eu": "Basque", - "be": "Belarusian", - "bn": "Bengali", - "bs": "Bosnian", - "bg": "Bulgarian", - "ca": "Catalan", - "ceb": "Cebuano", - "ny": "Chichewa", - "zh-CN": "Chinese", - "co": "Corsican", - "hr": "Croatian", - "cs": "Czech", - "da": "Danish", - "nl": "Dutch", - "en": "English", - "eo": "Esperanto", - "et": "Estonian", - "tl": "Filipino", - "fi": "Finnish", - "fr": "French", - "fy": "Frisian", - "gl": "Galician", - "ka": "Georgian", - "de": "German", - "el": "Greek", - "gu": "Gujarati", - "ht": "Haitian Creole", - "ha": "Hausa", - "haw": "Hawaiian", - "iw": "Hebrew", - "hi": "Hindi", - "hmn": "Hmong", - "hu": "Hungarian", - "is": "Icelandic", - "ig": "Igbo", - "id": "Indonesian", - "ga": "Irish", - "it": "Italian", - "ja": "Japanese", - "jw": "Javanese", - "kn": "Kannada", - "kk": "Kazakh", - "km": "Khmer", - "rw": "Kinyarwanda", - "ko": "Korean", - "ku": "Kurdish (Kurmanji)", - "ky": "Kyrgyz", - "lo": "Lao", - "la": "Latin", - "lv": "Latvian", - "lt": "Lithuanian", - "lb": "Luxembourgish", - "mk": "Macedonian", - "mg": "Malagasy", - "ms": "Malay", - "ml": "Malayalam", - "mt": "Maltese", - "mi": "Maori", - "mr": "Marathi", - "mn": "Mongolian", - "my": "Myanmar (Burmese)", - "ne": "Nepali", - "no": "Norwegian", - "or": "Odia (Oriya)", - "ps": "Pashto", - "fa": "Persian", - "pl": "Polish", - "pt": "Portuguese", - "pa": "Punjabi", - "ro": "Romanian", - "ru": "Russian", - "sm": "Samoan", - "gd": "Scots Gaelic", - "sr": "Serbian", - "st": "Sesotho", - "sn": "Shona", - "sd": "Sindhi", - "si": "Sinhala", - "sk": "Slovak", - "sl": "Slovenian", - "so": "Somali", - "es": "Spanish", - "su": "Sundanese", - "sw": "Swahili", - "sv": "Swedish", - "tg": "Tajik", - "ta": "Tamil", - "tt": "Tatar", - "te": "Telugu", - "th": "Thai", - "tr": "Turkish", - "tk": "Turkmen", - "uk": "Ukrainian", - "ur": "Urdu", - "ug": "Uyghur", - "uz": "Uzbek", - "vi": "Vietnamese", - "cy": "Welsh", - "xh": "Xhosa", - "yi": "Yiddish", - "yo": "Yoruba", - "zu": "Zulu" - }, - "tl": { - "af": "Afrikaans", - "sq": "Albanian", - "am": "Amharic", - "ar": "Arabic", - "hy": "Armenian", - "az": "Azerbaijani", - "eu": "Basque", - "be": "Belarusian", - "bn": "Bengali", - "bs": "Bosnian", - "bg": "Bulgarian", - "ca": "Catalan", - "ceb": "Cebuano", - "ny": "Chichewa", - "zh-CN": "Chinese (Simplified)", - "zh-TW": "Chinese (Traditional)", - "co": "Corsican", - "hr": "Croatian", - "cs": "Czech", - "da": "Danish", - "nl": "Dutch", - "en": "English", - "eo": "Esperanto", - "et": "Estonian", - "tl": "Filipino", - "fi": "Finnish", - "fr": "French", - "fy": "Frisian", - "gl": "Galician", - "ka": "Georgian", - "de": "German", - "el": "Greek", - "gu": "Gujarati", - "ht": "Haitian Creole", - "ha": "Hausa", - "haw": "Hawaiian", - "iw": "Hebrew", - "hi": "Hindi", - "hmn": "Hmong", - "hu": "Hungarian", - "is": "Icelandic", - "ig": "Igbo", - "id": "Indonesian", - "ga": "Irish", - "it": "Italian", - "ja": "Japanese", - "jw": "Javanese", - "kn": "Kannada", - "kk": "Kazakh", - "km": "Khmer", - "rw": "Kinyarwanda", - "ko": "Korean", - "ku": "Kurdish (Kurmanji)", - "ky": "Kyrgyz", - "lo": "Lao", - "la": "Latin", - "lv": "Latvian", - "lt": "Lithuanian", - "lb": "Luxembourgish", - "mk": "Macedonian", - "mg": "Malagasy", - "ms": "Malay", - "ml": "Malayalam", - "mt": "Maltese", - "mi": "Maori", - "mr": "Marathi", - "mn": "Mongolian", - "my": "Myanmar (Burmese)", - "ne": "Nepali", - "no": "Norwegian", - "or": "Odia (Oriya)", - "ps": "Pashto", - "fa": "Persian", - "pl": "Polish", - "pt": "Portuguese", - "pa": "Punjabi", - "ro": "Romanian", - "ru": "Russian", - "sm": "Samoan", - "gd": "Scots Gaelic", - "sr": "Serbian", - "st": "Sesotho", - "sn": "Shona", - "sd": "Sindhi", - "si": "Sinhala", - "sk": "Slovak", - "sl": "Slovenian", - "so": "Somali", - "es": "Spanish", - "su": "Sundanese", - "sw": "Swahili", - "sv": "Swedish", - "tg": "Tajik", - "ta": "Tamil", - "tt": "Tatar", - "te": "Telugu", - "th": "Thai", - "tr": "Turkish", - "tk": "Turkmen", - "uk": "Ukrainian", - "ur": "Urdu", - "ug": "Uyghur", - "uz": "Uzbek", - "vi": "Vietnamese", - "cy": "Welsh", - "xh": "Xhosa", - "yi": "Yiddish", - "yo": "Yoruba", - "zu": "Zulu" - }, - "al": {} - } - --------------- - -Note on library usage ---------------------- - -DISCLAIMER: this is an unofficial library using the web API of translate.google.com -and also is not associated with Google. - -- - -- Due to limitations of the web version of google translate, this API - does not guarantee that the library would work properly at all times - (so please use this library if you don't care about stability). - -- **Important:** If you want to use a stable API, I highly recommend you to use - `Google's official translate - API `__. - -- If you get HTTP 5xx error or errors like #6, it's probably because - Google has banned your client IP address. - --------------- - -Versioning ----------- - -This library follows `Semantic Versioning `__ from -v2.0.0. Any release versioned 0.x.y is subject to backwards incompatible -changes at any time. - -Contributing -------------------------- - -Contributions are more than welcomed. See -`CONTRIBUTING.md `__ - ------------------------------------------ - -License -------- - -pygoogletranslation is licensed under the MIT License. The terms are as -follows: - -:: -MIT License - -Copyright (c) 2021 Saravananslb - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - - -.. |GitHub license| image:: https://img.shields.io/github/license/mashape/apistatus.svg - :target: http://opensource.org/licenses/MIT -.. |travis status| image:: https://travis-ci.org/ssut/py-googletrans.svg?branch=master - :target: https://travis-ci.org/Saravananslb/py-googletranslation -.. |Documentation Status| image:: https://readthedocs.org/projects/py-googletrans/badge/?version=latest - -.. |PyPI version| image:: https://badge.fury.io/py/pygoogletranslation.svg - :target: http://badge.fury.io/py/pygoogletranslation -.. |Coverage Status| image:: https://coveralls.io/repos/github/ssut/py-googletrans/badge.svg - -.. |Code Climate| image:: https://codeclimate.com/github/ssut/py-googletrans/badges/gpa.svg - diff --git a/pygoogletranslation/utils.py b/pygoogletranslation/utils.py index 6f7cc43..5bb5f30 100644 --- a/pygoogletranslation/utils.py +++ b/pygoogletranslation/utils.py @@ -64,7 +64,7 @@ def format_response(a): _b = _b.replace('\\\\', '\\') _b = _b.replace('\\"', '"') li_filter.append(_b) - fi_data = str(''.join(li_filter)).replace('","[', '",[').replace(']",null', '],null') + fi_data = str(''.join(li_filter)).replace('","[', '",[', 1).replace(']",null', '],null') li_data = json.loads(fi_data.rsplit('pygoogletranslation', 1)[-1]) return li_data From 38def24f3caeaae68ab199eef0dfbf746ed9073f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonat=C3=A3=20Bolzan=20Loss?= Date: Fri, 29 Jan 2021 14:03:33 -0300 Subject: [PATCH 3/5] Fix file name for setup.py --- README.md | 459 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 459 insertions(+) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..5c39afe --- /dev/null +++ b/README.md @@ -0,0 +1,459 @@ +PYGOOGLETRANSLATION +=================== + +https://pypi.org/project/pygoogletranslation/ + +|GitHub license| |travis status| |Documentation Status| |PyPI version| +|Coverage Status| |Code Climate| + +**Unlimited Text Translation** (no limitation) + +pygoogletranslation is a **free** and **unlimited** python library that +implemented Google Translate API. This uses the `Google Translate Ajax +API `__ to make calls to such methods as +detect and translate. + +Compatible with Python 3.6+. + + +Features +-------- + +- Translation from file (.doc, .docx, .pdf, .txt) +- Fast and reliable - it uses the same servers that + translate.google.com uses +- Auto language detection +- Bulk translations +- Request + +TODO +~~~~ + +more features are coming soon. + +- Proxy support +- Internal session management (for better bulk translations) + +Python Request Module +~~~~~~~~~~~~~~ + +This library uses request to get an data from google. + +Request : + POST + GET + + + +How does this library work +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You may wonder how this library works properly, whereas other +python translation package use the token mechanism but that is +failling because google has changed their token mechanism. + +-------------- + +Installation +------------ + +To install, either use things like pip with the package "pygoogletranslation" +or download the package and put the "pygoogletranslation" directory into your +python path. + +.. code:: bash + + $ pip install pygoogletranslation + +Basic Usage +----------- + +If source language is not given, google translate attempts to detect the +source language. + + +.. code:: python + + >>> from pygoogletranslation import Translator + >>> translator = Translator() + >>> translator.translate('Good Morning', dest='ta') + # + >>> translator.translate('안녕하세요.', dest='ja') + # + >>> translator.translate('veritas lux mea', src='la') + # + +Customize proxy URL +~~~~~~~~~~~~~~~~~~~~~ + +You can use proxies in the translation. + +.. code:: python + + >>> from pygoogletranslation import Translator + >>> translator = Translator(proxies=YOUR_PROXIES) + +Advanced Usage (Bulk) +~~~~~~~~~~~~~~~~~~~~~ + +Array can be used to translate a batch of strings in a single method +call and a single HTTP session. The exact same method shown above works +for arrays as well. + +.. code:: python + + >>> from pygoogletranslation import Translator + >>> translator = Translator() + >>> t = (translator.translate(["Good ' Morning", "India"], dest="ta")) + >>> for _t in t: + >>> print(_t.text) + # காலை வணக்கம் + # இந்தியா + + +Language detection +~~~~~~~~~~~~~~~~~~ + +The detect method, as its name implies, identifies the language used in +a given sentence. + +.. code:: python + + >>> from pygoogletranslation import Translator + >>> translator = Translator() + >>> translator.detect('காலை வணக்கம்,') + # + >>> translator.detect('この文章は日本語で書かれました。') + # + >>> translator.detect('This sentence is written in English.') + # + >>> translator.detect('Tiu frazo estas skribita en Esperanto.') + # + +Translation from document (.doc, .docx, .pdf, .txt): +--------------------------------------------- + >>> from pygoogletranslation import Translator + >>> translator = Translator() + >>> translator.bulktranslate('test.txt', dest="ta") + # + # for bulk translation, sometimes you might get an error with response + # code "429" - Too Many attempts. + # To overcome this error, add below parameter. + >>> translator = Translator(retry=NO_OF_ATTEMPTS, sleep=WAIT_SECONDS, retry_message=TRUE) + >>> translator.bulktranslate('test.txt', dest="ta") + # retry - no of attemps (default- 3 times) + # sleep - no of attempts after seconds (default- 5 seconds) + # retry_message - True - display retrying message (default- False) + + +pygoogletranslation to get Language and Language Codes +------------------------------------------------------- + >>> from pygoogletranslation import Translator + >>> translator = Translator() + >>> translator.glanguage() + >>> { + "sl": { + "auto": "Detect language", + "af": "Afrikaans", + "sq": "Albanian", + "am": "Amharic", + "ar": "Arabic", + "hy": "Armenian", + "az": "Azerbaijani", + "eu": "Basque", + "be": "Belarusian", + "bn": "Bengali", + "bs": "Bosnian", + "bg": "Bulgarian", + "ca": "Catalan", + "ceb": "Cebuano", + "ny": "Chichewa", + "zh-CN": "Chinese", + "co": "Corsican", + "hr": "Croatian", + "cs": "Czech", + "da": "Danish", + "nl": "Dutch", + "en": "English", + "eo": "Esperanto", + "et": "Estonian", + "tl": "Filipino", + "fi": "Finnish", + "fr": "French", + "fy": "Frisian", + "gl": "Galician", + "ka": "Georgian", + "de": "German", + "el": "Greek", + "gu": "Gujarati", + "ht": "Haitian Creole", + "ha": "Hausa", + "haw": "Hawaiian", + "iw": "Hebrew", + "hi": "Hindi", + "hmn": "Hmong", + "hu": "Hungarian", + "is": "Icelandic", + "ig": "Igbo", + "id": "Indonesian", + "ga": "Irish", + "it": "Italian", + "ja": "Japanese", + "jw": "Javanese", + "kn": "Kannada", + "kk": "Kazakh", + "km": "Khmer", + "rw": "Kinyarwanda", + "ko": "Korean", + "ku": "Kurdish (Kurmanji)", + "ky": "Kyrgyz", + "lo": "Lao", + "la": "Latin", + "lv": "Latvian", + "lt": "Lithuanian", + "lb": "Luxembourgish", + "mk": "Macedonian", + "mg": "Malagasy", + "ms": "Malay", + "ml": "Malayalam", + "mt": "Maltese", + "mi": "Maori", + "mr": "Marathi", + "mn": "Mongolian", + "my": "Myanmar (Burmese)", + "ne": "Nepali", + "no": "Norwegian", + "or": "Odia (Oriya)", + "ps": "Pashto", + "fa": "Persian", + "pl": "Polish", + "pt": "Portuguese", + "pa": "Punjabi", + "ro": "Romanian", + "ru": "Russian", + "sm": "Samoan", + "gd": "Scots Gaelic", + "sr": "Serbian", + "st": "Sesotho", + "sn": "Shona", + "sd": "Sindhi", + "si": "Sinhala", + "sk": "Slovak", + "sl": "Slovenian", + "so": "Somali", + "es": "Spanish", + "su": "Sundanese", + "sw": "Swahili", + "sv": "Swedish", + "tg": "Tajik", + "ta": "Tamil", + "tt": "Tatar", + "te": "Telugu", + "th": "Thai", + "tr": "Turkish", + "tk": "Turkmen", + "uk": "Ukrainian", + "ur": "Urdu", + "ug": "Uyghur", + "uz": "Uzbek", + "vi": "Vietnamese", + "cy": "Welsh", + "xh": "Xhosa", + "yi": "Yiddish", + "yo": "Yoruba", + "zu": "Zulu" + }, + "tl": { + "af": "Afrikaans", + "sq": "Albanian", + "am": "Amharic", + "ar": "Arabic", + "hy": "Armenian", + "az": "Azerbaijani", + "eu": "Basque", + "be": "Belarusian", + "bn": "Bengali", + "bs": "Bosnian", + "bg": "Bulgarian", + "ca": "Catalan", + "ceb": "Cebuano", + "ny": "Chichewa", + "zh-CN": "Chinese (Simplified)", + "zh-TW": "Chinese (Traditional)", + "co": "Corsican", + "hr": "Croatian", + "cs": "Czech", + "da": "Danish", + "nl": "Dutch", + "en": "English", + "eo": "Esperanto", + "et": "Estonian", + "tl": "Filipino", + "fi": "Finnish", + "fr": "French", + "fy": "Frisian", + "gl": "Galician", + "ka": "Georgian", + "de": "German", + "el": "Greek", + "gu": "Gujarati", + "ht": "Haitian Creole", + "ha": "Hausa", + "haw": "Hawaiian", + "iw": "Hebrew", + "hi": "Hindi", + "hmn": "Hmong", + "hu": "Hungarian", + "is": "Icelandic", + "ig": "Igbo", + "id": "Indonesian", + "ga": "Irish", + "it": "Italian", + "ja": "Japanese", + "jw": "Javanese", + "kn": "Kannada", + "kk": "Kazakh", + "km": "Khmer", + "rw": "Kinyarwanda", + "ko": "Korean", + "ku": "Kurdish (Kurmanji)", + "ky": "Kyrgyz", + "lo": "Lao", + "la": "Latin", + "lv": "Latvian", + "lt": "Lithuanian", + "lb": "Luxembourgish", + "mk": "Macedonian", + "mg": "Malagasy", + "ms": "Malay", + "ml": "Malayalam", + "mt": "Maltese", + "mi": "Maori", + "mr": "Marathi", + "mn": "Mongolian", + "my": "Myanmar (Burmese)", + "ne": "Nepali", + "no": "Norwegian", + "or": "Odia (Oriya)", + "ps": "Pashto", + "fa": "Persian", + "pl": "Polish", + "pt": "Portuguese", + "pa": "Punjabi", + "ro": "Romanian", + "ru": "Russian", + "sm": "Samoan", + "gd": "Scots Gaelic", + "sr": "Serbian", + "st": "Sesotho", + "sn": "Shona", + "sd": "Sindhi", + "si": "Sinhala", + "sk": "Slovak", + "sl": "Slovenian", + "so": "Somali", + "es": "Spanish", + "su": "Sundanese", + "sw": "Swahili", + "sv": "Swedish", + "tg": "Tajik", + "ta": "Tamil", + "tt": "Tatar", + "te": "Telugu", + "th": "Thai", + "tr": "Turkish", + "tk": "Turkmen", + "uk": "Ukrainian", + "ur": "Urdu", + "ug": "Uyghur", + "uz": "Uzbek", + "vi": "Vietnamese", + "cy": "Welsh", + "xh": "Xhosa", + "yi": "Yiddish", + "yo": "Yoruba", + "zu": "Zulu" + }, + "al": {} + } + +-------------- + +Note on library usage +--------------------- + +DISCLAIMER: this is an unofficial library using the web API of translate.google.com +and also is not associated with Google. + +- + +- Due to limitations of the web version of google translate, this API + does not guarantee that the library would work properly at all times + (so please use this library if you don't care about stability). + +- **Important:** If you want to use a stable API, I highly recommend you to use + `Google's official translate + API `__. + +- If you get HTTP 5xx error or errors like #6, it's probably because + Google has banned your client IP address. + +-------------- + +Versioning +---------- + +This library follows `Semantic Versioning `__ from +v2.0.0. Any release versioned 0.x.y is subject to backwards incompatible +changes at any time. + +Contributing +------------------------- + +Contributions are more than welcomed. See +`CONTRIBUTING.md `__ + +----------------------------------------- + +License +------- + +pygoogletranslation is licensed under the MIT License. The terms are as +follows: + +:: +MIT License + +Copyright (c) 2021 Saravananslb + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + + +.. |GitHub license| image:: https://img.shields.io/github/license/mashape/apistatus.svg + :target: http://opensource.org/licenses/MIT +.. |travis status| image:: https://travis-ci.org/ssut/py-googletrans.svg?branch=master + :target: https://travis-ci.org/Saravananslb/py-googletranslation +.. |Documentation Status| image:: https://readthedocs.org/projects/py-googletrans/badge/?version=latest + +.. |PyPI version| image:: https://badge.fury.io/py/pygoogletranslation.svg + :target: http://badge.fury.io/py/pygoogletranslation +.. |Coverage Status| image:: https://coveralls.io/repos/github/ssut/py-googletrans/badge.svg + +.. |Code Climate| image:: https://codeclimate.com/github/ssut/py-googletrans/badges/gpa.svg + From fb3ce1b8722db3714fc5cc70735b03c73d851498 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonat=C3=A3=20Bolzan=20Loss?= Date: Fri, 29 Jan 2021 15:45:09 -0300 Subject: [PATCH 4/5] This allows str to have newline char and other ascii --- pygoogletranslation/utils.py | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/pygoogletranslation/utils.py b/pygoogletranslation/utils.py index 5bb5f30..45d224d 100644 --- a/pygoogletranslation/utils.py +++ b/pygoogletranslation/utils.py @@ -60,12 +60,28 @@ def format_response(a): _b = 'pygoogletranslation' if flag: # Code cleanup - _b = _b.replace('\\n', '') _b = _b.replace('\\\\', '\\') + _b = _b.replace('\\ ', ' ') + _b = _b.replace('\\n', '\n') _b = _b.replace('\\"', '"') + + + if '\\u' in _b: + _bp = '' + p = 0 + while p < len(_b): + if _b[p:p+2] == '\\u': + _bp += bytes(_b[p:p+6], 'ascii').decode('unicode-escape') + p += 6 + else: + _bp += _b[p:p+1] + p += 1 + _b = _bp + li_filter.append(_b) - fi_data = str(''.join(li_filter)).replace('","[', '",[', 1).replace(']",null', '],null') - li_data = json.loads(fi_data.rsplit('pygoogletranslation', 1)[-1]) + + fi_data = str(''.join(li_filter)).replace('","[', '",[', 1).replace('\n",null', '\n,null') + li_data = json.loads(fi_data.split('pygoogletranslation')[1], strict=False) return li_data def tokenize_sentence(text): @@ -94,7 +110,11 @@ def format_translation(translated): pron = '' for _translated in translated: try: - text += _translated[0][2][1][0][0][5][0][0] + if len(_translated[0][2][1][0][0][5]) > 1: + for phrase in _translated[0][2][1][0][0][5]: + text += ' ' + phrase[0] + else: + text += _translated[0][2][1][0][0][5][0][0] except: text += fix_trans_error(_translated) try: @@ -102,6 +122,8 @@ def format_translation(translated): except: pron += '' + text = text.strip() + for _translated in translated: try: _translated[0][2][1][0][0][5][0][0] = text From 1bb8ef8bdae3c9c9b1e9bb1d9239af7514af9679 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonat=C3=A3=20Bolzan=20Loss?= Date: Fri, 29 Jan 2021 19:31:44 -0300 Subject: [PATCH 5/5] Maybe a better approach to use a parser --- pygoogletranslation/utils.py | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/pygoogletranslation/utils.py b/pygoogletranslation/utils.py index 45d224d..0836c73 100644 --- a/pygoogletranslation/utils.py +++ b/pygoogletranslation/utils.py @@ -59,20 +59,23 @@ def format_response(a): flag = not flag _b = 'pygoogletranslation' if flag: - # Code cleanup - _b = _b.replace('\\\\', '\\') - _b = _b.replace('\\ ', ' ') - _b = _b.replace('\\n', '\n') - _b = _b.replace('\\"', '"') - - - if '\\u' in _b: + # Parsing to cleanup "unescaped escaped" characters + if '\\' in _b: _bp = '' p = 0 while p < len(_b): - if _b[p:p+2] == '\\u': - _bp += bytes(_b[p:p+6], 'ascii').decode('unicode-escape') - p += 6 + if _b[p:p+2] == '\\\\': + _bp += '\\' + p += 2 + elif _b[p:p+1] == '\\': + if _b[p:p+2] == '\\u': + _bp += bytes(_b[p:p+6], 'ascii').decode('unicode-escape') + p += 6 + elif _b[p:p+2] == '\\n': + _bp += '\n' + p += 2 + else: + p += 1 else: _bp += _b[p:p+1] p += 1 @@ -144,7 +147,9 @@ def fix_trans_error(translated): if len(translated[0][2][1]) > 0: if len(translated[0][2][1][0]) > 0: if len(translated[0][2][1][0][0]) > 5: - if len(translated[0][2][1][0][0][5]) > 0: + if translated[0][2][1][0][0][5] is None: + text = translated[0][2][1][0][0][0] + elif len(translated[0][2][1][0][0][5]) > 0: if len(translated[0][2][1][0][0][5][0]) > 0: text = translated[0][2][1][0][0][5][0][0] else: