forked from nidhaloff/deep-translator
-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathmicrosoft.py
136 lines (119 loc) · 4.77 KB
/
microsoft.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# -*- coding: utf-8 -*-
__copyright__ = "Copyright (C) 2020 Nidhal Baccouri"
import logging
import os
import sys
from typing import List, Optional
import requests
from deep_translator.base import BaseTranslator
from deep_translator.constants import BASE_URLS, MSFT_ENV_VAR
from deep_translator.exceptions import (
ApiKeyException,
MicrosoftAPIerror,
TranslationNotFound,
)
from deep_translator.validate import is_input_valid
class MicrosoftTranslator(BaseTranslator):
"""
the class that wraps functions, which use the Microsoft translator under the hood to translate word(s)
"""
def __init__(
self,
source: str = "auto",
target: str = "en",
api_key: Optional[str] = os.getenv(MSFT_ENV_VAR, None),
region: Optional[str] = None,
proxies: Optional[dict] = None,
**kwargs,
):
"""
@params api_key and target are the required params
@param api_key: your Microsoft API key
@param region: your Microsoft Location
"""
if not api_key:
raise ApiKeyException(env_var=MSFT_ENV_VAR)
self.api_key = api_key
self.proxies = proxies
self.headers = {
"Ocp-Apim-Subscription-Key": self.api_key,
"Content-type": "application/json",
}
# parameter region is not required but very common and goes to headers if passed
if region:
self.region = region
self.headers["Ocp-Apim-Subscription-Region"] = self.region
super().__init__(
base_url=BASE_URLS.get("MICROSOFT_TRANSLATE"),
source=source,
target=target,
languages=self._get_supported_languages(),
**kwargs,
)
# this function get the actual supported languages of the msft translator and store them in a dict, where
# the keys are the abbreviations and the values are the languages
# a common variable used in the other translators would be: MICROSOFT_CODES_TO_LANGUAGES
def _get_supported_languages(self):
microsoft_languages_api_url = (
"https://api.cognitive.microsofttranslator.com/languages?api-version=3.0&scope"
"=translation "
)
microsoft_languages_response = requests.get(
microsoft_languages_api_url
)
translation_dict = microsoft_languages_response.json()["translation"]
return {
translation_dict[k]["name"].lower(): k.lower()
for k in translation_dict.keys()
}
def translate(self, text: str, **kwargs) -> str:
"""
function that uses microsoft translate to translate a text
@param text: desired text to translate
@return: str: translated text
"""
# a body must be a list of dicts to process multiple texts;
# I have not added multiple text processing here since it is covered by the translate_batch method
response = None
if is_input_valid(text):
self._url_params["from"] = self._source
self._url_params["to"] = self._target
valid_microsoft_json = [{"text": text}]
try:
response = requests.post(
self._base_url,
params=self._url_params,
headers=self.headers,
json=valid_microsoft_json,
proxies=self.proxies,
)
except requests.exceptions.RequestException:
exc_type, value, traceback = sys.exc_info()
logging.warning(f"Returned error: {exc_type.__name__}")
if response is None:
raise TranslationNotFound(text)
# Where Microsoft API responds with an api error, it returns a dict in response.json()
if type(response.json()) is dict:
error_message = response.json()["error"]
raise MicrosoftAPIerror(error_message)
# Where it responds with a translation, its response.json() is a list
# e.g. [{'translations': [{'text':'Hello world!', 'to': 'en'}]}]
elif type(response.json()) is list:
all_translations = [
i["text"] for i in response.json()[0]["translations"]
]
return "\n".join(all_translations)
def translate_file(self, path: str, **kwargs) -> str:
"""
translate from a file
@param path: path to file
@return: translated text
"""
return self._translate_file(path, **kwargs)
def translate_batch(self, batch: List[str], **kwargs) -> List[str]:
"""
translate a batch of texts
@param batch: list of texts to translate
@return: list of translations
"""
return self._translate_batch(batch, **kwargs)