Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -12,6 +12,7 @@ all_langs = languagecodes.iso_languages
|
|
| 12 |
options = list(favourite_langs.keys())
|
| 13 |
options.extend(list(all_langs.keys()))
|
| 14 |
models = ["Helsinki-NLP",
|
|
|
|
| 15 |
"t5-small", "t5-base", "t5-large",
|
| 16 |
"google/flan-t5-small", "google/flan-t5-base", "google/flan-t5-large", "google/flan-t5-xl",
|
| 17 |
"facebook/nllb-200-distilled-600M", "facebook/nllb-200-distilled-1.3B", "facebook/nllb-200-1.3B",
|
|
@@ -19,11 +20,8 @@ models = ["Helsinki-NLP",
|
|
| 19 |
"bigscience/mt0-small", "bigscience/mt0-base", "bigscience/mt0-large", "bigscience/mt0-xl",
|
| 20 |
"bigscience/bloomz-560m", "bigscience/bloomz-1b1", "bigscience/bloomz-1b7", "bigscience/bloomz-3b",
|
| 21 |
"utter-project/EuroLLM-1.7B", "utter-project/EuroLLM-1.7B-Instruct",
|
| 22 |
-
"Unbabel/Tower-Plus-2B",
|
| 23 |
-
"
|
| 24 |
-
"Unbabel/TowerInstruct-Mistral-7B-v0.2",
|
| 25 |
-
"openGPT-X/Teuken-7B-instruct-commercial-v0.4",
|
| 26 |
-
"openGPT-X/Teuken-7B-instruct-v0.6"
|
| 27 |
]
|
| 28 |
|
| 29 |
def model_to_cuda(model):
|
|
@@ -35,6 +33,30 @@ def model_to_cuda(model):
|
|
| 35 |
print("CUDA not available! Using CPU.")
|
| 36 |
return model
|
| 37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
def HelsinkiNLPAutoTokenizer(sl, tl, input_text):
|
| 39 |
if model_name == "Helsinki-NLP":
|
| 40 |
message_text = f'Translated from {sl} to {tl} with {model_name}.'
|
|
@@ -241,7 +263,11 @@ def translate_text(input_text: str, sselected_language: str, tselected_language:
|
|
| 241 |
if model_name.startswith("Helsinki-NLP"):
|
| 242 |
translated_text, message_text = HelsinkiNLP(sl, tl, input_text)
|
| 243 |
return translated_text, message_text
|
| 244 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 245 |
elif model_name == "utter-project/EuroLLM-1.7B-Instruct":
|
| 246 |
translated_text = eurollm_instruct(model_name, sselected_language, tselected_language, input_text)
|
| 247 |
return translated_text, message_text
|
|
|
|
| 12 |
options = list(favourite_langs.keys())
|
| 13 |
options.extend(list(all_langs.keys()))
|
| 14 |
models = ["Helsinki-NLP",
|
| 15 |
+
"Argos",
|
| 16 |
"t5-small", "t5-base", "t5-large",
|
| 17 |
"google/flan-t5-small", "google/flan-t5-base", "google/flan-t5-large", "google/flan-t5-xl",
|
| 18 |
"facebook/nllb-200-distilled-600M", "facebook/nllb-200-distilled-1.3B", "facebook/nllb-200-1.3B",
|
|
|
|
| 20 |
"bigscience/mt0-small", "bigscience/mt0-base", "bigscience/mt0-large", "bigscience/mt0-xl",
|
| 21 |
"bigscience/bloomz-560m", "bigscience/bloomz-1b1", "bigscience/bloomz-1b7", "bigscience/bloomz-3b",
|
| 22 |
"utter-project/EuroLLM-1.7B", "utter-project/EuroLLM-1.7B-Instruct",
|
| 23 |
+
"Unbabel/Tower-Plus-2B", "Unbabel/TowerInstruct-7B-v0.2", "Unbabel/TowerInstruct-Mistral-7B-v0.2",
|
| 24 |
+
"openGPT-X/Teuken-7B-instruct-commercial-v0.4", "openGPT-X/Teuken-7B-instruct-v0.6"
|
|
|
|
|
|
|
|
|
|
| 25 |
]
|
| 26 |
|
| 27 |
def model_to_cuda(model):
|
|
|
|
| 33 |
print("CUDA not available! Using CPU.")
|
| 34 |
return model
|
| 35 |
|
| 36 |
+
def download_argos_model(from_code, to_code):
|
| 37 |
+
import argostranslate.package
|
| 38 |
+
print('Downloading model', from_code, to_code)
|
| 39 |
+
# Download and install Argos Translate package
|
| 40 |
+
argostranslate.package.update_package_index()
|
| 41 |
+
available_packages = argostranslate.package.get_available_packages()
|
| 42 |
+
package_to_install = next(
|
| 43 |
+
filter(
|
| 44 |
+
lambda x: x.from_code == from_code and x.to_code == to_code, available_packages
|
| 45 |
+
)
|
| 46 |
+
)
|
| 47 |
+
argostranslate.package.install_from_path(package_to_install.download())
|
| 48 |
+
|
| 49 |
+
def argos(sl, tl, input_text):
|
| 50 |
+
import argostranslate.translate
|
| 51 |
+
# Translate
|
| 52 |
+
try:
|
| 53 |
+
download_argos_model(sl, tl)
|
| 54 |
+
translated_text = argostranslate.translate.translate(input_text, sl, tl)
|
| 55 |
+
except StopIteration:
|
| 56 |
+
translated_text = f"No Argos model for {sselected_language} to {tselected_language}. Try other model or languages combination!"
|
| 57 |
+
except Exception as error:
|
| 58 |
+
translated_text = error
|
| 59 |
+
|
| 60 |
def HelsinkiNLPAutoTokenizer(sl, tl, input_text):
|
| 61 |
if model_name == "Helsinki-NLP":
|
| 62 |
message_text = f'Translated from {sl} to {tl} with {model_name}.'
|
|
|
|
| 263 |
if model_name.startswith("Helsinki-NLP"):
|
| 264 |
translated_text, message_text = HelsinkiNLP(sl, tl, input_text)
|
| 265 |
return translated_text, message_text
|
| 266 |
+
|
| 267 |
+
elif 'Argos' in model_name:
|
| 268 |
+
translated_text = argos(sl, tl, input_text)
|
| 269 |
+
return translated_text, message_text
|
| 270 |
+
|
| 271 |
elif model_name == "utter-project/EuroLLM-1.7B-Instruct":
|
| 272 |
translated_text = eurollm_instruct(model_name, sselected_language, tselected_language, input_text)
|
| 273 |
return translated_text, message_text
|