Update app.py
Browse files
app.py
CHANGED
|
@@ -8,11 +8,11 @@ import httpx
|
|
| 8 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
| 9 |
# Language options and mappings
|
| 10 |
favourite_langs = {"Romanian": "ro", "German": "de", "English": "en", "-----": "-----"}
|
| 11 |
-
# langs = ["German", "Romanian", "English", "French", "Spanish", "Italian",]
|
| 12 |
df = pl.read_parquet("isolanguages.parquet")
|
| 13 |
non_empty_isos = df.slice(1).filter(pl.col("ISO639-1") != "").rows()
|
| 14 |
all_langs = {iso[0]: (iso[1], iso[2], iso[3]) for iso in non_empty_isos} # {'Romanian': ('ro', 'rum', 'ron')}
|
| 15 |
name_to_iso1 = {iso[0]: iso[1] for iso in non_empty_isos} # {'Romanian': 'ro', 'German': 'de'}
|
|
|
|
| 16 |
langs = list(favourite_langs.keys())
|
| 17 |
langs.extend(list(all_langs.keys())) # Language options as list, add favourite languages first
|
| 18 |
# all_langs = languagecodes.iso_languages_byname
|
|
@@ -38,19 +38,6 @@ models = ["Helsinki-NLP", "QUICKMT", "Argos", "Lego-MT/Lego-MT", "HPLT", "HPLT-O
|
|
| 38 |
"tencent/Hunyuan-MT-7B",
|
| 39 |
"openGPT-X/Teuken-7B-instruct-commercial-v0.4", "openGPT-X/Teuken-7B-instruct-v0.6",
|
| 40 |
]
|
| 41 |
-
allmodels = ["Helsinki-NLP",
|
| 42 |
-
"Helsinki-NLP/opus-mt-tc-bible-big-mul-mul", "Helsinki-NLP/opus-mt-tc-bible-big-mul-deu_eng_nld",
|
| 43 |
-
"Helsinki-NLP/opus-mt-tc-bible-big-mul-deu_eng_fra_por_spa", "Helsinki-NLP/opus-mt-tc-bible-big-deu_eng_fra_por_spa-mul",
|
| 44 |
-
"Helsinki-NLP/opus-mt-tc-bible-big-roa-deu_eng_fra_por_spa", "Helsinki-NLP/opus-mt-tc-bible-big-deu_eng_fra_por_spa-roa",
|
| 45 |
-
"facebook/nllb-200-distilled-600M", "facebook/nllb-200-distilled-1.3B", "facebook/nllb-200-1.3B", "facebook/nllb-200-3.3B",
|
| 46 |
-
"facebook/mbart-large-50-many-to-many-mmt", "facebook/mbart-large-50-one-to-many-mmt", "facebook/mbart-large-50-many-to-one-mmt",
|
| 47 |
-
"facebook/m2m100_418M", "facebook/m2m100_1.2B", "Lego-MT/Lego-MT",
|
| 48 |
-
"bigscience/mt0-small", "bigscience/mt0-base", "bigscience/mt0-large", "bigscience/mt0-xl",
|
| 49 |
-
"bigscience/bloomz-560m", "bigscience/bloomz-1b1", "bigscience/bloomz-1b7", "bigscience/bloomz-3b",
|
| 50 |
-
"t5-small", "t5-base", "t5-large",
|
| 51 |
-
"google/flan-t5-small", "google/flan-t5-base", "google/flan-t5-large", "google/flan-t5-xl",
|
| 52 |
-
"google/madlad400-3b-mt", "jbochi/madlad400-3b-mt",
|
| 53 |
-
]
|
| 54 |
class Translators:
|
| 55 |
def __init__(self, model_name: str, sl: str, tl: str, input_text: str):
|
| 56 |
self.model_name = model_name
|
|
@@ -539,7 +526,7 @@ def translate_text(model_name: str, s_language: str, t_language: str, input_text
|
|
| 539 |
message_text = f'Translated from {s_language} to {t_language} with {model_name}'
|
| 540 |
translated_text = None
|
| 541 |
try:
|
| 542 |
-
if
|
| 543 |
translated_text, message_text = Translators(model_name, sl, tl, input_text).simplepipe()
|
| 544 |
|
| 545 |
elif "-mul" in model_name.lower() or "mul-" in model_name.lower() or "-roa" in model_name.lower():
|
|
@@ -548,6 +535,9 @@ def translate_text(model_name: str, s_language: str, t_language: str, input_text
|
|
| 548 |
elif model_name == "Helsinki-NLP":
|
| 549 |
translated_text, message_text = Translators(model_name, sl, tl, input_text).HelsinkiNLP()
|
| 550 |
|
|
|
|
|
|
|
|
|
|
| 551 |
elif "HPLT" in model_name:
|
| 552 |
if model_name == "HPLT-OPUS":
|
| 553 |
translated_text, message = Translators(model_name, sl, tl, input_text).hplt(opus = True)
|
|
@@ -618,9 +608,6 @@ def translate_text(model_name: str, s_language: str, t_language: str, input_text
|
|
| 618 |
elif model_name == "Bergamot":
|
| 619 |
translated_text, message_text = Translators(model_name, s_language, t_language, input_text).bergamot()
|
| 620 |
|
| 621 |
-
elif model_name == "QUICKMT":
|
| 622 |
-
translated_text, message_text = Translators(model_name, sl, tl, input_text).quickmt()
|
| 623 |
-
|
| 624 |
elif "Hunyuan" in model_name:
|
| 625 |
translated_text = Translators(model_name, s_language, t_language, input_text).hunyuan()
|
| 626 |
|
|
|
|
| 8 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
| 9 |
# Language options and mappings
|
| 10 |
favourite_langs = {"Romanian": "ro", "German": "de", "English": "en", "-----": "-----"}
|
|
|
|
| 11 |
df = pl.read_parquet("isolanguages.parquet")
|
| 12 |
non_empty_isos = df.slice(1).filter(pl.col("ISO639-1") != "").rows()
|
| 13 |
all_langs = {iso[0]: (iso[1], iso[2], iso[3]) for iso in non_empty_isos} # {'Romanian': ('ro', 'rum', 'ron')}
|
| 14 |
name_to_iso1 = {iso[0]: iso[1] for iso in non_empty_isos} # {'Romanian': 'ro', 'German': 'de'}
|
| 15 |
+
# langs = ["German", "Romanian", "English", "French", "Spanish", "Italian"]
|
| 16 |
langs = list(favourite_langs.keys())
|
| 17 |
langs.extend(list(all_langs.keys())) # Language options as list, add favourite languages first
|
| 18 |
# all_langs = languagecodes.iso_languages_byname
|
|
|
|
| 38 |
"tencent/Hunyuan-MT-7B",
|
| 39 |
"openGPT-X/Teuken-7B-instruct-commercial-v0.4", "openGPT-X/Teuken-7B-instruct-v0.6",
|
| 40 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
class Translators:
|
| 42 |
def __init__(self, model_name: str, sl: str, tl: str, input_text: str):
|
| 43 |
self.model_name = model_name
|
|
|
|
| 526 |
message_text = f'Translated from {s_language} to {t_language} with {model_name}'
|
| 527 |
translated_text = None
|
| 528 |
try:
|
| 529 |
+
if model_name == "Helsinki-NLP/opus-mt-tc-bible-big-roa-en":
|
| 530 |
translated_text, message_text = Translators(model_name, sl, tl, input_text).simplepipe()
|
| 531 |
|
| 532 |
elif "-mul" in model_name.lower() or "mul-" in model_name.lower() or "-roa" in model_name.lower():
|
|
|
|
| 535 |
elif model_name == "Helsinki-NLP":
|
| 536 |
translated_text, message_text = Translators(model_name, sl, tl, input_text).HelsinkiNLP()
|
| 537 |
|
| 538 |
+
elif model_name == "QUICKMT":
|
| 539 |
+
translated_text, message_text = Translators(model_name, sl, tl, input_text).quickmt()
|
| 540 |
+
|
| 541 |
elif "HPLT" in model_name:
|
| 542 |
if model_name == "HPLT-OPUS":
|
| 543 |
translated_text, message = Translators(model_name, sl, tl, input_text).hplt(opus = True)
|
|
|
|
| 608 |
elif model_name == "Bergamot":
|
| 609 |
translated_text, message_text = Translators(model_name, s_language, t_language, input_text).bergamot()
|
| 610 |
|
|
|
|
|
|
|
|
|
|
| 611 |
elif "Hunyuan" in model_name:
|
| 612 |
translated_text = Translators(model_name, s_language, t_language, input_text).hunyuan()
|
| 613 |
|