TiberiuCristianLeon commited on
Commit
bd9f56c
·
verified ·
1 Parent(s): 72c74bf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +104 -39
app.py CHANGED
@@ -18,10 +18,10 @@ iso1_to_name = {iso[1]: iso[0] for iso in non_empty_isos} # {'ro': 'Romanian', '
18
  langs = list(favourite_langs.keys())
19
  langs.extend(list(all_langs.keys())) # Language options as list, add favourite languages first
20
 
21
- models = ["Helsinki-NLP",
22
  "Helsinki-NLP/opus-mt-tc-bible-big-mul-mul", "Helsinki-NLP/opus-mt-tc-bible-big-mul-deu_eng_nld",
23
  "Helsinki-NLP/opus-mt-tc-bible-big-mul-deu_eng_fra_por_spa", "Helsinki-NLP/opus-mt-tc-bible-big-deu_eng_fra_por_spa-mul",
24
- "Helsinki-NLP/opus-mt-tc-bible-big-roa-deu_eng_fra_por_spa", "Helsinki-NLP/opus-mt-tc-bible-big-deu_eng_fra_por_spa-roa",
25
  "facebook/nllb-200-distilled-600M", "facebook/nllb-200-distilled-1.3B", "facebook/nllb-200-1.3B", "facebook/nllb-200-3.3B",
26
  "facebook/mbart-large-50-many-to-many-mmt", "facebook/mbart-large-50-one-to-many-mmt", "facebook/mbart-large-50-many-to-one-mmt",
27
  "facebook/m2m100_418M", "facebook/m2m100_1.2B", "Lego-MT/Lego-MT",
@@ -30,11 +30,9 @@ models = ["Helsinki-NLP",
30
  "t5-small", "t5-base", "t5-large",
31
  "google/flan-t5-small", "google/flan-t5-base", "google/flan-t5-large", "google/flan-t5-xl",
32
  "google/madlad400-3b-mt", "jbochi/madlad400-3b-mt",
33
- "Argos", "Google",
34
  "HuggingFaceTB/SmolLM3-3B", "winninghealth/WiNGPT-Babel-2",
35
  "utter-project/EuroLLM-1.7B", "utter-project/EuroLLM-1.7B-Instruct",
36
- "Unbabel/Tower-Plus-2B", "Unbabel/TowerInstruct-7B-v0.2", "Unbabel/TowerInstruct-Mistral-7B-v0.2",
37
- "openGPT-X/Teuken-7B-instruct-commercial-v0.4", "openGPT-X/Teuken-7B-instruct-v0.6"
38
  ]
39
  DEFAULTS = [langs[0], langs[1], models[0]]
40
 
@@ -78,8 +76,37 @@ class Translators:
78
  response = httpx.get(url)
79
  return response.json()[0][0][0]
80
 
81
- @classmethod
82
- def download_argos_model(cls, from_code, to_code):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  import argostranslate.package
84
  print('Downloading model', from_code, to_code)
85
  # Download and install Argos Translate package
@@ -103,6 +130,63 @@ class Translators:
103
  translated_text = error
104
  return translated_text
105
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  def HelsinkiNLP_mulroa(self):
107
  try:
108
  pipe = pipeline("translation", model=self.model_name, device=self.device)
@@ -319,34 +403,6 @@ class Translators:
319
  output = output.rsplit(f'{self.tl}:')[-1].strip().replace('assistant\n', '').strip()
320
  return output
321
 
322
- def teuken(self):
323
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
324
- model = AutoModelForCausalLM.from_pretrained(
325
- self.model_name,
326
- trust_remote_code=True,
327
- torch_dtype=torch.bfloat16,
328
- )
329
- model = model.to(device).eval()
330
- tokenizer = AutoTokenizer.from_pretrained(
331
- self.model_name,
332
- use_fast=False,
333
- trust_remote_code=True,
334
- )
335
- translation_prompt = f"Translate the following text from {self.sl} into {self.tl}: {self.input_text}"
336
- messages = [{"role": "User", "content": translation_prompt}]
337
- prompt_ids = tokenizer.apply_chat_template(messages, chat_template="EN", tokenize=True, add_generation_prompt=False, return_tensors="pt")
338
- prediction = model.generate(
339
- prompt_ids.to(model.device),
340
- max_length=512,
341
- do_sample=True,
342
- top_k=50,
343
- top_p=0.95,
344
- temperature=0.7,
345
- num_return_sequences=1,
346
- )
347
- translation = tokenizer.decode(prediction[0].tolist())
348
- return translation
349
-
350
  def unbabel(self):
351
  pipe = pipeline("text-generation", model=self.model_name, torch_dtype=torch.bfloat16, device_map="auto")
352
  messages = [{"role": "user",
@@ -422,10 +478,16 @@ def translate_text(input_text: str, s_language: str, t_language: str, model_name
422
 
423
  elif model_name == 'Argos':
424
  translated_text = Translators(model_name, sl, tl, input_text).argos()
 
 
 
425
 
426
  elif model_name == 'Google':
427
  translated_text = Translators(model_name, sl, tl, input_text).google()
428
 
 
 
 
429
  elif "m2m" in model_name.lower():
430
  translated_text = Translators(model_name, sl, tl, input_text).mtom()
431
 
@@ -459,10 +521,7 @@ def translate_text(input_text: str, s_language: str, t_language: str, model_name
459
 
460
  elif model_name == "facebook/mbart-large-50-many-to-one-mmt":
461
  translated_text = Translators(model_name, s_language, t_language, input_text).mbart_many_to_one()
462
-
463
- elif 'teuken' in model_name.lower():
464
- translated_text = Translators(model_name, s_language, t_language, input_text).teuken()
465
-
466
  elif model_name == "utter-project/EuroLLM-1.7B-Instruct":
467
  translated_text = Translators(model_name, s_language, t_language, input_text).eurollm_instruct()
468
 
@@ -478,6 +537,12 @@ def translate_text(input_text: str, s_language: str, t_language: str, model_name
478
  elif model_name == "winninghealth/WiNGPT-Babel-2":
479
  translated_text = Translators(model_name, s_language, t_language, input_text).wingpt()
480
 
 
 
 
 
 
 
481
  elif model_name == "Bergamot":
482
  translated_text, message_text = Translators(model_name, s_language, t_language, input_text).bergamot()
483
 
 
18
  langs = list(favourite_langs.keys())
19
  langs.extend(list(all_langs.keys())) # Language options as list, add favourite languages first
20
 
21
+ models = ["Helsinki-NLP", "QUICK-MT", "Argos", "Google", "HPLT", "HPLT-OPUS",
22
  "Helsinki-NLP/opus-mt-tc-bible-big-mul-mul", "Helsinki-NLP/opus-mt-tc-bible-big-mul-deu_eng_nld",
23
  "Helsinki-NLP/opus-mt-tc-bible-big-mul-deu_eng_fra_por_spa", "Helsinki-NLP/opus-mt-tc-bible-big-deu_eng_fra_por_spa-mul",
24
+ "Helsinki-NLP/opus-mt-tc-bible-big-roa-deu_eng_fra_por_spa", "Helsinki-NLP/opus-mt-tc-bible-big-deu_eng_fra_por_spa-roa", "Helsinki-NLP/opus-mt-tc-bible-big-roa-en"
25
  "facebook/nllb-200-distilled-600M", "facebook/nllb-200-distilled-1.3B", "facebook/nllb-200-1.3B", "facebook/nllb-200-3.3B",
26
  "facebook/mbart-large-50-many-to-many-mmt", "facebook/mbart-large-50-one-to-many-mmt", "facebook/mbart-large-50-many-to-one-mmt",
27
  "facebook/m2m100_418M", "facebook/m2m100_1.2B", "Lego-MT/Lego-MT",
 
30
  "t5-small", "t5-base", "t5-large",
31
  "google/flan-t5-small", "google/flan-t5-base", "google/flan-t5-large", "google/flan-t5-xl",
32
  "google/madlad400-3b-mt", "jbochi/madlad400-3b-mt",
 
33
  "HuggingFaceTB/SmolLM3-3B", "winninghealth/WiNGPT-Babel-2",
34
  "utter-project/EuroLLM-1.7B", "utter-project/EuroLLM-1.7B-Instruct",
35
+ "Unbabel/Tower-Plus-2B", "Unbabel/TowerInstruct-7B-v0.2", "Unbabel/TowerInstruct-Mistral-7B-v0.2"
 
36
  ]
37
  DEFAULTS = [langs[0], langs[1], models[0]]
38
 
 
76
  response = httpx.get(url)
77
  return response.json()[0][0][0]
78
 
79
+ def simplepipe(self):
80
+ try:
81
+ pipe = pipeline("translation", model=self.model_name, device=self.device)
82
+ translation = pipe(self.input_text)
83
+ message = f'Translated from {iso1_to_name[self.sl]} to {iso1_to_name[self.tl]} with {self.model_name}.'
84
+ return translation[0]['translation_text'], message
85
+ except Exception as error:
86
+ return f"Error translating with model: {self.model_name}! Try other available language combination or model.", error
87
+
88
+ def hplt(self, opus = False):
89
+ # langs = ['ar', 'bs', 'ca', 'en', 'et', 'eu', 'fi', 'ga', 'gl', 'hi', 'hr', 'is', 'mt', 'nn', 'sq', 'sw', 'zh_hant']
90
+ hplt_models = ['ar-en', 'bs-en', 'ca-en', 'en-ar', 'en-bs', 'en-ca', 'en-et', 'en-eu', 'en-fi',
91
+ 'en-ga', 'en-gl', 'en-hi', 'en-hr', 'en-is', 'en-mt', 'en-nn', 'en-sq', 'en-sw',
92
+ 'en-zh_hant', 'et-en', 'eu-en', 'fi-en', 'ga-en', 'gl-en', 'hi-en', 'hr-en',
93
+ 'is-en', 'mt-en', 'nn-en', 'sq-en', 'sw-en', 'zh_hant-en']
94
+ if opus:
95
+ hplt_model = f'HPLT/translate-{self.sl}-{self.tl}-v1.0-hplt_opus' # HPLT/translate-en-hr-v1.0-hplt_opus
96
+ else:
97
+ hplt_model = f'HPLT/translate-{self.sl}-{self.tl}-v1.0-hplt' # HPLT/translate-en-hr-v1.0-hplt
98
+ if f'{self.sl}-{self.tl}' in hplt_models:
99
+ pipe = pipeline("translation", model=hplt_model, device=self.device)
100
+ translation = pipe(self.input_text)
101
+ translated_text = translation[0]['translation_text']
102
+ message = f'Translated from {iso1_to_name[self.sl]} to {iso1_to_name[self.tl]} with {hplt_model}.'
103
+ else:
104
+ translated_text = f'HPLT model from {iso1_to_name[self.sl]} to {iso1_to_name[self.tl]} not available!'
105
+ message = f"Available models: {', '.join(hplt_models)}"
106
+ return translated_text, message
107
+
108
+ @staticmethod
109
+ def download_argos_model(from_code, to_code):
110
  import argostranslate.package
111
  print('Downloading model', from_code, to_code)
112
  # Download and install Argos Translate package
 
130
  translated_text = error
131
  return translated_text
132
 
133
+ @staticmethod
134
+ def quickmttranslate(model_path, input_text):
135
+ from quickmt import Translator
136
+ # 'auto' auto-detects GPU, set to "cpu" to force CPU inference
137
+ device = 'gpu' if torch.cuda.is_available() else 'cpu'
138
+ translator = Translator(str(model_path), device = device)
139
+ # translation = Translator(f"./quickmt-{self.sl}-{self.tl}/", device="auto", inter_threads=2)
140
+ # set beam size to 1 for faster speed (but lower quality)
141
+ translation = translator(input_text, beam_size=5, max_input_length = 512, max_decoding_length = 512)
142
+ # print(model_path, input_text, translation)
143
+ return translation
144
+
145
+ @staticmethod
146
+ def quickmtdownload(model_name):
147
+ from quickmt.hub import hf_download
148
+ from pathlib import Path
149
+ model_path = Path("/quickmt/models") / model_name
150
+ if not model_path.exists():
151
+ hf_download(
152
+ model_name = f"quickmt/{model_name}",
153
+ output_dir=Path("/quickmt/models") / model_name,
154
+ )
155
+ return model_path
156
+
157
+ def quickmt(self):
158
+ model_name = f"quickmt-{self.sl}-{self.tl}"
159
+ # from quickmt.hub import hf_list
160
+ # quickmt_models = [i.split("/quickmt-")[1] for i in hf_list()]
161
+ # quickmt_models.sort()
162
+ # print(quickmt_models)
163
+ quickmt_models = ['ar-en', 'bn-en', 'cs-en', 'da-en', 'de-en', 'el-en', 'en-ar', 'en-bn', 'en-cs', 'en-de', 'en-el', 'en-es',
164
+ 'en-fa', 'en-fr', 'en-he', 'en-hi', 'en-hu', 'en-id', 'en-it', 'en-ja', 'en-ko', 'en-lv', 'en-pl', 'en-pt',
165
+ 'en-ro', 'en-ru', 'en-th', 'en-tr', 'en-ur', 'en-vi', 'en-zh', 'es-en', 'fa-en', 'fr-en', 'he-en', 'hi-en',
166
+ 'hu-en', 'id-en', 'it-en', 'ja-en', 'ko-en', 'lv-en', 'pl-en', 'pt-en', 'ro-en', 'ru-en', 'th-en', 'tr-en', 'ur-en', 'vi-en', 'zh-en']
167
+ # available_languages = list(set([lang for model in quickmt_models for lang in model.split('-')]))
168
+ # available_languages.sort()
169
+ available_languages = ['ar', 'bn', 'cs', 'da', 'de', 'el', 'en', 'es', 'fa', 'fr', 'he', 'hi', 'hu',
170
+ 'id', 'it', 'ja', 'ko', 'lv', 'pl', 'pt', 'ro', 'ru', 'th', 'tr', 'ur', 'vi', 'zh']
171
+ # Direct translation model
172
+ if f"{self.sl}-{self.tl}" in quickmt_models:
173
+ model_path = Translators.quickmtdownload(model_name)
174
+ translated_text = Translators.quickmttranslate(model_path, self.input_text)
175
+ message = f'Translated from {iso1_to_name[self.sl]} to {iso1_to_name[self.tl]} with {model_name}.'
176
+ # Pivot language English
177
+ elif self.sl in available_languages and self.tl in available_languages:
178
+ model_name = f"quickmt-{self.sl}-en"
179
+ model_path = Translators.quickmtdownload(model_name)
180
+ entranslation = Translators.quickmttranslate(model_path, self.input_text)
181
+ model_name = f"quickmt-en-{self.tl}"
182
+ model_path = Translators.quickmtdownload(model_name)
183
+ translated_text = Translators.quickmttranslate(model_path, entranslation)
184
+ message = f'Translated from {iso1_to_name[self.sl]} to {iso1_to_name[self.tl]} with pivot language English.'
185
+ else:
186
+ translated_text = f'Model {model_name} from {iso1_to_name[self.sl]} to {iso1_to_name[self.tl]} not available!'
187
+ message = f"Available models: {', '.join(quickmt_models)}"
188
+ return translated_text, message
189
+
190
  def HelsinkiNLP_mulroa(self):
191
  try:
192
  pipe = pipeline("translation", model=self.model_name, device=self.device)
 
403
  output = output.rsplit(f'{self.tl}:')[-1].strip().replace('assistant\n', '').strip()
404
  return output
405
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
406
  def unbabel(self):
407
  pipe = pipeline("text-generation", model=self.model_name, torch_dtype=torch.bfloat16, device_map="auto")
408
  messages = [{"role": "user",
 
478
 
479
  elif model_name == 'Argos':
480
  translated_text = Translators(model_name, sl, tl, input_text).argos()
481
+
482
+ elif model_name == "QUICK-MT":
483
+ translated_text, message_text = Translators(model_name, sl, tl, input_text).quickmt()
484
 
485
  elif model_name == 'Google':
486
  translated_text = Translators(model_name, sl, tl, input_text).google()
487
 
488
+ elif model_name == "Helsinki-NLP/opus-mt-tc-bible-big-roa-en":
489
+ translated_text, message_text = Translators(model_name, sl, tl, input_text).simplepipe()
490
+
491
  elif "m2m" in model_name.lower():
492
  translated_text = Translators(model_name, sl, tl, input_text).mtom()
493
 
 
521
 
522
  elif model_name == "facebook/mbart-large-50-many-to-one-mmt":
523
  translated_text = Translators(model_name, s_language, t_language, input_text).mbart_many_to_one()
524
+
 
 
 
525
  elif model_name == "utter-project/EuroLLM-1.7B-Instruct":
526
  translated_text = Translators(model_name, s_language, t_language, input_text).eurollm_instruct()
527
 
 
537
  elif model_name == "winninghealth/WiNGPT-Babel-2":
538
  translated_text = Translators(model_name, s_language, t_language, input_text).wingpt()
539
 
540
+ elif "HPLT" in model_name:
541
+ if model_name == "HPLT-OPUS":
542
+ translated_text, message = Translators(model_name, sl, tl, input_text).hplt(opus = True)
543
+ else:
544
+ translated_text, message = Translators(model_name, sl, tl, input_text).hplt()
545
+
546
  elif model_name == "Bergamot":
547
  translated_text, message_text = Translators(model_name, s_language, t_language, input_text).bergamot()
548