TiberiuCristianLeon commited on
Commit
4070cfe
·
verified ·
1 Parent(s): 9bf3bb5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -1
app.py CHANGED
@@ -43,7 +43,8 @@ models = ["Helsinki-NLP", "QUICKMT", "Argos", "Lego-MT/Lego-MT", "HPLT", "HPLT-O
43
  "bigscience/bloomz-560m", "bigscience/bloomz-1b1", "bigscience/bloomz-1b7", "bigscience/bloomz-3b",
44
  "t5-small", "t5-base", "t5-large",
45
  "google/flan-t5-small", "google/flan-t5-base", "google/flan-t5-large", "google/flan-t5-xl",
46
- "google/madlad400-3b-mt", "jbochi/madlad400-3b-mt", "Heng666/madlad400-3b-mt-ct2-int8",
 
47
  "utter-project/EuroLLM-1.7B", "utter-project/EuroLLM-1.7B-Instruct",
48
  "Unbabel/Tower-Plus-2B", "Unbabel/TowerInstruct-7B-v0.2", "Unbabel/TowerInstruct-Mistral-7B-v0.2",
49
  "HuggingFaceTB/SmolLM3-3B", "winninghealth/WiNGPT-Babel-2",
@@ -264,6 +265,29 @@ class Translators:
264
  translator = pipeline('translation', model=model, tokenizer=tokenizer, src_lang=self.sl, tgt_lang=self.tl)
265
  translated_text = translator(text, max_length=512)
266
  return translated_text[0]['translation_text']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
267
 
268
  def smollm(self):
269
  tokenizer = AutoTokenizer.from_pretrained(self.model_name)
@@ -568,6 +592,9 @@ def translate_text(model_name: str, s_language: str, t_language: str, input_text
568
 
569
  elif 'madlad' in model_name.lower():
570
  translated_text = Translators(model_name, sl, tl, input_text).madlad()
 
 
 
571
 
572
  elif 'mt0' in model_name.lower():
573
  translated_text = Translators(model_name, s_language, t_language, input_text).bigscience()
 
43
  "bigscience/bloomz-560m", "bigscience/bloomz-1b1", "bigscience/bloomz-1b7", "bigscience/bloomz-3b",
44
  "t5-small", "t5-base", "t5-large",
45
  "google/flan-t5-small", "google/flan-t5-base", "google/flan-t5-large", "google/flan-t5-xl",
46
+ "google/madlad400-3b-mt", "jbochi/madlad400-3b-mt",
47
+ "Heng666/madlad400-3b-mt-ct2-int8", "Heng666/madlad400-7b-mt-ct2-int8",
48
  "utter-project/EuroLLM-1.7B", "utter-project/EuroLLM-1.7B-Instruct",
49
  "Unbabel/Tower-Plus-2B", "Unbabel/TowerInstruct-7B-v0.2", "Unbabel/TowerInstruct-Mistral-7B-v0.2",
50
  "HuggingFaceTB/SmolLM3-3B", "winninghealth/WiNGPT-Babel-2",
 
265
  translator = pipeline('translation', model=model, tokenizer=tokenizer, src_lang=self.sl, tgt_lang=self.tl)
266
  translated_text = translator(text, max_length=512)
267
  return translated_text[0]['translation_text']
268
+
269
+ def madladct2(self):
270
+ import ctranslate2
271
+ from sentencepiece import SentencePieceProcessor
272
+ from huggingface_hub import snapshot_download
273
+
274
+ model_path = snapshot_download(self.model_name)
275
+
276
+ tokenizer = SentencePieceProcessor()
277
+ tokenizer.load(f"{model_path}/sentencepiece.model")
278
+ translator = ctranslate2.Translator(model_path)
279
+
280
+ input_tokens = tokenizer.encode(f"<2{self.tl}> {self.input_text}", out_type=str)
281
+ results = translator.translate_batch(
282
+ [input_tokens],
283
+ batch_type="tokens",
284
+ max_batch_size=512,
285
+ beam_size=1,
286
+ no_repeat_ngram_size=1,
287
+ repetition_penalty=2,
288
+ )
289
+ translated_sentence = tokenizer.decode(results[0].hypotheses[0])
290
+ return translated_sentence)
291
 
292
  def smollm(self):
293
  tokenizer = AutoTokenizer.from_pretrained(self.model_name)
 
592
 
593
  elif 'madlad' in model_name.lower():
594
  translated_text = Translators(model_name, sl, tl, input_text).madlad()
595
+
596
+ elif 'mt-ct2-int8' in model_name.lower():
597
+ translated_text = Translators(model_name, sl, tl, input_text).madladct2()
598
 
599
  elif 'mt0' in model_name.lower():
600
  translated_text = Translators(model_name, s_language, t_language, input_text).bigscience()