Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -89,8 +89,26 @@ class Translators:
|
|
| 89 |
do_sample=True
|
| 90 |
)
|
| 91 |
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
|
|
|
| 92 |
return response.split("Translation:")[-1].strip()
|
| 93 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
def mtom(model_name, sl, tl, input_text):
|
| 95 |
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
|
| 96 |
model = M2M100ForConditionalGeneration.from_pretrained(model_name)
|
|
@@ -143,23 +161,6 @@ def HelsinkiNLP(sl, tl, input_text):
|
|
| 143 |
except KeyError as error:
|
| 144 |
return f"Error: Translation direction {sl} to {tl} is not supported by Helsinki Translation Models", error
|
| 145 |
|
| 146 |
-
def flan(model_name, sl, tl, input_text):
|
| 147 |
-
tokenizer = T5Tokenizer.from_pretrained(model_name, legacy=False)
|
| 148 |
-
model = T5ForConditionalGeneration.from_pretrained(model_name)
|
| 149 |
-
input_text = f"translate {sl} to {tl}: {input_text}"
|
| 150 |
-
input_ids = tokenizer(input_text, return_tensors="pt").input_ids
|
| 151 |
-
outputs = model.generate(input_ids)
|
| 152 |
-
return tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
|
| 153 |
-
|
| 154 |
-
def tfive(model_name, sl, tl, input_text):
|
| 155 |
-
tokenizer = T5Tokenizer.from_pretrained(model_name)
|
| 156 |
-
model = T5ForConditionalGeneration.from_pretrained(model_name, device_map="auto")
|
| 157 |
-
prompt = f"translate {sl} to {tl}: {input_text}"
|
| 158 |
-
input_ids = tokenizer.encode(prompt, return_tensors="pt")
|
| 159 |
-
output_ids = model.generate(input_ids, max_length=512)
|
| 160 |
-
translated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
| 161 |
-
return translated_text
|
| 162 |
-
|
| 163 |
def teuken(model_name, sl, tl, input_text):
|
| 164 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 165 |
model = AutoModelForCausalLM.from_pretrained(
|
|
@@ -336,9 +337,12 @@ def translate_text(input_text: str, sselected_language: str, tselected_language:
|
|
| 336 |
|
| 337 |
elif model_name == "utter-project/EuroLLM-1.7B":
|
| 338 |
translated_text = eurollm(model_name, sselected_language, tselected_language, input_text)
|
| 339 |
-
|
|
|
|
|
|
|
|
|
|
| 340 |
elif 'flan' in model_name.lower():
|
| 341 |
-
translated_text =
|
| 342 |
|
| 343 |
elif 'teuken' in model_name.lower():
|
| 344 |
translated_text = teuken(model_name, sselected_language, tselected_language, input_text)
|
|
@@ -364,9 +368,6 @@ def translate_text(input_text: str, sselected_language: str, tselected_language:
|
|
| 364 |
|
| 365 |
elif 'Unbabel' in model_name:
|
| 366 |
translated_text = unbabel(model_name, sselected_language, tselected_language, input_text)
|
| 367 |
-
|
| 368 |
-
elif model_name.startswith('t5'):
|
| 369 |
-
translated_text = tfive(model_name, sselected_language, tselected_language, input_text)
|
| 370 |
|
| 371 |
elif model_name == "HuggingFaceTB/SmolLM3-3B":
|
| 372 |
translated_text = Translators(model_name, sselected_language, tselected_language, input_text).smollm()
|
|
|
|
| 89 |
do_sample=True
|
| 90 |
)
|
| 91 |
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 92 |
+
print(response)
|
| 93 |
return response.split("Translation:")[-1].strip()
|
| 94 |
|
| 95 |
+
def flan(self):
|
| 96 |
+
tokenizer = T5Tokenizer.from_pretrained(self.model_name, legacy=False)
|
| 97 |
+
model = T5ForConditionalGeneration.from_pretrained(self.model_name)
|
| 98 |
+
prompt = f"translate {self.sl} to {self.tl}: {self.input_text}"
|
| 99 |
+
input_ids = tokenizer(prompt, return_tensors="pt").input_ids
|
| 100 |
+
outputs = model.generate(input_ids)
|
| 101 |
+
return tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
|
| 102 |
+
|
| 103 |
+
def tfive(self):
|
| 104 |
+
tokenizer = T5Tokenizer.from_pretrained(self.model_name)
|
| 105 |
+
model = T5ForConditionalGeneration.from_pretrained(self.model_name, device_map="auto")
|
| 106 |
+
prompt = f"translate {self.sl} to {self.tl}: {self.input_text}"
|
| 107 |
+
input_ids = tokenizer.encode(prompt, return_tensors="pt")
|
| 108 |
+
output_ids = model.generate(input_ids, max_length=512)
|
| 109 |
+
translated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True).strip()
|
| 110 |
+
return translated_text
|
| 111 |
+
|
| 112 |
def mtom(model_name, sl, tl, input_text):
|
| 113 |
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
|
| 114 |
model = M2M100ForConditionalGeneration.from_pretrained(model_name)
|
|
|
|
| 161 |
except KeyError as error:
|
| 162 |
return f"Error: Translation direction {sl} to {tl} is not supported by Helsinki Translation Models", error
|
| 163 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
def teuken(model_name, sl, tl, input_text):
|
| 165 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 166 |
model = AutoModelForCausalLM.from_pretrained(
|
|
|
|
| 337 |
|
| 338 |
elif model_name == "utter-project/EuroLLM-1.7B":
|
| 339 |
translated_text = eurollm(model_name, sselected_language, tselected_language, input_text)
|
| 340 |
+
|
| 341 |
+
elif model_name.startswith('t5'):
|
| 342 |
+
translated_text = Translators(model_name, sselected_language, tselected_language, input_text).tfive()
|
| 343 |
+
|
| 344 |
elif 'flan' in model_name.lower():
|
| 345 |
+
translated_text = Translators(model_name, sselected_language, tselected_language, input_text).flan()
|
| 346 |
|
| 347 |
elif 'teuken' in model_name.lower():
|
| 348 |
translated_text = teuken(model_name, sselected_language, tselected_language, input_text)
|
|
|
|
| 368 |
|
| 369 |
elif 'Unbabel' in model_name:
|
| 370 |
translated_text = unbabel(model_name, sselected_language, tselected_language, input_text)
|
|
|
|
|
|
|
|
|
|
| 371 |
|
| 372 |
elif model_name == "HuggingFaceTB/SmolLM3-3B":
|
| 373 |
translated_text = Translators(model_name, sselected_language, tselected_language, input_text).smollm()
|