Update app.py
Browse files
app.py
CHANGED
|
@@ -44,9 +44,11 @@ models = ["Helsinki-NLP", "QUICKMT", "Argos", "Lego-MT/Lego-MT", "HPLT", "HPLT-O
|
|
| 44 |
"t5-small", "t5-base", "t5-large",
|
| 45 |
"google/flan-t5-small", "google/flan-t5-base", "google/flan-t5-large", "google/flan-t5-xl",
|
| 46 |
"google/madlad400-3b-mt", "Heng666/madlad400-3b-mt-ct2", "Heng666/madlad400-3b-mt-ct2-int8", "Heng666/madlad400-7b-mt-ct2-int8",
|
|
|
|
| 47 |
"utter-project/EuroLLM-1.7B", "utter-project/EuroLLM-1.7B-Instruct",
|
| 48 |
"Unbabel/Tower-Plus-2B", "Unbabel/TowerInstruct-7B-v0.2", "Unbabel/TowerInstruct-Mistral-7B-v0.2",
|
| 49 |
-
"HuggingFaceTB/SmolLM3-3B",
|
|
|
|
| 50 |
"tencent/Hunyuan-MT-7B",
|
| 51 |
"openGPT-X/Teuken-7B-instruct-commercial-v0.4", "openGPT-X/Teuken-7B-instruct-v0.6",
|
| 52 |
]
|
|
@@ -208,6 +210,38 @@ class Translators:
|
|
| 208 |
return translation[0]['translation_text'], message
|
| 209 |
except Exception as error:
|
| 210 |
return f"Error translating with model: {self.model_name}! Try other available language combination or model.", error
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 211 |
|
| 212 |
def HelsinkiNLP_mulroa(self):
|
| 213 |
try:
|
|
@@ -578,6 +612,9 @@ def translate_text(model_name: str, s_language: str, t_language: str, input_text
|
|
| 578 |
elif model_name == 'Google':
|
| 579 |
translated_text = Translators(model_name, sl, tl, input_text).google()
|
| 580 |
|
|
|
|
|
|
|
|
|
|
| 581 |
elif "m2m" in model_name.lower():
|
| 582 |
translated_text = Translators(model_name, sl, tl, input_text).mtom()
|
| 583 |
|
|
|
|
| 44 |
"t5-small", "t5-base", "t5-large",
|
| 45 |
"google/flan-t5-small", "google/flan-t5-base", "google/flan-t5-large", "google/flan-t5-xl",
|
| 46 |
"google/madlad400-3b-mt", "Heng666/madlad400-3b-mt-ct2", "Heng666/madlad400-3b-mt-ct2-int8", "Heng666/madlad400-7b-mt-ct2-int8",
|
| 47 |
+
"BSC-LT/salamandraTA-2b-instruct", "BSC-LT/salamandraTA-7b-instruct",
|
| 48 |
"utter-project/EuroLLM-1.7B", "utter-project/EuroLLM-1.7B-Instruct",
|
| 49 |
"Unbabel/Tower-Plus-2B", "Unbabel/TowerInstruct-7B-v0.2", "Unbabel/TowerInstruct-Mistral-7B-v0.2",
|
| 50 |
+
"HuggingFaceTB/SmolLM3-3B",
|
| 51 |
+
"winninghealth/WiNGPT-Babel-2-1", "winninghealth/WiNGPT-Babel-2", "winninghealth/WiNGPT-Babel",
|
| 52 |
"tencent/Hunyuan-MT-7B",
|
| 53 |
"openGPT-X/Teuken-7B-instruct-commercial-v0.4", "openGPT-X/Teuken-7B-instruct-v0.6",
|
| 54 |
]
|
|
|
|
| 210 |
return translation[0]['translation_text'], message
|
| 211 |
except Exception as error:
|
| 212 |
return f"Error translating with model: {self.model_name}! Try other available language combination or model.", error
|
| 213 |
+
|
| 214 |
+
def salamandraTApipe(self):
|
| 215 |
+
pipe = pipeline("translation", model=self.model)
|
| 216 |
+
messages = [
|
| 217 |
+
f"Translate the following text from {self.sl} into {self.tl}.\n{self.sl}: {self.input_text} \n{self.tl}:",
|
| 218 |
+
]
|
| 219 |
+
return pipe(messages)[0]["generated_text"]
|
| 220 |
+
|
| 221 |
+
def salamandraTA(self):
|
| 222 |
+
# from datetime import datetime
|
| 223 |
+
text = f"Translate the following text from {self.sl} into {self.tl}.\n{self.sl}: {self.input_text} \n{self.tl}:"
|
| 224 |
+
tokenizer = AutoTokenizer.from_pretrained(self.model)
|
| 225 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 226 |
+
self.model,
|
| 227 |
+
device_map="auto",
|
| 228 |
+
torch_dtype=torch.bfloat16
|
| 229 |
+
)
|
| 230 |
+
message = [ { "role": "user", "content": text } ]
|
| 231 |
+
# date_string = datetime.today().strftime('%Y-%m-%d')
|
| 232 |
+
prompt = tokenizer.apply_chat_template(
|
| 233 |
+
message,
|
| 234 |
+
tokenize=False,
|
| 235 |
+
add_generation_prompt=True
|
| 236 |
+
)
|
| 237 |
+
# date_string=date_string
|
| 238 |
+
inputs = tokenizer.encode(prompt, add_special_tokens=False, return_tensors="pt")
|
| 239 |
+
input_length = inputs.shape[1]
|
| 240 |
+
outputs = model.generate(input_ids=inputs.to(model.device),
|
| 241 |
+
max_new_tokens=512,
|
| 242 |
+
early_stopping=True,
|
| 243 |
+
num_beams=5)
|
| 244 |
+
return tokenizer.decode(outputs[0, input_length:], skip_special_tokens=True))
|
| 245 |
|
| 246 |
def HelsinkiNLP_mulroa(self):
|
| 247 |
try:
|
|
|
|
| 612 |
elif model_name == 'Google':
|
| 613 |
translated_text = Translators(model_name, sl, tl, input_text).google()
|
| 614 |
|
| 615 |
+
elif "salamandraTA" in model_name.lower():
|
| 616 |
+
translated_text = Translators(model_name, sl, tl, input_text).salamandraTApipe()
|
| 617 |
+
|
| 618 |
elif "m2m" in model_name.lower():
|
| 619 |
translated_text = Translators(model_name, sl, tl, input_text).mtom()
|
| 620 |
|