Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -26,6 +26,7 @@ models = ["Helsinki-NLP",
|
|
| 26 |
"bigscience/bloomz-1b7",
|
| 27 |
"bigscience/bloomz-3b",
|
| 28 |
"utter-project/EuroLLM-1.7B",
|
|
|
|
| 29 |
"Unbabel/Tower-Plus-2B",
|
| 30 |
"Unbabel/TowerInstruct-7B-v0.2",
|
| 31 |
"Unbabel/TowerInstruct-Mistral-7B-v0.2",
|
|
@@ -106,6 +107,17 @@ def eurollm(model_name, sl, tl, input_text):
|
|
| 106 |
result = output.rsplit(f'{tl}:')[-1].strip()
|
| 107 |
return result
|
| 108 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
def nllb(model_name, sl, tl, input_text):
|
| 110 |
tokenizer = AutoTokenizer.from_pretrained(model_name, src_lang=sl)
|
| 111 |
model = AutoModelForSeq2SeqLM.from_pretrained(model_name, device_map="auto")
|
|
@@ -176,7 +188,11 @@ def translate_text(input_text: str, sselected_language: str, tselected_language:
|
|
| 176 |
model = model_to_cuda(AutoModelForSeq2SeqLM.from_pretrained(model_name))
|
| 177 |
except EnvironmentError as error:
|
| 178 |
return f"Error finding model: {model_name}! Try other available language combination.", error
|
| 179 |
-
if
|
|
|
|
|
|
|
|
|
|
|
|
|
| 180 |
translated_text = eurollm(model_name, sselected_language, tselected_language, input_text)
|
| 181 |
return translated_text, message_text
|
| 182 |
|
|
|
|
| 26 |
"bigscience/bloomz-1b7",
|
| 27 |
"bigscience/bloomz-3b",
|
| 28 |
"utter-project/EuroLLM-1.7B",
|
| 29 |
+
"utter-project/EuroLLM-1.7B-Instruct",
|
| 30 |
"Unbabel/Tower-Plus-2B",
|
| 31 |
"Unbabel/TowerInstruct-7B-v0.2",
|
| 32 |
"Unbabel/TowerInstruct-Mistral-7B-v0.2",
|
|
|
|
| 107 |
result = output.rsplit(f'{tl}:')[-1].strip()
|
| 108 |
return result
|
| 109 |
|
| 110 |
+
def eurollm_instruct(model_name, sl, tl, input_text):
|
| 111 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 112 |
+
model = AutoModelForCausalLM.from_pretrained(model_name)
|
| 113 |
+
text = f'<|im_start|>system\n<|im_end|>\n<|im_start|>user\nTranslate the following {sl} source text to {tl}:\n{sl}: {input_text} \n{tl}: <|im_end|>\n<|im_start|>assistant\n'
|
| 114 |
+
inputs = tokenizer(text, return_tensors="pt")
|
| 115 |
+
outputs = model.generate(**inputs, max_new_tokens=256)
|
| 116 |
+
output = tokenizer.decode(outputs[0], skip_special_tokens=True))
|
| 117 |
+
if f'{tl}:' in output:
|
| 118 |
+
output = output.rsplit(f'{tl}:')[-1].strip()
|
| 119 |
+
return output
|
| 120 |
+
|
| 121 |
def nllb(model_name, sl, tl, input_text):
|
| 122 |
tokenizer = AutoTokenizer.from_pretrained(model_name, src_lang=sl)
|
| 123 |
model = AutoModelForSeq2SeqLM.from_pretrained(model_name, device_map="auto")
|
|
|
|
| 188 |
model = model_to_cuda(AutoModelForSeq2SeqLM.from_pretrained(model_name))
|
| 189 |
except EnvironmentError as error:
|
| 190 |
return f"Error finding model: {model_name}! Try other available language combination.", error
|
| 191 |
+
if model_name = "utter-project/EuroLLM-1.7B-Instruct":
|
| 192 |
+
translated_text = eurollm_instruct(model_name, sselected_language, tselected_language, input_text)
|
| 193 |
+
return translated_text, message_text
|
| 194 |
+
|
| 195 |
+
if model_name = "utter-project/EuroLLM-1.7B":
|
| 196 |
translated_text = eurollm(model_name, sselected_language, tselected_language, input_text)
|
| 197 |
return translated_text, message_text
|
| 198 |
|