Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -216,15 +216,18 @@ def unbabel(model_name, sl, tl, input_text):
|
|
| 216 |
messages = [{"role": "user",
|
| 217 |
"content": f"Translate the following text from {sl} into {tl}.\n{sl}: {input_text}.\n{tl}:"}]
|
| 218 |
prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=False)
|
| 219 |
-
|
| 220 |
-
|
|
|
|
|
|
|
| 221 |
outputs = pipe(prompt, max_new_tokens=max_new_tokens, do_sample=False)
|
| 222 |
translated_text = outputs[0]["generated_text"]
|
| 223 |
markers = ["<end_of_turn>", "<|im_end|>", "<|im_start|>assistant"] # , "\n"
|
| 224 |
for marker in markers:
|
| 225 |
if marker in translated_text:
|
| 226 |
translated_text = translated_text.split(marker)[1].strip()
|
| 227 |
-
|
|
|
|
| 228 |
return translated_text
|
| 229 |
|
| 230 |
def mbart_many_to_many(model_name, sl, tl, input_text):
|
|
|
|
| 216 |
messages = [{"role": "user",
|
| 217 |
"content": f"Translate the following text from {sl} into {tl}.\n{sl}: {input_text}.\n{tl}:"}]
|
| 218 |
prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=False)
|
| 219 |
+
tokenized_input = pipe.tokenizer(input_text, return_tensors="pt")
|
| 220 |
+
num_input_tokens = len(tokenized_input["input_ids"][0])
|
| 221 |
+
max_new_tokens = int(num_input_tokens) + int(0.25 * len(num_input_tokens))
|
| 222 |
+
print(len(input_text), tokenized_input, num_input_tokens, max_new_tokens)
|
| 223 |
outputs = pipe(prompt, max_new_tokens=max_new_tokens, do_sample=False)
|
| 224 |
translated_text = outputs[0]["generated_text"]
|
| 225 |
markers = ["<end_of_turn>", "<|im_end|>", "<|im_start|>assistant"] # , "\n"
|
| 226 |
for marker in markers:
|
| 227 |
if marker in translated_text:
|
| 228 |
translated_text = translated_text.split(marker)[1].strip()
|
| 229 |
+
translated_text = translated_text.replace('Answer:', '', 1).strip() if translated_text.startswith('Answer:') else translated_text
|
| 230 |
+
translated_text = translated_text.split("Translated text:")[0].strip() if "Translated text:" in translated_text else translated_text
|
| 231 |
return translated_text
|
| 232 |
|
| 233 |
def mbart_many_to_many(model_name, sl, tl, input_text):
|