TiberiuCristianLeon commited on
Commit
302175c
·
verified ·
1 Parent(s): fe441ff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -3
app.py CHANGED
@@ -216,15 +216,18 @@ def unbabel(model_name, sl, tl, input_text):
216
  messages = [{"role": "user",
217
  "content": f"Translate the following text from {sl} into {tl}.\n{sl}: {input_text}.\n{tl}:"}]
218
  prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=False)
219
- max_new_tokens = int(len(input_text) // 3) + int(0.25 * len(input_text))
220
- print(len(input_text), max_new_tokens)
 
 
221
  outputs = pipe(prompt, max_new_tokens=max_new_tokens, do_sample=False)
222
  translated_text = outputs[0]["generated_text"]
223
  markers = ["<end_of_turn>", "<|im_end|>", "<|im_start|>assistant"] # , "\n"
224
  for marker in markers:
225
  if marker in translated_text:
226
  translated_text = translated_text.split(marker)[1].strip()
227
- translated_text = translated_text.replace('Answer:', '', 1).strip() if translated_text.startswith('Answer:') else translated_text
 
228
  return translated_text
229
 
230
  def mbart_many_to_many(model_name, sl, tl, input_text):
 
216
  messages = [{"role": "user",
217
  "content": f"Translate the following text from {sl} into {tl}.\n{sl}: {input_text}.\n{tl}:"}]
218
  prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=False)
219
+ tokenized_input = pipe.tokenizer(input_text, return_tensors="pt")
220
+ num_input_tokens = len(tokenized_input["input_ids"][0])
221
+ max_new_tokens = int(num_input_tokens) + int(0.25 * len(num_input_tokens))
222
+ print(len(input_text), tokenized_input, num_input_tokens, max_new_tokens)
223
  outputs = pipe(prompt, max_new_tokens=max_new_tokens, do_sample=False)
224
  translated_text = outputs[0]["generated_text"]
225
  markers = ["<end_of_turn>", "<|im_end|>", "<|im_start|>assistant"] # , "\n"
226
  for marker in markers:
227
  if marker in translated_text:
228
  translated_text = translated_text.split(marker)[1].strip()
229
+ translated_text = translated_text.replace('Answer:', '', 1).strip() if translated_text.startswith('Answer:') else translated_text
230
+ translated_text = translated_text.split("Translated text:")[0].strip() if "Translated text:" in translated_text else translated_text
231
  return translated_text
232
 
233
  def mbart_many_to_many(model_name, sl, tl, input_text):