TiberiuCristianLeon commited on
Commit
4dceed6
·
verified ·
1 Parent(s): 4bd448b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -1
app.py CHANGED
@@ -26,6 +26,7 @@ models = ["Helsinki-NLP",
26
  "bigscience/bloomz-1b7",
27
  "bigscience/bloomz-3b",
28
  "utter-project/EuroLLM-1.7B",
 
29
  "Unbabel/Tower-Plus-2B",
30
  "Unbabel/TowerInstruct-7B-v0.2",
31
  "Unbabel/TowerInstruct-Mistral-7B-v0.2",
@@ -106,6 +107,17 @@ def eurollm(model_name, sl, tl, input_text):
106
  result = output.rsplit(f'{tl}:')[-1].strip()
107
  return result
108
 
 
 
 
 
 
 
 
 
 
 
 
109
  def nllb(model_name, sl, tl, input_text):
110
  tokenizer = AutoTokenizer.from_pretrained(model_name, src_lang=sl)
111
  model = AutoModelForSeq2SeqLM.from_pretrained(model_name, device_map="auto")
@@ -176,7 +188,11 @@ def translate_text(input_text: str, sselected_language: str, tselected_language:
176
  model = model_to_cuda(AutoModelForSeq2SeqLM.from_pretrained(model_name))
177
  except EnvironmentError as error:
178
  return f"Error finding model: {model_name}! Try other available language combination.", error
179
- if 'eurollm' in model_name.lower():
 
 
 
 
180
  translated_text = eurollm(model_name, sselected_language, tselected_language, input_text)
181
  return translated_text, message_text
182
 
 
26
  "bigscience/bloomz-1b7",
27
  "bigscience/bloomz-3b",
28
  "utter-project/EuroLLM-1.7B",
29
+ "utter-project/EuroLLM-1.7B-Instruct",
30
  "Unbabel/Tower-Plus-2B",
31
  "Unbabel/TowerInstruct-7B-v0.2",
32
  "Unbabel/TowerInstruct-Mistral-7B-v0.2",
 
107
  result = output.rsplit(f'{tl}:')[-1].strip()
108
  return result
109
 
110
+ def eurollm_instruct(model_name, sl, tl, input_text):
111
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
112
+ model = AutoModelForCausalLM.from_pretrained(model_name)
113
+ text = f'<|im_start|>system\n<|im_end|>\n<|im_start|>user\nTranslate the following {sl} source text to {tl}:\n{sl}: {input_text} \n{tl}: <|im_end|>\n<|im_start|>assistant\n'
114
+ inputs = tokenizer(text, return_tensors="pt")
115
+ outputs = model.generate(**inputs, max_new_tokens=256)
116
+ output = tokenizer.decode(outputs[0], skip_special_tokens=True))
117
+ if f'{tl}:' in output:
118
+ output = output.rsplit(f'{tl}:')[-1].strip()
119
+ return output
120
+
121
  def nllb(model_name, sl, tl, input_text):
122
  tokenizer = AutoTokenizer.from_pretrained(model_name, src_lang=sl)
123
  model = AutoModelForSeq2SeqLM.from_pretrained(model_name, device_map="auto")
 
188
  model = model_to_cuda(AutoModelForSeq2SeqLM.from_pretrained(model_name))
189
  except EnvironmentError as error:
190
  return f"Error finding model: {model_name}! Try other available language combination.", error
191
+ if model_name = "utter-project/EuroLLM-1.7B-Instruct":
192
+ translated_text = eurollm_instruct(model_name, sselected_language, tselected_language, input_text)
193
+ return translated_text, message_text
194
+
195
+ if model_name = "utter-project/EuroLLM-1.7B":
196
  translated_text = eurollm(model_name, sselected_language, tselected_language, input_text)
197
  return translated_text, message_text
198