TiberiuCristianLeon commited on
Commit
27d41ff
Β·
verified Β·
1 Parent(s): 0aa7a3e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -14
app.py CHANGED
@@ -7,19 +7,18 @@ import httpx
7
 
8
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
9
  # Language options and mappings
10
- options = ["German", "Romanian", "English", "French", "Spanish", "Italian", "Hindi"]
11
- favourite_langs = {"German": "de", "Romanian": "ro", "English": "en", "-----": "-----"}
12
  df = pl.read_parquet("isolanguages.parquet")
13
  non_empty_isos = df.slice(1).filter(pl.col("ISO639-1") != "").rows()
14
  # all_langs = languagecodes.iso_languages_byname
15
  all_langs = {iso[0]: (iso[1], iso[2], iso[3]) for iso in non_empty_isos} # {'Romanian': ('ro', 'rum', 'ron')}
16
- # langs = list(favourite_langs.keys())
17
- # langs.extend(list(all_langs.keys())) # Language options as list, add favourite languages first
18
  # iso1_to_name = {codes[0]: lang for entry in all_langs for lang, codes in entry.items()} # {'ro': 'Romanian', 'de': 'German'}
19
  iso1_to_name = {iso[1]: iso[0] for iso in non_empty_isos} # {'ro': 'Romanian', 'de': 'German'}
20
  langs = {iso[0]: iso[1] for iso in non_empty_isos} # {'Romanian': 'ro', 'German': 'de'}
21
 
22
-
23
  models = ["Helsinki-NLP", "QUICKMT", "Argos", "Google", "HPLT", "t5-base", "t5-small", "t5-large",
24
  "utter-project/EuroLLM-1.7B", "utter-project/EuroLLM-1.7B-Instruct",
25
  "Unbabel/Tower-Plus-2B", "Unbabel/TowerInstruct-7B-v0.2", "Unbabel/TowerInstruct-Mistral-7B-v0.2",
@@ -139,9 +138,26 @@ class Translators:
139
  return translated_text
140
 
141
  def hunyuan(self):
142
- pipe = pipeline("translation", model=self.model_name, device=self.device)
143
- translated_text = pipe(self.input_text, src_lang=self.sl, tgt_lang=self.tl)
144
- return translation
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
 
146
  def HelsinkiNLP_mulroa(self):
147
  try:
@@ -564,9 +580,9 @@ input_text = st.text_area("Enter text to translate:", placeholder="Enter text to
564
 
565
  # Initialize session state if not already set
566
  if "sselected_language" not in st.session_state:
567
- st.session_state["sselected_language"] = options[0]
568
  if "tselected_language" not in st.session_state:
569
- st.session_state["tselected_language"] = options[1]
570
  if "model_name" not in st.session_state:
571
  st.session_state["model_name"] = models[1]
572
 
@@ -578,8 +594,8 @@ model_name = st.selectbox("Select a model:", models,
578
  scol, swapcol, tcol = st.columns([3, 1, 3])
579
 
580
  with scol:
581
- sselected_language = st.selectbox("Source language:", options,
582
- index=options.index(st.session_state["sselected_language"]))
583
  with swapcol:
584
  if st.button("πŸ”„ Swap"):
585
  st.session_state["model_name"] = model_name # Preserve model
@@ -587,8 +603,8 @@ with swapcol:
587
  st.session_state["tselected_language"], st.session_state["sselected_language"]
588
  st.rerun()
589
  with tcol:
590
- tselected_language = st.selectbox("Target language:", options,
591
- index=options.index(st.session_state["tselected_language"]))
592
 
593
  # Language codes
594
  sl = langs[st.session_state["sselected_language"]]
 
7
 
8
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
9
  # Language options and mappings
10
+ favourite_langs = {"Romanian": "ro", "German": "de", "English": "en", "-----": "-----"}
11
+ # langs = ["German", "Romanian", "English", "French", "Spanish", "Italian",]
12
  df = pl.read_parquet("isolanguages.parquet")
13
  non_empty_isos = df.slice(1).filter(pl.col("ISO639-1") != "").rows()
14
  # all_langs = languagecodes.iso_languages_byname
15
  all_langs = {iso[0]: (iso[1], iso[2], iso[3]) for iso in non_empty_isos} # {'Romanian': ('ro', 'rum', 'ron')}
16
+ langs = list(favourite_langs.keys())
17
+ langs.extend(list(all_langs.keys())) # Language options as list, add favourite languages first
18
  # iso1_to_name = {codes[0]: lang for entry in all_langs for lang, codes in entry.items()} # {'ro': 'Romanian', 'de': 'German'}
19
  iso1_to_name = {iso[1]: iso[0] for iso in non_empty_isos} # {'ro': 'Romanian', 'de': 'German'}
20
  langs = {iso[0]: iso[1] for iso in non_empty_isos} # {'Romanian': 'ro', 'German': 'de'}
21
 
 
22
  models = ["Helsinki-NLP", "QUICKMT", "Argos", "Google", "HPLT", "t5-base", "t5-small", "t5-large",
23
  "utter-project/EuroLLM-1.7B", "utter-project/EuroLLM-1.7B-Instruct",
24
  "Unbabel/Tower-Plus-2B", "Unbabel/TowerInstruct-7B-v0.2", "Unbabel/TowerInstruct-Mistral-7B-v0.2",
 
138
  return translated_text
139
 
140
  def hunyuan(self):
141
+ ZH_CODES = {"Chinese": "zh", "Traditional Chinese": "zh-Hant", "Cantonese": "yue"}
142
+ if self.sl in ZH_CODES.keys() or self.tl in ZH_CODES.keys():
143
+ prompt = f"ζŠŠδΈ‹ι’ηš„ζ–‡ζœ¬ηΏ»θ―‘ζˆ{self.tl}οΌŒδΈθ¦ι’ε€–θ§£ι‡Šγ€‚\n\n{self.input_text}"
144
+ else:
145
+ prompt = f"Translate the following segment into {self.tl}, without additional explanation.\n\n{self.input_text}."
146
+ tokenizer = AutoTokenizer.from_pretrained(self.model_name)
147
+ model = AutoModelForCausalLM.from_pretrained(self.model_name, device_map="auto")
148
+ messages = [{"role": "user", "content": prompt}]
149
+ tokenized_chat = tokenizer.apply_chat_template(
150
+ messages,
151
+ tokenize=True,
152
+ add_generation_prompt=False,
153
+ return_tensors="pt",
154
+ "top_k": 20,
155
+ "top_p": 0.6,
156
+ "repetition_penalty": 1.05,
157
+ "temperature": 0.7
158
+ )
159
+ outputs = model.generate(tokenized_chat.to(model.device), max_new_tokens=512)
160
+ return output_text = tokenizer.decode(outputs[0])
161
 
162
  def HelsinkiNLP_mulroa(self):
163
  try:
 
580
 
581
  # Initialize session state if not already set
582
  if "sselected_language" not in st.session_state:
583
+ st.session_state["sselected_language"] = langs[0]
584
  if "tselected_language" not in st.session_state:
585
+ st.session_state["tselected_language"] = langs[1]
586
  if "model_name" not in st.session_state:
587
  st.session_state["model_name"] = models[1]
588
 
 
594
  scol, swapcol, tcol = st.columns([3, 1, 3])
595
 
596
  with scol:
597
+ sselected_language = st.selectbox("Source language:", langs,
598
+ index=langs.index(st.session_state["sselected_language"]))
599
  with swapcol:
600
  if st.button("πŸ”„ Swap"):
601
  st.session_state["model_name"] = model_name # Preserve model
 
603
  st.session_state["tselected_language"], st.session_state["sselected_language"]
604
  st.rerun()
605
  with tcol:
606
+ tselected_language = st.selectbox("Target language:", langs,
607
+ index=langs.index(st.session_state["tselected_language"]))
608
 
609
  # Language codes
610
  sl = langs[st.session_state["sselected_language"]]