Spaces:

TiberiuCristianLeon
/

StreamlitTranslate

Running

App Files Files

TiberiuCristianLeon commited on Oct 31

Commit

cd52fb2

verified ·

1 Parent(s): 7270757

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -51

app.py CHANGED Viewed

@@ -13,15 +13,15 @@ df = pl.read_parquet("isolanguages.parquet")
 non_empty_isos = df.slice(1).filter(pl.col("ISO639-1") != "").rows()
 # all_langs = languagecodes.iso_languages_byname
 all_langs = {iso[0]: (iso[1], iso[2], iso[3]) for iso in non_empty_isos} # {'Romanian': ('ro', 'rum', 'ron')}
 # iso1_to_name = {codes[0]: lang for entry in all_langs for lang, codes in entry.items()} # {'ro': 'Romanian', 'de': 'German'}
 iso1_to_name = {iso[1]: iso[0] for iso in non_empty_isos} # {'ro': 'Romanian', 'de': 'German'}
 langs = {iso[0]: iso[1] for iso in non_empty_isos} # {'Romanian': 'ro', 'German': 'de'}
-# langs = {"English": "en", "Romanian": "ro", "German": "de", "French": "fr", "Spanish": "es", "Italian": "it"}
-# langs = list(favourite_langs.keys())
-# langs.extend(list(all_langs.keys())) # Language options as list, add favourite languages first
-models = ["Helsinki-NLP", "Argos", "t5-base", "t5-small", "t5-large", "Unbabel/Tower-Plus-2B",
-          "Unbabel/TowerInstruct-Mistral-7B-v0.2", "winninghealth/WiNGPT-Babel-2", "Google"]
 allmodels = ["Helsinki-NLP",
           "Helsinki-NLP/opus-mt-tc-bible-big-mul-mul", "Helsinki-NLP/opus-mt-tc-bible-big-mul-deu_eng_nld",
           "Helsinki-NLP/opus-mt-tc-bible-big-mul-deu_eng_fra_por_spa", "Helsinki-NLP/opus-mt-tc-bible-big-deu_eng_fra_por_spa-mul",
@@ -49,9 +49,9 @@ class Translators:
     def google(self):
         url = os.environ['GCLIENT'] + f'sl={self.sl}&tl={self.tl}&q={self.input_text}'
-        response = requests.get(url)
         return response.json()[0][0][0]
     @classmethod
     def download_argos_model(cls, from_code, to_code):
         import argostranslate.package
@@ -241,11 +241,11 @@ class Translators:
         translator = pipeline('translation', model=model, tokenizer=tokenizer, src_lang=self.sl, tgt_lang=self.tl)
         translated_text = translator(self.input_text, max_length=512)
         return translated_text[0]['translation_text']
     def wingpt(self):
         model = AutoModelForCausalLM.from_pretrained(
            self.model_name,
-           torch_dtype="auto",
            device_map="auto"
         )
         tokenizer = AutoTokenizer.from_pretrained(self.model_name)
@@ -377,40 +377,6 @@ def download_argos_model(from_code, to_code):
     )
     argostranslate.package.install_from_path(package_to_install.download())
-def wingpt(model_name, sl, tl, input_text):
-    model = AutoModelForCausalLM.from_pretrained(
-       model_name,
-       torch_dtype="auto",
-       device_map="auto"
-    )
-    tokenizer = AutoTokenizer.from_pretrained(model_name)
-    input_json = '{"input_text": input_text}'
-    messages = [
-       {"role": "system", "content": f"Translate this to {tl} language"},
-       {"role": "user", "content": input_text}
-    ]
-    text = tokenizer.apply_chat_template(
-       messages,
-       tokenize=False,
-       add_generation_prompt=True
-    )
-    model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
-    generated_ids = model.generate(
-       **model_inputs,
-       max_new_tokens=512,
-       temperature=0.1
-    )
-    generated_ids = [
-       output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
-    ]
-    print(tokenizer.batch_decode(generated_ids, skip_special_tokens=True))
-    rawresult = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
-    result = rawresult.split('\n')[-1].strip() if '\n' in rawresult else rawresult.strip()
-    return result
 def translate_text(model_name: str, s_language: str, t_language: str, input_text: str) -> tuple[str, str]:
     """
     Translates the input text from the source language to the target language  using a specified model.
@@ -565,7 +531,7 @@ with st.container(border=None, width="stretch", height="content", horizontal=Fal
 # Handle the submit button click
 if submit_button:
     with st.spinner("Translating...", show_time=True):
-        translated_text, message = translate_text(model_name, sselected_language, sselected_language, input_text)
         # if model_name.startswith('Helsinki-NLP'):
         #     # input_ids = tokenizer.encode(input_text, return_tensors='pt')
         #     # # Perform translation
@@ -586,11 +552,6 @@ if submit_button:
         #     translation = pipe(input_text)
         #     translated_text = translation[0]['translation_text']
-        # elif model_name.startswith('Google'):
-        #     url = os.environ['GCLIENT'] + f'sl={sl}&tl={tl}&q={input_text}'
-        #     response = httpx.get(url)
-        #     translated_text = response.json()[0][0][0]
-        #     print(response.json()[0][0])
         # elif model_name.startswith('t5'):
         #     tokenizer = T5Tokenizer.from_pretrained(model_name)
         #     model = T5ForConditionalGeneration.from_pretrained(model_name).to(device)
@@ -622,8 +583,6 @@ if submit_button:
         #         translated_text = f"No Argos model for {sselected_language} to {tselected_language}. Try other model or languages combination!"
         #     except Exception as error:
         #         translated_text = error
-        # elif model_name == "winninghealth/WiNGPT-Babel-2":
-        #     translated_text = wingpt(model_name, sselected_language, tselected_language, input_text)
     # Display the translated text
     print(f"Translated from {sselected_language} to {tselected_language} using {model_name}.", input_text, translated_text)

 non_empty_isos = df.slice(1).filter(pl.col("ISO639-1") != "").rows()
 # all_langs = languagecodes.iso_languages_byname
 all_langs = {iso[0]: (iso[1], iso[2], iso[3]) for iso in non_empty_isos} # {'Romanian': ('ro', 'rum', 'ron')}
+# langs = list(favourite_langs.keys())
+# langs.extend(list(all_langs.keys())) # Language options as list, add favourite languages first
 # iso1_to_name = {codes[0]: lang for entry in all_langs for lang, codes in entry.items()} # {'ro': 'Romanian', 'de': 'German'}
 iso1_to_name = {iso[1]: iso[0] for iso in non_empty_isos} # {'ro': 'Romanian', 'de': 'German'}
 langs = {iso[0]: iso[1] for iso in non_empty_isos} # {'Romanian': 'ro', 'German': 'de'}
+models = ["Helsinki-NLP", "Argos", "Google", "t5-base", "t5-small", "t5-large", "Unbabel/Tower-Plus-2B",
+          "Unbabel/TowerInstruct-Mistral-7B-v0.2", "winninghealth/WiNGPT-Babel-2"]
 allmodels = ["Helsinki-NLP",
           "Helsinki-NLP/opus-mt-tc-bible-big-mul-mul", "Helsinki-NLP/opus-mt-tc-bible-big-mul-deu_eng_nld",
           "Helsinki-NLP/opus-mt-tc-bible-big-mul-deu_eng_fra_por_spa", "Helsinki-NLP/opus-mt-tc-bible-big-deu_eng_fra_por_spa-mul",
     def google(self):
         url = os.environ['GCLIENT'] + f'sl={self.sl}&tl={self.tl}&q={self.input_text}'
+        response = httpx.get(url)
         return response.json()[0][0][0]
     @classmethod
     def download_argos_model(cls, from_code, to_code):
         import argostranslate.package
         translator = pipeline('translation', model=model, tokenizer=tokenizer, src_lang=self.sl, tgt_lang=self.tl)
         translated_text = translator(self.input_text, max_length=512)
         return translated_text[0]['translation_text']
     def wingpt(self):
         model = AutoModelForCausalLM.from_pretrained(
            self.model_name,
+           dtype="auto",
            device_map="auto"
         )
         tokenizer = AutoTokenizer.from_pretrained(self.model_name)
     )
     argostranslate.package.install_from_path(package_to_install.download())
 def translate_text(model_name: str, s_language: str, t_language: str, input_text: str) -> tuple[str, str]:
     """
     Translates the input text from the source language to the target language  using a specified model.
 # Handle the submit button click
 if submit_button:
     with st.spinner("Translating...", show_time=True):
+        translated_text, message = translate_text(model_name, sselected_language, tselected_language, input_text)
         # if model_name.startswith('Helsinki-NLP'):
         #     # input_ids = tokenizer.encode(input_text, return_tensors='pt')
         #     # # Perform translation
         #     translation = pipe(input_text)
         #     translated_text = translation[0]['translation_text']
         # elif model_name.startswith('t5'):
         #     tokenizer = T5Tokenizer.from_pretrained(model_name)
         #     model = T5ForConditionalGeneration.from_pretrained(model_name).to(device)
         #         translated_text = f"No Argos model for {sselected_language} to {tselected_language}. Try other model or languages combination!"
         #     except Exception as error:
         #         translated_text = error
     # Display the translated text
     print(f"Translated from {sselected_language} to {tselected_language} using {model_name}.", input_text, translated_text)