Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from random import random | |
| from spacy.lang.en.stop_words import STOP_WORDS | |
| #import en_core_web_sm | |
| from transformers import PegasusForConditionalGeneration, PegasusTokenizer | |
| import torch | |
| import re | |
| from string import punctuation | |
| from heapq import nlargest | |
| #import spacy_streamlit | |
| import configparser | |
| import random | |
| import spacy | |
| import gtts | |
| import os | |
| os.system("spacy download en_core_web_sm") | |
| nlp = spacy.load("en_core_web_sm") | |
| #nlp= en_core_web_sm.load() | |
| stopwords = list(STOP_WORDS) | |
| punctuation = punctuation + "\n" | |
| model_name = 'google/pegasus-xsum' | |
| torch_device = 'cuda' if torch.cuda.is_available() else 'cpu' | |
| tokenizer = PegasusTokenizer.from_pretrained(model_name) | |
| model = PegasusForConditionalGeneration.from_pretrained(model_name).to(torch_device) | |
| def abst_summary(src_text): | |
| batch = tokenizer.prepare_seq2seq_batch(src_text, truncation=True, padding='longest',return_tensors='pt') | |
| translated = model.generate(**batch) | |
| tgt_text = tokenizer.batch_decode(translated, skip_special_tokens=True) | |
| return tgt_text | |
| def word_frequency(doc): | |
| word_frequencies = {} | |
| for word in doc: | |
| if word.text.lower() not in stopwords: | |
| if word.text.lower() not in punctuation: | |
| if word.text not in word_frequencies.keys(): | |
| word_frequencies[word.text] = 1 | |
| else: | |
| word_frequencies[word.text] += 1 | |
| return word_frequencies | |
| def sentence_score(sentence_tokens, word_frequencies): | |
| sentence_score = {} | |
| for sent in sentence_tokens: | |
| for word in sent: | |
| if word.text.lower() in word_frequencies.keys(): | |
| if sent not in sentence_score.keys(): | |
| sentence_score[sent] = word_frequencies[word.text.lower()] | |
| else: | |
| sentence_score[sent] += word_frequencies[word.text.lower()] | |
| return sentence_score | |
| def get_summary(text): | |
| #text = re.sub(f"[{re.escape(punctuation)}]", "", text) | |
| text = re.sub(r"<.*?>", "", text) | |
| text = re.sub(r"[.*?]", "", text) | |
| text = re.sub(r"https?://\S+", " ", text) | |
| text = re.sub(r"\b[0-9]+\b\s*", " ", text) | |
| doc = nlp(text) | |
| word_frequencies = word_frequency(doc) | |
| for word in word_frequencies.keys(): | |
| word_frequencies[word] = word_frequencies[word] / max(word_frequencies.values()) | |
| sentence_tokens = [sent for sent in doc.sents] | |
| sentence_scores = sentence_score(sentence_tokens, word_frequencies) | |
| select_length = int(len(sentence_tokens)*0.10) | |
| if select_length < 1: | |
| select_length =1 | |
| #print(len(sentence_tokens)*0.10) | |
| summary = nlargest(select_length, sentence_scores, key=sentence_scores.get) | |
| summary = [word.text for word in summary] | |
| summary = " ".join(summary) | |
| #print("sums up:",summary) | |
| return summary | |
| st.set_page_config( | |
| page_title="Audio summarizer Web App", | |
| layout="wide", | |
| initial_sidebar_state="expanded" | |
| ) | |
| st.title("Audio Summaries") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| text_ = st.text_area(label="Enter Your Text or story", height=350, placeholder="Enter Your Text or story or your article iit can be of any length") | |
| if st.button("Get Summary"): | |
| ex_summary = get_summary(text_) | |
| ab_summary = abst_summary(text_) | |
| print(ab_summary) | |
| try: | |
| with col2: | |
| st.text_area(label="Extractive Text Summarization (Summary length :{}, Actual Text :{})".format(len(ex_summary),len(text_)), | |
| value=ex_summary, | |
| height=350) | |
| #st.text(summary) | |
| if len(ex_summary)>0: | |
| ex_tts = gtts.gTTS(ex_summary) | |
| ex_tts.save("ex_summary.wav") | |
| ex_audio_file = open('ex_summary.wav', 'rb') | |
| ex_audio_bytes = ex_audio_file.read() | |
| data = ex_audio_bytes | |
| st.audio(data, format="audio/wav", start_time=0, sample_rate=None) | |
| st.text_area(label="Abstractive Text Summarization (Summary length :{}, Actual Text :{})".format(len(ab_summary[0]),len(text_)), | |
| value=ab_summary[0], | |
| height=350) | |
| #st.text(summary) | |
| if len(ab_summary[0])>0: | |
| ab_tts = gtts.gTTS(ab_summary[0]) | |
| ab_tts.save("ab_summary.wav") | |
| ab_audio_file = open('ab_summary.wav', 'rb') | |
| ab_audio_bytes = ab_audio_file.read() | |
| data = ab_audio_bytes | |
| st.audio(data, format="audio/wav", start_time=0, sample_rate=None) | |
| except NameError: | |
| pass | |