Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
-
from util import
|
| 2 |
-
from util import pipeline
|
| 3 |
-
|
| 4 |
import gradio as gr
|
| 5 |
-
|
|
|
|
|
|
|
| 6 |
|
| 7 |
|
| 8 |
def get_model(cp):
|
|
@@ -11,10 +11,26 @@ def get_model(cp):
|
|
| 11 |
return tokenizer, model
|
| 12 |
|
| 13 |
tokenizer, model = get_model(cp_aug)
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
def generate_summary(url):
|
| 17 |
-
results =
|
| 18 |
summary = "\n".join(results)
|
| 19 |
return summary
|
| 20 |
|
|
|
|
| 1 |
+
from util import *
|
|
|
|
|
|
|
| 2 |
import gradio as gr
|
| 3 |
+
from deepmultilingualpunctuation import PunctuationModel
|
| 4 |
+
|
| 5 |
+
cp_aug = 'minnehwg/finetune-newwiki-summarization-ver-augmented'
|
| 6 |
|
| 7 |
|
| 8 |
def get_model(cp):
|
|
|
|
| 11 |
return tokenizer, model
|
| 12 |
|
| 13 |
tokenizer, model = get_model(cp_aug)
|
| 14 |
+
restore_model = PunctuationModel()
|
| 15 |
+
|
| 16 |
+
def execute_func(url, model, tokenizer, punc_model):
|
| 17 |
+
trans, sub = get_subtitles(url)
|
| 18 |
+
sub = restore_punctuation(sub, punc_model)
|
| 19 |
+
vie_sub = translate_long(sub)
|
| 20 |
+
vie_sub = processed(vie_sub)
|
| 21 |
+
chunks = split_into_chunks(vie_sub, 700, 2)
|
| 22 |
+
sum_para = []
|
| 23 |
+
for i in chunks:
|
| 24 |
+
tmp = summarize(i, model, tokenizer, num_beams=3)
|
| 25 |
+
sum_para.append(tmp)
|
| 26 |
+
suma = ''.join(sum_para)
|
| 27 |
+
del sub, vie_sub, sum_para, chunks
|
| 28 |
+
suma = post_processing(suma)
|
| 29 |
+
re = display(suma)
|
| 30 |
+
return re
|
| 31 |
|
| 32 |
def generate_summary(url):
|
| 33 |
+
results = execute_func(url, model, tokenizer, restore_model)
|
| 34 |
summary = "\n".join(results)
|
| 35 |
return summary
|
| 36 |
|