QAway-to
commited on
Commit
·
f12b1ae
1
Parent(s):
0611243
New model T5 type. app.py v1.6
Browse files- app.py +26 -18
- core/interviewer.py +88 -61
app.py
CHANGED
|
@@ -6,14 +6,21 @@ from core.utils import generate_first_question
|
|
| 6 |
from core.mbti_analyzer import analyze_mbti
|
| 7 |
from core.interviewer import generate_question, session_state
|
| 8 |
|
| 9 |
-
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
| 11 |
frames = cycle(["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"])
|
| 12 |
-
for
|
| 13 |
-
|
| 14 |
-
|
| 15 |
|
| 16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
if not user_text.strip():
|
| 18 |
yield "⚠️ Please enter your answer.", "", prev_count
|
| 19 |
return
|
|
@@ -25,24 +32,20 @@ def analyze_and_ask(user_text, prev_count, progress=gr.Progress(track_tqdm=True)
|
|
| 25 |
n = 1
|
| 26 |
counter = f"{n}/8"
|
| 27 |
|
| 28 |
-
# 1️⃣
|
| 29 |
yield "⏳ Analyzing personality...", "💭 Interviewer is thinking... ⠋", counter
|
| 30 |
|
| 31 |
-
# 2️⃣
|
| 32 |
-
loop = asyncio.new_event_loop()
|
| 33 |
-
asyncio.set_event_loop(loop)
|
| 34 |
-
loop.create_task(async_loader(lambda f: None))
|
| 35 |
-
|
| 36 |
-
# 3️⃣ Анализ MBTI
|
| 37 |
mbti_gen = analyze_mbti(user_text)
|
| 38 |
mbti_text = ""
|
| 39 |
for chunk in mbti_gen:
|
| 40 |
mbti_text = chunk
|
| 41 |
-
yield mbti_text, "💭 Interviewer is thinking...
|
| 42 |
|
| 43 |
-
#
|
| 44 |
-
question = generate_question(user_id)
|
| 45 |
|
|
|
|
| 46 |
if question.startswith("✅ All"):
|
| 47 |
yield f"{mbti_text}\n\nSession complete.", "🎯 All MBTI axes covered.", "8/8"
|
| 48 |
return
|
|
@@ -50,8 +53,9 @@ def analyze_and_ask(user_text, prev_count, progress=gr.Progress(track_tqdm=True)
|
|
| 50 |
# 5️⃣ Финальный вывод
|
| 51 |
yield mbti_text, question, counter
|
| 52 |
|
|
|
|
| 53 |
# --------------------------------------------------------------
|
| 54 |
-
#
|
| 55 |
# --------------------------------------------------------------
|
| 56 |
with gr.Blocks(theme=gr.themes.Soft(), title="MBTI Personality Interviewer") as demo:
|
| 57 |
gr.Markdown(
|
|
@@ -67,11 +71,13 @@ with gr.Blocks(theme=gr.themes.Soft(), title="MBTI Personality Interviewer") as
|
|
| 67 |
lines=4
|
| 68 |
)
|
| 69 |
btn = gr.Button("Анализировать и задать новый вопрос", variant="primary")
|
|
|
|
| 70 |
with gr.Column(scale=1):
|
| 71 |
mbti_out = gr.Textbox(label="📊 Анализ MBTI", lines=4)
|
| 72 |
interviewer_out = gr.Textbox(label="💬 Следующий вопрос", lines=3)
|
| 73 |
progress = gr.Textbox(label="⏳ Прогресс", value="0/8")
|
| 74 |
|
|
|
|
| 75 |
btn.click(
|
| 76 |
analyze_and_ask,
|
| 77 |
inputs=[inp, progress],
|
|
@@ -79,10 +85,12 @@ with gr.Blocks(theme=gr.themes.Soft(), title="MBTI Personality Interviewer") as
|
|
| 79 |
show_progress=True
|
| 80 |
)
|
| 81 |
|
|
|
|
| 82 |
demo.load(
|
| 83 |
lambda: ("", generate_first_question(), "0/8"),
|
| 84 |
inputs=None,
|
| 85 |
outputs=[mbti_out, interviewer_out, progress]
|
| 86 |
)
|
| 87 |
|
| 88 |
-
|
|
|
|
|
|
| 6 |
from core.mbti_analyzer import analyze_mbti
|
| 7 |
from core.interviewer import generate_question, session_state
|
| 8 |
|
| 9 |
+
|
| 10 |
+
# --------------------------------------------------------------
|
| 11 |
+
# 🌀 Асинхронная анимация "Thinking..."
|
| 12 |
+
# --------------------------------------------------------------
|
| 13 |
+
async def async_loader(update_fn, delay=0.15):
|
| 14 |
frames = cycle(["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"])
|
| 15 |
+
for frame in frames:
|
| 16 |
+
update_fn(f"💭 Interviewer is thinking... {frame}")
|
| 17 |
+
await asyncio.sleep(delay)
|
| 18 |
|
| 19 |
+
|
| 20 |
+
# --------------------------------------------------------------
|
| 21 |
+
# ⚙️ Основная логика
|
| 22 |
+
# --------------------------------------------------------------
|
| 23 |
+
def analyze_and_ask(user_text, prev_count):
|
| 24 |
if not user_text.strip():
|
| 25 |
yield "⚠️ Please enter your answer.", "", prev_count
|
| 26 |
return
|
|
|
|
| 32 |
n = 1
|
| 33 |
counter = f"{n}/8"
|
| 34 |
|
| 35 |
+
# 1️⃣ Мгновенная реакция — "анализ начинается"
|
| 36 |
yield "⏳ Analyzing personality...", "💭 Interviewer is thinking... ⠋", counter
|
| 37 |
|
| 38 |
+
# 2️⃣ Анализ MBTI
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
mbti_gen = analyze_mbti(user_text)
|
| 40 |
mbti_text = ""
|
| 41 |
for chunk in mbti_gen:
|
| 42 |
mbti_text = chunk
|
| 43 |
+
yield mbti_text, "💭 Interviewer is thinking... ⠹", counter
|
| 44 |
|
| 45 |
+
# 3️⃣ Генерация вопроса
|
| 46 |
+
question = generate_question(user_id=user_id, user_answer=user_text)
|
| 47 |
|
| 48 |
+
# 4️⃣ Проверяем завершение сессии
|
| 49 |
if question.startswith("✅ All"):
|
| 50 |
yield f"{mbti_text}\n\nSession complete.", "🎯 All MBTI axes covered.", "8/8"
|
| 51 |
return
|
|
|
|
| 53 |
# 5️⃣ Финальный вывод
|
| 54 |
yield mbti_text, question, counter
|
| 55 |
|
| 56 |
+
|
| 57 |
# --------------------------------------------------------------
|
| 58 |
+
# 🧱 Интерфейс Gradio
|
| 59 |
# --------------------------------------------------------------
|
| 60 |
with gr.Blocks(theme=gr.themes.Soft(), title="MBTI Personality Interviewer") as demo:
|
| 61 |
gr.Markdown(
|
|
|
|
| 71 |
lines=4
|
| 72 |
)
|
| 73 |
btn = gr.Button("Анализировать и задать новый вопрос", variant="primary")
|
| 74 |
+
|
| 75 |
with gr.Column(scale=1):
|
| 76 |
mbti_out = gr.Textbox(label="📊 Анализ MBTI", lines=4)
|
| 77 |
interviewer_out = gr.Textbox(label="💬 Следующий вопрос", lines=3)
|
| 78 |
progress = gr.Textbox(label="⏳ Прогресс", value="0/8")
|
| 79 |
|
| 80 |
+
# Асинхронная обработка
|
| 81 |
btn.click(
|
| 82 |
analyze_and_ask,
|
| 83 |
inputs=[inp, progress],
|
|
|
|
| 85 |
show_progress=True
|
| 86 |
)
|
| 87 |
|
| 88 |
+
# Стартовый вопрос при загрузке
|
| 89 |
demo.load(
|
| 90 |
lambda: ("", generate_first_question(), "0/8"),
|
| 91 |
inputs=None,
|
| 92 |
outputs=[mbti_out, interviewer_out, progress]
|
| 93 |
)
|
| 94 |
|
| 95 |
+
# Очередь нужна для стриминга, но без старого аргумента concurrency_count
|
| 96 |
+
demo.queue(max_size=32).launch(server_name="0.0.0.0", server_port=7860)
|
core/interviewer.py
CHANGED
|
@@ -1,82 +1,109 @@
|
|
| 1 |
# core/interviewer.py
|
| 2 |
import random
|
| 3 |
-
import
|
| 4 |
-
from transformers import AutoTokenizer,
|
| 5 |
|
| 6 |
-
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
-
tokenizer = AutoTokenizer.from_pretrained(
|
| 9 |
-
model =
|
| 10 |
-
INTERVIEWER_MODEL, torch_dtype="auto", device_map="auto"
|
| 11 |
-
)
|
| 12 |
|
| 13 |
-
|
| 14 |
-
"
|
| 15 |
model=model,
|
| 16 |
tokenizer=tokenizer,
|
| 17 |
-
max_new_tokens=
|
| 18 |
-
|
| 19 |
-
|
| 20 |
)
|
| 21 |
|
| 22 |
CATEGORIES = [
|
| 23 |
-
"Introversion",
|
| 24 |
-
"Sensing",
|
| 25 |
-
"Thinking",
|
| 26 |
-
"Judging",
|
| 27 |
]
|
| 28 |
|
|
|
|
| 29 |
session_state = {}
|
| 30 |
|
| 31 |
def init_session(user_id: str):
|
| 32 |
-
session_state[user_id] = {"asked": [], "answers": {}, "
|
| 33 |
|
| 34 |
-
def
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
if user_id not in session_state:
|
| 73 |
init_session(user_id)
|
| 74 |
|
| 75 |
-
|
| 76 |
-
|
|
|
|
|
|
|
|
|
|
| 77 |
return "✅ All 8 categories completed."
|
| 78 |
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
# core/interviewer.py
|
| 2 |
import random
|
| 3 |
+
import difflib
|
| 4 |
+
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
|
| 5 |
|
| 6 |
+
# Выбери одну
|
| 7 |
+
QG_MODEL = "mrm8488/t5-small-finetuned-question-generation-ap"
|
| 8 |
+
# QG_MODEL = "google/flan-t5-small"
|
| 9 |
+
# QG_MODEL = "iarfmoose/t5-base-question-generator"
|
| 10 |
|
| 11 |
+
tokenizer = AutoTokenizer.from_pretrained(QG_MODEL)
|
| 12 |
+
model = AutoModelForSeq2SeqLM.from_pretrained(QG_MODEL)
|
|
|
|
|
|
|
| 13 |
|
| 14 |
+
qg = pipeline(
|
| 15 |
+
"text2text-generation",
|
| 16 |
model=model,
|
| 17 |
tokenizer=tokenizer,
|
| 18 |
+
max_new_tokens=40,
|
| 19 |
+
num_beams=4,
|
| 20 |
+
no_repeat_ngram_size=4
|
| 21 |
)
|
| 22 |
|
| 23 |
CATEGORIES = [
|
| 24 |
+
"Introversion","Extroversion",
|
| 25 |
+
"Sensing","Intuition",
|
| 26 |
+
"Thinking","Feeling",
|
| 27 |
+
"Judging","Perceiving"
|
| 28 |
]
|
| 29 |
|
| 30 |
+
# Простенькая “память” с защитой от повторов
|
| 31 |
session_state = {}
|
| 32 |
|
| 33 |
def init_session(user_id: str):
|
| 34 |
+
session_state[user_id] = {"asked": [], "answers": {}, "questions": []}
|
| 35 |
|
| 36 |
+
def _too_similar(q: str, prev: list[str], thresh=0.86) -> bool:
|
| 37 |
+
qn = q.lower().strip()
|
| 38 |
+
for p in prev:
|
| 39 |
+
if difflib.SequenceMatcher(None, qn, p.lower().strip()).ratio() >= thresh:
|
| 40 |
+
return True
|
| 41 |
+
return False
|
| 42 |
+
|
| 43 |
+
def _clean(q: str) -> str:
|
| 44 |
+
q = q.strip().strip('"').strip("'")
|
| 45 |
+
# вырезаем префиксы вроде "question:", "generate a question:", etc.
|
| 46 |
+
bad = ["question:", "generate a question", "ask", "instruction", "output only", "you are"]
|
| 47 |
+
low = q.lower()
|
| 48 |
+
for b in bad:
|
| 49 |
+
if b in low:
|
| 50 |
+
# берём правую часть после двоеточия если есть
|
| 51 |
+
if ":" in q:
|
| 52 |
+
q = q.split(":", 1)[-1]
|
| 53 |
+
q = q.replace(b, "")
|
| 54 |
+
q = q.strip()
|
| 55 |
+
if not q.endswith("?"):
|
| 56 |
+
q += "?"
|
| 57 |
+
# короткие/мусорные — фоллбэк
|
| 58 |
+
if len(q.split()) < 3:
|
| 59 |
+
return "What do you usually enjoy doing in your free time?"
|
| 60 |
+
return q
|
| 61 |
+
|
| 62 |
+
def _template(category: str, user_answer: str) -> str:
|
| 63 |
+
"""
|
| 64 |
+
T5 понимает краткие шаблоны лучше длинных инструкций.
|
| 65 |
+
Для разных моделей – чуть разные формулировки, но суть одна:
|
| 66 |
+
"""
|
| 67 |
+
if "flan" in QG_MODEL:
|
| 68 |
+
# FLAN любит простые задачи в стиле instruction-tuning
|
| 69 |
+
return (
|
| 70 |
+
f"Generate one open-ended question about {category.lower()} based on the user's answer.\n"
|
| 71 |
+
f"User: {user_answer}\n"
|
| 72 |
+
f"Question:"
|
| 73 |
+
)
|
| 74 |
+
elif "question-generator" in QG_MODEL:
|
| 75 |
+
# Модель обучена на QG; ей достаточно контекста
|
| 76 |
+
return f"generate question: {user_answer} (topic: {category})"
|
| 77 |
+
else:
|
| 78 |
+
# very small QG
|
| 79 |
+
return f"answer: {user_answer} topic: {category} -> question"
|
| 80 |
+
|
| 81 |
+
def generate_question(user_id: str, user_answer: str) -> str:
|
| 82 |
if user_id not in session_state:
|
| 83 |
init_session(user_id)
|
| 84 |
|
| 85 |
+
S = session_state[user_id]
|
| 86 |
+
|
| 87 |
+
# выбираем НЕспрошенную категорию
|
| 88 |
+
remaining = [c for c in CATEGORIES if c not in S["asked"]]
|
| 89 |
+
if not remaining:
|
| 90 |
return "✅ All 8 categories completed."
|
| 91 |
|
| 92 |
+
category = random.choice(remaining)
|
| 93 |
+
|
| 94 |
+
# короткий, “неразговорчивый” шаблон (T5 такое любит)
|
| 95 |
+
prompt = _template(category, user_answer)
|
| 96 |
+
|
| 97 |
+
out = qg(prompt)[0]["generated_text"]
|
| 98 |
+
q = _clean(out)
|
| 99 |
+
|
| 100 |
+
# защита от повторов/перефразов
|
| 101 |
+
tries = 0
|
| 102 |
+
while _too_similar(q, S["questions"]) and tries < 3:
|
| 103 |
+
out = qg(prompt)[0]["generated_text"]
|
| 104 |
+
q = _clean(out)
|
| 105 |
+
tries += 1
|
| 106 |
+
|
| 107 |
+
S["asked"].append(category)
|
| 108 |
+
S["questions"].append(q)
|
| 109 |
+
return f"({category}) {q}"
|