QAway-to commited on
Commit
f12b1ae
·
1 Parent(s): 0611243

New model T5 type. app.py v1.6

Browse files
Files changed (2) hide show
  1. app.py +26 -18
  2. core/interviewer.py +88 -61
app.py CHANGED
@@ -6,14 +6,21 @@ from core.utils import generate_first_question
6
  from core.mbti_analyzer import analyze_mbti
7
  from core.interviewer import generate_question, session_state
8
 
9
- async def async_loader(progress_fn):
10
- """Асинхронный loader-аниматор (вращающиеся точки)."""
 
 
 
11
  frames = cycle(["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"])
12
- for _ in range(10):
13
- await asyncio.sleep(0.2)
14
- progress_fn(next(frames))
15
 
16
- def analyze_and_ask(user_text, prev_count, progress=gr.Progress(track_tqdm=True)):
 
 
 
 
17
  if not user_text.strip():
18
  yield "⚠️ Please enter your answer.", "", prev_count
19
  return
@@ -25,24 +32,20 @@ def analyze_and_ask(user_text, prev_count, progress=gr.Progress(track_tqdm=True)
25
  n = 1
26
  counter = f"{n}/8"
27
 
28
- # 1️⃣ Первое сообщениемгновенно
29
  yield "⏳ Analyzing personality...", "💭 Interviewer is thinking... ⠋", counter
30
 
31
- # 2️⃣ Анимация лоадера в фоне
32
- loop = asyncio.new_event_loop()
33
- asyncio.set_event_loop(loop)
34
- loop.create_task(async_loader(lambda f: None))
35
-
36
- # 3️⃣ Анализ MBTI
37
  mbti_gen = analyze_mbti(user_text)
38
  mbti_text = ""
39
  for chunk in mbti_gen:
40
  mbti_text = chunk
41
- yield mbti_text, "💭 Interviewer is thinking... ", counter
42
 
43
- # 4️⃣ Генерация вопроса
44
- question = generate_question(user_id)
45
 
 
46
  if question.startswith("✅ All"):
47
  yield f"{mbti_text}\n\nSession complete.", "🎯 All MBTI axes covered.", "8/8"
48
  return
@@ -50,8 +53,9 @@ def analyze_and_ask(user_text, prev_count, progress=gr.Progress(track_tqdm=True)
50
  # 5️⃣ Финальный вывод
51
  yield mbti_text, question, counter
52
 
 
53
  # --------------------------------------------------------------
54
- # UI
55
  # --------------------------------------------------------------
56
  with gr.Blocks(theme=gr.themes.Soft(), title="MBTI Personality Interviewer") as demo:
57
  gr.Markdown(
@@ -67,11 +71,13 @@ with gr.Blocks(theme=gr.themes.Soft(), title="MBTI Personality Interviewer") as
67
  lines=4
68
  )
69
  btn = gr.Button("Анализировать и задать новый вопрос", variant="primary")
 
70
  with gr.Column(scale=1):
71
  mbti_out = gr.Textbox(label="📊 Анализ MBTI", lines=4)
72
  interviewer_out = gr.Textbox(label="💬 Следующий вопрос", lines=3)
73
  progress = gr.Textbox(label="⏳ Прогресс", value="0/8")
74
 
 
75
  btn.click(
76
  analyze_and_ask,
77
  inputs=[inp, progress],
@@ -79,10 +85,12 @@ with gr.Blocks(theme=gr.themes.Soft(), title="MBTI Personality Interviewer") as
79
  show_progress=True
80
  )
81
 
 
82
  demo.load(
83
  lambda: ("", generate_first_question(), "0/8"),
84
  inputs=None,
85
  outputs=[mbti_out, interviewer_out, progress]
86
  )
87
 
88
- demo.queue(max_size=20).launch(server_name="0.0.0.0", server_port=7860)
 
 
6
  from core.mbti_analyzer import analyze_mbti
7
  from core.interviewer import generate_question, session_state
8
 
9
+
10
+ # --------------------------------------------------------------
11
+ # 🌀 Асинхронная анимация "Thinking..."
12
+ # --------------------------------------------------------------
13
+ async def async_loader(update_fn, delay=0.15):
14
  frames = cycle(["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"])
15
+ for frame in frames:
16
+ update_fn(f"💭 Interviewer is thinking... {frame}")
17
+ await asyncio.sleep(delay)
18
 
19
+
20
+ # --------------------------------------------------------------
21
+ # ⚙️ Основная логика
22
+ # --------------------------------------------------------------
23
+ def analyze_and_ask(user_text, prev_count):
24
  if not user_text.strip():
25
  yield "⚠️ Please enter your answer.", "", prev_count
26
  return
 
32
  n = 1
33
  counter = f"{n}/8"
34
 
35
+ # 1️⃣ Мгновенная реакция"анализ начинается"
36
  yield "⏳ Analyzing personality...", "💭 Interviewer is thinking... ⠋", counter
37
 
38
+ # 2️⃣ Анализ MBTI
 
 
 
 
 
39
  mbti_gen = analyze_mbti(user_text)
40
  mbti_text = ""
41
  for chunk in mbti_gen:
42
  mbti_text = chunk
43
+ yield mbti_text, "💭 Interviewer is thinking... ", counter
44
 
45
+ # 3️⃣ Генерация вопроса
46
+ question = generate_question(user_id=user_id, user_answer=user_text)
47
 
48
+ # 4️⃣ Проверяем завершение сессии
49
  if question.startswith("✅ All"):
50
  yield f"{mbti_text}\n\nSession complete.", "🎯 All MBTI axes covered.", "8/8"
51
  return
 
53
  # 5️⃣ Финальный вывод
54
  yield mbti_text, question, counter
55
 
56
+
57
  # --------------------------------------------------------------
58
+ # 🧱 Интерфейс Gradio
59
  # --------------------------------------------------------------
60
  with gr.Blocks(theme=gr.themes.Soft(), title="MBTI Personality Interviewer") as demo:
61
  gr.Markdown(
 
71
  lines=4
72
  )
73
  btn = gr.Button("Анализировать и задать новый вопрос", variant="primary")
74
+
75
  with gr.Column(scale=1):
76
  mbti_out = gr.Textbox(label="📊 Анализ MBTI", lines=4)
77
  interviewer_out = gr.Textbox(label="💬 Следующий вопрос", lines=3)
78
  progress = gr.Textbox(label="⏳ Прогресс", value="0/8")
79
 
80
+ # Асинхронная обработка
81
  btn.click(
82
  analyze_and_ask,
83
  inputs=[inp, progress],
 
85
  show_progress=True
86
  )
87
 
88
+ # Стартовый вопрос при загрузке
89
  demo.load(
90
  lambda: ("", generate_first_question(), "0/8"),
91
  inputs=None,
92
  outputs=[mbti_out, interviewer_out, progress]
93
  )
94
 
95
+ # Очередь нужна для стриминга, но без старого аргумента concurrency_count
96
+ demo.queue(max_size=32).launch(server_name="0.0.0.0", server_port=7860)
core/interviewer.py CHANGED
@@ -1,82 +1,109 @@
1
  # core/interviewer.py
2
  import random
3
- import itertools
4
- from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
5
 
6
- INTERVIEWER_MODEL = "f3nsmart/TinyLlama-MBTI-Interviewer-LoRA"
 
 
 
7
 
8
- tokenizer = AutoTokenizer.from_pretrained(INTERVIEWER_MODEL)
9
- model = AutoModelForCausalLM.from_pretrained(
10
- INTERVIEWER_MODEL, torch_dtype="auto", device_map="auto"
11
- )
12
 
13
- llm_pipe = pipeline(
14
- "text-generation",
15
  model=model,
16
  tokenizer=tokenizer,
17
- max_new_tokens=70,
18
- temperature=0.6,
19
- top_p=0.9,
20
  )
21
 
22
  CATEGORIES = [
23
- "Introversion", "Extroversion",
24
- "Sensing", "Intuition",
25
- "Thinking", "Feeling",
26
- "Judging", "Perceiving"
27
  ]
28
 
 
29
  session_state = {}
30
 
31
  def init_session(user_id: str):
32
- session_state[user_id] = {"asked": [], "answers": {}, "iteration": 1}
33
 
34
- def select_next_category(user_id: str):
35
- s = session_state[user_id]
36
- remaining = [c for c in CATEGORIES if c not in s["asked"]]
37
- if not remaining:
38
- return None
39
- next_cat = random.choice(remaining)
40
- s["asked"].append(next_cat)
41
- return next_cat
42
-
43
- def build_prompt(category: str):
44
- # Новый, более "демонстративный" промпт:
45
- return (
46
- f"You are a friendly MBTI interviewer.\n"
47
- f"Ask one short, open-ended question that explores {category.lower()}.\n"
48
- f"Examples: 'What makes you feel most energized in social situations?'\n"
49
- f"Output only the question, without quotes, without explanations."
50
- )
51
-
52
- def clean_question(text: str) -> str:
53
- """Удаляет инструкции и оставляет только вопрос."""
54
- text = text.strip()
55
-
56
- # убираем строки с 'ask', 'instruction' и т.п.
57
- bad_phrases = ["ask", "instruction", "output only", "question about", "you are"]
58
- for phrase in bad_phrases:
59
- if phrase.lower() in text.lower():
60
- # берём только часть после последнего примера знака '?'
61
- if '?' in text:
62
- text = text.split('?')[-1]
63
- else:
64
- text = text.replace(phrase, '')
65
- text = text.strip().strip('"').strip("'")
66
- if not text.endswith("?"):
67
- text += "?"
68
- return text
69
-
70
- def generate_question(user_id: str) -> str:
71
- """Генерация нового вопроса по категории."""
 
 
 
 
 
 
 
 
72
  if user_id not in session_state:
73
  init_session(user_id)
74
 
75
- category = select_next_category(user_id)
76
- if not category:
 
 
 
77
  return "✅ All 8 categories completed."
78
 
79
- prompt = build_prompt(category)
80
- raw = llm_pipe(prompt)[0]["generated_text"]
81
- question = clean_question(raw)
82
- return f"({category}) {question}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # core/interviewer.py
2
  import random
3
+ import difflib
4
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
5
 
6
+ # Выбери одну
7
+ QG_MODEL = "mrm8488/t5-small-finetuned-question-generation-ap"
8
+ # QG_MODEL = "google/flan-t5-small"
9
+ # QG_MODEL = "iarfmoose/t5-base-question-generator"
10
 
11
+ tokenizer = AutoTokenizer.from_pretrained(QG_MODEL)
12
+ model = AutoModelForSeq2SeqLM.from_pretrained(QG_MODEL)
 
 
13
 
14
+ qg = pipeline(
15
+ "text2text-generation",
16
  model=model,
17
  tokenizer=tokenizer,
18
+ max_new_tokens=40,
19
+ num_beams=4,
20
+ no_repeat_ngram_size=4
21
  )
22
 
23
  CATEGORIES = [
24
+ "Introversion","Extroversion",
25
+ "Sensing","Intuition",
26
+ "Thinking","Feeling",
27
+ "Judging","Perceiving"
28
  ]
29
 
30
+ # Простенькая “память” с защитой от повторов
31
  session_state = {}
32
 
33
  def init_session(user_id: str):
34
+ session_state[user_id] = {"asked": [], "answers": {}, "questions": []}
35
 
36
+ def _too_similar(q: str, prev: list[str], thresh=0.86) -> bool:
37
+ qn = q.lower().strip()
38
+ for p in prev:
39
+ if difflib.SequenceMatcher(None, qn, p.lower().strip()).ratio() >= thresh:
40
+ return True
41
+ return False
42
+
43
+ def _clean(q: str) -> str:
44
+ q = q.strip().strip('"').strip("'")
45
+ # вырезаем префиксы вроде "question:", "generate a question:", etc.
46
+ bad = ["question:", "generate a question", "ask", "instruction", "output only", "you are"]
47
+ low = q.lower()
48
+ for b in bad:
49
+ if b in low:
50
+ # берём правую часть после двоеточия если есть
51
+ if ":" in q:
52
+ q = q.split(":", 1)[-1]
53
+ q = q.replace(b, "")
54
+ q = q.strip()
55
+ if not q.endswith("?"):
56
+ q += "?"
57
+ # короткие/мусорные — фоллбэк
58
+ if len(q.split()) < 3:
59
+ return "What do you usually enjoy doing in your free time?"
60
+ return q
61
+
62
+ def _template(category: str, user_answer: str) -> str:
63
+ """
64
+ T5 понимает краткие шаблоны лучше длинных инструкций.
65
+ Для разных моделей – чуть разные формулировки, но суть одна:
66
+ """
67
+ if "flan" in QG_MODEL:
68
+ # FLAN любит простые задачи в стиле instruction-tuning
69
+ return (
70
+ f"Generate one open-ended question about {category.lower()} based on the user's answer.\n"
71
+ f"User: {user_answer}\n"
72
+ f"Question:"
73
+ )
74
+ elif "question-generator" in QG_MODEL:
75
+ # Модель обучена на QG; ей достаточно контекста
76
+ return f"generate question: {user_answer} (topic: {category})"
77
+ else:
78
+ # very small QG
79
+ return f"answer: {user_answer} topic: {category} -> question"
80
+
81
+ def generate_question(user_id: str, user_answer: str) -> str:
82
  if user_id not in session_state:
83
  init_session(user_id)
84
 
85
+ S = session_state[user_id]
86
+
87
+ # выбираем НЕспрошенную категорию
88
+ remaining = [c for c in CATEGORIES if c not in S["asked"]]
89
+ if not remaining:
90
  return "✅ All 8 categories completed."
91
 
92
+ category = random.choice(remaining)
93
+
94
+ # короткий, “неразговорчивый” шаблон (T5 такое любит)
95
+ prompt = _template(category, user_answer)
96
+
97
+ out = qg(prompt)[0]["generated_text"]
98
+ q = _clean(out)
99
+
100
+ # защита от повторов/перефразов
101
+ tries = 0
102
+ while _too_similar(q, S["questions"]) and tries < 3:
103
+ out = qg(prompt)[0]["generated_text"]
104
+ q = _clean(out)
105
+ tries += 1
106
+
107
+ S["asked"].append(category)
108
+ S["questions"].append(q)
109
+ return f"({category}) {q}"