rahul7star commited on
Commit
2c806d6
·
verified ·
1 Parent(s): 1964751

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -0
app.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer
4
+
5
+ MODEL_NAME = "ValiantLabs/Qwen3-4B-Thinking-2507-Esper3.1"
6
+
7
+ # Load model & tokenizer once at startup
8
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
9
+ model = AutoModelForCausalLM.from_pretrained(
10
+ MODEL_NAME,
11
+ torch_dtype="auto",
12
+ device_map="auto"
13
+ )
14
+
15
+ def ask_question(prompt):
16
+ """Generate response (thinking + final content) from Qwen3 model."""
17
+ try:
18
+ messages = [{"role": "user", "content": prompt}]
19
+ text = tokenizer.apply_chat_template(
20
+ messages,
21
+ tokenize=False,
22
+ add_generation_prompt=True,
23
+ enable_thinking=True # thinking mode
24
+ )
25
+ inputs = tokenizer([text], return_tensors="pt").to(model.device)
26
+
27
+ generated_ids = model.generate(
28
+ **inputs,
29
+ max_new_tokens=4096,
30
+ temperature=0.7,
31
+ do_sample=True
32
+ )
33
+ output_ids = generated_ids[0][len(inputs.input_ids[0]):].tolist()
34
+
35
+ # Find the thinking section (token 151668 == </think>)
36
+ try:
37
+ index = len(output_ids) - output_ids[::-1].index(151668)
38
+ except ValueError:
39
+ index = 0
40
+
41
+ thinking_content = tokenizer.decode(output_ids[:index], skip_special_tokens=True).strip("\n")
42
+ content = tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip("\n")
43
+
44
+ return thinking_content, content
45
+ except Exception as e:
46
+ return f"⚠️ Error: {e}", ""
47
+
48
+ # --- Gradio UI ---
49
+ with gr.Blocks(title="Qwen3 Thinking Chat") as demo:
50
+ gr.Markdown("## 🧠 Qwen3-4B-Thinking — Ask Anything")
51
+ gr.Markdown(
52
+ "This demo uses **ValiantLabs/Qwen3-4B-Thinking-2507-Esper3.1**, "
53
+ "a reasoning model that shows its internal 'thinking' trace before giving the final answer."
54
+ )
55
+
56
+ with gr.Row():
57
+ prompt_box = gr.Textbox(
58
+ label="Ask your question",
59
+ placeholder="e.g. Explain how quantum entanglement works.",
60
+ lines=3
61
+ )
62
+
63
+ with gr.Row():
64
+ think_output = gr.Textbox(label="🧩 Thinking process", lines=10)
65
+ final_output = gr.Textbox(label="💬 Final answer", lines=10)
66
+
67
+ ask_btn = gr.Button("🚀 Generate Answer")
68
+
69
+ ask_btn.click(
70
+ fn=ask_question,
71
+ inputs=prompt_box,
72
+ outputs=[think_output, final_output]
73
+ )
74
+
75
+ demo.launch()