Spaces:

rahul7star
/

Qwen4-Think

Runtime error

App Files Files Community

rahul7star commited on Oct 7

Commit

2c806d6

verified ·

1 Parent(s): 1964751

Create app.py

Browse files

Files changed (1) hide show

app.py +75 -0

app.py ADDED Viewed

	@@ -0,0 +1,75 @@

+import gradio as gr
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+MODEL_NAME = "ValiantLabs/Qwen3-4B-Thinking-2507-Esper3.1"
+# Load model & tokenizer once at startup
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+model = AutoModelForCausalLM.from_pretrained(
+    MODEL_NAME,
+    torch_dtype="auto",
+    device_map="auto"
+)
+def ask_question(prompt):
+    """Generate response (thinking + final content) from Qwen3 model."""
+    try:
+        messages = [{"role": "user", "content": prompt}]
+        text = tokenizer.apply_chat_template(
+            messages,
+            tokenize=False,
+            add_generation_prompt=True,
+            enable_thinking=True  # thinking mode
+        )
+        inputs = tokenizer([text], return_tensors="pt").to(model.device)
+        generated_ids = model.generate(
+            **inputs,
+            max_new_tokens=4096,
+            temperature=0.7,
+            do_sample=True
+        )
+        output_ids = generated_ids[0][len(inputs.input_ids[0]):].tolist()
+        # Find the thinking section (token 151668 == </think>)
+        try:
+            index = len(output_ids) - output_ids[::-1].index(151668)
+        except ValueError:
+            index = 0
+        thinking_content = tokenizer.decode(output_ids[:index], skip_special_tokens=True).strip("\n")
+        content = tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip("\n")
+        return thinking_content, content
+    except Exception as e:
+        return f"⚠️ Error: {e}", ""
+# --- Gradio UI ---
+with gr.Blocks(title="Qwen3 Thinking Chat") as demo:
+    gr.Markdown("## 🧠 Qwen3-4B-Thinking — Ask Anything")
+    gr.Markdown(
+        "This demo uses **ValiantLabs/Qwen3-4B-Thinking-2507-Esper3.1**, "
+        "a reasoning model that shows its internal 'thinking' trace before giving the final answer."
+    )
+    with gr.Row():
+        prompt_box = gr.Textbox(
+            label="Ask your question",
+            placeholder="e.g. Explain how quantum entanglement works.",
+            lines=3
+        )
+    with gr.Row():
+        think_output = gr.Textbox(label="🧩 Thinking process", lines=10)
+        final_output = gr.Textbox(label="💬 Final answer", lines=10)
+    ask_btn = gr.Button("🚀 Generate Answer")
+    ask_btn.click(
+        fn=ask_question,
+        inputs=prompt_box,
+        outputs=[think_output, final_output]
+    )
+demo.launch()