Spaces:

fexeak
/

my-space

Running

App Files Files Community

fexeak commited on Jul 25

Commit

7d026c2

1 Parent(s): cb41d64

refactor: 替换模型为SmolLM2并简化代码结构

Browse files

移除原有NSFW-Flash模型相关代码，改用更轻量的SmolLM2-135M模型
简化代码结构，仅保留基础模型加载和推理功能

Files changed (2) hide show

app.py +9 -164
app.py.bak +165 -0

app.py CHANGED Viewed

@@ -1,165 +1,10 @@
-import torch
-import gradio as gr
 from transformers import AutoModelForCausalLM, AutoTokenizer
-import threading
-import time
-# Global variables for model and tokenizer
-model = None
-tokenizer = None
-model_loaded = False
-def load_model():
-    """Load the model and tokenizer"""
-    global model, tokenizer, model_loaded
-    try:
-        print("Loading model...")
-        model = AutoModelForCausalLM.from_pretrained(
-            "UnfilteredAI/NSFW-Flash",
-            trust_remote_code=True,
-            torch_dtype=torch.bfloat16
-        ).to("cuda" if torch.cuda.is_available() else "cpu")
-        tokenizer = AutoTokenizer.from_pretrained(
-            "UnfilteredAI/NSFW-Flash",
-            trust_remote_code=True
-        )
-        model_loaded = True
-        print("Model loaded successfully!")
-    except Exception as e:
-        print(f"Error loading model: {e}")
-        model_loaded = False
-def generate_response(message, history, temperature, max_length, top_p):
-    """Generate response from the model"""
-    global model, tokenizer, model_loaded
-    if not model_loaded:
-        return "模型尚未加载完成，请稍等..."
-    try:
-        # Build conversation history
-        chat = [
-            {"role": "system", "content": "You are NSFW-Flash, an AI assistant. Respond helpfully and appropriately."}
-        ]
-        # Add conversation history
-        for user_msg, bot_msg in history:
-            chat.append({"role": "user", "content": user_msg})
-            if bot_msg:
-                chat.append({"role": "assistant", "content": bot_msg})
-        # Add current message
-        chat.append({"role": "user", "content": message})
-        # Apply chat template
-        chat_text = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
-        # Tokenize
-        inputs = tokenizer(chat_text, return_tensors="pt", return_attention_mask=False)
-        if torch.cuda.is_available():
-            inputs = inputs.to("cuda")
-        # Generate
-        with torch.no_grad():
-            generated = model.generate(
-                **inputs,
-                max_length=max_length,
-                temperature=temperature,
-                top_p=top_p,
-                do_sample=True,
-                use_cache=False,
-                eos_token_id=tokenizer.eos_token_id,
-                pad_token_id=tokenizer.eos_token_id
-            )
-        # Decode response
-        response = tokenizer.decode(generated[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
-        return response.strip()
-    except Exception as e:
-        return f"生成回复时出错: {str(e)}"
-def chat_interface(message, history, temperature, max_length, top_p):
-    """Chat interface for Gradio"""
-    response = generate_response(message, history, temperature, max_length, top_p)
-    history.append([message, response])
-    return "", history
-# Load model in background
-loading_thread = threading.Thread(target=load_model)
-loading_thread.start()
-# Create Gradio interface
-with gr.Blocks(title="AI Chat Assistant") as demo:
-    gr.Markdown("# 🤖 AI Chat Assistant")
-    gr.Markdown("基于 NSFW-Flash 模型的聊天助手")
-    with gr.Row():
-        with gr.Column(scale=3):
-            chatbot = gr.Chatbot(
-                value=[],
-                height=500,
-                show_label=False
-            )
-            with gr.Row():
-                msg = gr.Textbox(
-                    placeholder="输入您的消息...",
-                    show_label=False,
-                    scale=4
-                )
-                send_btn = gr.Button("发送", scale=1)
-            clear_btn = gr.Button("清空对话")
-        with gr.Column(scale=1):
-            gr.Markdown("### 参数设置")
-            temperature = gr.Slider(
-                minimum=0.1,
-                maximum=2.0,
-                value=0.7,
-                step=0.1,
-                label="Temperature"
-            )
-            max_length = gr.Slider(
-                minimum=100,
-                maximum=2000,
-                value=1000,
-                step=100,
-                label="最大长度"
-            )
-            top_p = gr.Slider(
-                minimum=0.1,
-                maximum=1.0,
-                value=0.95,
-                step=0.05,
-                label="Top-p"
-            )
-    # Event handlers
-    send_btn.click(
-        chat_interface,
-        inputs=[msg, chatbot, temperature, max_length, top_p],
-        outputs=[msg, chatbot]
-    )
-    msg.submit(
-        chat_interface,
-        inputs=[msg, chatbot, temperature, max_length, top_p],
-        outputs=[msg, chatbot]
-    )
-    clear_btn.click(
-        lambda: ([], ""),
-        outputs=[chatbot, msg]
-    )
-if __name__ == "__main__":
-    demo.launch(
-        server_name="0.0.0.0",
-        server_port=7860,
-        share=True,
-        show_error=True
-    )

+# pip install transformers
 from transformers import AutoModelForCausalLM, AutoTokenizer
+checkpoint = "HuggingFaceTB/SmolLM2-135M"
+device = "cuda" # for GPU usage or "cpu" for CPU usage
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+# for multiple GPUs install accelerate and do `model = AutoModelForCausalLM.from_pretrained(checkpoint, device_map="auto")`
+model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device)
+inputs = tokenizer.encode("Gravity is", return_tensors="pt").to(device)
+outputs = model.generate(inputs)
+print(tokenizer.decode(outputs[0]))

app.py.bak ADDED Viewed

	@@ -0,0 +1,165 @@

+import torch
+import gradio as gr
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import threading
+import time
+# Global variables for model and tokenizer
+model = None
+tokenizer = None
+model_loaded = False
+def load_model():
+    """Load the model and tokenizer"""
+    global model, tokenizer, model_loaded
+    try:
+        print("Loading model...")
+        model = AutoModelForCausalLM.from_pretrained(
+            "UnfilteredAI/NSFW-Flash",
+            trust_remote_code=True,
+            torch_dtype=torch.bfloat16
+        ).to("cuda" if torch.cuda.is_available() else "cpu")
+        tokenizer = AutoTokenizer.from_pretrained(
+            "UnfilteredAI/NSFW-Flash",
+            trust_remote_code=True
+        )
+        model_loaded = True
+        print("Model loaded successfully!")
+    except Exception as e:
+        print(f"Error loading model: {e}")
+        model_loaded = False
+def generate_response(message, history, temperature, max_length, top_p):
+    """Generate response from the model"""
+    global model, tokenizer, model_loaded
+    if not model_loaded:
+        return "模型尚未加载完成，请稍等..."
+    try:
+        # Build conversation history
+        chat = [
+            {"role": "system", "content": "You are NSFW-Flash, an AI assistant. Respond helpfully and appropriately."}
+        ]
+        # Add conversation history
+        for user_msg, bot_msg in history:
+            chat.append({"role": "user", "content": user_msg})
+            if bot_msg:
+                chat.append({"role": "assistant", "content": bot_msg})
+        # Add current message
+        chat.append({"role": "user", "content": message})
+        # Apply chat template
+        chat_text = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
+        # Tokenize
+        inputs = tokenizer(chat_text, return_tensors="pt", return_attention_mask=False)
+        if torch.cuda.is_available():
+            inputs = inputs.to("cuda")
+        # Generate
+        with torch.no_grad():
+            generated = model.generate(
+                **inputs,
+                max_length=max_length,
+                temperature=temperature,
+                top_p=top_p,
+                do_sample=True,
+                use_cache=False,
+                eos_token_id=tokenizer.eos_token_id,
+                pad_token_id=tokenizer.eos_token_id
+            )
+        # Decode response
+        response = tokenizer.decode(generated[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
+        return response.strip()
+    except Exception as e:
+        return f"生成回复时出错: {str(e)}"
+def chat_interface(message, history, temperature, max_length, top_p):
+    """Chat interface for Gradio"""
+    response = generate_response(message, history, temperature, max_length, top_p)
+    history.append([message, response])
+    return "", history
+# Load model in background
+loading_thread = threading.Thread(target=load_model)
+loading_thread.start()
+# Create Gradio interface
+with gr.Blocks(title="AI Chat Assistant") as demo:
+    gr.Markdown("# 🤖 AI Chat Assistant")
+    gr.Markdown("基于 NSFW-Flash 模型的聊天助手")
+    with gr.Row():
+        with gr.Column(scale=3):
+            chatbot = gr.Chatbot(
+                value=[],
+                height=500,
+                show_label=False
+            )
+            with gr.Row():
+                msg = gr.Textbox(
+                    placeholder="输入您的消息...",
+                    show_label=False,
+                    scale=4
+                )
+                send_btn = gr.Button("发送", scale=1)
+            clear_btn = gr.Button("清空对话")
+        with gr.Column(scale=1):
+            gr.Markdown("### 参数设置")
+            temperature = gr.Slider(
+                minimum=0.1,
+                maximum=2.0,
+                value=0.7,
+                step=0.1,
+                label="Temperature"
+            )
+            max_length = gr.Slider(
+                minimum=100,
+                maximum=2000,
+                value=1000,
+                step=100,
+                label="最大长度"
+            )
+            top_p = gr.Slider(
+                minimum=0.1,
+                maximum=1.0,
+                value=0.95,
+                step=0.05,
+                label="Top-p"
+            )
+    # Event handlers
+    send_btn.click(
+        chat_interface,
+        inputs=[msg, chatbot, temperature, max_length, top_p],
+        outputs=[msg, chatbot]
+    )
+    msg.submit(
+        chat_interface,
+        inputs=[msg, chatbot, temperature, max_length, top_p],
+        outputs=[msg, chatbot]
+    )
+    clear_btn.click(
+        lambda: ([], ""),
+        outputs=[chatbot, msg]
+    )
+if __name__ == "__main__":
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=True,
+        show_error=True
+    )