Q6 / app.py
JasonGordon's picture
Create app.py
715abbd verified
raw
history blame contribute delete
766 Bytes
from llama_cpp import Llama
import gradio as gr
# Load the model from the repo (downloads on startup, uses runtime storage)
llm = Llama.from_pretrained(
repo_id="QuantFactory/DarkIdol-Llama-3.1-8B-Instruct-1.2-Uncensored-GGUF",
filename="DarkIdol-Llama-3.1-8B-Instruct-1.2-Uncensored.Q6_K.gguf", # Use Q6 for good quality; change if needed
n_ctx=2048, # For longer conversations
n_gpu_layers=-1 # Offload to GPU if you upgrade hardware
)
def chat(user_input, history):
messages = [{"role": "user", "content": user_input}]
output = llm.create_chat_completion(messages, max_tokens=256, temperature=0.8)
return output['choices'][0]['message']['content']
demo = gr.ChatInterface(fn=chat, title="Q6 Uncensored Voice Agent")
demo.launch()