import os
import re
import traceback
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
MODEL_REPO = "Qwen/Qwen2.5-1.5B-Instruct-GGUF"
MODEL_FILE = "qwen2.5-1.5b-instruct-q4_k_m.gguf"
MODEL_TYPE = "qwen"
PROFANITY_LIST = ['damn', 'hell', 'crap', 'stupid', 'idiot', 'dumb', 'suck']
OFF_TOPIC_KEYWORDS = ['weather', 'sports', 'politics', 'religion', 'movie', 'game', 'recipe', 'joke', 'story', 'music', 'celebrity']
class LLMEngine:
def __init__(self):
self.model = None
self.context = ""
self.load_context()
def load_context(self):
try:
context_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "data", "company_context.txt")
if os.path.exists(context_path):
with open(context_path, "r", encoding="utf-8") as f:
self.context = f.read()
except Exception as e:
print(f"Error loading context: {e}")
def load_model(self):
if self.model is None:
print("Loading LLM...")
try:
model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE)
self.model = Llama(model_path=model_path, n_ctx=2048, n_gpu_layers=0, verbose=False)
print("LLM Loaded.")
except Exception as e:
print(f"Failed to load LLM: {e}")
traceback.print_exc()
def check_profanity(self, text):
for word in PROFANITY_LIST:
if re.search(r'\b' + word + r'\b', text.lower()):
return True
return False
def check_off_topic(self, text):
for keyword in OFF_TOPIC_KEYWORDS:
if keyword in text.lower():
return True
return False
def generate_response(self, user_query, history=[], dynamic_context=None):
if self.check_profanity(user_query):
return "I'm here to assist with business operations. Please keep our conversation professional."
if self.check_off_topic(user_query):
return "I specialize in demand forecasting, inventory management, and order processing. How can I help you with these?"
greetings = ["hi", "hello", "hey", "greetings", "good morning", "good afternoon", "good evening"]
if user_query.lower().strip().strip("!.,?") in greetings:
return (
"Hello! I'm BMS AI Assistant. I can help you with:
"
"