import os import re import traceback from llama_cpp import Llama from huggingface_hub import hf_hub_download MODEL_REPO = "Qwen/Qwen2.5-1.5B-Instruct-GGUF" MODEL_FILE = "qwen2.5-1.5b-instruct-q4_k_m.gguf" MODEL_TYPE = "qwen" PROFANITY_LIST = ['damn', 'hell', 'crap', 'stupid', 'idiot', 'dumb', 'suck'] OFF_TOPIC_KEYWORDS = ['weather', 'sports', 'politics', 'religion', 'movie', 'game', 'recipe', 'joke', 'story', 'music', 'celebrity'] class LLMEngine: def __init__(self): self.model = None self.context = "" self.load_context() def load_context(self): try: context_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "data", "company_context.txt") if os.path.exists(context_path): with open(context_path, "r", encoding="utf-8") as f: self.context = f.read() except Exception as e: print(f"Error loading context: {e}") def load_model(self): if self.model is None: print("Loading LLM...") try: model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE) self.model = Llama(model_path=model_path, n_ctx=2048, n_gpu_layers=0, verbose=False) print("LLM Loaded.") except Exception as e: print(f"Failed to load LLM: {e}") traceback.print_exc() def check_profanity(self, text): for word in PROFANITY_LIST: if re.search(r'\b' + word + r'\b', text.lower()): return True return False def check_off_topic(self, text): for keyword in OFF_TOPIC_KEYWORDS: if keyword in text.lower(): return True return False def generate_response(self, user_query, history=[], dynamic_context=None): if self.check_profanity(user_query): return "I'm here to assist with business operations. Please keep our conversation professional." if self.check_off_topic(user_query): return "I specialize in demand forecasting, inventory management, and order processing. How can I help you with these?" greetings = ["hi", "hello", "hey", "greetings", "good morning", "good afternoon", "good evening"] if user_query.lower().strip().strip("!.,?") in greetings: return ( "Hello! I'm BMS AI Assistant. I can help you with:
" "" "Try asking:
" "" ) if any(w in user_query.lower() for w in ['capabilities', 'what can you do', 'help me', 'functions']): return "I can assist you with:
1. Demand Forecasting
2. Inventory Management (US/Canada)
3. Competitor Analysis
4. Order Processing (Multi-item)
5. PDF Reports

Try asking: 'Competitor analysis for BMS0001'" if any(w in user_query.lower() for w in ['who made you', 'who developed', 'who created', 'who built']): return "I'm BMS AI Assistant, developed to help you manage inventory and forecast demand efficiently." if any(p in user_query.lower() for p in ['how are you', 'how do you do', 'how is it going']): return "I'm functioning well and ready to assist you! How can I help with your inventory or forecasting needs today?" if self.model is None: self.load_model() if self.model is None: return "I'm sorry, I couldn't load my language model. Please try asking about specific items." system_prompt = ( "You are BMS AI Assistant, a specialized ERP assistant for Business Management Systems. " "Your role is to assist strictly with demand forecasting, inventory management, competitor analysis, and order processing. " "Use the provided conversation history to understand context (e.g., what 'it' refers to). " "If the user asks a follow-up question like 'tell me more', elaborate on the previous topic using the context. " "If the answer is not in the context, politely state that you don't have that information. " "Do not answer general knowledge questions unrelated to the business context." ) if dynamic_context: system_prompt += f"\n\nCURRENT DATA VISIBLE TO USER:\n{dynamic_context}\n(Use this data to answer follow-up questions)" # Build History String history_str = "" for msg in history[-3:]: # Keep last 3 turns role = "user" if msg['role'] == 'user' else "assistant" history_str += f"<|im_start|>{role}\n{msg['content']}<|im_end|>\n" # ChatML Format: <|im_start|>system\n{system}<|im_end|>\n{history}<|im_start|>user\n{user}<|im_end|>\n<|im_start|>assistant\n full_prompt = f"<|im_start|>system\n{system_prompt}\n\nContext:\n{self.context}<|im_end|>\n{history_str}<|im_start|>user\n{user_query}<|im_end|>\n<|im_start|>assistant\n" try: output = self.model( full_prompt, max_tokens=350, temperature=0.1, top_k=40, top_p=0.9, stop=["<|im_end|>", "<|im_start|>"], echo=False ) response = output['choices'][0]['text'] return response.strip() except Exception as e: return f"Error generating response: {e}" llm = LLMEngine()