import os import sys import uuid # Ensure we can import from core sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from core.rag.store import VectorStore def ingest_knowledge_base(): # Initialize DB print("🚀 Connecting to Vector Database...") try: store = VectorStore() except Exception as e: print(f"❌ Error initializing DB: {e}") return base_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "knowledge_base") documents = [] metadatas = [] ids = [] print(f"📂 Scanning folder: {base_path}") if not os.path.exists(base_path): print(f"⚠️ Knowledge base folder not found at {base_path}") return # Saari files scan karo recursive tareeke se for root, _, files in os.walk(base_path): for file in files: if file.endswith(".md") or file.endswith(".txt"): file_path = os.path.join(root, file) try: with open(file_path, "r", encoding="utf-8") as f: content = f.read() if len(content.strip()) < 10: continue # Skip empty files # Content aur Meta data ready karo documents.append(content) metadatas.append({"source": file, "category": os.path.basename(root)}) ids.append(str(uuid.uuid4())) print(f" - Prepared: {file}") except Exception as e: print(f" - ⚠️ Skipped {file}: {e}") # DB mein daalo if documents: print(f"💾 Saving {len(documents)} documents to ChromaDB...") store.add_text(documents, metadatas, ids) print("✅ Knowledge Injection Complete!") else: print("⚠️ No valid documents found to ingest.") if __name__ == "__main__": ingest_knowledge_base()