|
|
import os |
|
|
import sys |
|
|
import uuid |
|
|
|
|
|
|
|
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) |
|
|
|
|
|
from core.rag.store import VectorStore |
|
|
|
|
|
def ingest_knowledge_base(): |
|
|
|
|
|
print("π Connecting to Vector Database...") |
|
|
try: |
|
|
store = VectorStore() |
|
|
except Exception as e: |
|
|
print(f"β Error initializing DB: {e}") |
|
|
return |
|
|
|
|
|
base_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "knowledge_base") |
|
|
|
|
|
documents = [] |
|
|
metadatas = [] |
|
|
ids = [] |
|
|
|
|
|
print(f"π Scanning folder: {base_path}") |
|
|
|
|
|
if not os.path.exists(base_path): |
|
|
print(f"β οΈ Knowledge base folder not found at {base_path}") |
|
|
return |
|
|
|
|
|
|
|
|
for root, _, files in os.walk(base_path): |
|
|
for file in files: |
|
|
if file.endswith(".md") or file.endswith(".txt"): |
|
|
file_path = os.path.join(root, file) |
|
|
|
|
|
try: |
|
|
with open(file_path, "r", encoding="utf-8") as f: |
|
|
content = f.read() |
|
|
if len(content.strip()) < 10: continue |
|
|
|
|
|
|
|
|
documents.append(content) |
|
|
metadatas.append({"source": file, "category": os.path.basename(root)}) |
|
|
ids.append(str(uuid.uuid4())) |
|
|
|
|
|
print(f" - Prepared: {file}") |
|
|
except Exception as e: |
|
|
print(f" - β οΈ Skipped {file}: {e}") |
|
|
|
|
|
|
|
|
if documents: |
|
|
print(f"πΎ Saving {len(documents)} documents to ChromaDB...") |
|
|
store.add_text(documents, metadatas, ids) |
|
|
print("β
Knowledge Injection Complete!") |
|
|
else: |
|
|
print("β οΈ No valid documents found to ingest.") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
ingest_knowledge_base() |