reachify-ai-service / scripts /ingest_data.py
amitbhatt6075's picture
Complete fresh start - FINAL UPLOAD
0914e96
raw
history blame
2.01 kB
import os
import sys
import uuid
# Ensure we can import from core
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from core.rag.store import VectorStore
def ingest_knowledge_base():
# Initialize DB
print("πŸš€ Connecting to Vector Database...")
try:
store = VectorStore()
except Exception as e:
print(f"❌ Error initializing DB: {e}")
return
base_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "knowledge_base")
documents = []
metadatas = []
ids = []
print(f"πŸ“‚ Scanning folder: {base_path}")
if not os.path.exists(base_path):
print(f"⚠️ Knowledge base folder not found at {base_path}")
return
# Saari files scan karo recursive tareeke se
for root, _, files in os.walk(base_path):
for file in files:
if file.endswith(".md") or file.endswith(".txt"):
file_path = os.path.join(root, file)
try:
with open(file_path, "r", encoding="utf-8") as f:
content = f.read()
if len(content.strip()) < 10: continue # Skip empty files
# Content aur Meta data ready karo
documents.append(content)
metadatas.append({"source": file, "category": os.path.basename(root)})
ids.append(str(uuid.uuid4()))
print(f" - Prepared: {file}")
except Exception as e:
print(f" - ⚠️ Skipped {file}: {e}")
# DB mein daalo
if documents:
print(f"πŸ’Ύ Saving {len(documents)} documents to ChromaDB...")
store.add_text(documents, metadatas, ids)
print("βœ… Knowledge Injection Complete!")
else:
print("⚠️ No valid documents found to ingest.")
if __name__ == "__main__":
ingest_knowledge_base()