Data_Analysis_Chatbot

Sleeping

App Files Files Community

Data_Analysis_Chatbot / src /streamlit_app.py

Starburst15

Update src/streamlit_app.py

793855f verified about 2 months ago

raw

history blame contribute delete

4.51 kB

	# ======================================================
	# 📊 Smart Data Analyst Pro (Chat Mode)
	# Frontend & Orchestration — Uses utils.py for backend logic
	# ======================================================

	import os
	import pandas as pd
	import streamlit as st
	from dotenv import load_dotenv
	from huggingface_hub import InferenceClient, login
	import google.generativeai as genai

	# 🧠 Import backend logic
	from utils import (
	ai_clean_dataset,
	query_analysis_model,
	)

	# ======================================================
	# ⚙️ APP CONFIGURATION
	# ======================================================
	st.set_page_config(page_title="📊 Smart Data Analyst Pro", layout="wide")
	st.title("📊 Smart Data Analyst Pro (Chat Mode)")
	st.caption("Chat with your dataset — AI cleans, analyzes, and visualizes data. Hugging Face + Gemini compatible.")

	# ======================================================
	# 🔐 Load Environment Variables
	# ======================================================
	load_dotenv()
	HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_API_KEY")
	GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")

	if not HF_TOKEN:
	st.error("❌ Missing HF_TOKEN. Please set it in your .env file.")
	else:
	login(token=HF_TOKEN)

	if GEMINI_API_KEY:
	genai.configure(api_key=GEMINI_API_KEY)
	else:
	st.warning("⚠️ Gemini API key missing. Gemini 2.5 Flash will not work.")

	# ======================================================
	# 🧠 MODEL SETTINGS (SIDEBAR)
	# ======================================================
	with st.sidebar:
	st.header("⚙️ Model Settings")

	CLEANER_MODEL = st.selectbox(
	"Select Cleaner Model:",
	[
	"Qwen/Qwen2.5-Coder-14B",
	"mistralai/Mistral-7B-Instruct-v0.3"
	],
	index=0
	)

	ANALYST_MODEL = st.selectbox(
	"Select Analysis Model:",
	[
	"Gemini 2.5 Flash (Google)",
	"Qwen/Qwen2.5-14B-Instruct",
	"mistralai/Mistral-7B-Instruct-v0.3",
	"HuggingFaceH4/zephyr-7b-beta"
	],
	index=0
	)

	temperature = st.slider("Temperature", 0.0, 1.0, 0.3)
	max_tokens = st.slider("Max Tokens", 128, 4096, 1024)

	# ======================================================
	# 🧩 MODEL CLIENTS
	# ======================================================
	hf_cleaner_client = InferenceClient(model=CLEANER_MODEL, token=HF_TOKEN)
	hf_analyst_client = None
	if ANALYST_MODEL != "Gemini 2.5 Flash (Google)":
	hf_analyst_client = InferenceClient(model=ANALYST_MODEL, token=HF_TOKEN)

	# ======================================================
	# 🚀 MAIN CHATBOT LOGIC
	# ======================================================
	uploaded = st.file_uploader("📎 Upload CSV or Excel file", type=["csv", "xlsx"])

	if "messages" not in st.session_state:
	st.session_state.messages = []

	if uploaded:
	# Load dataset
	df = pd.read_csv(uploaded) if uploaded.name.endswith(".csv") else pd.read_excel(uploaded)

	# 🧼 AI-BASED CLEANING
	with st.spinner("🧼 Cleaning your dataset..."):
	cleaned_df, cleaning_status = ai_clean_dataset(df, hf_cleaner_client)

	# Display cleaning info
	st.subheader("✅ Cleaning Status")
	st.info(cleaning_status)

	st.subheader("📊 Dataset Preview")
	st.dataframe(cleaned_df.head(), use_container_width=True)

	# 💬 Chat interface
	st.subheader("💬 Chat with Your Dataset")

	for msg in st.session_state.messages:
	with st.chat_message(msg["role"]):
	st.markdown(msg["content"])

	if user_query := st.chat_input("Ask something about your dataset..."):
	st.session_state.messages.append({"role": "user", "content": user_query})
	with st.chat_message("user"):
	st.markdown(user_query)

	with st.chat_message("assistant"):
	with st.spinner("🤖 Analyzing..."):
	result = query_analysis_model(
	cleaned_df,
	user_query,
	uploaded.name,
	ANALYST_MODEL,
	hf_client=hf_analyst_client,
	temperature=temperature,
	max_tokens=max_tokens,
	gemini_api_key=GEMINI_API_KEY
	)
	st.markdown(result)
	st.session_state.messages.append({"role": "assistant", "content": result})

	else:
	st.info("📥 Upload a dataset to begin chatting with your AI analyst.")