Data_Analysis_Chatbot / src /streamlit_app.py
Starburst15's picture
Update src/streamlit_app.py
793855f verified
# ======================================================
# πŸ“Š Smart Data Analyst Pro (Chat Mode)
# Frontend & Orchestration β€” Uses utils.py for backend logic
# ======================================================
import os
import pandas as pd
import streamlit as st
from dotenv import load_dotenv
from huggingface_hub import InferenceClient, login
import google.generativeai as genai
# 🧠 Import backend logic
from utils import (
ai_clean_dataset,
query_analysis_model,
)
# ======================================================
# βš™οΈ APP CONFIGURATION
# ======================================================
st.set_page_config(page_title="πŸ“Š Smart Data Analyst Pro", layout="wide")
st.title("πŸ“Š Smart Data Analyst Pro (Chat Mode)")
st.caption("Chat with your dataset β€” AI cleans, analyzes, and visualizes data. Hugging Face + Gemini compatible.")
# ======================================================
# πŸ” Load Environment Variables
# ======================================================
load_dotenv()
HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_API_KEY")
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
if not HF_TOKEN:
st.error("❌ Missing HF_TOKEN. Please set it in your .env file.")
else:
login(token=HF_TOKEN)
if GEMINI_API_KEY:
genai.configure(api_key=GEMINI_API_KEY)
else:
st.warning("⚠️ Gemini API key missing. Gemini 2.5 Flash will not work.")
# ======================================================
# 🧠 MODEL SETTINGS (SIDEBAR)
# ======================================================
with st.sidebar:
st.header("βš™οΈ Model Settings")
CLEANER_MODEL = st.selectbox(
"Select Cleaner Model:",
[
"Qwen/Qwen2.5-Coder-14B",
"mistralai/Mistral-7B-Instruct-v0.3"
],
index=0
)
ANALYST_MODEL = st.selectbox(
"Select Analysis Model:",
[
"Gemini 2.5 Flash (Google)",
"Qwen/Qwen2.5-14B-Instruct",
"mistralai/Mistral-7B-Instruct-v0.3",
"HuggingFaceH4/zephyr-7b-beta"
],
index=0
)
temperature = st.slider("Temperature", 0.0, 1.0, 0.3)
max_tokens = st.slider("Max Tokens", 128, 4096, 1024)
# ======================================================
# 🧩 MODEL CLIENTS
# ======================================================
hf_cleaner_client = InferenceClient(model=CLEANER_MODEL, token=HF_TOKEN)
hf_analyst_client = None
if ANALYST_MODEL != "Gemini 2.5 Flash (Google)":
hf_analyst_client = InferenceClient(model=ANALYST_MODEL, token=HF_TOKEN)
# ======================================================
# πŸš€ MAIN CHATBOT LOGIC
# ======================================================
uploaded = st.file_uploader("πŸ“Ž Upload CSV or Excel file", type=["csv", "xlsx"])
if "messages" not in st.session_state:
st.session_state.messages = []
if uploaded:
# Load dataset
df = pd.read_csv(uploaded) if uploaded.name.endswith(".csv") else pd.read_excel(uploaded)
# 🧼 AI-BASED CLEANING
with st.spinner("🧼 Cleaning your dataset..."):
cleaned_df, cleaning_status = ai_clean_dataset(df, hf_cleaner_client)
# Display cleaning info
st.subheader("βœ… Cleaning Status")
st.info(cleaning_status)
st.subheader("πŸ“Š Dataset Preview")
st.dataframe(cleaned_df.head(), use_container_width=True)
# πŸ’¬ Chat interface
st.subheader("πŸ’¬ Chat with Your Dataset")
for msg in st.session_state.messages:
with st.chat_message(msg["role"]):
st.markdown(msg["content"])
if user_query := st.chat_input("Ask something about your dataset..."):
st.session_state.messages.append({"role": "user", "content": user_query})
with st.chat_message("user"):
st.markdown(user_query)
with st.chat_message("assistant"):
with st.spinner("πŸ€– Analyzing..."):
result = query_analysis_model(
cleaned_df,
user_query,
uploaded.name,
ANALYST_MODEL,
hf_client=hf_analyst_client,
temperature=temperature,
max_tokens=max_tokens,
gemini_api_key=GEMINI_API_KEY
)
st.markdown(result)
st.session_state.messages.append({"role": "assistant", "content": result})
else:
st.info("πŸ“₯ Upload a dataset to begin chatting with your AI analyst.")