# ====================================================== # ๐Ÿ“Š Smart Data Analyst Pro (Chat Mode) # Frontend & Orchestration โ€” Uses utils.py for backend logic # ====================================================== import os import pandas as pd import streamlit as st from dotenv import load_dotenv from huggingface_hub import InferenceClient, login import google.generativeai as genai # ๐Ÿง  Import backend logic from utils import ( ai_clean_dataset, query_analysis_model, ) # ====================================================== # โš™๏ธ APP CONFIGURATION # ====================================================== st.set_page_config(page_title="๐Ÿ“Š Smart Data Analyst Pro", layout="wide") st.title("๐Ÿ“Š Smart Data Analyst Pro (Chat Mode)") st.caption("Chat with your dataset โ€” AI cleans, analyzes, and visualizes data. Hugging Face + Gemini compatible.") # ====================================================== # ๐Ÿ” Load Environment Variables # ====================================================== load_dotenv() HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_API_KEY") GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") if not HF_TOKEN: st.error("โŒ Missing HF_TOKEN. Please set it in your .env file.") else: login(token=HF_TOKEN) if GEMINI_API_KEY: genai.configure(api_key=GEMINI_API_KEY) else: st.warning("โš ๏ธ Gemini API key missing. Gemini 2.5 Flash will not work.") # ====================================================== # ๐Ÿง  MODEL SETTINGS (SIDEBAR) # ====================================================== with st.sidebar: st.header("โš™๏ธ Model Settings") CLEANER_MODEL = st.selectbox( "Select Cleaner Model:", [ "Qwen/Qwen2.5-Coder-14B", "mistralai/Mistral-7B-Instruct-v0.3" ], index=0 ) ANALYST_MODEL = st.selectbox( "Select Analysis Model:", [ "Gemini 2.5 Flash (Google)", "Qwen/Qwen2.5-14B-Instruct", "mistralai/Mistral-7B-Instruct-v0.3", "HuggingFaceH4/zephyr-7b-beta" ], index=0 ) temperature = st.slider("Temperature", 0.0, 1.0, 0.3) max_tokens = st.slider("Max Tokens", 128, 4096, 1024) # ====================================================== # ๐Ÿงฉ MODEL CLIENTS # ====================================================== hf_cleaner_client = InferenceClient(model=CLEANER_MODEL, token=HF_TOKEN) hf_analyst_client = None if ANALYST_MODEL != "Gemini 2.5 Flash (Google)": hf_analyst_client = InferenceClient(model=ANALYST_MODEL, token=HF_TOKEN) # ====================================================== # ๐Ÿš€ MAIN CHATBOT LOGIC # ====================================================== uploaded = st.file_uploader("๐Ÿ“Ž Upload CSV or Excel file", type=["csv", "xlsx"]) if "messages" not in st.session_state: st.session_state.messages = [] if uploaded: # Load dataset df = pd.read_csv(uploaded) if uploaded.name.endswith(".csv") else pd.read_excel(uploaded) # ๐Ÿงผ AI-BASED CLEANING with st.spinner("๐Ÿงผ Cleaning your dataset..."): cleaned_df, cleaning_status = ai_clean_dataset(df, hf_cleaner_client) # Display cleaning info st.subheader("โœ… Cleaning Status") st.info(cleaning_status) st.subheader("๐Ÿ“Š Dataset Preview") st.dataframe(cleaned_df.head(), use_container_width=True) # ๐Ÿ’ฌ Chat interface st.subheader("๐Ÿ’ฌ Chat with Your Dataset") for msg in st.session_state.messages: with st.chat_message(msg["role"]): st.markdown(msg["content"]) if user_query := st.chat_input("Ask something about your dataset..."): st.session_state.messages.append({"role": "user", "content": user_query}) with st.chat_message("user"): st.markdown(user_query) with st.chat_message("assistant"): with st.spinner("๐Ÿค– Analyzing..."): result = query_analysis_model( cleaned_df, user_query, uploaded.name, ANALYST_MODEL, hf_client=hf_analyst_client, temperature=temperature, max_tokens=max_tokens, gemini_api_key=GEMINI_API_KEY ) st.markdown(result) st.session_state.messages.append({"role": "assistant", "content": result}) else: st.info("๐Ÿ“ฅ Upload a dataset to begin chatting with your AI analyst.")