Spaces:
Sleeping
Sleeping
| # ====================================================== | |
| # π Smart Data Analyst Pro (Chat Mode) | |
| # Frontend & Orchestration β Uses utils.py for backend logic | |
| # ====================================================== | |
| import os | |
| import pandas as pd | |
| import streamlit as st | |
| from dotenv import load_dotenv | |
| from huggingface_hub import InferenceClient, login | |
| import google.generativeai as genai | |
| # π§ Import backend logic | |
| from utils import ( | |
| ai_clean_dataset, | |
| query_analysis_model, | |
| ) | |
| # ====================================================== | |
| # βοΈ APP CONFIGURATION | |
| # ====================================================== | |
| st.set_page_config(page_title="π Smart Data Analyst Pro", layout="wide") | |
| st.title("π Smart Data Analyst Pro (Chat Mode)") | |
| st.caption("Chat with your dataset β AI cleans, analyzes, and visualizes data. Hugging Face + Gemini compatible.") | |
| # ====================================================== | |
| # π Load Environment Variables | |
| # ====================================================== | |
| load_dotenv() | |
| HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_API_KEY") | |
| GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") | |
| if not HF_TOKEN: | |
| st.error("β Missing HF_TOKEN. Please set it in your .env file.") | |
| else: | |
| login(token=HF_TOKEN) | |
| if GEMINI_API_KEY: | |
| genai.configure(api_key=GEMINI_API_KEY) | |
| else: | |
| st.warning("β οΈ Gemini API key missing. Gemini 2.5 Flash will not work.") | |
| # ====================================================== | |
| # π§ MODEL SETTINGS (SIDEBAR) | |
| # ====================================================== | |
| with st.sidebar: | |
| st.header("βοΈ Model Settings") | |
| CLEANER_MODEL = st.selectbox( | |
| "Select Cleaner Model:", | |
| [ | |
| "Qwen/Qwen2.5-Coder-14B", | |
| "mistralai/Mistral-7B-Instruct-v0.3" | |
| ], | |
| index=0 | |
| ) | |
| ANALYST_MODEL = st.selectbox( | |
| "Select Analysis Model:", | |
| [ | |
| "Gemini 2.5 Flash (Google)", | |
| "Qwen/Qwen2.5-14B-Instruct", | |
| "mistralai/Mistral-7B-Instruct-v0.3", | |
| "HuggingFaceH4/zephyr-7b-beta" | |
| ], | |
| index=0 | |
| ) | |
| temperature = st.slider("Temperature", 0.0, 1.0, 0.3) | |
| max_tokens = st.slider("Max Tokens", 128, 4096, 1024) | |
| # ====================================================== | |
| # π§© MODEL CLIENTS | |
| # ====================================================== | |
| hf_cleaner_client = InferenceClient(model=CLEANER_MODEL, token=HF_TOKEN) | |
| hf_analyst_client = None | |
| if ANALYST_MODEL != "Gemini 2.5 Flash (Google)": | |
| hf_analyst_client = InferenceClient(model=ANALYST_MODEL, token=HF_TOKEN) | |
| # ====================================================== | |
| # π MAIN CHATBOT LOGIC | |
| # ====================================================== | |
| uploaded = st.file_uploader("π Upload CSV or Excel file", type=["csv", "xlsx"]) | |
| if "messages" not in st.session_state: | |
| st.session_state.messages = [] | |
| if uploaded: | |
| # Load dataset | |
| df = pd.read_csv(uploaded) if uploaded.name.endswith(".csv") else pd.read_excel(uploaded) | |
| # π§Ό AI-BASED CLEANING | |
| with st.spinner("π§Ό Cleaning your dataset..."): | |
| cleaned_df, cleaning_status = ai_clean_dataset(df, hf_cleaner_client) | |
| # Display cleaning info | |
| st.subheader("β Cleaning Status") | |
| st.info(cleaning_status) | |
| st.subheader("π Dataset Preview") | |
| st.dataframe(cleaned_df.head(), use_container_width=True) | |
| # π¬ Chat interface | |
| st.subheader("π¬ Chat with Your Dataset") | |
| for msg in st.session_state.messages: | |
| with st.chat_message(msg["role"]): | |
| st.markdown(msg["content"]) | |
| if user_query := st.chat_input("Ask something about your dataset..."): | |
| st.session_state.messages.append({"role": "user", "content": user_query}) | |
| with st.chat_message("user"): | |
| st.markdown(user_query) | |
| with st.chat_message("assistant"): | |
| with st.spinner("π€ Analyzing..."): | |
| result = query_analysis_model( | |
| cleaned_df, | |
| user_query, | |
| uploaded.name, | |
| ANALYST_MODEL, | |
| hf_client=hf_analyst_client, | |
| temperature=temperature, | |
| max_tokens=max_tokens, | |
| gemini_api_key=GEMINI_API_KEY | |
| ) | |
| st.markdown(result) | |
| st.session_state.messages.append({"role": "assistant", "content": result}) | |
| else: | |
| st.info("π₯ Upload a dataset to begin chatting with your AI analyst.") | |