Spaces:
Sleeping
Sleeping
File size: 4,505 Bytes
793855f 44669ca 97d5e2d ce96f36 44669ca 97d5e2d 793855f 44669ca 97d5e2d 44669ca 97d5e2d 44669ca 97d5e2d 44669ca 97d5e2d 44669ca 97d5e2d 44669ca 97d5e2d 44669ca 97d5e2d 793855f 97d5e2d 44669ca 97d5e2d 44669ca 97d5e2d 44669ca 97d5e2d ce96f36 793855f 97d5e2d ce96f36 97d5e2d 793855f 97d5e2d 793855f 97d5e2d 793855f 97d5e2d 793855f 97d5e2d 793855f 97d5e2d 793855f 97d5e2d 793855f 97d5e2d 793855f 97d5e2d 793855f 97d5e2d 793855f 97d5e2d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
# ======================================================
# π Smart Data Analyst Pro (Chat Mode)
# Frontend & Orchestration β Uses utils.py for backend logic
# ======================================================
import os
import pandas as pd
import streamlit as st
from dotenv import load_dotenv
from huggingface_hub import InferenceClient, login
import google.generativeai as genai
# π§ Import backend logic
from utils import (
ai_clean_dataset,
query_analysis_model,
)
# ======================================================
# βοΈ APP CONFIGURATION
# ======================================================
st.set_page_config(page_title="π Smart Data Analyst Pro", layout="wide")
st.title("π Smart Data Analyst Pro (Chat Mode)")
st.caption("Chat with your dataset β AI cleans, analyzes, and visualizes data. Hugging Face + Gemini compatible.")
# ======================================================
# π Load Environment Variables
# ======================================================
load_dotenv()
HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_API_KEY")
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
if not HF_TOKEN:
st.error("β Missing HF_TOKEN. Please set it in your .env file.")
else:
login(token=HF_TOKEN)
if GEMINI_API_KEY:
genai.configure(api_key=GEMINI_API_KEY)
else:
st.warning("β οΈ Gemini API key missing. Gemini 2.5 Flash will not work.")
# ======================================================
# π§ MODEL SETTINGS (SIDEBAR)
# ======================================================
with st.sidebar:
st.header("βοΈ Model Settings")
CLEANER_MODEL = st.selectbox(
"Select Cleaner Model:",
[
"Qwen/Qwen2.5-Coder-14B",
"mistralai/Mistral-7B-Instruct-v0.3"
],
index=0
)
ANALYST_MODEL = st.selectbox(
"Select Analysis Model:",
[
"Gemini 2.5 Flash (Google)",
"Qwen/Qwen2.5-14B-Instruct",
"mistralai/Mistral-7B-Instruct-v0.3",
"HuggingFaceH4/zephyr-7b-beta"
],
index=0
)
temperature = st.slider("Temperature", 0.0, 1.0, 0.3)
max_tokens = st.slider("Max Tokens", 128, 4096, 1024)
# ======================================================
# π§© MODEL CLIENTS
# ======================================================
hf_cleaner_client = InferenceClient(model=CLEANER_MODEL, token=HF_TOKEN)
hf_analyst_client = None
if ANALYST_MODEL != "Gemini 2.5 Flash (Google)":
hf_analyst_client = InferenceClient(model=ANALYST_MODEL, token=HF_TOKEN)
# ======================================================
# π MAIN CHATBOT LOGIC
# ======================================================
uploaded = st.file_uploader("π Upload CSV or Excel file", type=["csv", "xlsx"])
if "messages" not in st.session_state:
st.session_state.messages = []
if uploaded:
# Load dataset
df = pd.read_csv(uploaded) if uploaded.name.endswith(".csv") else pd.read_excel(uploaded)
# π§Ό AI-BASED CLEANING
with st.spinner("π§Ό Cleaning your dataset..."):
cleaned_df, cleaning_status = ai_clean_dataset(df, hf_cleaner_client)
# Display cleaning info
st.subheader("β
Cleaning Status")
st.info(cleaning_status)
st.subheader("π Dataset Preview")
st.dataframe(cleaned_df.head(), use_container_width=True)
# π¬ Chat interface
st.subheader("π¬ Chat with Your Dataset")
for msg in st.session_state.messages:
with st.chat_message(msg["role"]):
st.markdown(msg["content"])
if user_query := st.chat_input("Ask something about your dataset..."):
st.session_state.messages.append({"role": "user", "content": user_query})
with st.chat_message("user"):
st.markdown(user_query)
with st.chat_message("assistant"):
with st.spinner("π€ Analyzing..."):
result = query_analysis_model(
cleaned_df,
user_query,
uploaded.name,
ANALYST_MODEL,
hf_client=hf_analyst_client,
temperature=temperature,
max_tokens=max_tokens,
gemini_api_key=GEMINI_API_KEY
)
st.markdown(result)
st.session_state.messages.append({"role": "assistant", "content": result})
else:
st.info("π₯ Upload a dataset to begin chatting with your AI analyst.")
|