Spaces:

iisadia
/

transformer-visualizer

Sleeping

App Files Files Community

iisadia commited on Apr 7

Commit

db0eecf

verified ·

1 Parent(s): cf92986

Update app.py

Browse files

Files changed (1) hide show

app.py +130 -14

app.py CHANGED Viewed

@@ -2,6 +2,9 @@ import streamlit as st
 import matplotlib.pyplot as plt
 import pandas as pd
 import torch
 from transformers import AutoConfig, AutoTokenizer
 # Page configuration
@@ -46,18 +49,38 @@ st.markdown("""
 </style>
 """, unsafe_allow_html=True)
-# Model database
 MODELS = {
-    "BERT": {"model_name": "bert-base-uncased", "type": "Encoder", "layers": 12, "heads": 12, "params": 109.48},
-    "GPT-2": {"model_name": "gpt2", "type": "Decoder", "layers": 12, "heads": 12, "params": 117},
-    "T5-Small": {"model_name": "t5-small", "type": "Seq2Seq", "layers": 6, "heads": 8, "params": 60},
-    "RoBERTa": {"model_name": "roberta-base", "type": "Encoder", "layers": 12, "heads": 12, "params": 125},
-    "DistilBERT": {"model_name": "distilbert-base-uncased", "type": "Encoder", "layers": 6, "heads": 12, "params": 66},
-    "ALBERT": {"model_name": "albert-base-v2", "type": "Encoder", "layers": 12, "heads": 12, "params": 11.8},
-    "ELECTRA": {"model_name": "google/electra-small-discriminator", "type": "Encoder", "layers": 12, "heads": 12, "params": 13.5},
-    "XLNet": {"model_name": "xlnet-base-cased", "type": "AutoRegressive", "layers": 12, "heads": 12, "params": 110},
-    "BART": {"model_name": "facebook/bart-base", "type": "Seq2Seq", "layers": 6, "heads": 16, "params": 139},
-    "DeBERTa": {"model_name": "microsoft/deberta-base", "type": "Encoder", "layers": 12, "heads": 12, "params": 139}
 }
 def get_model_config(model_name):
@@ -155,6 +178,87 @@ def visualize_attention_patterns():
     fig.patch.set_facecolor('#2c2c2c')
     st.pyplot(fig)
 def main():
     st.title("🧠 Transformer Model Visualizer")
@@ -173,7 +277,11 @@ def main():
     with col4:
         st.metric("Parameters", f"{model_info['params']}M")
-    tab1, tab2, tab3, tab4 = st.tabs(["Model Structure", "Comparison", "Model Attention", "Tokenization"])
     with tab1:
         st.subheader("Architecture Diagram")
@@ -202,11 +310,9 @@ def main():
     with tab4:
         st.subheader("📝 Tokenization Visualization")
         input_text = st.text_input("Enter Text:", "Hello, how are you?")
         col1, col2 = st.columns(2)
         with col1:
             st.markdown("**Tokenized Output**")
             tokens = tokenizer.tokenize(input_text)
@@ -239,6 +345,16 @@ def main():
         - Padding token: `{tokenizer.pad_token}`
         - Max length: `{tokenizer.model_max_length}`
         """)
 if __name__ == "__main__":
     main()

 import matplotlib.pyplot as plt
 import pandas as pd
 import torch
+import plotly.express as px
+from sklearn.decomposition import PCA
+from sklearn.manifold import TSNE
 from transformers import AutoConfig, AutoTokenizer
 # Page configuration
 </style>
 """, unsafe_allow_html=True)
+# Enhanced Model database
 MODELS = {
+    "BERT": {"model_name": "bert-base-uncased", "type": "Encoder", "layers": 12, "heads": 12,
+            "params": 109.48, "downloads": "10M+", "release_year": 2018, "gpu_req": "4GB+",
+            "cpu_req": "4 cores+", "ram_req": "8GB+"},
+    "GPT-2": {"model_name": "gpt2", "type": "Decoder", "layers": 12, "heads": 12,
+             "params": 117, "downloads": "8M+", "release_year": 2019, "gpu_req": "6GB+",
+             "cpu_req": "4 cores+", "ram_req": "12GB+"},
+    "T5-Small": {"model_name": "t5-small", "type": "Seq2Seq", "layers": 6, "heads": 8,
+                "params": 60, "downloads": "5M+", "release_year": 2019, "gpu_req": "3GB+",
+                "cpu_req": "2 cores+", "ram_req": "6GB+"},
+    "RoBERTa": {"model_name": "roberta-base", "type": "Encoder", "layers": 12, "heads": 12,
+               "params": 125, "downloads": "7M+", "release_year": 2019, "gpu_req": "5GB+",
+               "cpu_req": "4 cores+", "ram_req": "10GB+"},
+    "DistilBERT": {"model_name": "distilbert-base-uncased", "type": "Encoder", "layers": 6,
+                  "heads": 12, "params": 66, "downloads": "9M+", "release_year": 2019,
+                  "gpu_req": "2GB+", "cpu_req": "2 cores+", "ram_req": "4GB+"},
+    "ALBERT": {"model_name": "albert-base-v2", "type": "Encoder", "layers": 12, "heads": 12,
+              "params": 11.8, "downloads": "3M+", "release_year": 2019, "gpu_req": "1GB+",
+              "cpu_req": "1 core+", "ram_req": "2GB+"},
+    "ELECTRA": {"model_name": "google/electra-small-discriminator", "type": "Encoder",
+               "layers": 12, "heads": 12, "params": 13.5, "downloads": "2M+",
+               "release_year": 2020, "gpu_req": "2GB+", "cpu_req": "2 cores+", "ram_req": "4GB+"},
+    "XLNet": {"model_name": "xlnet-base-cased", "type": "AutoRegressive", "layers": 12,
+             "heads": 12, "params": 110, "downloads": "4M+", "release_year": 2019,
+             "gpu_req": "5GB+", "cpu_req": "4 cores+", "ram_req": "8GB+"},
+    "BART": {"model_name": "facebook/bart-base", "type": "Seq2Seq", "layers": 6, "heads": 16,
+            "params": 139, "downloads": "6M+", "release_year": 2020, "gpu_req": "6GB+",
+            "cpu_req": "4 cores+", "ram_req": "12GB+"},
+    "DeBERTa": {"model_name": "microsoft/deberta-base", "type": "Encoder", "layers": 12,
+               "heads": 12, "params": 139, "downloads": "3M+", "release_year": 2021,
+               "gpu_req": "8GB+", "cpu_req": "6 cores+", "ram_req": "16GB+"}
 }
 def get_model_config(model_name):
     fig.patch.set_facecolor('#2c2c2c')
     st.pyplot(fig)
+def embedding_projector():
+    st.subheader("🔍 Embedding Projector")
+    # Sample words for visualization
+    words = ["king", "queen", "man", "woman", "computer", "algorithm",
+            "neural", "network", "language", "processing"]
+    # Create dummy embeddings (3D for visualization)
+    embeddings = torch.randn(len(words), 256)
+    # Dimensionality reduction
+    method = st.selectbox("Reduction Method", ["PCA", "t-SNE"])
+    if method == "PCA":
+        reduced = PCA(n_components=3).fit_transform(embeddings)
+    else:
+        reduced = TSNE(n_components=3).fit_transform(embeddings.numpy())
+    # Create interactive 3D plot
+    fig = px.scatter_3d(
+        x=reduced[:,0], y=reduced[:,1], z=reduced[:,2],
+        text=words,
+        title=f"Word Embeddings ({method})"
+    )
+    fig.update_traces(marker=dict(size=5), textposition='top center')
+    st.plotly_chart(fig, use_container_width=True)
+def hardware_recommendations(model_info):
+    st.subheader("💻 Hardware Recommendations")
+    col1, col2, col3 = st.columns(3)
+    with col1:
+        st.metric("Minimum GPU", model_info.get("gpu_req", "4GB+"))
+    with col2:
+        st.metric("CPU Recommendation", model_info.get("cpu_req", "4 cores+"))
+    with col3:
+        st.metric("RAM Requirement", model_info.get("ram_req", "8GB+"))
+    st.markdown("""
+    **Cloud Recommendations:**
+    - AWS: g4dn.xlarge instance
+    - GCP: n1-standard-4 with T4 GPU
+    - Azure: Standard_NC4as_T4_v3
+    """)
+def model_zoo_statistics():
+    st.subheader("📊 Model Zoo Statistics")
+    df = pd.DataFrame.from_dict(MODELS, orient='index')
+    st.dataframe(
+        df[["release_year", "downloads", "params"]],
+        column_config={
+            "release_year": "Release Year",
+            "downloads": "Downloads",
+            "params": "Params (M)"
+        },
+        use_container_width=True,
+        height=400
+    )
+    fig = px.bar(df, x=df.index, y="params", title="Model Parameters Comparison")
+    st.plotly_chart(fig, use_container_width=True)
+def memory_usage_estimator(model_info):
+    st.subheader("🧮 Memory Usage Estimator")
+    precision = st.selectbox("Precision", ["FP32", "FP16", "INT8"])
+    batch_size = st.slider("Batch size", 1, 128, 8)
+    # Memory calculation
+    bytes_map = {"FP32": 4, "FP16": 2, "INT8": 1}
+    estimated_memory = (model_info["params"] * 1e6 * bytes_map[precision] * batch_size) / (1024**3)
+    col1, col2 = st.columns(2)
+    with col1:
+        st.metric("Estimated VRAM", f"{estimated_memory:.1f} GB")
+    with col2:
+        st.metric("Recommended GPU", "RTX 3090" if estimated_memory > 24 else "RTX 3060")
+    st.progress(min(estimated_memory/40, 1.0), text="GPU Memory Utilization (of 40GB GPU)")
 def main():
     st.title("🧠 Transformer Model Visualizer")
     with col4:
         st.metric("Parameters", f"{model_info['params']}M")
+    # Updated tabs with all 7 sections
+    tab1, tab2, tab3, tab4, tab5, tab6, tab7 = st.tabs([
+        "Model Structure", "Comparison", "Model Attention",
+        "Tokenization", "Embeddings", "Hardware", "Stats & Memory"
+    ])
     with tab1:
         st.subheader("Architecture Diagram")
     with tab4:
         st.subheader("📝 Tokenization Visualization")
         input_text = st.text_input("Enter Text:", "Hello, how are you?")
         col1, col2 = st.columns(2)
         with col1:
             st.markdown("**Tokenized Output**")
             tokens = tokenizer.tokenize(input_text)
         - Padding token: `{tokenizer.pad_token}`
         - Max length: `{tokenizer.model_max_length}`
         """)
+    with tab5:
+        embedding_projector()
+    with tab6:
+        hardware_recommendations(model_info)
+    with tab7:
+        model_zoo_statistics()
+        memory_usage_estimator(model_info)
 if __name__ == "__main__":
     main()