import gradio as gr
from typing import Dict, Union
import tiktoken
from transformers import AutoTokenizer
import logging
import time

# Set up logging for better error tracking
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# HUGGING FACE SPACES OPTIMIZATION: Smart caching with memory management
# Note: For production deployment, a dedicated server with more RAM would allow
# full caching of all tokenizers simultaneously for optimal performance.
_tokenizer_cache = {}
_tiktoken_cache = {}
_cache_access_times = {}
_max_cached_tokenizers = 3  # Conservative limit for HF Spaces free tier

def _cleanup_old_tokenizers():
    """Remove least recently used tokenizers to manage memory on HF Spaces."""
    if len(_tokenizer_cache) <= _max_cached_tokenizers:
        return
    
    # Sort by access time, remove oldest
    sorted_cache = sorted(_cache_access_times.items(), key=lambda x: x[1])
    oldest_key = sorted_cache[0][0]
    
    if oldest_key in _tokenizer_cache:
        del _tokenizer_cache[oldest_key]
        del _cache_access_times[oldest_key]
        logger.info(f"🧹 Removed cached tokenizer: {oldest_key} (memory management)")

def count_tokens_openai_gpt4(text: str) -> Dict[str, Union[int, float, str]]:
    """
    Count tokens using OpenAI GPT-4/GPT-3.5-turbo tokenizer.
    
    Args:
        text (str): The text to analyze for token count
        
    Returns:
        dict: Token count results with GPT-4 style tokenization including word count, character count, token count, analysis, tokenizer used, and tokens per word
    """
    return _count_tokens_internal(text, "tiktoken-cl100k")

def count_tokens_openai_davinci(text: str) -> Dict[str, Union[int, float, str]]:
    """
    Count tokens using OpenAI text-davinci tokenizer.
    
    Args:
        text (str): The text to analyze for token count
        
    Returns:
        dict: Token count results with text-davinci style tokenization including word count, character count, token count, analysis, tokenizer used, and tokens per word
    """
    return _count_tokens_internal(text, "tiktoken-p50k")

def count_tokens_openai_gpt3(text: str) -> Dict[str, Union[int, float, str]]:
    """
    Count tokens using OpenAI GPT-3 tokenizer.
    
    Args:
        text (str): The text to analyze for token count
        
    Returns:
        dict: Token count results with GPT-3 style tokenization including word count, character count, token count, analysis, tokenizer used, and tokens per word
    """
    return _count_tokens_internal(text, "tiktoken-r50k")

def count_tokens_bert_family(text: str) -> Dict[str, Union[int, float, str]]:
    """
    Count tokens using BERT tokenizer for classification and understanding tasks.
    
    Args:
        text (str): The text to analyze for token count
        
    Returns:
        dict: Token count results with BERT style tokenization including word count, character count, token count, analysis, tokenizer used, and tokens per word
    """
    return _count_tokens_internal(text, "hf-bert")

def count_tokens_roberta_family(text: str) -> Dict[str, Union[int, float, str]]:
    """
    Count tokens using RoBERTa tokenizer for modern classification models.
    
    Args:
        text (str): The text to analyze for token count
        
    Returns:
        dict: Token count results with RoBERTa style tokenization including word count, character count, token count, analysis, tokenizer used, and tokens per word
    """
    return _count_tokens_internal(text, "hf-roberta")

def count_tokens_gpt2_family(text: str) -> Dict[str, Union[int, float, str]]:
    """
    Count tokens using GPT-2 tokenizer for open source LLMs like Llama and Mistral.
    
    Args:
        text (str): The text to analyze for token count
        
    Returns:
        dict: Token count results with GPT-2 style tokenization including word count, character count, token count, analysis, tokenizer used, and tokens per word
    """
    return _count_tokens_internal(text, "hf-gpt2")

def count_tokens_t5_family(text: str) -> Dict[str, Union[int, float, str]]:
    """
    Count tokens using T5 tokenizer for sequence-to-sequence models.
    
    Args:
        text (str): The text to analyze for token count
        
    Returns:
        dict: Token count results with T5 style tokenization including word count, character count, token count, analysis, tokenizer used, and tokens per word
    """
    return _count_tokens_internal(text, "hf-t5")

def count_tokens_distilbert(text: str) -> Dict[str, Union[int, float, str]]:
    """
    Count tokens using DistilBERT tokenizer for lightweight BERT variants.
    
    Args:
        text (str): The text to analyze for token count
        
    Returns:
        dict: Token count results with DistilBERT style tokenization including word count, character count, token count, analysis, tokenizer used, and tokens per word
    """
    return _count_tokens_internal(text, "hf-distilbert")

def _count_tokens_internal(text: str, tokenizer_type: str) -> Dict[str, Union[int, float, str]]:
    """
    Internal helper function for token counting with HF Spaces optimizations.
    
    Note: On Hugging Face Spaces, first-time model loading may take 5-30 seconds 
    due to downloading from HuggingFace Hub. For production use, consider a 
    dedicated server with sufficient RAM to cache all tokenizers permanently.
    """
    
    # Input validation
    if not isinstance(text, str):
        return {
            "word_count": 0,
            "character_count": 0,
            "token_count": "Error: Invalid input type",
            "analysis": "error",
            "tokenizer_used": tokenizer_type,
            "tokens_per_word": 0
        }
    
    if not text.strip():
        return {
            "word_count": 0,
            "character_count": 0,
            "token_count": 0,
            "analysis": "empty text",
            "tokenizer_used": tokenizer_type,
            "tokens_per_word": 0
        }
    
    # Basic word counting
    words = text.split()
    word_count = len(words)
    char_count = len(text)
    token_count = -1
    
    start_time = time.time()
    is_cached = False
    
    try:
        if tokenizer_type.startswith("tiktoken"):
            # ⚡ tiktoken is lightweight, always cache (minimal memory impact)
            if tokenizer_type not in _tiktoken_cache:
                encoding_map = {
                    "tiktoken-cl100k": "cl100k_base",
                    "tiktoken-p50k": "p50k_base",
                    "tiktoken-r50k": "r50k_base"
                }
                encoding_name = encoding_map.get(tokenizer_type, "cl100k_base")
                _tiktoken_cache[tokenizer_type] = tiktoken.get_encoding(encoding_name)
                logger.info(f"✅ Cached tiktoken: {tokenizer_type}")
            else:
                is_cached = True
            
            encoding = _tiktoken_cache[tokenizer_type]
            token_count = len(encoding.encode(text))
            
        elif tokenizer_type.startswith("hf-"):
            # ⚡ Smart caching for transformers models (memory-aware)
            if tokenizer_type in _tokenizer_cache:
                # Cache hit
                logger.info(f"⚡ Using cached tokenizer: {tokenizer_type}")
                tokenizer = _tokenizer_cache[tokenizer_type]
                is_cached = True
            else:
                # Cache miss - load and manage memory
                _cleanup_old_tokenizers()
                
                model_map = {
                    "hf-bert": "bert-base-uncased",
                    "hf-roberta": "roberta-base",
                    "hf-gpt2": "gpt2",
                    "hf-distilbert": "distilbert-base-uncased",
                    "hf-t5": "t5-small"
                }
                model = model_map.get(tokenizer_type, "bert-base-uncased")
                
                logger.info(f"🔄 Loading tokenizer: {tokenizer_type} ({model}) - First time may take 5-30s")
                tokenizer = AutoTokenizer.from_pretrained(model)
                
                # Cache with memory management
                _tokenizer_cache[tokenizer_type] = tokenizer
                logger.info(f"✅ Cached tokenizer: {tokenizer_type}")
            
            # Update access time for LRU management
            _cache_access_times[tokenizer_type] = time.time()
            
            tokens = tokenizer.tokenize(text)
            token_count = len(tokens)
            
        else:
            raise ValueError(f"Unsupported tokenizer type: {tokenizer_type}")
            
    except Exception as e:
        logger.error(f"Error in tokenization for {tokenizer_type}: {str(e)}")
        token_count = -1
    
    processing_time = time.time() - start_time
    
    # Enhanced analysis with performance and caching info
    if token_count == -1:
        analysis = "tokenizer error"
    elif token_count <= 50:
        analysis = "short text"
    elif token_count <= 200:
        analysis = "medium text"
    elif token_count <= 1000:
        analysis = "long text"
    else:
        analysis = "very long text"
    
    # Add performance metadata for user feedback
    performance_info = f" (⏱️ {processing_time:.2f}s"
    if is_cached:
        performance_info += " ⚡ cached)"
    else:
        performance_info += " 🔄 loaded)"
    
    return {
        "word_count": word_count,
        "character_count": char_count,
        "token_count": token_count if token_count != -1 else "Error during tokenization",
        "analysis": analysis + performance_info,
        "tokenizer_used": tokenizer_type,
        "tokens_per_word": round(token_count / word_count, 2) if word_count > 0 and token_count > 0 else 0
    }

# Create individual interfaces for each tokenizer
demo_gpt4 = gr.Interface(
    fn=count_tokens_openai_gpt4,
    inputs=gr.Textbox(
        placeholder="Enter text to count tokens for GPT-4/3.5...", 
        label="Text",
        value="Count the tokens in this sample text."  # Default value as per guide
    ),
    outputs=gr.JSON(),
    title="GPT-4/3.5 Token Counter ⚡",
    description="Count tokens using OpenAI GPT-4/GPT-3.5-turbo tokenizer. Best for GPT-4, Claude, Gemini. (Fast - tiktoken)"
)

demo_davinci = gr.Interface(
    fn=count_tokens_openai_davinci,
    inputs=gr.Textbox(
        placeholder="Enter text to count tokens for text-davinci...", 
        label="Text",
        value="Count the tokens in this sample text."
    ),
    outputs=gr.JSON(),
    title="text-davinci Token Counter ⚡",
    description="Count tokens using OpenAI text-davinci tokenizer. (Fast - tiktoken)"
)

demo_gpt3 = gr.Interface(
    fn=count_tokens_openai_gpt3,
    inputs=gr.Textbox(
        placeholder="Enter text to count tokens for GPT-3...", 
        label="Text",
        value="Count the tokens in this sample text."
    ),
    outputs=gr.JSON(),
    title="GPT-3 Token Counter ⚡",
    description="Count tokens using OpenAI GPT-3 tokenizer. (Fast - tiktoken)"
)

demo_bert = gr.Interface(
    fn=count_tokens_bert_family,
    inputs=gr.Textbox(
        placeholder="Enter text to count tokens for BERT...", 
        label="Text",
        value="Count the tokens in this sample text."
    ),
    outputs=gr.JSON(),
    title="BERT Token Counter 🔄",
    description="Count tokens using BERT tokenizer for classification tasks. (First use: ~10s, then cached)"
)

demo_roberta = gr.Interface(
    fn=count_tokens_roberta_family,
    inputs=gr.Textbox(
        placeholder="Enter text to count tokens for RoBERTa...", 
        label="Text",
        value="Count the tokens in this sample text."
    ),
    outputs=gr.JSON(),
    title="RoBERTa Token Counter 🔄",
    description="Count tokens using RoBERTa tokenizer. (First use: ~10s, then cached)"
)

demo_gpt2 = gr.Interface(
    fn=count_tokens_gpt2_family,
    inputs=gr.Textbox(
        placeholder="Enter text to count tokens for GPT-2/Llama/Mistral...", 
        label="Text",
        value="Count the tokens in this sample text."
    ),
    outputs=gr.JSON(),
    title="GPT-2/Llama/Mistral Token Counter 🔄",
    description="Count tokens using GPT-2 tokenizer. Good approximation for Llama, Mistral, and open source LLMs. (First use: ~15s, then cached)"
)

demo_t5 = gr.Interface(
    fn=count_tokens_t5_family,
    inputs=gr.Textbox(
        placeholder="Enter text to count tokens for T5...", 
        label="Text",
        value="Count the tokens in this sample text."
    ),
    outputs=gr.JSON(),
    title="T5 Token Counter 🔄",
    description="Count tokens using T5 tokenizer for sequence-to-sequence models. (First use: ~10s, then cached)"
)

demo_distilbert = gr.Interface(
    fn=count_tokens_distilbert,
    inputs=gr.Textbox(
        placeholder="Enter text to count tokens for DistilBERT...", 
        label="Text",
        value="Count the tokens in this sample text."
    ),
    outputs=gr.JSON(),
    title="DistilBERT Token Counter 🔄",
    description="Count tokens using DistilBERT tokenizer for lightweight models. (First use: ~20s, then cached)"
)

# Combine all interfaces into a single app with tabs
demo = gr.TabbedInterface(
    [demo_gpt4, demo_davinci, demo_gpt3, demo_bert, demo_roberta, demo_gpt2, demo_t5, demo_distilbert],
    ["GPT-4/3.5 ⚡", "text-davinci ⚡", "GPT-3 ⚡", "BERT 🔄", "RoBERTa 🔄", "GPT-2/Llama 🔄", "T5 🔄", "DistilBERT 🔄"],
    title="🔢 Multi-Model Token Counter Suite",
    theme=gr.themes.Soft()
)

# Add information about HF Spaces limitations
demo.queue(max_size=10)  # Limit concurrent requests for HF Spaces

# Launch the interface and MCP server
if __name__ == "__main__":
    print("🚀 Launching Token Counter with HF Spaces optimizations")
    print("ℹ️  Note: First-time model loading may take 5-30 seconds")
    print("ℹ️  For production use, consider a dedicated server with more RAM")
    demo.launch(mcp_server=True)