""" Response Schemas ================ DTOs for API responses """ from pydantic import BaseModel, Field from typing import List, Optional, Dict from enum import Enum class SentimentLabel(str, Enum): """Sentiment labels""" TOXIC = "toxic" CLEAN = "clean" class WordScore(BaseModel): """Word-level score information""" word: str = Field(..., description="The word") score: float = Field(..., ge=0.0, le=1.0, description="Toxicity score (0-1)") position: Dict[str, int] = Field(..., description="Position in text {start, end}") is_toxic: bool = Field(..., description="Whether word is toxic") is_stop_word: bool = Field(..., description="Whether word is a stop word") class SentenceResult(BaseModel): """Sentence-level analysis result""" sentence_number: int = Field(..., description="Sentence index (1-based)") text: str = Field(..., description="Sentence text") label: SentimentLabel = Field(..., description="Toxic or clean") confidence: float = Field(..., ge=0.0, le=1.0, description="Confidence score") threshold: float = Field(..., ge=0.0, le=1.0, description="Threshold used") word_count: int = Field(..., description="Number of words") word_scores: Optional[List[WordScore]] = Field(None, description="Word-level scores") class ToxicWordSummary(BaseModel): """Summary of toxic words""" word: str = Field(..., description="Toxic word") score: float = Field(..., ge=0.0, le=1.0, description="Maximum score") occurrences: int = Field(..., description="Number of occurrences") sentences: List[int] = Field(..., description="Sentence numbers containing this word") class Statistics(BaseModel): """Overall statistics""" total_words: int = Field(..., description="Total number of words") toxic_words: int = Field(..., description="Number of toxic words") mean_score: float = Field(..., ge=0.0, le=1.0, description="Mean toxicity score") median_score: float = Field(..., ge=0.0, le=1.0, description="Median toxicity score") max_score: float = Field(..., ge=0.0, le=1.0, description="Maximum toxicity score") min_score: float = Field(..., ge=0.0, le=1.0, description="Minimum toxicity score") class AnalysisResponse(BaseModel): """Complete analysis response""" success: bool = Field(True, description="Whether analysis succeeded") text: str = Field(..., description="Original input text") overall_label: SentimentLabel = Field(..., description="Overall text sentiment") toxic_sentence_count: int = Field(..., description="Number of toxic sentences") clean_sentence_count: int = Field(..., description="Number of clean sentences") total_sentences: int = Field(..., description="Total number of sentences") sentences: List[SentenceResult] = Field(..., description="Sentence-level results") toxic_words_summary: List[ToxicWordSummary] = Field(..., description="Summary of toxic words") statistics: Statistics = Field(..., description="Overall statistics") html_highlighted: Optional[str] = Field(None, description="HTML with highlighting") class Config: json_schema_extra = { "example": { "success": True, "text": "Đồ ngu ngốc!", "overall_label": "toxic", "toxic_sentence_count": 1, "clean_sentence_count": 0, "total_sentences": 1, "sentences": [ { "sentence_number": 1, "text": "Đồ ngu ngốc!", "label": "toxic", "confidence": 0.998, "threshold": 0.62, "word_count": 3, "word_scores": [ { "word": "Đồ", "score": 0.902, "position": {"start": 0, "end": 2}, "is_toxic": True, "is_stop_word": False } ] } ], "toxic_words_summary": [ { "word": "ngu", "score": 0.924, "occurrences": 1, "sentences": [1] } ], "statistics": { "total_words": 3, "toxic_words": 3, "mean_score": 0.856, "median_score": 0.865, "max_score": 0.924, "min_score": 0.756 }, "html_highlighted": "