""" HTML Generator ============== Generate HTML highlighting (Single Responsibility) """ from typing import List, Dict from app.services.text_processor import TextProcessor class HTMLGenerator: """ HTML generation service Responsibilities: - Generate HTML with highlighting - Format toxic/clean sentences differently """ @staticmethod def generate_highlighted_html( text: str, sentence_results: List[Dict[str, any]] ) -> str: """ Generate HTML with highlighting Args: text: Original text sentence_results: List of sentence analysis results Returns: HTML string with highlighting """ html = '

' last_end = 0 for sent_data in sentence_results: sent_start = sent_data['sent_start'] sent_end = sent_data['sent_end'] is_toxic = sent_data['is_toxic'] words = sent_data['words'] scores = sent_data['scores'] threshold = sent_data['threshold'] # Add space between sentences if sent_start > last_end: html += text[last_end:sent_start] sent_text = text[sent_start:sent_end] if is_toxic: # Toxic sentence - highlight words sent_html = HTMLGenerator._generate_toxic_sentence_html( sent_text, sent_start, words, scores, threshold ) html += f'{sent_html}' else: # Clean sentence - plain text html += f'{sent_text}' last_end = sent_end # Add remaining text if last_end < len(text): html += text[last_end:] html += '

' return html @staticmethod def _generate_toxic_sentence_html( sent_text: str, sent_start: int, words: List[Dict[str, any]], scores: List[float], threshold: float ) -> str: """ Generate HTML for toxic sentence Args: sent_text: Sentence text sent_start: Sentence start position in full text words: List of words scores: Word scores threshold: Toxicity threshold Returns: HTML string for sentence """ sent_html = "" char_idx = 0 word_idx = 0 while char_idx < len(sent_text): if word_idx < len(words): word_info = words[word_idx] word_start_rel = word_info['start'] - sent_start word_end_rel = word_info['end'] - sent_start if char_idx == word_start_rel: word = word_info['word'] score = scores[word_idx] if score > threshold and not TextProcessor.is_stop_word(word) and len(word) > 1: # Toxic word - red background color = int(255 * (1 - score)) sent_html += ( f'{word}' ) else: # Non-toxic word if TextProcessor.is_stop_word(word): sent_html += f'{word}' else: sent_html += f'{word}' char_idx = word_end_rel word_idx += 1 continue # Not at word - add character (punctuation, space, etc) sent_html += sent_text[char_idx] char_idx += 1 return sent_html