engine / narration_system.py
VeuReu's picture
Upload 15 files
b17b915 verified
raw
history blame
6.89 kB
# narrator_agent.py
from __future__ import annotations
from typing import Dict, List, Any
from langgraph.graph import StateGraph, END
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from dataclasses import dataclass
import json
import time
@dataclass
class NarratorInput:
dialogues_srt: str
frame_descriptions: List[Dict[str, Any]] # [{"timestamp": "00:01:23,000", "description": "..."}]
une_guidelines_path: str
max_cycles: int = 3
@dataclass
class NarratorOutput:
narrative_text: str
srt_text: str
critic_feedback: str | None = None
approved: bool = False
class NarrationSystem:
"""
LangGraph-based multi-agent system:
- NarratorNode: generates narration + SRT according to UNE-153010
- CriticNode: evaluates conformity with UNE and coherence
- IdentityManagerNode: adjusts character identification if needed
- BackgroundDescriptorNode: fixes background/scene coherence
"""
def __init__(self, model_url: str, une_guidelines_path: str):
self.model_url = model_url
self.une_guidelines_path = une_guidelines_path
# LLM endpoints (each node could use a different deployment if desired)
self.narrator_llm = ChatOpenAI(base_url=model_url, model="gpt-4o-mini", temperature=0.6)
self.critic_llm = ChatOpenAI(base_url=model_url, model="gpt-4o-mini", temperature=0.3)
self.identity_llm = ChatOpenAI(base_url=model_url, model="gpt-4o-mini", temperature=0.4)
self.background_llm = ChatOpenAI(base_url=model_url, model="gpt-4o-mini", temperature=0.4)
with open(une_guidelines_path, "r", encoding="utf-8") as f:
self.une_rules = f.read()
# Build LangGraph workflow
self.graph = self.build_graph()
# -----------------------------------------------------------
# LangGraph nodes
# -----------------------------------------------------------
def narrator_node(self, state):
dialogues = state["dialogues_srt"]
frames = state["frame_descriptions"]
prompt = ChatPromptTemplate.from_template("""
Eres un narrador de audiodescripciones según la norma UNE-153010.
Combina coherentemente los diálogos del siguiente SRT con las descripciones de escena dadas.
Sigue estas pautas:
- Genera una narración libre que integre ambos tipos de información.
- Evita redundancias o descripciones triviales.
- Limita la duración de las audiodescripciones para que quepan entre los diálogos.
- Devuelve **dos bloques**:
1️⃣ `NARRATION_TEXT`: narración libre completa en texto continuo.
2️⃣ `UNE_SRT`: subtítulos con los diálogos y las audiodescripciones UNE.
## DIÁLOGOS SRT
{dialogues}
## DESCRIPCIONES DE FRAMES
{frames}
""")
response = self.narrator_llm.invoke(prompt.format(dialogues=dialogues, frames=json.dumps(frames, ensure_ascii=False)))
return {"narration": response.content, "critic_feedback": None, "approved": False}
def critic_node(self, state):
narration = state["narration"]
prompt = ChatPromptTemplate.from_template("""
Actúa como un revisor experto en audiodescripción conforme a la norma UNE-153010.
Evalúa el siguiente texto y SRT generados, detectando:
- Incoherencias en asignación de personajes.
- Errores en la identificación de escenarios.
- Desviaciones respecto a la norma UNE-153010.
- Incoherencias narrativas generales.
Devuelve:
- "APPROVED" si el resultado es conforme.
- En caso contrario, una lista JSON con observaciones clasificadas en:
- "characters"
- "scenes"
- "norma"
- "coherence"
## NORMA UNE-153010
{une_rules}
## TEXTO Y SRT A EVALUAR
{narration}
""")
response = self.critic_llm.invoke(prompt.format(une_rules=self.une_rules, narration=narration))
text = response.content.strip()
if "APPROVED" in text.upper():
return {"critic_feedback": None, "approved": True}
return {"critic_feedback": text, "approved": False}
def identity_node(self, state):
fb = state.get("critic_feedback", "")
narration = state["narration"]
prompt = ChatPromptTemplate.from_template("""
El siguiente feedback señala incoherencias en personajes o diálogos.
Corrige únicamente esos aspectos manteniendo el resto igual.
## FEEDBACK
{fb}
## TEXTO ORIGINAL
{narration}
""")
response = self.identity_llm.invoke(prompt.format(fb=fb, narration=narration))
return {"narration": response.content}
def background_node(self, state):
fb = state.get("critic_feedback", "")
narration = state["narration"]
prompt = ChatPromptTemplate.from_template("""
El siguiente feedback señala incoherencias en escenarios o contexto visual.
Ajusta las descripciones de fondo manteniendo el estilo y duración UNE.
## FEEDBACK
{fb}
## TEXTO ORIGINAL
{narration}
""")
response = self.background_llm.invoke(prompt.format(fb=fb, narration=narration))
return {"narration": response.content}
# -----------------------------------------------------------
# Graph assembly
# -----------------------------------------------------------
def build_graph(self):
g = StateGraph()
g.add_node("NarratorNode", self.narrator_node)
g.add_node("CriticNode", self.critic_node)
g.add_node("IdentityManagerNode", self.identity_node)
g.add_node("BackgroundDescriptorNode", self.background_node)
g.set_entry_point("NarratorNode")
g.add_edge("NarratorNode", "CriticNode")
g.add_conditional_edges(
"CriticNode",
lambda state: "done" if state.get("approved") else "retry",
{
"done": END,
"retry": "IdentityManagerNode",
},
)
g.add_edge("IdentityManagerNode", "BackgroundDescriptorNode")
g.add_edge("BackgroundDescriptorNode", "CriticNode")
return g.compile()
# -----------------------------------------------------------
# Run loop
# -----------------------------------------------------------
def run(self, dialogues_srt: str, frame_descriptions: List[Dict[str, Any]], max_cycles: int = 3) -> NarratorOutput:
state = {"dialogues_srt": dialogues_srt, "frame_descriptions": frame_descriptions}
result = self.graph.invoke(state)
return NarratorOutput(
narrative_text=result.get("narration", ""),
srt_text=result.get("narration", ""), # could be parsed separately if model emits dual block
critic_feedback=result.get("critic_feedback"),
approved=result.get("approved", False),
)