|
|
|
|
|
from __future__ import annotations
|
|
|
from typing import Dict, List, Any
|
|
|
from langgraph.graph import StateGraph, END
|
|
|
from langchain_openai import ChatOpenAI
|
|
|
from langchain_core.prompts import ChatPromptTemplate
|
|
|
from dataclasses import dataclass
|
|
|
import json
|
|
|
import time
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
class NarratorInput:
|
|
|
dialogues_srt: str
|
|
|
frame_descriptions: List[Dict[str, Any]]
|
|
|
une_guidelines_path: str
|
|
|
max_cycles: int = 3
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
class NarratorOutput:
|
|
|
narrative_text: str
|
|
|
srt_text: str
|
|
|
critic_feedback: str | None = None
|
|
|
approved: bool = False
|
|
|
|
|
|
|
|
|
class NarrationSystem:
|
|
|
"""
|
|
|
LangGraph-based multi-agent system:
|
|
|
- NarratorNode: generates narration + SRT according to UNE-153010
|
|
|
- CriticNode: evaluates conformity with UNE and coherence
|
|
|
- IdentityManagerNode: adjusts character identification if needed
|
|
|
- BackgroundDescriptorNode: fixes background/scene coherence
|
|
|
"""
|
|
|
|
|
|
def __init__(self, model_url: str, une_guidelines_path: str):
|
|
|
self.model_url = model_url
|
|
|
self.une_guidelines_path = une_guidelines_path
|
|
|
|
|
|
|
|
|
self.narrator_llm = ChatOpenAI(base_url=model_url, model="gpt-4o-mini", temperature=0.6)
|
|
|
self.critic_llm = ChatOpenAI(base_url=model_url, model="gpt-4o-mini", temperature=0.3)
|
|
|
self.identity_llm = ChatOpenAI(base_url=model_url, model="gpt-4o-mini", temperature=0.4)
|
|
|
self.background_llm = ChatOpenAI(base_url=model_url, model="gpt-4o-mini", temperature=0.4)
|
|
|
|
|
|
with open(une_guidelines_path, "r", encoding="utf-8") as f:
|
|
|
self.une_rules = f.read()
|
|
|
|
|
|
|
|
|
self.graph = self.build_graph()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def narrator_node(self, state):
|
|
|
dialogues = state["dialogues_srt"]
|
|
|
frames = state["frame_descriptions"]
|
|
|
|
|
|
prompt = ChatPromptTemplate.from_template("""
|
|
|
Eres un narrador de audiodescripciones según la norma UNE-153010.
|
|
|
Combina coherentemente los diálogos del siguiente SRT con las descripciones de escena dadas.
|
|
|
|
|
|
Sigue estas pautas:
|
|
|
- Genera una narración libre que integre ambos tipos de información.
|
|
|
- Evita redundancias o descripciones triviales.
|
|
|
- Limita la duración de las audiodescripciones para que quepan entre los diálogos.
|
|
|
- Devuelve **dos bloques**:
|
|
|
1️⃣ `NARRATION_TEXT`: narración libre completa en texto continuo.
|
|
|
2️⃣ `UNE_SRT`: subtítulos con los diálogos y las audiodescripciones UNE.
|
|
|
|
|
|
## DIÁLOGOS SRT
|
|
|
{dialogues}
|
|
|
|
|
|
## DESCRIPCIONES DE FRAMES
|
|
|
{frames}
|
|
|
""")
|
|
|
|
|
|
response = self.narrator_llm.invoke(prompt.format(dialogues=dialogues, frames=json.dumps(frames, ensure_ascii=False)))
|
|
|
return {"narration": response.content, "critic_feedback": None, "approved": False}
|
|
|
|
|
|
def critic_node(self, state):
|
|
|
narration = state["narration"]
|
|
|
prompt = ChatPromptTemplate.from_template("""
|
|
|
Actúa como un revisor experto en audiodescripción conforme a la norma UNE-153010.
|
|
|
Evalúa el siguiente texto y SRT generados, detectando:
|
|
|
- Incoherencias en asignación de personajes.
|
|
|
- Errores en la identificación de escenarios.
|
|
|
- Desviaciones respecto a la norma UNE-153010.
|
|
|
- Incoherencias narrativas generales.
|
|
|
|
|
|
Devuelve:
|
|
|
- "APPROVED" si el resultado es conforme.
|
|
|
- En caso contrario, una lista JSON con observaciones clasificadas en:
|
|
|
- "characters"
|
|
|
- "scenes"
|
|
|
- "norma"
|
|
|
- "coherence"
|
|
|
|
|
|
## NORMA UNE-153010
|
|
|
{une_rules}
|
|
|
|
|
|
## TEXTO Y SRT A EVALUAR
|
|
|
{narration}
|
|
|
""")
|
|
|
|
|
|
response = self.critic_llm.invoke(prompt.format(une_rules=self.une_rules, narration=narration))
|
|
|
text = response.content.strip()
|
|
|
|
|
|
if "APPROVED" in text.upper():
|
|
|
return {"critic_feedback": None, "approved": True}
|
|
|
return {"critic_feedback": text, "approved": False}
|
|
|
|
|
|
def identity_node(self, state):
|
|
|
fb = state.get("critic_feedback", "")
|
|
|
narration = state["narration"]
|
|
|
prompt = ChatPromptTemplate.from_template("""
|
|
|
El siguiente feedback señala incoherencias en personajes o diálogos.
|
|
|
Corrige únicamente esos aspectos manteniendo el resto igual.
|
|
|
|
|
|
## FEEDBACK
|
|
|
{fb}
|
|
|
|
|
|
## TEXTO ORIGINAL
|
|
|
{narration}
|
|
|
""")
|
|
|
response = self.identity_llm.invoke(prompt.format(fb=fb, narration=narration))
|
|
|
return {"narration": response.content}
|
|
|
|
|
|
def background_node(self, state):
|
|
|
fb = state.get("critic_feedback", "")
|
|
|
narration = state["narration"]
|
|
|
prompt = ChatPromptTemplate.from_template("""
|
|
|
El siguiente feedback señala incoherencias en escenarios o contexto visual.
|
|
|
Ajusta las descripciones de fondo manteniendo el estilo y duración UNE.
|
|
|
|
|
|
## FEEDBACK
|
|
|
{fb}
|
|
|
|
|
|
## TEXTO ORIGINAL
|
|
|
{narration}
|
|
|
""")
|
|
|
response = self.background_llm.invoke(prompt.format(fb=fb, narration=narration))
|
|
|
return {"narration": response.content}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def build_graph(self):
|
|
|
g = StateGraph()
|
|
|
g.add_node("NarratorNode", self.narrator_node)
|
|
|
g.add_node("CriticNode", self.critic_node)
|
|
|
g.add_node("IdentityManagerNode", self.identity_node)
|
|
|
g.add_node("BackgroundDescriptorNode", self.background_node)
|
|
|
|
|
|
g.set_entry_point("NarratorNode")
|
|
|
g.add_edge("NarratorNode", "CriticNode")
|
|
|
g.add_conditional_edges(
|
|
|
"CriticNode",
|
|
|
lambda state: "done" if state.get("approved") else "retry",
|
|
|
{
|
|
|
"done": END,
|
|
|
"retry": "IdentityManagerNode",
|
|
|
},
|
|
|
)
|
|
|
g.add_edge("IdentityManagerNode", "BackgroundDescriptorNode")
|
|
|
g.add_edge("BackgroundDescriptorNode", "CriticNode")
|
|
|
|
|
|
return g.compile()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def run(self, dialogues_srt: str, frame_descriptions: List[Dict[str, Any]], max_cycles: int = 3) -> NarratorOutput:
|
|
|
state = {"dialogues_srt": dialogues_srt, "frame_descriptions": frame_descriptions}
|
|
|
result = self.graph.invoke(state)
|
|
|
return NarratorOutput(
|
|
|
narrative_text=result.get("narration", ""),
|
|
|
srt_text=result.get("narration", ""),
|
|
|
critic_feedback=result.get("critic_feedback"),
|
|
|
approved=result.get("approved", False),
|
|
|
)
|
|
|
|