from __future__ import annotations from fastapi import FastAPI, UploadFile, File, Form from fastapi.responses import JSONResponse from fastapi.middleware.cors import CORSMiddleware from pathlib import Path import shutil import uvicorn import json from video_processing import process_video_pipeline from casting_loader import ensure_chroma, build_faces_index, build_voices_index from narration_system import NarrationSystem from llm_router import load_yaml, LLMRouter app = FastAPI(title="Veureu Engine API", version="0.2.0") app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) ROOT = Path("/tmp/veureu") ROOT.mkdir(parents=True, exist_ok=True) TEMP_ROOT = Path("/tmp/temp") TEMP_ROOT.mkdir(parents=True, exist_ok=True) VIDEOS_ROOT = Path("/tmp/data/videos") VIDEOS_ROOT.mkdir(parents=True, exist_ok=True) @app.get("/") def root(): return {"ok": True, "service": "veureu-engine"} @app.post("/process_video") async def process_video( video_file: UploadFile = File(...), config_path: str = Form("config.yaml"), out_root: str = Form("results"), db_dir: str = Form("chroma_db"), ): tmp_video = ROOT / video_file.filename with tmp_video.open("wb") as f: shutil.copyfileobj(video_file.file, f) result = process_video_pipeline(str(tmp_video), config_path=config_path, out_root=out_root, db_dir=db_dir) return JSONResponse(result) @app.post("/create_initial_casting") async def create_initial_casting( video: UploadFile = File(...), epsilon: float = Form(...), min_cluster_size: int = Form(...), ): # Guardar vídeo en carpeta de datos video_name = Path(video.filename).stem dst_video = VIDEOS_ROOT / f"{video_name}.mp4" with dst_video.open("wb") as f: shutil.copyfileobj(video.file, f) # Crear estructura de carpetas en temp//... base = TEMP_ROOT / video_name for sub in ("sources", "faces", "voices", "backgrounds"): (base / sub).mkdir(parents=True, exist_ok=True) # Aquí en el futuro se puede disparar la lógica real de detección return { "ok": True, "video": str(dst_video), "epsilon": float(epsilon), "min_cluster_size": int(min_cluster_size), "temp_dirs": { "sources": str(base / "sources"), "faces": str(base / "faces"), "voices": str(base / "voices"), "backgrounds": str(base / "backgrounds"), }, } @app.post("/load_casting") async def load_casting( faces_dir: str = Form("identities/faces"), voices_dir: str = Form("identities/voices"), db_dir: str = Form("chroma_db"), drop_collections: bool = Form(False), ): client = ensure_chroma(Path(db_dir)) n_faces = build_faces_index(Path(faces_dir), client, collection_name="index_faces", drop=drop_collections) n_voices = build_voices_index(Path(voices_dir), client, collection_name="index_voices", drop=drop_collections) return {"ok": True, "faces": n_faces, "voices": n_voices} @app.post("/refine_narration") async def refine_narration( dialogues_srt: str = Form(...), frame_descriptions_json: str = Form("[]"), config_path: str = Form("config.yaml"), ): cfg = load_yaml(config_path) frames = json.loads(frame_descriptions_json) model_name = cfg.get("narration", {}).get("model", "salamandra-instruct") use_remote = model_name in (cfg.get("models", {}).get("routing", {}).get("use_remote_for", [])) if use_remote: router = LLMRouter(cfg) system_msg = ( "Eres un sistema de audiodescripción que cumple UNE-153010. " "Fusiona diálogos del SRT con descripciones concisas en los huecos, evitando redundancias. " "Devuelve JSON con {narrative_text, srt_text}." ) prompt = json.dumps({"dialogues_srt": dialogues_srt, "frames": frames, "rules": cfg.get("narration", {})}, ensure_ascii=False) try: txt = router.instruct(prompt=prompt, system=system_msg, model=model_name) out = {} try: out = json.loads(txt) except Exception: out = {"narrative_text": txt, "srt_text": ""} return { "narrative_text": out.get("narrative_text", ""), "srt_text": out.get("srt_text", ""), "approved": True, "critic_feedback": "", } except Exception: ns = NarrationSystem(model_url=None, une_guidelines_path=cfg.get("narration", {}).get("narration_une_guidelines_path", "UNE_153010.txt")) res = ns.run(dialogues_srt, frames) return {"narrative_text": res.narrative_text, "srt_text": res.srt_text, "approved": res.approved, "critic_feedback": res.critic_feedback} ns = NarrationSystem(model_url=None, une_guidelines_path=cfg.get("narration", {}).get("une_guidelines_path", "UNE_153010.txt")) out = ns.run(dialogues_srt, frames) return {"narrative_text": out.narrative_text, "srt_text": out.srt_text, "approved": out.approved, "critic_feedback": out.critic_feedback} if __name__ == "__main__": uvicorn.run(app, host="0.0.0.0", port=7860)