|
|
from __future__ import annotations
|
|
|
from fastapi import FastAPI, UploadFile, File, Form, BackgroundTasks, HTTPException
|
|
|
from fastapi.responses import JSONResponse, FileResponse
|
|
|
from fastapi.middleware.cors import CORSMiddleware
|
|
|
from pathlib import Path
|
|
|
import shutil
|
|
|
import uvicorn
|
|
|
import json
|
|
|
import uuid
|
|
|
from datetime import datetime
|
|
|
from typing import Dict
|
|
|
from enum import Enum
|
|
|
import os
|
|
|
|
|
|
from video_processing import process_video_pipeline
|
|
|
from casting_loader import ensure_chroma, build_faces_index, build_voices_index
|
|
|
from narration_system import NarrationSystem
|
|
|
from llm_router import load_yaml, LLMRouter
|
|
|
from character_detection import detect_characters_from_video
|
|
|
|
|
|
app = FastAPI(title="Veureu Engine API", version="0.2.0")
|
|
|
app.add_middleware(
|
|
|
CORSMiddleware,
|
|
|
allow_origins=["*"],
|
|
|
allow_credentials=True,
|
|
|
allow_methods=["*"],
|
|
|
allow_headers=["*"],
|
|
|
)
|
|
|
|
|
|
ROOT = Path("/tmp/veureu")
|
|
|
ROOT.mkdir(parents=True, exist_ok=True)
|
|
|
TEMP_ROOT = Path("/tmp/temp")
|
|
|
TEMP_ROOT.mkdir(parents=True, exist_ok=True)
|
|
|
VIDEOS_ROOT = Path("/tmp/data/videos")
|
|
|
VIDEOS_ROOT.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
|
|
|
class JobStatus(str, Enum):
|
|
|
QUEUED = "queued"
|
|
|
PROCESSING = "processing"
|
|
|
DONE = "done"
|
|
|
FAILED = "failed"
|
|
|
|
|
|
jobs: Dict[str, dict] = {}
|
|
|
|
|
|
@app.get("/")
|
|
|
def root():
|
|
|
return {"ok": True, "service": "veureu-engine"}
|
|
|
|
|
|
@app.post("/process_video")
|
|
|
async def process_video(
|
|
|
video_file: UploadFile = File(...),
|
|
|
config_path: str = Form("config.yaml"),
|
|
|
out_root: str = Form("results"),
|
|
|
db_dir: str = Form("chroma_db"),
|
|
|
):
|
|
|
tmp_video = ROOT / video_file.filename
|
|
|
with tmp_video.open("wb") as f:
|
|
|
shutil.copyfileobj(video_file.file, f)
|
|
|
result = process_video_pipeline(str(tmp_video), config_path=config_path, out_root=out_root, db_dir=db_dir)
|
|
|
return JSONResponse(result)
|
|
|
|
|
|
@app.post("/create_initial_casting")
|
|
|
async def create_initial_casting(
|
|
|
background_tasks: BackgroundTasks,
|
|
|
video: UploadFile = File(...),
|
|
|
epsilon: float = Form(...),
|
|
|
min_cluster_size: int = Form(...),
|
|
|
):
|
|
|
"""
|
|
|
Crea un job para procesar el vídeo de forma asíncrona.
|
|
|
Devuelve un job_id inmediatamente.
|
|
|
"""
|
|
|
|
|
|
video_name = Path(video.filename).stem
|
|
|
dst_video = VIDEOS_ROOT / f"{video_name}.mp4"
|
|
|
with dst_video.open("wb") as f:
|
|
|
shutil.copyfileobj(video.file, f)
|
|
|
|
|
|
|
|
|
job_id = str(uuid.uuid4())
|
|
|
|
|
|
|
|
|
jobs[job_id] = {
|
|
|
"id": job_id,
|
|
|
"status": JobStatus.QUEUED,
|
|
|
"video_path": str(dst_video),
|
|
|
"video_name": video_name,
|
|
|
"epsilon": float(epsilon),
|
|
|
"min_cluster_size": int(min_cluster_size),
|
|
|
"created_at": datetime.now().isoformat(),
|
|
|
"results": None,
|
|
|
"error": None
|
|
|
}
|
|
|
|
|
|
print(f"[{job_id}] Job creado para vídeo: {video_name}")
|
|
|
|
|
|
|
|
|
background_tasks.add_task(process_video_job, job_id)
|
|
|
|
|
|
|
|
|
return {"job_id": job_id}
|
|
|
|
|
|
@app.get("/jobs/{job_id}/status")
|
|
|
def get_job_status(job_id: str):
|
|
|
"""
|
|
|
Devuelve el estado actual de un job.
|
|
|
El UI hace polling de este endpoint cada 5 segundos.
|
|
|
"""
|
|
|
if job_id not in jobs:
|
|
|
raise HTTPException(status_code=404, detail="Job not found")
|
|
|
|
|
|
job = jobs[job_id]
|
|
|
|
|
|
response = {
|
|
|
"status": job["status"]
|
|
|
}
|
|
|
|
|
|
|
|
|
if job["status"] == JobStatus.DONE:
|
|
|
response["results"] = job["results"]
|
|
|
|
|
|
|
|
|
elif job["status"] == JobStatus.FAILED:
|
|
|
response["error"] = job["error"]
|
|
|
|
|
|
return response
|
|
|
|
|
|
@app.get("/files/{video_name}/{char_id}/{filename}")
|
|
|
def serve_character_file(video_name: str, char_id: str, filename: str):
|
|
|
"""
|
|
|
Sirve archivos estáticos de personajes (imágenes).
|
|
|
Ejemplo: /files/dif_catala_1/char1/representative.jpg
|
|
|
"""
|
|
|
file_path = TEMP_ROOT / video_name / char_id / filename
|
|
|
|
|
|
if not file_path.exists():
|
|
|
raise HTTPException(status_code=404, detail="File not found")
|
|
|
|
|
|
return FileResponse(file_path)
|
|
|
|
|
|
def process_video_job(job_id: str):
|
|
|
"""
|
|
|
Procesa el vídeo de forma asíncrona.
|
|
|
Esta función se ejecuta en background.
|
|
|
"""
|
|
|
try:
|
|
|
job = jobs[job_id]
|
|
|
print(f"[{job_id}] Iniciando procesamiento...")
|
|
|
|
|
|
|
|
|
job["status"] = JobStatus.PROCESSING
|
|
|
|
|
|
video_path = job["video_path"]
|
|
|
video_name = job["video_name"]
|
|
|
epsilon = job["epsilon"]
|
|
|
min_cluster_size = job["min_cluster_size"]
|
|
|
|
|
|
|
|
|
base = TEMP_ROOT / video_name
|
|
|
base.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
print(f"[{job_id}] Directorio base: {base}")
|
|
|
|
|
|
|
|
|
try:
|
|
|
print(f"[{job_id}] Iniciando detección de personajes...")
|
|
|
result = detect_characters_from_video(
|
|
|
video_path=video_path,
|
|
|
output_base=str(base),
|
|
|
epsilon=epsilon,
|
|
|
min_cluster_size=min_cluster_size,
|
|
|
video_name=video_name
|
|
|
)
|
|
|
|
|
|
characters = result.get("characters", [])
|
|
|
analysis_path = result.get("analysis_path", "")
|
|
|
|
|
|
print(f"[{job_id}] Personajes detectados: {len(characters)}")
|
|
|
for char in characters:
|
|
|
print(f"[{job_id}] - {char['name']}: {char['num_faces']} caras")
|
|
|
|
|
|
|
|
|
job["status"] = JobStatus.DONE
|
|
|
job["results"] = {
|
|
|
"characters": characters,
|
|
|
"num_characters": len(characters),
|
|
|
"analysis_path": analysis_path,
|
|
|
"base_dir": str(base)
|
|
|
}
|
|
|
|
|
|
except Exception as e_detect:
|
|
|
|
|
|
print(f"[{job_id}] Error en detección: {e_detect}")
|
|
|
print(f"[{job_id}] Usando modo fallback (carpetas vacías)")
|
|
|
|
|
|
|
|
|
for sub in ("sources", "faces", "voices", "backgrounds"):
|
|
|
(base / sub).mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
job["status"] = JobStatus.DONE
|
|
|
job["results"] = {
|
|
|
"characters": [],
|
|
|
"num_characters": 0,
|
|
|
"temp_dirs": {
|
|
|
"sources": str(base / "sources"),
|
|
|
"faces": str(base / "faces"),
|
|
|
"voices": str(base / "voices"),
|
|
|
"backgrounds": str(base / "backgrounds"),
|
|
|
},
|
|
|
"warning": f"Detección falló, usando modo fallback: {str(e_detect)}"
|
|
|
}
|
|
|
|
|
|
print(f"[{job_id}] ✓ Job completado exitosamente")
|
|
|
|
|
|
except Exception as e:
|
|
|
print(f"[{job_id}] ✗ Error en el procesamiento: {e}")
|
|
|
jobs[job_id]["status"] = JobStatus.FAILED
|
|
|
jobs[job_id]["error"] = str(e)
|
|
|
|
|
|
@app.post("/load_casting")
|
|
|
async def load_casting(
|
|
|
faces_dir: str = Form("identities/faces"),
|
|
|
voices_dir: str = Form("identities/voices"),
|
|
|
db_dir: str = Form("chroma_db"),
|
|
|
drop_collections: bool = Form(False),
|
|
|
):
|
|
|
client = ensure_chroma(Path(db_dir))
|
|
|
n_faces = build_faces_index(Path(faces_dir), client, collection_name="index_faces", drop=drop_collections)
|
|
|
n_voices = build_voices_index(Path(voices_dir), client, collection_name="index_voices", drop=drop_collections)
|
|
|
return {"ok": True, "faces": n_faces, "voices": n_voices}
|
|
|
|
|
|
@app.post("/refine_narration")
|
|
|
async def refine_narration(
|
|
|
dialogues_srt: str = Form(...),
|
|
|
frame_descriptions_json: str = Form("[]"),
|
|
|
config_path: str = Form("config.yaml"),
|
|
|
):
|
|
|
cfg = load_yaml(config_path)
|
|
|
frames = json.loads(frame_descriptions_json)
|
|
|
model_name = cfg.get("narration", {}).get("model", "salamandra-instruct")
|
|
|
use_remote = model_name in (cfg.get("models", {}).get("routing", {}).get("use_remote_for", []))
|
|
|
|
|
|
if use_remote:
|
|
|
router = LLMRouter(cfg)
|
|
|
system_msg = (
|
|
|
"Eres un sistema de audiodescripción que cumple UNE-153010. "
|
|
|
"Fusiona diálogos del SRT con descripciones concisas en los huecos, evitando redundancias. "
|
|
|
"Devuelve JSON con {narrative_text, srt_text}."
|
|
|
)
|
|
|
prompt = json.dumps({"dialogues_srt": dialogues_srt, "frames": frames, "rules": cfg.get("narration", {})}, ensure_ascii=False)
|
|
|
try:
|
|
|
txt = router.instruct(prompt=prompt, system=system_msg, model=model_name)
|
|
|
out = {}
|
|
|
try:
|
|
|
out = json.loads(txt)
|
|
|
except Exception:
|
|
|
out = {"narrative_text": txt, "srt_text": ""}
|
|
|
return {
|
|
|
"narrative_text": out.get("narrative_text", ""),
|
|
|
"srt_text": out.get("srt_text", ""),
|
|
|
"approved": True,
|
|
|
"critic_feedback": "",
|
|
|
}
|
|
|
except Exception:
|
|
|
ns = NarrationSystem(model_url=None, une_guidelines_path=cfg.get("narration", {}).get("narration_une_guidelines_path", "UNE_153010.txt"))
|
|
|
res = ns.run(dialogues_srt, frames)
|
|
|
return {"narrative_text": res.narrative_text, "srt_text": res.srt_text, "approved": res.approved, "critic_feedback": res.critic_feedback}
|
|
|
|
|
|
ns = NarrationSystem(model_url=None, une_guidelines_path=cfg.get("narration", {}).get("une_guidelines_path", "UNE_153010.txt"))
|
|
|
out = ns.run(dialogues_srt, frames)
|
|
|
return {"narrative_text": out.narrative_text, "srt_text": out.srt_text, "approved": out.approved, "critic_feedback": out.critic_feedback}
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
uvicorn.run(app, host="0.0.0.0", port=7860)
|
|
|
|