Delete worker.py
Browse files
worker.py
DELETED
|
@@ -1,133 +0,0 @@
|
|
| 1 |
-
# worker.py
|
| 2 |
-
import os
|
| 3 |
-
import time
|
| 4 |
-
import uuid
|
| 5 |
-
import requests
|
| 6 |
-
from typing import Dict, Any, Optional
|
| 7 |
-
from queue_manager import job_store, UPLOAD_DIR, RESULTS_DIR
|
| 8 |
-
from models_job import JobStatus, JobResult, CharacterItem, Metrics
|
| 9 |
-
|
| 10 |
-
HF_TOKEN = os.environ.get("HF_TOKEN") # opcional
|
| 11 |
-
TGI_BASE_URL = os.environ.get("TGI_BASE_URL") # ej: https://org-tgi--space.hf.space
|
| 12 |
-
INFERENCE_ENDPOINT_URL = os.environ.get("INFERENCE_ENDPOINT_URL")
|
| 13 |
-
INFERENCE_MODEL_ID = os.environ.get("INFERENCE_MODEL_ID") # p.ej. "meta-llama/Llama-3.1-8B-Instruct"
|
| 14 |
-
|
| 15 |
-
def _auth_headers_json() -> Dict[str, str]:
|
| 16 |
-
headers = {"Content-Type": "application/json"}
|
| 17 |
-
if HF_TOKEN:
|
| 18 |
-
headers["Authorization"] = f"Bearer {HF_TOKEN}"
|
| 19 |
-
return headers
|
| 20 |
-
|
| 21 |
-
def _call_tgi(prompt: str) -> str:
|
| 22 |
-
"""
|
| 23 |
-
Ejemplo para TGI /v1/chat/completions (ajusta al formato de tu TGI).
|
| 24 |
-
"""
|
| 25 |
-
if not TGI_BASE_URL:
|
| 26 |
-
# si no hay TGI configurado, devuelve texto de demo
|
| 27 |
-
return f"[DEMO] Respuesta generada para: {prompt[:60]}..."
|
| 28 |
-
url = f"{TGI_BASE_URL.rstrip('/')}/v1/chat/completions"
|
| 29 |
-
payload = {
|
| 30 |
-
"model": "tgi", # no siempre necesario
|
| 31 |
-
"messages": [{"role": "user", "content": prompt}],
|
| 32 |
-
"max_tokens": 256
|
| 33 |
-
}
|
| 34 |
-
r = requests.post(url, headers=_auth_headers_json(), json=payload, timeout=120)
|
| 35 |
-
r.raise_for_status()
|
| 36 |
-
data = r.json()
|
| 37 |
-
# Ajusta según la respuesta de tu TGI
|
| 38 |
-
return data["choices"][0]["message"]["content"]
|
| 39 |
-
|
| 40 |
-
def _call_inference_api(prompt: str) -> str:
|
| 41 |
-
"""
|
| 42 |
-
Ejemplo para Inference API serverless.
|
| 43 |
-
"""
|
| 44 |
-
if not INFERENCE_MODEL_ID:
|
| 45 |
-
return f"[DEMO] Inference API no configurado; prompt: {prompt[:60]}..."
|
| 46 |
-
url = f"https://api-inference.huggingface.co/models/{INFERENCE_MODEL_ID}"
|
| 47 |
-
r = requests.post(url, headers=_auth_headers_json(), json={"inputs": prompt, "parameters": {"max_new_tokens": 128}}, timeout=120)
|
| 48 |
-
r.raise_for_status()
|
| 49 |
-
out = r.json()
|
| 50 |
-
if isinstance(out, list) and out and "generated_text" in out[0]:
|
| 51 |
-
return out[0]["generated_text"]
|
| 52 |
-
return str(out)
|
| 53 |
-
|
| 54 |
-
def _call_inference_endpoint(payload: Dict[str, Any]) -> Dict[str, Any]:
|
| 55 |
-
"""
|
| 56 |
-
Ejemplo para Inference Endpoint dedicado.
|
| 57 |
-
"""
|
| 58 |
-
if not INFERENCE_ENDPOINT_URL:
|
| 59 |
-
return {"text": "[DEMO] Endpoint no configurado"}
|
| 60 |
-
r = requests.post(INFERENCE_ENDPOINT_URL, headers=_auth_headers_json(), json=payload, timeout=120)
|
| 61 |
-
r.raise_for_status()
|
| 62 |
-
return r.json()
|
| 63 |
-
|
| 64 |
-
def _fake_extract_characters() -> list[CharacterItem]:
|
| 65 |
-
return [
|
| 66 |
-
CharacterItem(name="Alice", screen_time_sec=312.5),
|
| 67 |
-
CharacterItem(name="Bob", screen_time_sec=288.0),
|
| 68 |
-
]
|
| 69 |
-
|
| 70 |
-
def process_job(job: Dict[str, Any]):
|
| 71 |
-
"""
|
| 72 |
-
job = {
|
| 73 |
-
"job_id": str,
|
| 74 |
-
"mode": "book"|"une"|"both",
|
| 75 |
-
"local_path": "/app/data/uploads/xxx.mp4" (si es subida),
|
| 76 |
-
"video_url": "https://..." (si es por URL)
|
| 77 |
-
}
|
| 78 |
-
"""
|
| 79 |
-
job_id = job["job_id"]
|
| 80 |
-
mode = job.get("mode", "both")
|
| 81 |
-
src_filename = os.path.basename(job.get("local_path") or job.get("video_url") or f"{job_id}.mp4")
|
| 82 |
-
|
| 83 |
-
# Marca a processing
|
| 84 |
-
st = JobStatus(job_id=job_id, status="processing", progress=5, message="Iniciando procesamiento…")
|
| 85 |
-
job_store.set_status(job_id, st)
|
| 86 |
-
|
| 87 |
-
# (1) Descarga si viene por URL (demo omite; implementa si lo necesitas)
|
| 88 |
-
local_path = job.get("local_path")
|
| 89 |
-
if not local_path and job.get("video_url"):
|
| 90 |
-
# Aquí descargarías el vídeo a local_path
|
| 91 |
-
# local_path = os.path.join(UPLOAD_DIR, f"{job_id}_{src_filename}")
|
| 92 |
-
# requests.get(... stream ...) -> write file
|
| 93 |
-
pass
|
| 94 |
-
|
| 95 |
-
# (2) ASR / Diarización / Preparaciones etc. (simulación)
|
| 96 |
-
time.sleep(1)
|
| 97 |
-
st.progress = 20; st.message = "Extrayendo transcripción/diálogos…"; job_store.set_status(job_id, st)
|
| 98 |
-
# Aquí llamarías a tus pipelines reales (Whisper, diarización, etc.)
|
| 99 |
-
|
| 100 |
-
# (3) Generación “libro” con LLM (demo)
|
| 101 |
-
book_text = None; book_mp3_url = None
|
| 102 |
-
if mode in ("book","both"):
|
| 103 |
-
prompt = "Genera una audiodescripción tipo libro con diálogos condensados del vídeo."
|
| 104 |
-
book_text = _call_tgi(prompt) if TGI_BASE_URL else _call_inference_api(prompt)
|
| 105 |
-
# Si sintetizas audio, guarda mp3 y pon su URL accesible (por simplicidad omitimos)
|
| 106 |
-
book_mp3_url = None
|
| 107 |
-
st.progress = 60; st.message = "Generando texto Libro…"; job_store.set_status(job_id, st)
|
| 108 |
-
|
| 109 |
-
# (4) Generación UNE (SRT + audio) (demo)
|
| 110 |
-
une_srt = None; une_mp3_url = None
|
| 111 |
-
if mode in ("une","both"):
|
| 112 |
-
# Genera un SRT mínimo de ejemplo
|
| 113 |
-
une_srt = "1\n00:00:00,000 --> 00:00:03,000\n[Audiodescripción UNE de ejemplo]\n"
|
| 114 |
-
une_mp3_url = None
|
| 115 |
-
st.progress = 80; st.message = "Generando SRT UNE…"; job_store.set_status(job_id, st)
|
| 116 |
-
|
| 117 |
-
# (5) Personajes, métricas (demo)
|
| 118 |
-
chars = _fake_extract_characters()
|
| 119 |
-
metrics = Metrics(wer=0.07, der=0.12, ux=4.3)
|
| 120 |
-
|
| 121 |
-
time.sleep(1)
|
| 122 |
-
st.progress = 100; st.message = "Completado"; st.status = "completed"; job_store.set_status(job_id, st)
|
| 123 |
-
|
| 124 |
-
result = JobResult(
|
| 125 |
-
job_id=job_id,
|
| 126 |
-
source_filename=src_filename,
|
| 127 |
-
duration_sec=None,
|
| 128 |
-
characters=chars,
|
| 129 |
-
book={"text": book_text, "mp3_url": book_mp3_url} if book_text or book_mp3_url else None,
|
| 130 |
-
une={"srt": une_srt, "mp3_url": une_mp3_url} if une_srt or une_mp3_url else None,
|
| 131 |
-
metrics=metrics
|
| 132 |
-
)
|
| 133 |
-
job_store.set_result(job_id, result)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|