VeuReu commited on
Commit
7e13059
·
verified ·
1 Parent(s): 95d08d0

Delete worker.py

Browse files
Files changed (1) hide show
  1. worker.py +0 -133
worker.py DELETED
@@ -1,133 +0,0 @@
1
- # worker.py
2
- import os
3
- import time
4
- import uuid
5
- import requests
6
- from typing import Dict, Any, Optional
7
- from queue_manager import job_store, UPLOAD_DIR, RESULTS_DIR
8
- from models_job import JobStatus, JobResult, CharacterItem, Metrics
9
-
10
- HF_TOKEN = os.environ.get("HF_TOKEN") # opcional
11
- TGI_BASE_URL = os.environ.get("TGI_BASE_URL") # ej: https://org-tgi--space.hf.space
12
- INFERENCE_ENDPOINT_URL = os.environ.get("INFERENCE_ENDPOINT_URL")
13
- INFERENCE_MODEL_ID = os.environ.get("INFERENCE_MODEL_ID") # p.ej. "meta-llama/Llama-3.1-8B-Instruct"
14
-
15
- def _auth_headers_json() -> Dict[str, str]:
16
- headers = {"Content-Type": "application/json"}
17
- if HF_TOKEN:
18
- headers["Authorization"] = f"Bearer {HF_TOKEN}"
19
- return headers
20
-
21
- def _call_tgi(prompt: str) -> str:
22
- """
23
- Ejemplo para TGI /v1/chat/completions (ajusta al formato de tu TGI).
24
- """
25
- if not TGI_BASE_URL:
26
- # si no hay TGI configurado, devuelve texto de demo
27
- return f"[DEMO] Respuesta generada para: {prompt[:60]}..."
28
- url = f"{TGI_BASE_URL.rstrip('/')}/v1/chat/completions"
29
- payload = {
30
- "model": "tgi", # no siempre necesario
31
- "messages": [{"role": "user", "content": prompt}],
32
- "max_tokens": 256
33
- }
34
- r = requests.post(url, headers=_auth_headers_json(), json=payload, timeout=120)
35
- r.raise_for_status()
36
- data = r.json()
37
- # Ajusta según la respuesta de tu TGI
38
- return data["choices"][0]["message"]["content"]
39
-
40
- def _call_inference_api(prompt: str) -> str:
41
- """
42
- Ejemplo para Inference API serverless.
43
- """
44
- if not INFERENCE_MODEL_ID:
45
- return f"[DEMO] Inference API no configurado; prompt: {prompt[:60]}..."
46
- url = f"https://api-inference.huggingface.co/models/{INFERENCE_MODEL_ID}"
47
- r = requests.post(url, headers=_auth_headers_json(), json={"inputs": prompt, "parameters": {"max_new_tokens": 128}}, timeout=120)
48
- r.raise_for_status()
49
- out = r.json()
50
- if isinstance(out, list) and out and "generated_text" in out[0]:
51
- return out[0]["generated_text"]
52
- return str(out)
53
-
54
- def _call_inference_endpoint(payload: Dict[str, Any]) -> Dict[str, Any]:
55
- """
56
- Ejemplo para Inference Endpoint dedicado.
57
- """
58
- if not INFERENCE_ENDPOINT_URL:
59
- return {"text": "[DEMO] Endpoint no configurado"}
60
- r = requests.post(INFERENCE_ENDPOINT_URL, headers=_auth_headers_json(), json=payload, timeout=120)
61
- r.raise_for_status()
62
- return r.json()
63
-
64
- def _fake_extract_characters() -> list[CharacterItem]:
65
- return [
66
- CharacterItem(name="Alice", screen_time_sec=312.5),
67
- CharacterItem(name="Bob", screen_time_sec=288.0),
68
- ]
69
-
70
- def process_job(job: Dict[str, Any]):
71
- """
72
- job = {
73
- "job_id": str,
74
- "mode": "book"|"une"|"both",
75
- "local_path": "/app/data/uploads/xxx.mp4" (si es subida),
76
- "video_url": "https://..." (si es por URL)
77
- }
78
- """
79
- job_id = job["job_id"]
80
- mode = job.get("mode", "both")
81
- src_filename = os.path.basename(job.get("local_path") or job.get("video_url") or f"{job_id}.mp4")
82
-
83
- # Marca a processing
84
- st = JobStatus(job_id=job_id, status="processing", progress=5, message="Iniciando procesamiento…")
85
- job_store.set_status(job_id, st)
86
-
87
- # (1) Descarga si viene por URL (demo omite; implementa si lo necesitas)
88
- local_path = job.get("local_path")
89
- if not local_path and job.get("video_url"):
90
- # Aquí descargarías el vídeo a local_path
91
- # local_path = os.path.join(UPLOAD_DIR, f"{job_id}_{src_filename}")
92
- # requests.get(... stream ...) -> write file
93
- pass
94
-
95
- # (2) ASR / Diarización / Preparaciones etc. (simulación)
96
- time.sleep(1)
97
- st.progress = 20; st.message = "Extrayendo transcripción/diálogos…"; job_store.set_status(job_id, st)
98
- # Aquí llamarías a tus pipelines reales (Whisper, diarización, etc.)
99
-
100
- # (3) Generación “libro” con LLM (demo)
101
- book_text = None; book_mp3_url = None
102
- if mode in ("book","both"):
103
- prompt = "Genera una audiodescripción tipo libro con diálogos condensados del vídeo."
104
- book_text = _call_tgi(prompt) if TGI_BASE_URL else _call_inference_api(prompt)
105
- # Si sintetizas audio, guarda mp3 y pon su URL accesible (por simplicidad omitimos)
106
- book_mp3_url = None
107
- st.progress = 60; st.message = "Generando texto Libro…"; job_store.set_status(job_id, st)
108
-
109
- # (4) Generación UNE (SRT + audio) (demo)
110
- une_srt = None; une_mp3_url = None
111
- if mode in ("une","both"):
112
- # Genera un SRT mínimo de ejemplo
113
- une_srt = "1\n00:00:00,000 --> 00:00:03,000\n[Audiodescripción UNE de ejemplo]\n"
114
- une_mp3_url = None
115
- st.progress = 80; st.message = "Generando SRT UNE…"; job_store.set_status(job_id, st)
116
-
117
- # (5) Personajes, métricas (demo)
118
- chars = _fake_extract_characters()
119
- metrics = Metrics(wer=0.07, der=0.12, ux=4.3)
120
-
121
- time.sleep(1)
122
- st.progress = 100; st.message = "Completado"; st.status = "completed"; job_store.set_status(job_id, st)
123
-
124
- result = JobResult(
125
- job_id=job_id,
126
- source_filename=src_filename,
127
- duration_sec=None,
128
- characters=chars,
129
- book={"text": book_text, "mp3_url": book_mp3_url} if book_text or book_mp3_url else None,
130
- une={"srt": une_srt, "mp3_url": une_mp3_url} if une_srt or une_mp3_url else None,
131
- metrics=metrics
132
- )
133
- job_store.set_result(job_id, result)