caarleexx commited on
Commit
d3cb1fe
·
verified ·
1 Parent(s): af88c68

Upload 2 files

Browse files
Files changed (2) hide show
  1. api/ltx_server.py +953 -0
  2. api/seedvr_server.py +111 -0
api/ltx_server.py ADDED
@@ -0,0 +1,953 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ltx_server.py — VideoService (beta 1.1)
2
+ # Sempre output_type="latent"; no final: VAE (bloco inteiro) → pixels → MP4.
3
+ # Ignora UserWarning/FutureWarning e injeta VAE no manager com dtype/device corretos.
4
+
5
+ # --- 0. WARNINGS E AMBIENTE ---
6
+ import warnings
7
+ warnings.filterwarnings("ignore", category=UserWarning)
8
+ warnings.filterwarnings("ignore", category=FutureWarning)
9
+ warnings.filterwarnings("ignore", message=".*")
10
+
11
+ from huggingface_hub import logging
12
+
13
+ logging.set_verbosity_error()
14
+ logging.set_verbosity_warning()
15
+ logging.set_verbosity_info()
16
+ logging.set_verbosity_debug()
17
+
18
+
19
+ LTXV_DEBUG=1
20
+ LTXV_FRAME_LOG_EVERY=8
21
+
22
+
23
+
24
+ # --- 1. IMPORTAÇÕES ---
25
+ import os, subprocess, shlex, tempfile
26
+ import torch
27
+ import json
28
+ import numpy as np
29
+ import random
30
+ import os
31
+ import shlex
32
+ import yaml
33
+ from typing import List, Dict
34
+ from pathlib import Path
35
+ import imageio
36
+ import tempfile
37
+ from huggingface_hub import hf_hub_download
38
+ import sys
39
+ import subprocess
40
+ import gc
41
+ import shutil
42
+ import contextlib
43
+ import time
44
+ import traceback
45
+ from einops import rearrange
46
+ import torch.nn.functional as F
47
+
48
+ # Singletons (versões simples)
49
+ from managers.vae_manager import vae_manager_singleton
50
+ from tools.video_encode_tool import video_encode_tool_singleton
51
+
52
+ # --- 2. GERENCIAMENTO DE DEPENDÊNCIAS E SETUP ---
53
+ def _query_gpu_processes_via_nvml(device_index: int) -> List[Dict]:
54
+ try:
55
+ import psutil
56
+ import pynvml as nvml
57
+ nvml.nvmlInit()
58
+ handle = nvml.nvmlDeviceGetHandleByIndex(device_index)
59
+ try:
60
+ procs = nvml.nvmlDeviceGetComputeRunningProcesses_v3(handle)
61
+ except Exception:
62
+ procs = nvml.nvmlDeviceGetComputeRunningProcesses(handle)
63
+ results = []
64
+ for p in procs:
65
+ pid = int(p.pid)
66
+ used_mb = None
67
+ try:
68
+ if getattr(p, "usedGpuMemory", None) is not None and p.usedGpuMemory not in (0,):
69
+ used_mb = max(0, int(p.usedGpuMemory) // (1024 * 1024))
70
+ except Exception:
71
+ used_mb = None
72
+ name = "unknown"
73
+ user = "unknown"
74
+ try:
75
+ import psutil
76
+ pr = psutil.Process(pid)
77
+ name = pr.name()
78
+ user = pr.username()
79
+ except Exception:
80
+ pass
81
+ results.append({"pid": pid, "name": name, "user": user, "used_mb": used_mb})
82
+ nvml.nvmlShutdown()
83
+ return results
84
+ except Exception:
85
+ return []
86
+
87
+ def _query_gpu_processes_via_nvidiasmi(device_index: int) -> List[Dict]:
88
+ cmd = f"nvidia-smi -i {device_index} --query-compute-apps=pid,process_name,used_memory --format=csv,noheader,nounits"
89
+ try:
90
+ out = subprocess.check_output(shlex.split(cmd), stderr=subprocess.STDOUT, text=True, timeout=2.0)
91
+ except Exception:
92
+ return []
93
+ results = []
94
+ for line in out.strip().splitlines():
95
+ parts = [p.strip() for p in line.split(",")]
96
+ if len(parts) >= 3:
97
+ try:
98
+ pid = int(parts[0]); name = parts[1]; used_mb = int(parts[2])
99
+ user = "unknown"
100
+ try:
101
+ import psutil
102
+ pr = psutil.Process(pid)
103
+ user = pr.username()
104
+ except Exception:
105
+ pass
106
+ results.append({"pid": pid, "name": name, "user": user, "used_mb": used_mb})
107
+ except Exception:
108
+ continue
109
+ return results
110
+
111
+
112
+
113
+ def calculate_new_dimensions(orig_w, orig_h, divisor=8):
114
+ """
115
+ Calcula novas dimensões mantendo a proporção, garantindo que ambos os
116
+ lados sejam divisíveis pelo divisor especificado (padrão 8).
117
+ """
118
+ if orig_w == 0 or orig_h == 0:
119
+ # Retorna um valor padrão seguro
120
+ return 512, 512
121
+
122
+ # Preserva a orientação (paisagem vs. retrato)
123
+ if orig_w >= orig_h:
124
+ # Paisagem ou quadrado
125
+ aspect_ratio = orig_w / orig_h
126
+ # Começa com uma altura base e calcula a largura
127
+ new_h = 512 # Altura base para paisagem
128
+ new_w = new_h * aspect_ratio
129
+ else:
130
+ # Retrato
131
+ aspect_ratio = orig_h / orig_w
132
+ # Começa com uma largura base e calcula a altura
133
+ new_w = 512 # Largura base para retrato
134
+ new_h = new_w * aspect_ratio
135
+
136
+ # Arredonda AMBOS os valores para o múltiplo mais próximo do divisor
137
+ final_w = int(round(new_w / divisor)) * divisor
138
+ final_h = int(round(new_h / divisor)) * divisor
139
+
140
+ # Garante que as dimensões não sejam zero após o arredondamento
141
+ final_w = max(divisor, final_w)
142
+ final_h = max(divisor, final_h)
143
+
144
+ print(f"[Dimension Calc] Original: {orig_w}x{orig_h} -> Calculado: {new_w:.0f}x{new_h:.0f} -> Final (divisível por {divisor}): {final_w}x{final_h}")
145
+ return final_h, final_w # Retorna (altura, largura)
146
+
147
+
148
+ def handle_media_upload_for_dims(filepath, current_h, current_w):
149
+ """
150
+ Esta função agora usará o novo cálculo robusto.
151
+ (O corpo desta função não precisa de alterações, pois ela já chama a função de cálculo)
152
+ """
153
+ if not filepath or not os.path.exists(str(filepath)):
154
+ return gr.update(value=current_h), gr.update(value=current_w)
155
+ try:
156
+ if str(filepath).lower().endswith(('.png', '.jpg', '.jpeg', '.webp')):
157
+ with Image.open(filepath) as img:
158
+ orig_w, orig_h = img.size
159
+ else: # Assumir que é um vídeo
160
+ with imageio.get_reader(filepath) as reader:
161
+ meta = reader.get_meta_data()
162
+ orig_w, orig_h = meta.get('size', (current_w, current_h))
163
+
164
+ # Chama a nova função corrigida
165
+ new_h, new_w = calculate_new_dimensions(orig_w, orig_h)
166
+
167
+ return gr.update(value=new_h), gr.update(value=new_w)
168
+ except Exception as e:
169
+ print(f"Erro ao processar mídia para dimensões: {e}")
170
+ return gr.update(value=current_h), gr.update(value=current_w)
171
+
172
+
173
+ def _gpu_process_table(processes: List[Dict], current_pid: int) -> str:
174
+ if not processes:
175
+ return " - Processos ativos: (nenhum)\n"
176
+ processes = sorted(processes, key=lambda x: (x.get("used_mb") or 0), reverse=True)
177
+ lines = [" - Processos ativos (PID | USER | NAME | VRAM MB):"]
178
+ for p in processes:
179
+ star = "*" if p["pid"] == current_pid else " "
180
+ used_str = str(p["used_mb"]) if p.get("used_mb") is not None else "N/A"
181
+ lines.append(f" {star} {p['pid']} | {p['user']} | {p['name']} | {used_str}")
182
+ return "\n".join(lines) + "\n"
183
+
184
+ def run_setup():
185
+ setup_script_path = "setup.py"
186
+ if not os.path.exists(setup_script_path):
187
+ print("[DEBUG] 'setup.py' não encontrado. Pulando clonagem de dependências.")
188
+ return
189
+ try:
190
+ print("[DEBUG] Executando setup.py para dependências...")
191
+ subprocess.run([sys.executable, setup_script_path], check=True)
192
+ print("[DEBUG] Setup concluído com sucesso.")
193
+ except subprocess.CalledProcessError as e:
194
+ print(f"[DEBUG] ERRO no setup.py (code {e.returncode}). Abortando.")
195
+ sys.exit(1)
196
+
197
+ from api.ltx.inference import (
198
+ create_ltx_video_pipeline,
199
+ create_latent_upsampler,
200
+ load_image_to_tensor_with_resize_and_crop,
201
+ seed_everething,
202
+ calculate_padding,
203
+ load_media_file,
204
+ )
205
+
206
+ DEPS_DIR = Path("/data")
207
+ LTX_VIDEO_REPO_DIR = DEPS_DIR / "LTX-Video"
208
+ if not LTX_VIDEO_REPO_DIR.exists():
209
+ print(f"[DEBUG] Repositório não encontrado em {LTX_VIDEO_REPO_DIR}. Rodando setup...")
210
+ run_setup()
211
+
212
+ def add_deps_to_path():
213
+ repo_path = str(LTX_VIDEO_REPO_DIR.resolve())
214
+ if str(LTX_VIDEO_REPO_DIR.resolve()) not in sys.path:
215
+ sys.path.insert(0, repo_path)
216
+ print(f"[DEBUG] Repo adicionado ao sys.path: {repo_path}")
217
+
218
+ add_deps_to_path()
219
+
220
+ # --- 3. IMPORTAÇÕES ESPECÍFICAS DO MODELO ---
221
+
222
+ from ltx_video.pipelines.pipeline_ltx_video import ConditioningItem, LTXMultiScalePipeline
223
+ from ltx_video.utils.skip_layer_strategy import SkipLayerStrategy
224
+ from ltx_video.models.autoencoders.vae_encode import un_normalize_latents, normalize_latents
225
+ from ltx_video.pipelines.pipeline_ltx_video import adain_filter_latent
226
+
227
+
228
+ # --- 4. FUNÇÕES HELPER DE LOG ---
229
+ def log_tensor_info(tensor, name="Tensor"):
230
+ if not isinstance(tensor, torch.Tensor):
231
+ print(f"\n[INFO] '{name}' não é tensor.")
232
+ return
233
+ print(f"\n--- Tensor: {name} ---")
234
+ print(f" - Shape: {tuple(tensor.shape)}")
235
+ print(f" - Dtype: {tensor.dtype}")
236
+ print(f" - Device: {tensor.device}")
237
+ if tensor.numel() > 0:
238
+ try:
239
+ print(f" - Min: {tensor.min().item():.4f} Max: {tensor.max().item():.4f} Mean: {tensor.mean().item():.4f}")
240
+ except Exception:
241
+ pass
242
+ print("------------------------------------------\n")
243
+
244
+
245
+
246
+
247
+
248
+ # --- 5. CLASSE PRINCIPAL DO SERVIÇO ---
249
+ class VideoService:
250
+ def __init__(self):
251
+ t0 = time.perf_counter()
252
+ print("[DEBUG] Inicializando VideoService...")
253
+ self.debug = os.getenv("LTXV_DEBUG", "1") == "1"
254
+ self.frame_log_every = int(os.getenv("LTXV_FRAME_LOG_EVERY", "8"))
255
+ self.config = self._load_config()
256
+ print(f"[DEBUG] Config carregada (precision={self.config.get('precision')}, sampler={self.config.get('sampler')})")
257
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
258
+ print(f"[DEBUG] Device selecionado: {self.device}")
259
+ self.last_memory_reserved_mb = 0.0
260
+ self._tmp_dirs = set(); self._tmp_files = set(); self._last_outputs = []
261
+
262
+ self.pipeline, self.latent_upsampler = self._load_models()
263
+ print(f"[DEBUG] Pipeline e Upsampler carregados. Upsampler ativo? {bool(self.latent_upsampler)}")
264
+
265
+ print(f"[DEBUG] Movendo modelos para {self.device}...")
266
+ self.pipeline.to(self.device)
267
+ if self.latent_upsampler:
268
+ self.latent_upsampler.to(self.device)
269
+
270
+ self._apply_precision_policy()
271
+ print(f"[DEBUG] runtime_autocast_dtype = {getattr(self, 'runtime_autocast_dtype', None)}")
272
+
273
+ # Injeta pipeline/vae no manager (impede vae=None)
274
+ vae_manager_singleton.attach_pipeline(
275
+ self.pipeline,
276
+ device=self.device,
277
+ autocast_dtype=self.runtime_autocast_dtype
278
+ )
279
+ print(f"[DEBUG] VAE manager conectado: has_vae={hasattr(self.pipeline, 'vae')} device={self.device}")
280
+
281
+ if self.device == "cuda":
282
+ torch.cuda.empty_cache()
283
+ self._log_gpu_memory("Após carregar modelos")
284
+
285
+ print(f"[DEBUG] VideoService pronto. boot_time={time.perf_counter()-t0:.3f}s")
286
+
287
+ def _log_gpu_memory(self, stage_name: str):
288
+ if self.device != "cuda":
289
+ return
290
+ device_index = torch.cuda.current_device() if torch.cuda.is_available() else 0
291
+ current_reserved_b = torch.cuda.memory_reserved(device_index)
292
+ current_reserved_mb = current_reserved_b / (1024 ** 2)
293
+ total_memory_b = torch.cuda.get_device_properties(device_index).total_memory
294
+ total_memory_mb = total_memory_b / (1024 ** 2)
295
+ peak_reserved_mb = torch.cuda.max_memory_reserved(device_index) / (1024 ** 2)
296
+ delta_mb = current_reserved_mb - getattr(self, "last_memory_reserved_mb", 0.0)
297
+ processes = _query_gpu_processes_via_nvml(device_index) or _query_gpu_processes_via_nvidiasmi(device_index)
298
+ print(f"\n--- [LOG GPU] {stage_name} (cuda:{device_index}) ---")
299
+ print(f" - Reservado: {current_reserved_mb:.2f} MB / {total_memory_mb:.2f} MB (Δ={delta_mb:+.2f} MB)")
300
+ if peak_reserved_mb > getattr(self, "last_memory_reserved_mb", 0.0):
301
+ print(f" - Pico reservado (nesta fase): {peak_reserved_mb:.2f} MB")
302
+ print(_gpu_process_table(processes, os.getpid()), end="")
303
+ print("--------------------------------------------------\n")
304
+ self.last_memory_reserved_mb = current_reserved_mb
305
+
306
+ def _register_tmp_dir(self, d: str):
307
+ if d and os.path.isdir(d):
308
+ self._tmp_dirs.add(d); print(f"[DEBUG] Registrado tmp dir: {d}")
309
+
310
+ def _register_tmp_file(self, f: str):
311
+ if f and os.path.exists(f):
312
+ self._tmp_files.add(f); print(f"[DEBUG] Registrado tmp file: {f}")
313
+
314
+ def finalize(self, keep_paths=None, extra_paths=None, clear_gpu=True):
315
+ print("[DEBUG] Finalize: iniciando limpeza...")
316
+ keep = set(keep_paths or []); extras = set(extra_paths or [])
317
+ removed_files = 0
318
+ for f in list(self._tmp_files | extras):
319
+ try:
320
+ if f not in keep and os.path.isfile(f):
321
+ os.remove(f); removed_files += 1; print(f"[DEBUG] Removido arquivo tmp: {f}")
322
+ except Exception as e:
323
+ print(f"[DEBUG] Falha removendo arquivo {f}: {e}")
324
+ finally:
325
+ self._tmp_files.discard(f)
326
+ removed_dirs = 0
327
+ for d in list(self._tmp_dirs):
328
+ try:
329
+ if d not in keep and os.path.isdir(d):
330
+ shutil.rmtree(d, ignore_errors=True); removed_dirs += 1; print(f"[DEBUG] Removido diretório tmp: {d}")
331
+ except Exception as e:
332
+ print(f"[DEBUG] Falha removendo diretório {d}: {e}")
333
+ finally:
334
+ self._tmp_dirs.discard(d)
335
+ print(f"[DEBUG] Finalize: arquivos removidos={removed_files}, dirs removidos={removed_dirs}")
336
+ gc.collect()
337
+ try:
338
+ if clear_gpu and torch.cuda.is_available():
339
+ torch.cuda.empty_cache()
340
+ try:
341
+ torch.cuda.ipc_collect()
342
+ except Exception:
343
+ pass
344
+ except Exception as e:
345
+ print(f"[DEBUG] Finalize: limpeza GPU falhou: {e}")
346
+ try:
347
+ self._log_gpu_memory("Após finalize")
348
+ except Exception as e:
349
+ print(f"[DEBUG] Log GPU pós-finalize falhou: {e}")
350
+
351
+ def _load_config(self):
352
+ base = LTX_VIDEO_REPO_DIR / "configs"
353
+ candidates = [
354
+ base / "ltxv-13b-0.9.8-dev-fp8.yaml",
355
+ base / "ltxv-13b-0.9.8-distilled-fp8.yaml",
356
+ base / "ltxv-13b-0.9.8-distilled.yaml",
357
+ ]
358
+ for cfg in candidates:
359
+ if cfg.exists():
360
+ print(f"[DEBUG] Config selecionada: {cfg}")
361
+ with open(cfg, "r") as file:
362
+ return yaml.safe_load(file)
363
+ cfg = base / "ltxv-13b-0.9.8-distilled-fp8.yaml"
364
+ print(f"[DEBUG] Config fallback: {cfg}")
365
+ with open(cfg, "r") as file:
366
+ return yaml.safe_load(file)
367
+
368
+ def _load_models(self):
369
+ t0 = time.perf_counter()
370
+ LTX_REPO = "Lightricks/LTX-Video"
371
+ print("[DEBUG] Baixando checkpoint principal...")
372
+ distilled_model_path = hf_hub_download(
373
+ repo_id=LTX_REPO,
374
+ filename=self.config["checkpoint_path"],
375
+ local_dir=os.getenv("HF_HOME"),
376
+ cache_dir=os.getenv("HF_HOME_CACHE"),
377
+ token=os.getenv("HF_TOKEN"),
378
+ )
379
+ self.config["checkpoint_path"] = distilled_model_path
380
+ print(f"[DEBUG] Checkpoint em: {distilled_model_path}")
381
+
382
+ print("[DEBUG] Baixando upscaler espacial...")
383
+ spatial_upscaler_path = hf_hub_download(
384
+ repo_id=LTX_REPO,
385
+ filename=self.config["spatial_upscaler_model_path"],
386
+ local_dir=os.getenv("HF_HOME"),
387
+ cache_dir=os.getenv("HF_HOME_CACHE"),
388
+ token=os.getenv("HF_TOKEN")
389
+ )
390
+ self.config["spatial_upscaler_model_path"] = spatial_upscaler_path
391
+ print(f"[DEBUG] Upscaler em: {spatial_upscaler_path}")
392
+
393
+ print("[DEBUG] Construindo pipeline...")
394
+ pipeline = create_ltx_video_pipeline(
395
+ ckpt_path=self.config["checkpoint_path"],
396
+ precision=self.config["precision"],
397
+ text_encoder_model_name_or_path=self.config["text_encoder_model_name_or_path"],
398
+ sampler=self.config["sampler"],
399
+ device="cpu",
400
+ enhance_prompt=False,
401
+ prompt_enhancer_image_caption_model_name_or_path=self.config["prompt_enhancer_image_caption_model_name_or_path"],
402
+ prompt_enhancer_llm_model_name_or_path=self.config["prompt_enhancer_llm_model_name_or_path"],
403
+ )
404
+ print("[DEBUG] Pipeline pronto.")
405
+
406
+ latent_upsampler = None
407
+ if self.config.get("spatial_upscaler_model_path"):
408
+ print("[DEBUG] Construindo latent_upsampler...")
409
+ latent_upsampler = create_latent_upsampler(self.config["spatial_upscaler_model_path"], device="cpu")
410
+ print("[DEBUG] Upsampler pronto.")
411
+ print(f"[DEBUG] _load_models() tempo total={time.perf_counter()-t0:.3f}s")
412
+ return pipeline, latent_upsampler
413
+
414
+ def _promote_fp8_weights_to_bf16(self, module):
415
+ if not isinstance(module, torch.nn.Module):
416
+ print("[DEBUG] Promoção FP8→BF16 ignorada: alvo não é nn.Module.")
417
+ return
418
+ f8 = getattr(torch, "float8_e4m3fn", None)
419
+ if f8 is None:
420
+ print("[DEBUG] torch.float8_e4m3fn indisponível.")
421
+ return
422
+ p_cnt = b_cnt = 0
423
+ for _, p in module.named_parameters(recurse=True):
424
+ try:
425
+ if p.dtype == f8:
426
+ with torch.no_grad():
427
+ p.data = p.data.to(torch.bfloat16); p_cnt += 1
428
+ except Exception:
429
+ pass
430
+ for _, b in module.named_buffers(recurse=True):
431
+ try:
432
+ if hasattr(b, "dtype") and b.dtype == f8:
433
+ b.data = b.data.to(torch.bfloat16); b_cnt += 1
434
+ except Exception:
435
+ pass
436
+ print(f"[DEBUG] FP8→BF16: params_promoted={p_cnt}, buffers_promoted={b_cnt}")
437
+
438
+
439
+
440
+ @torch.no_grad()
441
+ def _upsample_latents_internal(self, latents: torch.Tensor) -> torch.Tensor:
442
+ """
443
+ Lógica extraída diretamente da LTXMultiScalePipeline para upscale de latentes.
444
+ """
445
+ if not self.latent_upsampler:
446
+ raise ValueError("Latent Upsampler não está carregado.")
447
+
448
+ # Garante que os modelos estejam no dispositivo correto
449
+ self.latent_upsampler.to(self.device)
450
+ self.pipeline.vae.to(self.device)
451
+ print(f"[DEBUG-UPSAMPLE] Shape de entrada: {tuple(latents.shape)}")
452
+ latents = un_normalize_latents(latents, self.pipeline.vae, vae_per_channel_normalize=True)
453
+ upsampled_latents = self.latent_upsampler(latents)
454
+ upsampled_latents = normalize_latents(upsampled_latents, self.pipeline.vae, vae_per_channel_normalize=True)
455
+ print(f"[DEBUG-UPSAMPLE] Shape de saída: {tuple(upsampled_latents.shape)}")
456
+
457
+ return upsampled_latents
458
+
459
+
460
+
461
+ def _apply_precision_policy(self):
462
+ prec = str(self.config.get("precision", "")).lower()
463
+ self.runtime_autocast_dtype = torch.float32
464
+ print(f"[DEBUG] Aplicando política de precisão: {prec}")
465
+ if prec == "float8_e4m3fn":
466
+ self.runtime_autocast_dtype = torch.bfloat16
467
+ force_promote = os.getenv("LTXV_FORCE_BF16_ON_FP8", "0") == "1"
468
+ print(f"[DEBUG] FP8 detectado. force_promote={force_promote}")
469
+ if force_promote and hasattr(torch, "float8_e4m3fn"):
470
+ try:
471
+ self._promote_fp8_weights_to_bf16(self.pipeline)
472
+ except Exception as e:
473
+ print(f"[DEBUG] Promoção FP8→BF16 na pipeline falhou: {e}")
474
+ try:
475
+ if self.latent_upsampler:
476
+ self._promote_fp8_weights_to_bf16(self.latent_upsampler)
477
+ except Exception as e:
478
+ print(f"[DEBUG] Promoção FP8→BF16 no upsampler falhou: {e}")
479
+ elif prec == "bfloat16":
480
+ self.runtime_autocast_dtype = torch.bfloat16
481
+ elif prec == "mixed_precision":
482
+ self.runtime_autocast_dtype = torch.float16
483
+ else:
484
+ self.runtime_autocast_dtype = torch.float32
485
+
486
+ def _prepare_conditioning_tensor(self, filepath, height, width, padding_values):
487
+ print(f"[DEBUG] Carregando condicionamento: {filepath}")
488
+ tensor = load_image_to_tensor_with_resize_and_crop(filepath, height, width)
489
+ tensor = torch.nn.functional.pad(tensor, padding_values)
490
+ out = tensor.to(self.device, dtype=self.runtime_autocast_dtype) if self.device == "cuda" else tensor.to(self.device)
491
+ print(f"[DEBUG] Cond shape={tuple(out.shape)} dtype={out.dtype} device={out.device}")
492
+ return out
493
+
494
+
495
+ def _dividir_latentes_por_tamanho(self, latents_brutos, num_latente_por_chunk: int, overlap: int = 1):
496
+ """
497
+ Divide o tensor de latentes em chunks com tamanho definido em número de latentes.
498
+
499
+ Args:
500
+ latents_brutos: tensor [B, C, T, H, W]
501
+ num_latente_por_chunk: número de latentes por chunk
502
+ overlap: número de frames que se sobrepõem entre chunks
503
+
504
+ Returns:
505
+ List[tensor]: lista de chunks cloneados
506
+ """
507
+ sum_latent = latents_brutos.shape[2]
508
+ chunks = []
509
+
510
+ if num_latente_por_chunk >= sum_latent:
511
+ return [latents_brutos]
512
+
513
+ n_chunks = (sum_latent) // num_latente_por_chunk
514
+ steps = sum_latent//n_chunks
515
+ print("================PODA CAUSAL=================")
516
+ print(f"[DEBUG] TOTAL LATENTES = {sum_latent}")
517
+ print(f"[DEBUG] LATENTES min por chunk = {num_latente_por_chunk}")
518
+ print(f"[DEBUG] Número de chunks = {n_chunks}")
519
+ if n_chunks > 1:
520
+ i=0
521
+ while i < n_chunks:
522
+ start = (num_latente_por_chunk*i)
523
+ end = (start+num_latente_por_chunk+overlap)
524
+ if i+1 < n_chunks:
525
+ chunk = latents_brutos[:, :, start:end, :, :].clone().detach()
526
+ print(f"[DEBUG] chunk{i+1}[:, :, {start}:{end}, :, :] = {chunk.shape[2]}")
527
+ else:
528
+ chunk = latents_brutos[:, :, start:, :, :].clone().detach()
529
+ print(f"[DEBUG] chunk{i+1}[:, :, {start}:, :, :] = {chunk.shape[2]}")
530
+ chunks.append(chunk)
531
+ i+=1
532
+ else:
533
+ print(f"[DEBUG] numero chunks minimo ")
534
+ print(f"[DEBUG] latents_brutos[:, :, :, :, :] = {latents_brutos.shape[2]}")
535
+ chunks.append(latents_brutos)
536
+ print("================PODA CAUSAL=================")
537
+ return chunks
538
+
539
+ def _get_total_frames(self, video_path: str) -> int:
540
+ cmd = [
541
+ "ffprobe",
542
+ "-v", "error",
543
+ "-select_streams", "v:0",
544
+ "-count_frames",
545
+ "-show_entries", "stream=nb_read_frames",
546
+ "-of", "default=nokey=1:noprint_wrappers=1",
547
+ video_path
548
+ ]
549
+ result = subprocess.run(cmd, capture_output=True, text=True, check=True)
550
+ return int(result.stdout.strip())
551
+
552
+ def _gerar_lista_com_transicoes(self, pasta: str, video_paths: list[str], crossfade_frames: int = 8) -> list[str]:
553
+ """
554
+ Gera uma nova lista de vídeos aplicando transições suaves (blend frame a frame)
555
+ seguindo exatamente a lógica linear de Carlos.
556
+ """
557
+ import os, subprocess, shutil
558
+
559
+ poda = crossfade_frames
560
+ total_partes = len(video_paths)
561
+ video_fade_fim = None
562
+ video_fade_ini = None
563
+ nova_lista = []
564
+
565
+ print("===========CONCATECAO CAUSAL=============")
566
+
567
+ print(f"[DEBUG] Iniciando pipeline com {total_partes} vídeos e {poda} frames de crossfade")
568
+
569
+ for i in range(total_partes):
570
+ base = video_paths[i]
571
+
572
+ # --- PODA ---
573
+ video_podado = os.path.join(pasta, f"{base}_podado_{i}.mp4")
574
+
575
+
576
+ if i<total_partes-1:
577
+ end_frame = self._get_total_frames(base) - poda
578
+ else:
579
+ end_frame = self._get_total_frames(base)
580
+
581
+ if i>0:
582
+ start_frame = poda
583
+ else:
584
+ start_frame = 0
585
+
586
+ cmd_fim = (
587
+ f'ffmpeg -y -hide_banner -loglevel error -i "{base}" '
588
+ f'-vf "trim=start_frame={start_frame}:end_frame={end_frame},setpts=PTS-STARTPTS" '
589
+ f'-an "{video_podado}"'
590
+ )
591
+ subprocess.run(cmd_fim, shell=True, check=True)
592
+
593
+
594
+ # --- FADE_INI ---
595
+ if i > 0:
596
+ video_fade_ini = os.path.join(pasta, f"{base}_fade_ini_{i}.mp4")
597
+ cmd_ini = (
598
+ f'ffmpeg -y -hide_banner -loglevel error -i "{base}" '
599
+ f'-vf "trim=end_frame={poda},setpts=PTS-STARTPTS" -an "{video_fade_ini}"'
600
+ )
601
+ subprocess.run(cmd_ini, shell=True, check=True)
602
+
603
+ # --- TRANSIÇÃO ---
604
+ if video_fade_fim and video_fade_ini:
605
+ video_fade = os.path.join(pasta, f"transicao_{i}_{i+1}.mp4")
606
+ cmd_blend = (
607
+ f'ffmpeg -y -hide_banner -loglevel error '
608
+ f'-i "{video_fade_fim}" -i "{video_fade_ini}" '
609
+ f'-filter_complex "[0:v][1:v]blend=all_expr=\'A*(1-T/{poda})+B*(T/{poda})\',format=yuv420p" '
610
+ f'-frames:v {poda} "{video_fade}"'
611
+ )
612
+ subprocess.run(cmd_blend, shell=True, check=True)
613
+ print(f"[DEBUG] transicao adicionada {i}/{i+1} {self._get_total_frames(video_fade)} frames ✅")
614
+ nova_lista.append(video_fade)
615
+
616
+ # --- FADE_FIM ---
617
+ if i<=total_partes-1:
618
+ video_fade_fim = os.path.join(pasta, f"{base}_fade_fim_{i}.mp4")
619
+ cmd_fim = (
620
+ f'ffmpeg -y -hide_banner -loglevel error -i "{base}" '
621
+ f'-vf "trim=start_frame={end_frame-poda},setpts=PTS-STARTPTS" -an "{video_fade_fim}"'
622
+ )
623
+ subprocess.run(cmd_fim, shell=True, check=True)
624
+
625
+ nova_lista.append(video_podado)
626
+ print(f"[DEBUG] Video podado {i+1} adicionado {self._get_total_frames(video_podado)} frames ✅")
627
+
628
+
629
+
630
+ print("===========CONCATECAO CAUSAL=============")
631
+ print(f"[DEBUG] {nova_lista}")
632
+ return nova_lista
633
+
634
+ def _concat_mp4s_no_reencode(self, mp4_list: List[str], out_path: str):
635
+ """
636
+ Concatena múltiplos MP4s sem reencode usando o demuxer do ffmpeg.
637
+ ATENÇÃO: todos os arquivos precisam ter mesmo codec, fps, resolução etc.
638
+ """
639
+ if not mp4_list or len(mp4_list) < 2:
640
+ raise ValueError("Forneça pelo menos dois arquivos MP4 para concatenar.")
641
+
642
+
643
+ # Cria lista temporária para o ffmpeg
644
+ with tempfile.NamedTemporaryFile("w", delete=False, suffix=".txt") as f:
645
+ for mp4 in mp4_list:
646
+ f.write(f"file '{os.path.abspath(mp4)}'\n")
647
+ list_path = f.name
648
+
649
+ cmd = f"ffmpeg -y -f concat -safe 0 -i {list_path} -c copy {out_path}"
650
+ print(f"[DEBUG] Concat: {cmd}")
651
+
652
+ try:
653
+ subprocess.check_call(shlex.split(cmd))
654
+ finally:
655
+ try:
656
+ os.remove(list_path)
657
+ except Exception:
658
+ pass
659
+
660
+
661
+ # ==============================================================================
662
+ # --- FUNÇÃO GENERATE COMPLETA E ATUALIZADA ---
663
+ # ==============================================================================
664
+ def generate(
665
+ self,
666
+ prompt,
667
+ negative_prompt,
668
+ mode="text-to-video",
669
+ start_image_filepath=None,
670
+ middle_image_filepath=None,
671
+ middle_frame_number=None,
672
+ middle_image_weight=1.0,
673
+ end_image_filepath=None,
674
+ end_image_weight=1.0,
675
+ input_video_filepath=None,
676
+ height=512,
677
+ width=704,
678
+ duration=2.0,
679
+ frames_to_use=9,
680
+ seed=42,
681
+ randomize_seed=True,
682
+ guidance_scale=3.0,
683
+ improve_texture=True,
684
+ progress_callback=None,
685
+ external_decode=True,
686
+ ):
687
+ t_all = time.perf_counter()
688
+ print(f"[DEBUG] generate() begin mode={mode} external_decode={external_decode} improve_texture={improve_texture}")
689
+ if self.device == "cuda":
690
+ torch.cuda.empty_cache(); torch.cuda.reset_peak_memory_stats()
691
+ self._log_gpu_memory("Início da Geração")
692
+
693
+ # --- Setup Inicial (como antes) ---
694
+ if mode == "image-to-video" and not start_image_filepath:
695
+ raise ValueError("A imagem de início é obrigatória para o modo image-to-video")
696
+ used_seed = random.randint(0, 2**32 - 1) if randomize_seed else int(seed)
697
+ seed_everething(used_seed); print(f"[DEBUG] Seed usado: {used_seed}")
698
+ FPS = 24.0; MAX_NUM_FRAMES = 2570
699
+ target_frames_rounded = round(duration * FPS)
700
+ n_val = round((float(target_frames_rounded) - 1.0) / 8.0)
701
+ actual_num_frames = max(9, min(MAX_NUM_FRAMES, int(n_val * 8 + 1)))
702
+ height_padded = ((height - 1) // 8 + 1) * 8
703
+ width_padded = ((width - 1) // 8 + 1) * 8
704
+ padding_values = calculate_padding(height, width, height_padded, width_padded)
705
+ generator = torch.Generator(device=self.device).manual_seed(used_seed)
706
+
707
+ conditioning_items = []
708
+ if mode == "image-to-video":
709
+ start_tensor = self._prepare_conditioning_tensor(start_image_filepath, height, width, padding_values)
710
+ conditioning_items.append(ConditioningItem(start_tensor, 0, 1.0))
711
+ if middle_image_filepath and middle_frame_number is not None:
712
+ middle_tensor = self._prepare_conditioning_tensor(middle_image_filepath, height, width, padding_values)
713
+ safe_middle_frame = max(0, min(int(middle_frame_number), actual_num_frames - 1))
714
+ conditioning_items.append(ConditioningItem(middle_tensor, safe_middle_frame, float(middle_image_weight)))
715
+ if end_image_filepath:
716
+ end_tensor = self._prepare_conditioning_tensor(end_image_filepath, height, width, padding_values)
717
+ last_frame_index = actual_num_frames - 1
718
+ conditioning_items.append(ConditioningItem(end_tensor, last_frame_index, float(end_image_weight)))
719
+ print(f"[DEBUG] Conditioning items: {len(conditioning_items)}")
720
+
721
+ call_kwargs = {
722
+ "prompt": prompt,
723
+ "negative_prompt": negative_prompt,
724
+ "height": height_padded,
725
+ "width": width_padded,
726
+ "num_frames": actual_num_frames,
727
+ "frame_rate": int(FPS),
728
+ "generator": generator,
729
+ "output_type": "latent",
730
+ "conditioning_items": conditioning_items if conditioning_items else None,
731
+ "media_items": None,
732
+ "decode_timestep": self.config["decode_timestep"],
733
+ "decode_noise_scale": self.config["decode_noise_scale"],
734
+ "stochastic_sampling": self.config["stochastic_sampling"],
735
+ "image_cond_noise_scale": 0.01,
736
+ "is_video": True,
737
+ "vae_per_channel_normalize": True,
738
+ "mixed_precision": (self.config["precision"] == "mixed_precision"),
739
+ "offload_to_cpu": False,
740
+ "enhance_prompt": False,
741
+ "skip_layer_strategy": SkipLayerStrategy.AttentionValues,
742
+ }
743
+ print(f"[DEBUG] output_type={call_kwargs['output_type']} skip_layer_strategy={call_kwargs['skip_layer_strategy']}")
744
+
745
+ latents = None
746
+ latents_list = []
747
+ results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
748
+
749
+
750
+ try:
751
+ ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext()
752
+ with ctx:
753
+ if improve_texture:
754
+ if not self.latent_upsampler:
755
+ raise ValueError("Upscaler espacial não carregado, mas 'improve_texture' está ativo.")
756
+
757
+ # --- ETAPA 1: GERAÇÃO BASE (FIRST PASS) ---
758
+ print("\n--- INICIANDO ETAPA 1: GERAÇÃO BASE (FIRST PASS) ---")
759
+ t_pass1 = time.perf_counter()
760
+
761
+ first_pass_config = self.config.get("first_pass", {}).copy()
762
+ downscale_factor = self.config.get("downscale_factor", 0.6666666)
763
+ vae_scale_factor = self.pipeline.vae_scale_factor # Geralmente 8
764
+
765
+ # --- <INÍCIO DA LÓGICA DE CÁLCULO EXATA> ---
766
+ # Replica a fórmula da LTXMultiScalePipeline
767
+ x_width = int(width_padded * downscale_factor)
768
+ downscaled_width = x_width - (x_width % vae_scale_factor)
769
+ x_height = int(height_padded * downscale_factor)
770
+ downscaled_height = x_height - (x_height % vae_scale_factor)
771
+ print(f"[DEBUG] First Pass Dims: Original Pad ({width_padded}x{height_padded}) -> Downscaled ({downscaled_width}x{downscaled_height})")
772
+ # --- <FIM DA LÓGICA DE CÁLCULO EXATA> ---
773
+
774
+ first_pass_kwargs = call_kwargs.copy()
775
+
776
+ first_pass_kwargs.update({
777
+ "output_type": "latent",
778
+ "width": downscaled_width,
779
+ "height": downscaled_height,
780
+ "guidance_scale": float(guidance_scale),
781
+ **first_pass_config
782
+ })
783
+
784
+ print(f"[DEBUG] First Pass: Gerando em {downscaled_width}x{downscaled_height}...")
785
+ base_latents = self.pipeline(**first_pass_kwargs).images
786
+ log_tensor_info(base_latents, "Latentes Base (First Pass)")
787
+ print(f"[DEBUG] First Pass concluída em {time.perf_counter() - t_pass1:.2f}s")
788
+
789
+ # --- ETAPA 2: UPSCALE DOS LATENTES ---
790
+ print("\n--- INICIANDO ETAPA 2: UPSCALE DOS LATENTES ---")
791
+ t_upscale = time.perf_counter()
792
+
793
+ upsampled_latents = self._upsample_latents_internal(base_latents)
794
+ upsampled_latents = adain_filter_latent(latents=upsampled_latents, reference_latents=base_latents)
795
+ log_tensor_info(upsampled_latents, "Latentes Pós-Upscale")
796
+ print(f"[DEBUG] Upscale de Latentes concluído em {time.perf_counter() - t_upscale:.2f}s")
797
+ del base_latents; gc.collect(); torch.cuda.empty_cache()
798
+
799
+ par = 0
800
+ latents_cpu_up = upsampled_latents.detach().to("cpu", non_blocking=True)
801
+ torch.cuda.empty_cache()
802
+ try:
803
+ torch.cuda.ipc_collect()
804
+ except Exception:
805
+ pass
806
+
807
+ latents_parts_up = self._dividir_latentes_por_tamanho(latents_cpu_up,4,1)
808
+
809
+ for latents in latents_parts_up:
810
+
811
+ # # --- ETAPA 3: REFINAMENTO DE TEXTURA (SECOND PASS) ---
812
+ print("\n--- INICIANDO ETAPA 3: REFINAMENTO DE TEXTURA (SECOND PASS) ---")
813
+
814
+ second_pass_config = self.config.get("second_pass", {}).copy()
815
+ # --- <INÍCIO DA LÓGICA DE CÁLCULO EXATA PARA SECOND PASS> ---
816
+ # Usa as dimensões da primeira passagem dobradas, como na pipeline original
817
+ second_pass_width = downscaled_width * 2
818
+ second_pass_height = downscaled_height * 2
819
+ print(f"[DEBUG] Second Pass Dims: Target ({second_pass_width}x{second_pass_height})")
820
+ # --- <FIM DA LÓGICA DE CÁLCULO EXATA> ---
821
+ t_pass2 = time.perf_counter()
822
+
823
+ vae_temporal_scale = self.pipeline.video_scale_factor # Geralmente 4 ou 8
824
+ num_pixel_frames_part = ((latents.shape[2] - 1) * vae_temporal_scale) + 1
825
+ print(f"[DEBUG] Parte {i+1}: {latents.shape[2] - 1} latentes -> {num_pixel_frames_part} frames de pixel (alvo)")
826
+
827
+ second_pass_kwargs = call_kwargs.copy()
828
+ second_pass_kwargs.update({
829
+ "output_type": "latent",
830
+ "width": second_pass_width,
831
+ "height": second_pass_height,
832
+ "num_frames": num_pixel_frames_part,
833
+ "latents": upsampled_latents, # O tensor upscaled
834
+ "guidance_scale": float(guidance_scale),
835
+ **second_pass_config
836
+ })
837
+
838
+ print(f"[DEBUG] Second Pass: Refinando em {width_padded}x{height_padded}...")
839
+ final_latents = self.pipeline(**second_pass_kwargs).images
840
+ log_tensor_info(final_latents, "Latentes Finais (Pós-Second Pass)")
841
+ print(f"[DEBUG] Second part Pass concluída em {time.perf_counter() - t_pass2:.2f}s")
842
+
843
+ latents_list.append(final_latents)
844
+
845
+ else: # Geração de etapa única
846
+ print("\n--- INICIANDO GERAÇÃO DE ETAPA ÚNICA ---")
847
+ t_single = time.perf_counter()
848
+ single_pass_kwargs = call_kwargs.copy()
849
+ single_pass_kwargs.update(self.config.get("first_pass", {}))
850
+ single_pass_kwargs["guidance_scale"] = float(guidance_scale)
851
+ single_pass_kwargs["output_type"] = "latent"
852
+
853
+ latents = self.pipeline(**single_pass_kwargs).images
854
+ log_tensor_info(latents, "Latentes Finais (Etapa Única)")
855
+ print(f"[DEBUG] Etapa única concluída em {time.perf_counter() - t_single:.2f}s")
856
+
857
+ latents_list.append(latents)
858
+
859
+ # --- ETAPA FINAL: DECODIFICAÇÃO E CODIFICAÇÃO MP4 ---
860
+ print("\n--- INICIANDO ETAPA FINAL: DECODIFICAÇÃO E MONTAGEM ---")
861
+
862
+ #latents_cpu = latents.detach().to("cpu", non_blocking=True)
863
+ #torch.cuda.empty_cache()
864
+ #try:
865
+ # torch.cuda.ipc_collect()
866
+ #except Exception:
867
+ # pass
868
+
869
+ latents_parts = []
870
+ for latents in latents_list:
871
+ latents_parts.append(self._dividir_latentes_por_tamanho(latents,4,1))
872
+
873
+
874
+ partes_mp4 = []
875
+ par = 0
876
+ for latents in latents_parts:
877
+
878
+ par = par + 1
879
+ output_video_path = os.path.join(results_dir, f"output_{used_seed}_{par}.mp4")
880
+ final_output_path = None
881
+
882
+ print("[DEBUG] Decodificando bloco de latentes com VAE {par} → tensor de pixels...")
883
+ # Usar manager com timestep por item; previne target_shape e rota NoneType.decode
884
+ pixel_tensor = vae_manager_singleton.decode(
885
+ latents.to(self.device, non_blocking=True),
886
+ decode_timestep=float(self.config.get("decode_timestep", 0.05))
887
+ )
888
+ log_tensor_info(pixel_tensor, "Pixel tensor (VAE saída)")
889
+
890
+ print("[DEBUG] Codificando MP4 a partir do tensor de pixels (bloco inteiro)...")
891
+ video_encode_tool_singleton.save_video_from_tensor(
892
+ pixel_tensor,
893
+ output_video_path,
894
+ fps=call_kwargs["frame_rate"],
895
+ progress_callback=progress_callback
896
+ )
897
+
898
+ candidate = os.path.join(results_dir, f"output_par_{par}.mp4")
899
+ try:
900
+ shutil.move(output_video_path, candidate)
901
+ final_output_path = candidate
902
+ print(f"[DEBUG] MP4 parte {par} movido para {final_output_path}")
903
+ partes_mp4.append(final_output_path)
904
+
905
+ except Exception as e:
906
+ final_output_path = output_video_path
907
+ print(f"[DEBUG] Falha no move; usando tmp como final: {e}")
908
+
909
+ total_partes = len(partes_mp4)
910
+ if (total_partes>1):
911
+ final_vid = os.path.join(results_dir, f"concat_fim_{used_seed}.mp4")
912
+ partes_mp4_fade = self._gerar_lista_com_transicoes(pasta=results_dir, video_paths=partes_mp4, crossfade_frames=8)
913
+ self._concat_mp4s_no_reencode(partes_mp4_fade, final_vid)
914
+ else:
915
+ final_vid = partes_mp4[0]
916
+
917
+
918
+ self._log_gpu_memory("Fim da Geração")
919
+ return final_vid, used_seed
920
+
921
+
922
+ except Exception as e:
923
+ print("[DEBUG] EXCEÇÃO NA GERAÇÃO:")
924
+ print("".join(traceback.format_exception(type(e), e, e.__traceback__)))
925
+ raise
926
+ finally:
927
+ try:
928
+ del latents
929
+ except Exception:
930
+ pass
931
+ try:
932
+ del multi_scale_pipeline
933
+ except Exception:
934
+ pass
935
+
936
+ gc.collect()
937
+ try:
938
+ if self.device == "cuda":
939
+ torch.cuda.empty_cache()
940
+ try:
941
+ torch.cuda.ipc_collect()
942
+ except Exception:
943
+ pass
944
+ except Exception as e:
945
+ print(f"[DEBUG] Limpeza GPU no finally falhou: {e}")
946
+
947
+ try:
948
+ self.finalize(keep_paths=[])
949
+ except Exception as e:
950
+ print(f"[DEBUG] finalize() no finally falhou: {e}")
951
+
952
+ print("Criando instância do VideoService. O carregamento do modelo começará agora...")
953
+ video_generation_service = VideoService()
api/seedvr_server.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ import subprocess
4
+ import sys
5
+ import time
6
+ import mimetypes
7
+ from pathlib import Path
8
+ from typing import List, Optional, Tuple
9
+
10
+ from huggingface_hub import hf_hub_download
11
+
12
+ class SeedVRServer:
13
+ def __init__(self, **kwargs):
14
+ self.SEEDVR_ROOT = Path(os.getenv("SEEDVR_ROOT", "/data/SeedVR"))
15
+ # Apontamos para o nosso diretório de checkpoints customizado
16
+ self.CKPTS_ROOT = Path("/data/seedvr_models_fp16")
17
+ self.OUTPUT_ROOT = Path(os.getenv("OUTPUT_ROOT", "/app/outputs"))
18
+ self.INPUT_ROOT = Path(os.getenv("INPUT_ROOT", "/app/inputs"))
19
+ self.HF_HOME_CACHE = Path(os.getenv("HF_HOME", "/data/.cache/huggingface"))
20
+ self.REPO_URL = os.getenv("SEEDVR_GIT_URL", "https://github.com/numz/ComfyUI-SeedVR2_VideoUpscaler")
21
+ self.NUM_GPUS_TOTAL = int(os.getenv("NUM_GPUS", "4"))
22
+
23
+ print("🚀 SeedVRServer (FP16) inicializando e preparando o ambiente...")
24
+ for p in [self.SEEDVR_ROOT.parent, self.CKPTS_ROOT, self.OUTPUT_ROOT, self.INPUT_ROOT, self.HF_HOME_CACHE]:
25
+ p.mkdir(parents=True, exist_ok=True)
26
+
27
+ self.setup_dependencies()
28
+ print("✅ SeedVRServer (FP16) pronto.")
29
+
30
+ def setup_dependencies(self):
31
+ self._ensure_repo()
32
+ # O monkey patch agora é feito pelo start_seedvr.sh, não mais aqui.
33
+ self._ensure_model()
34
+
35
+ def _ensure_repo(self) -> None:
36
+ if not (self.SEEDVR_ROOT / ".git").exists():
37
+ print(f"[SeedVRServer] Clonando repositório para {self.SEEDVR_ROOT}...")
38
+ subprocess.run(["git", "clone", "--depth", "1", self.REPO_URL, str(self.SEEDVR_ROOT)], check=True)
39
+ else:
40
+ print("[SeedVRServer] Repositório SeedVR já existe.")
41
+
42
+ def _ensure_model(self) -> None:
43
+ """Baixa os arquivos de modelo FP16 otimizados e suas dependências."""
44
+ print(f"[SeedVRServer] Verificando checkpoints (FP16) em {self.CKPTS_ROOT}...")
45
+
46
+ model_files = {
47
+ "seedvr2_ema_3b_fp16.safetensors": "MonsterMMORPG/SeedVR2_SECourses", "ema_vae_fp16.safetensors": "MonsterMMORPG/SeedVR2_SECourses",
48
+ "pos_emb.pt": "ByteDance-Seed/SeedVR2-3B", "neg_emb.pt": "ByteDance-Seed/SeedVR2-3B"
49
+ }
50
+
51
+ for filename, repo_id in model_files.items():
52
+ if not (self.CKPTS_ROOT / filename).exists():
53
+ print(f"Baixando {filename} de {repo_id}...")
54
+ hf_hub_download(repo_id=repo_id, filename=filename, local_dir=str(self.CKPTS_ROOT), cache_dir=str(self.HF_HOME_CACHE), token=os.getenv("HF_TOKEN"))
55
+ print("[SeedVRServer] Checkpoints (FP16) estão no local correto.")
56
+
57
+ def _prepare_job(self, input_file: str) -> Tuple[Path, Path]:
58
+ ts = f"{int(time.time())}_{os.urandom(4).hex()}"
59
+ job_input_dir = self.INPUT_ROOT / f"job_{ts}"
60
+ out_dir = self.OUTPUT_ROOT / f"run_{ts}"
61
+ job_input_dir.mkdir(parents=True, exist_ok=True)
62
+ out_dir.mkdir(parents=True, exist_ok=True)
63
+ shutil.copy2(input_file, job_input_dir / Path(input_file).name)
64
+ return job_input_dir, out_dir
65
+
66
+ def run_inference(self, filepath: str, *, seed: int, resh: int, resw: int, spsize: int, fps: Optional[float] = None):
67
+ script = self.SEEDVR_ROOT / "inference_cli.py"
68
+ job_input_dir, outdir = self._prepare_job(filepath)
69
+ mediatype, _ = mimetypes.guess_type(filepath)
70
+ is_image = mediatype and mediatype.startswith("image")
71
+
72
+ effective_nproc = 1 if is_image else self.NUM_GPUS_TOTAL
73
+ effective_spsize = 1 if is_image else spsize
74
+
75
+ output_filename = f"result_{Path(filepath).stem}.mp4" if not is_image else f"{Path(filepath).stem}_upscaled"
76
+ output_filepath = outdir / output_filename
77
+
78
+
79
+
80
+ cmd = [
81
+ "torchrun", "--standalone", "--nnodes=1",
82
+ f"--nproc-per-node={effective_nproc}",
83
+ str(script),
84
+ "--video_path", str(filepath),
85
+ "--output", str(output_filepath),
86
+ "--model_dir", str(self.CKPTS_ROOT),
87
+ "--seed", str(seed),
88
+ "--cuda_device", "0",
89
+ "--resolution", str(resh),
90
+ "--batch_size", str(effective_spsize),
91
+ "--model", "seedvr2_ema_3b_fp16.safetensors",
92
+ "--preserve_vram",
93
+ "--debug",
94
+ "--output_format", "video" if not is_image else "png",
95
+ ]
96
+
97
+
98
+ print("SeedVRServer Comando:", " ".join(cmd))
99
+ try:
100
+ subprocess.run(cmd, cwd=str(self.SEEDVR_ROOT), check=True, env=os.environ.copy(), stdout=sys.stdout, stderr=sys.stderr)
101
+ # Constrói a tupla de retorno de forma determinística
102
+ if is_image:
103
+ # CLI salva PNGs em diretório args.output (tratado como diretório quando outputformat=png)
104
+ image_dir = output_filepath if output_filepath.suffix == "" else output_filepath.with_suffix("")
105
+ return str(image_dir), None, outdir
106
+ else:
107
+ # CLI salva vídeo exatamente em output_filepath
108
+ return None, str(output_filepath), outdir
109
+ except Exception as e:
110
+ print(f"[UI ERROR] A inferência falhou: {e}")
111
+ return None, None, None