|
|
|
|
|
|
|
|
|
|
|
|
|
|
engine:
|
|
|
output_root: "results"
|
|
|
|
|
|
api:
|
|
|
cors_allow_origins: ["*"]
|
|
|
sync_timeout_seconds: 3600
|
|
|
|
|
|
database:
|
|
|
enabled: true
|
|
|
persist_directory: "chroma_db"
|
|
|
enable_face_recognition: true
|
|
|
enable_voice_recognition: true
|
|
|
face_collection: "index_faces"
|
|
|
voice_collection: "index_voices"
|
|
|
|
|
|
jobs:
|
|
|
enabled: false
|
|
|
max_workers: 1
|
|
|
result_ttl_seconds: 86400
|
|
|
|
|
|
video_processing:
|
|
|
keyframes:
|
|
|
conditional_extraction:
|
|
|
enable: true
|
|
|
min_scene_length_seconds: 1.5
|
|
|
difference_threshold: 28.0
|
|
|
|
|
|
frames_per_second:
|
|
|
enable: true
|
|
|
fps: 1.0
|
|
|
|
|
|
ocr:
|
|
|
engine: "tesseract"
|
|
|
language_hint: "spa"
|
|
|
tesseract_cmd: ""
|
|
|
|
|
|
faces:
|
|
|
detector_model: "mtcnn"
|
|
|
embedding_model: "Facenet512"
|
|
|
min_face_size: 32
|
|
|
detection_confidence: 0.85
|
|
|
|
|
|
ocr_clustering:
|
|
|
method: "sequential_similarity"
|
|
|
sentence_transformer: "all-MiniLM-L6-v2"
|
|
|
similarity_threshold: 0.60
|
|
|
|
|
|
audio_processing:
|
|
|
sample_rate: 16000
|
|
|
format: "wav"
|
|
|
|
|
|
diarization:
|
|
|
enabled: true
|
|
|
force_silence_only: true
|
|
|
min_segment_duration: 0.5
|
|
|
max_segment_duration: 10.0
|
|
|
silence_thresh: -40
|
|
|
min_silence_len: 500
|
|
|
|
|
|
enable_voice_embeddings: true
|
|
|
speaker_embedding:
|
|
|
enabled: true
|
|
|
|
|
|
|
|
|
voice_processing:
|
|
|
speaker_identification:
|
|
|
enabled: true
|
|
|
find_optimal_clusters: true
|
|
|
min_speakers: 1
|
|
|
max_speakers: 5
|
|
|
distance_threshold: 0.40
|
|
|
|
|
|
asr:
|
|
|
|
|
|
enable_full_transcription: true
|
|
|
|
|
|
background_descriptor:
|
|
|
montage:
|
|
|
enable: true
|
|
|
max_frames: 12
|
|
|
grid: "auto"
|
|
|
|
|
|
description:
|
|
|
model: "salamandra-vision"
|
|
|
max_tokens: 512
|
|
|
temperature: 0.2
|
|
|
|
|
|
identity:
|
|
|
timeline_mapping:
|
|
|
per_second_frames_source: "frames_per_second"
|
|
|
attach_faces_to:
|
|
|
- "keyframes"
|
|
|
- "audio_segments"
|
|
|
out_key: "persona"
|
|
|
|
|
|
narration:
|
|
|
model: "salamandra-instruct"
|
|
|
une_guidelines_path: "UNE_153010.txt"
|
|
|
timing:
|
|
|
max_ad_duration_ratio: 0.60
|
|
|
min_gap_seconds: 1.20
|
|
|
min_ad_seconds: 0.80
|
|
|
llm:
|
|
|
max_tokens: 1024
|
|
|
temperature: 0.2
|
|
|
|
|
|
subtitles:
|
|
|
max_chars_per_line: 42
|
|
|
max_lines_per_cue: 10
|
|
|
speaker_display: "brackets"
|
|
|
|
|
|
models:
|
|
|
|
|
|
instruct: "salamandra-instruct"
|
|
|
vision: "salamandra-vision"
|
|
|
tools: "salamandra-tools"
|
|
|
asr: "whisper-catalan"
|
|
|
|
|
|
routing:
|
|
|
use_remote_for:
|
|
|
- "salamandra-instruct"
|
|
|
- "salamandra-vision"
|
|
|
- "salamandra-tools"
|
|
|
- "whisper-catalan"
|
|
|
|
|
|
remote_spaces:
|
|
|
user: "veureu"
|
|
|
|
|
|
endpoints:
|
|
|
salamandra-instruct:
|
|
|
space: "schat"
|
|
|
base_url: "https://veureu-schat.hf.space"
|
|
|
client: "gradio"
|
|
|
predict_route: "/predict"
|
|
|
|
|
|
salamandra-vision:
|
|
|
space: "svision"
|
|
|
base_url: "https://veureu-svision.hf.space"
|
|
|
client: "gradio"
|
|
|
predict_route: "/predict"
|
|
|
|
|
|
salamandra-tools:
|
|
|
space: "stools"
|
|
|
base_url: "https://veureu-stools.hf.space"
|
|
|
client: "gradio"
|
|
|
predict_route: "/predict"
|
|
|
|
|
|
whisper-catalan:
|
|
|
space: "asr"
|
|
|
base_url: "https://veureu-asr.hf.space"
|
|
|
client: "gradio"
|
|
|
predict_route: "/predict"
|
|
|
|
|
|
http:
|
|
|
timeout_seconds: 180
|
|
|
retries: 3
|
|
|
backoff_seconds: 2.0
|
|
|
|
|
|
security:
|
|
|
use_hf_token: true
|
|
|
hf_token_env: "HF_TOKEN"
|
|
|
allow_insecure_tls: false
|
|
|
|
|
|
logging:
|
|
|
level: "INFO"
|
|
|
json: false
|
|
|
|