""" PromptWizard Qwen Training — Configurable Dataset & Repo Fine-tunes Qwen using a user-selected dataset and uploads the trained model to a user-specified Hugging Face Hub repo asynchronously with detailed logs. """ import gradio as gr import spaces import torch from transformers import ( AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments, ) from datasets import load_dataset from peft import LoraConfig, get_peft_model, TaskType from huggingface_hub import HfApi, HfFolder, Repository import os, tempfile, shutil, asyncio, threading, time from datetime import datetime # ==== Async upload wrapper ==== def start_async_upload(local_dir, hf_repo, output_log): """Starts async model upload in a background thread.""" def runner(): output_log.append(f"[INFO] šŸš€ Async upload thread started for repo: {hf_repo}") asyncio.run(async_upload_model(local_dir, hf_repo, output_log)) output_log.append(f"[INFO] šŸ›‘ Async upload thread finished for repo: {hf_repo}") threading.Thread(target=runner, daemon=True).start() from huggingface_hub import upload_folder, HfFolder async def async_upload_model(local_dir, hf_repo, output_log, max_retries=3): """ Uploads a local model directory to HF Hub asynchronously using HTTP API. """ try: token = HfFolder.get_token() output_log.append(f"[INFO] ā˜ļø Preparing to upload to repo: {hf_repo}") attempt = 0 while attempt < max_retries: try: output_log.append(f"[INFO] šŸ”„ Attempt {attempt+1} to upload folder via HTTP API...") upload_folder( folder_path=local_dir, repo_id=hf_repo, repo_type="model", token=token, ignore_patterns=["*.lock", "*.tmp"], # ignore temp files create_pr=False, ) output_log.append("[SUCCESS] āœ… Model successfully uploaded to HF Hub!") break except Exception as e: attempt += 1 output_log.append(f"[ERROR] Upload attempt {attempt} failed: {e}") if attempt >= max_retries: output_log.append("[ERROR] āŒ Max retries reached. Upload failed.") else: output_log.append("[INFO] Retrying upload in 5 seconds...") await asyncio.sleep(5) except Exception as e: output_log.append(f"[ERROR] āŒ Unexpected error during upload: {e}") # ==== GPU check ==== def check_gpu_status(): return "šŸš€ Zero GPU Ready - GPU will be allocated when training starts" # ==== Logging helper ==== def log_message(output_log, msg): line = f"[{datetime.now().strftime('%H:%M:%S')}] {msg}" print(line) output_log.append(line) # ==== Main TExt Training ==== # @spaces.GPU(duration=300) # def train_model(base_model, dataset_name, num_epochs, batch_size, learning_rate, hf_repo): # output_log = [] # test_split = 0.2 # try: # log_message(output_log, "šŸ” Initializing training sequence...") # # ===== Device ===== # device = "cuda" if torch.cuda.is_available() else "cpu" # log_message(output_log, f"šŸŽ® Using device: {device}") # if device == "cuda": # log_message(output_log, f"āœ… GPU: {torch.cuda.get_device_name(0)}") # # ===== Load dataset ===== # log_message(output_log, f"\nšŸ“š Loading dataset: {dataset_name} ...") # dataset = load_dataset(dataset_name) # dataset = dataset["train"].train_test_split(test_size=test_split) # train_dataset = dataset["train"] # test_dataset = dataset["test"] # log_message(output_log, f" Training samples: {len(train_dataset)}") # log_message(output_log, f" Test samples: {len(test_dataset)}") # log_message(output_log, f" Columns: {train_dataset.column_names}") # # ===== Format examples ===== # # def format_example(item): # # text = item.get("text") or item.get("content") or " ".join(str(v) for v in item.values()) # # prompt = f"""<|system|> # # You are a wise teacher interpreting Bhagavad Gita with deep insights. # # <|user|> # # {text} # # <|assistant|> # # """ # # return {"text": prompt} # # ===== Format examples dynamically ===== # def format_example(item): # text_content = item.get("text") or item.get("content") or str(item.get("path", "")) or " ".join(str(v) for v in item.values()) # # Use shorter, clean system prompt + user content for better loss # prompt = ( # f"<|system|>\nYou are an expert AI assistant.\n<|user|>\n{text_content}\n<|assistant|>\n" # ) # return {"text": prompt} # train_dataset = train_dataset.map(format_example) # test_dataset = test_dataset.map(format_example) # log_message(output_log, f"āœ… Formatted {len(train_dataset)} train + {len(test_dataset)} test examples") # # ===== Load model & tokenizer ===== # log_message(output_log, f"\nšŸ¤– Loading model: {base_model}") # tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True) # if tokenizer.pad_token is None: # tokenizer.pad_token = tokenizer.eos_token # model = AutoModelForCausalLM.from_pretrained( # base_model, # trust_remote_code=True, # torch_dtype=torch.float16 if device == "cuda" else torch.float32, # low_cpu_mem_usage=True, # ) # if device == "cuda": # model = model.to(device) # log_message(output_log, "āœ… Model and tokenizer loaded successfully") # log_message(output_log, f"Tokenizer vocab size: {tokenizer.vocab_size}") # # ===== LoRA configuration ===== # log_message(output_log, "\nāš™ļø Configuring LoRA for efficient fine-tuning...") # lora_config = LoraConfig( # task_type=TaskType.CAUSAL_LM, # r=8, # lora_alpha=16, # lora_dropout=0.1, # target_modules=["q_proj", "v_proj"], # bias="none", # ) # model = get_peft_model(model, lora_config) # trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad) # log_message(output_log, f"Trainable params after LoRA: {trainable_params:,}") # # ===== Tokenization + labels ===== # def tokenize_fn(examples): # tokenized = tokenizer( # examples["text"], # padding="max_length", # truncation=True, # max_length=256, # ) # tokenized["labels"] = tokenized["input_ids"].copy() # return tokenized # train_dataset = train_dataset.map(tokenize_fn, batched=True) # test_dataset = test_dataset.map(tokenize_fn, batched=True) # log_message(output_log, "āœ… Tokenization + labels done") # # ===== Training arguments ===== # output_dir = "./qwen-gita-lora" # training_args = TrainingArguments( # output_dir=output_dir, # num_train_epochs=num_epochs, # per_device_train_batch_size=batch_size, # gradient_accumulation_steps=2, # warmup_steps=10, # logging_steps=5, # save_strategy="epoch", # fp16=device == "cuda", # optim="adamw_torch", # learning_rate=learning_rate, # max_steps=500, # Limit for demo is 100 # ) # trainer = Trainer( # model=model, # args=training_args, # train_dataset=train_dataset, # eval_dataset=test_dataset, # tokenizer=tokenizer, # ) # # ===== Train ===== # log_message(output_log, "\nšŸš€ Starting training...") # trainer.train() # log_message(output_log, "\nšŸ’¾ Saving trained model locally...") # trainer.save_model(output_dir) # tokenizer.save_pretrained(output_dir) # # ===== Async upload ===== # log_message(output_log, f"\nā˜ļø Initiating async upload to {hf_repo}") # start_async_upload(output_dir, hf_repo, output_log) # log_message(output_log, "āœ… Training complete & async upload started!") # except Exception as e: # log_message(output_log, f"\nāŒ Error during training: {e}") # return "\n".join(output_log) # ===================================================== # 🧠 Train model to expand short prompts into long ones # ===================================================== @spaces.GPU(duration=300) def train_model( base_model, dataset_name, num_epochs, batch_size, learning_rate, hf_repo ): output_log = [] try: log_message(output_log, "šŸš€ Starting FAST test training...") # ===== Device ===== device = "cuda" if torch.cuda.is_available() else "cpu" dtype = torch.bfloat16 if torch.cuda.is_available() and torch.cuda.is_bf16_supported() else torch.float16 log_message(output_log, f"šŸŽ® Device: {device}, dtype: {dtype}") if device == "cuda": log_message(output_log, f"āœ… GPU: {torch.cuda.get_device_name(0)}") # ===== Load dataset ===== log_message(output_log, f"\nšŸ“š Loading dataset: {dataset_name}") dataset = load_dataset(dataset_name) dataset = dataset["train"].train_test_split(test_size=0.2, seed=42) train_dataset, test_dataset = dataset["train"], dataset["test"] # ===== ⚔ FAST mode: use small subset ===== train_dataset = train_dataset.select(range(min(1000, len(train_dataset)))) test_dataset = test_dataset.select(range(min(200, len(test_dataset)))) log_message(output_log, f"⚔ Using {len(train_dataset)} train / {len(test_dataset)} test samples") # ===== Format samples ===== def format_example(example): short_prompt = example.get("short", "").strip() long_response = example.get("long", "").strip() return { "text": ( f"<|system|>\nYou are an AI that expands short prompts into detailed, descriptive ones.\n" f"<|user|>\nShort: {short_prompt}\n" f"<|assistant|>\n{long_response}" ) } train_dataset = train_dataset.map(format_example) test_dataset = test_dataset.map(format_example) # ===== Format examples dynamically ===== def format_example(item): text_content = item.get("text") or item.get("content") or str(item.get("path", "")) or " ".join(str(v) for v in item.values()) # Use shorter, clean system prompt + user content for better loss prompt = ( f"<|system|>\nYou are an expert AI assistant.\n<|user|>\n{text_content}\n<|assistant|>\n" ) return {"text": prompt} train_dataset = train_dataset.map(format_example) test_dataset = test_dataset.map(format_example) log_message(output_log, f"āœ… Formatted {len(train_dataset)} train + {len(test_dataset)} test examples") # ===== Load model & tokenizer ===== log_message(output_log, f"\nšŸ¤– Loading model: {base_model}") tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True) if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token model = AutoModelForCausalLM.from_pretrained( base_model, trust_remote_code=True, torch_dtype=torch.float16 if device == "cuda" else torch.float32, low_cpu_mem_usage=True, ) if device == "cuda": model = model.to(device) log_message(output_log, "āœ… Model and tokenizer loaded successfully") log_message(output_log, f"Tokenizer vocab size: {tokenizer.vocab_size}") # ===== LoRA configuration ===== log_message(output_log, "\nāš™ļø Configuring LoRA for efficient fine-tuning...") lora_config = LoraConfig( task_type=TaskType.CAUSAL_LM, r=8, lora_alpha=16, lora_dropout=0.1, target_modules=["q_proj", "v_proj"], bias="none", ) model = get_peft_model(model, lora_config) trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad) log_message(output_log, f"Trainable params after LoRA: {trainable_params:,}") # ===== Tokenization + labels ===== def tokenize_fn(examples): tokenized = tokenizer( examples["text"], padding="max_length", truncation=True, max_length=256, ) tokenized["labels"] = tokenized["input_ids"].copy() return tokenized train_dataset = train_dataset.map(tokenize_fn, batched=True) test_dataset = test_dataset.map(tokenize_fn, batched=True) log_message(output_log, "āœ… Tokenization + labels done") # ===== Training arguments ===== output_dir = "./qwen-gita-lora" training_args = TrainingArguments( output_dir=output_dir, num_train_epochs=num_epochs, per_device_train_batch_size=batch_size, gradient_accumulation_steps=2, warmup_steps=10, logging_steps=5, save_strategy="epoch", fp16=device == "cuda", optim="adamw_torch", learning_rate=learning_rate, max_steps=500, # Limit for demo is 100 ) trainer = Trainer( model=model, args=training_args, train_dataset=train_dataset, eval_dataset=test_dataset, tokenizer=tokenizer, ) # ===== Train ===== log_message(output_log, "\nšŸš€ Starting training...") trainer.train() log_message(output_log, "\nšŸ’¾ Saving trained model locally...") trainer.save_model(output_dir) tokenizer.save_pretrained(output_dir) # ===== Async upload ===== log_message(output_log, f"\nā˜ļø Initiating async upload to {hf_repo}") start_async_upload(output_dir, hf_repo, output_log) log_message(output_log, "āœ… Training complete & async upload started!") except Exception as e: log_message(output_log, f"\nāŒ Error during training: {e}") return "\n".join(output_log) # ==== Gradio Interface ==== def create_interface(): with gr.Blocks(title="PromptWizard — Qwen Trainer") as demo: gr.Markdown(""" # 🧘 PromptWizard Qwen Fine-tuning Fine-tune Qwen on any dataset and upload to any Hugging Face repo. """) with gr.Row(): with gr.Column(): gr.Textbox(label="GPU Status", value=check_gpu_status(), interactive=False) base_model = gr.Textbox(label="Base Model", value="Qwen/Qwen2.5-0.5B") dataset_name = gr.Textbox(label="Dataset Name", value="rahul7star/Gita") hf_repo = gr.Textbox(label="HF Repo for Upload", value="rahul7star/Qwen0.5-3B-Gita") num_epochs = gr.Slider(1, 3, value=1, step=1, label="Epochs") batch_size = gr.Slider(1, 4, value=2, step=1, label="Batch Size") learning_rate = gr.Number(value=5e-5, label="Learning Rate") train_btn = gr.Button("šŸš€ Start Fine-tuning", variant="primary") with gr.Column(): output = gr.Textbox( label="Training Log", lines=25, max_lines=40, value="Click 'Start Fine-tuning' to train and upload your model.", ) train_btn.click( fn=train_model, inputs=[base_model, dataset_name, num_epochs, batch_size, learning_rate, hf_repo], outputs=output, ) return demo if __name__ == "__main__": demo = create_interface() demo.launch(server_name="0.0.0.0", server_port=7860)