""" Helion-V1.5-XL Usage Examples Demonstrates various use cases and configurations """ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline import torch # Initialize model and tokenizer MODEL_NAME = "DeepXR/Helion-V1.5-XL" def load_model(quantization="none"): """Load model with optional quantization""" tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) if quantization == "4bit": from transformers import BitsAndBytesConfig quantization_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4" ) model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, quantization_config=quantization_config, device_map="auto", trust_remote_code=True ) else: model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, torch_dtype=torch.bfloat16, device_map="auto", trust_remote_code=True ) return model, tokenizer # Example 1: Simple Text Generation def example_simple_generation(): """Basic text generation example""" print("\n" + "="*80) print("EXAMPLE 1: Simple Text Generation") print("="*80) model, tokenizer = load_model() prompt = "Explain the concept of neural networks in simple terms:" inputs = tokenizer(prompt, return_tensors="pt").to(model.device) outputs = model.generate( **inputs, max_new_tokens=256, temperature=0.7, top_p=0.9, do_sample=True ) response = tokenizer.decode(outputs[0], skip_special_tokens=True) print(f"\nPrompt: {prompt}") print(f"\nResponse: {response[len(prompt):]}") # Example 2: Chat Conversation def example_chat_conversation(): """Multi-turn conversation example""" print("\n" + "="*80) print("EXAMPLE 2: Chat Conversation") print("="*80) model, tokenizer = load_model() conversation = [ {"role": "system", "content": "You are a helpful AI assistant."}, {"role": "user", "content": "What are the main benefits of renewable energy?"}, ] prompt = tokenizer.apply_chat_template( conversation, tokenize=False, add_generation_prompt=True ) inputs = tokenizer(prompt, return_tensors="pt").to(model.device) outputs = model.generate(**inputs, max_new_tokens=300, temperature=0.7) response = tokenizer.decode(outputs[0], skip_special_tokens=True) print(f"\nConversation:\n{response}") # Example 3: Code Generation def example_code_generation(): """Code generation example""" print("\n" + "="*80) print("EXAMPLE 3: Code Generation") print("="*80) model, tokenizer = load_model() prompt = """Write a Python function that finds the longest palindromic substring: def longest_palindrome(s: str) -> str:""" inputs = tokenizer(prompt, return_tensors="pt").to(model.device) outputs = model.generate( **inputs, max_new_tokens=512, temperature=0.2, # Lower temperature for code top_p=0.95, do_sample=True ) code = tokenizer.decode(outputs[0], skip_special_tokens=True) print(f"\nGenerated Code:\n{code}") # Example 4: Structured Output (JSON) def example_structured_output(): """Generate structured JSON output""" print("\n" + "="*80) print("EXAMPLE 4: Structured JSON Output") print("="*80) model, tokenizer = load_model() prompt = """Generate a JSON object describing a fictional book: { "title": "The Last Algorithm", "author": """ inputs = tokenizer(prompt, return_tensors="pt").to(model.device) outputs = model.generate( **inputs, max_new_tokens=256, temperature=0.4, top_p=0.9 ) result = tokenizer.decode(outputs[0], skip_special_tokens=True) print(f"\nGenerated JSON:\n{result}") # Example 5: Batch Processing def example_batch_processing(): """Process multiple prompts in batch""" print("\n" + "="*80) print("EXAMPLE 5: Batch Processing") print("="*80) model, tokenizer = load_model() prompts = [ "List three benefits of exercise:", "What is quantum computing?", "Explain photosynthesis briefly:" ] inputs = tokenizer( prompts, return_tensors="pt", padding=True, truncation=True ).to(model.device) outputs = model.generate( **inputs, max_new_tokens=128, temperature=0.7, do_sample=True ) for i, output in enumerate(outputs): response = tokenizer.decode(output, skip_special_tokens=True) print(f"\nPrompt {i+1}: {prompts[i]}") print(f"Response: {response[len(prompts[i]):]}\n") # Example 6: Creative Writing def example_creative_writing(): """Creative writing with higher temperature""" print("\n" + "="*80) print("EXAMPLE 6: Creative Writing") print("="*80) model, tokenizer = load_model() prompt = "Write the opening paragraph of a science fiction story:" inputs = tokenizer(prompt, return_tensors="pt").to(model.device) outputs = model.generate( **inputs, max_new_tokens=512, temperature=0.9, # Higher for creativity top_p=0.95, top_k=100, repetition_penalty=1.15, do_sample=True ) story = tokenizer.decode(outputs[0], skip_special_tokens=True) print(f"\n{story}") # Example 7: Using Pipeline API def example_pipeline_api(): """Use the transformers pipeline API""" print("\n" + "="*80) print("EXAMPLE 7: Pipeline API") print("="*80) generator = pipeline( "text-generation", model=MODEL_NAME, torch_dtype=torch.bfloat16, device_map="auto" ) results = generator( "The future of artificial intelligence is", max_new_tokens=200, temperature=0.7, top_p=0.9, num_return_sequences=1 ) print(f"\nGenerated text:\n{results[0]['generated_text']}") # Example 8: Streaming Generation def example_streaming_generation(): """Generate text with streaming (token by token)""" print("\n" + "="*80) print("EXAMPLE 8: Streaming Generation") print("="*80) from transformers import TextIteratorStreamer from threading import Thread model, tokenizer = load_model() prompt = "Explain machine learning in three sentences:" inputs = tokenizer(prompt, return_tensors="pt").to(model.device) streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True) generation_kwargs = dict( **inputs, max_new_tokens=256, temperature=0.7, streamer=streamer ) thread = Thread(target=model.generate, kwargs=generation_kwargs) thread.start() print(f"\nPrompt: {prompt}\n\nResponse (streaming): ", end="") for new_text in streamer: print(new_text, end="", flush=True) print("\n") thread.join() # Example 9: Few-Shot Learning def example_few_shot(): """Few-shot learning example""" print("\n" + "="*80) print("EXAMPLE 9: Few-Shot Learning") print("="*80) model, tokenizer = load_model() prompt = """Translate English to French: English: Hello, how are you? French: Bonjour, comment allez-vous? English: What is your name? French: Comment vous appelez-vous? English: I love programming. French:""" inputs = tokenizer(prompt, return_tensors="pt").to(model.device) outputs = model.generate(**inputs, max_new_tokens=50, temperature=0.3) result = tokenizer.decode(outputs[0], skip_special_tokens=True) print(f"\n{result}") # Example 10: Custom Generation Parameters def example_custom_parameters(): """Advanced generation parameter tuning""" print("\n" + "="*80) print("EXAMPLE 10: Custom Generation Parameters") print("="*80) model, tokenizer = load_model() prompt = "Write a haiku about technology:" inputs = tokenizer(prompt, return_tensors="pt").to(model.device) # Multiple generations with different parameters configs = [ {"name": "Conservative", "temperature": 0.3, "top_p": 0.9, "top_k": 30}, {"name": "Balanced", "temperature": 0.7, "top_p": 0.9, "top_k": 50}, {"name": "Creative", "temperature": 1.0, "top_p": 0.95, "top_k": 100}, ] for config in configs: outputs = model.generate( **inputs, max_new_tokens=128, temperature=config["temperature"], top_p=config["top_p"], top_k=config["top_k"], do_sample=True ) result = tokenizer.decode(outputs[0], skip_special_tokens=True) print(f"\n{config['name']} (temp={config['temperature']}):") print(result[len(prompt):]) def main(): """Run all examples""" print("\n" + "="*80) print("HELION-V1.5-XL USAGE EXAMPLES") print("="*80) examples = [ ("Simple Generation", example_simple_generation), ("Chat Conversation", example_chat_conversation), ("Code Generation", example_code_generation), ("Structured Output", example_structured_output), ("Batch Processing", example_batch_processing), ("Creative Writing", example_creative_writing), ("Pipeline API", example_pipeline_api), ("Streaming Generation", example_streaming_generation), ("Few-Shot Learning", example_few_shot), ("Custom Parameters", example_custom_parameters), ] print("\nAvailable examples:") for i, (name, _) in enumerate(examples, 1): print(f" {i}. {name}") print("\nRun individual examples or all examples.") print("Example: python example_usage.py") # Uncomment to run specific examples # example_simple_generation() # example_chat_conversation() # example_code_generation() if __name__ == "__main__": main()