Helion-V1.5-XL / example_usage.py
Trouter-Library's picture
Create example_usage.py
9426882 verified
raw
history blame
10.2 kB
"""
Helion-V1.5-XL Usage Examples
Demonstrates various use cases and configurations
"""
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch
# Initialize model and tokenizer
MODEL_NAME = "DeepXR/Helion-V1.5-XL"
def load_model(quantization="none"):
"""Load model with optional quantization"""
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
if quantization == "4bit":
from transformers import BitsAndBytesConfig
quantization_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.bfloat16,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4"
)
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
quantization_config=quantization_config,
device_map="auto",
trust_remote_code=True
)
else:
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
torch_dtype=torch.bfloat16,
device_map="auto",
trust_remote_code=True
)
return model, tokenizer
# Example 1: Simple Text Generation
def example_simple_generation():
"""Basic text generation example"""
print("\n" + "="*80)
print("EXAMPLE 1: Simple Text Generation")
print("="*80)
model, tokenizer = load_model()
prompt = "Explain the concept of neural networks in simple terms:"
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
outputs = model.generate(
**inputs,
max_new_tokens=256,
temperature=0.7,
top_p=0.9,
do_sample=True
)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(f"\nPrompt: {prompt}")
print(f"\nResponse: {response[len(prompt):]}")
# Example 2: Chat Conversation
def example_chat_conversation():
"""Multi-turn conversation example"""
print("\n" + "="*80)
print("EXAMPLE 2: Chat Conversation")
print("="*80)
model, tokenizer = load_model()
conversation = [
{"role": "system", "content": "You are a helpful AI assistant."},
{"role": "user", "content": "What are the main benefits of renewable energy?"},
]
prompt = tokenizer.apply_chat_template(
conversation,
tokenize=False,
add_generation_prompt=True
)
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
outputs = model.generate(**inputs, max_new_tokens=300, temperature=0.7)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(f"\nConversation:\n{response}")
# Example 3: Code Generation
def example_code_generation():
"""Code generation example"""
print("\n" + "="*80)
print("EXAMPLE 3: Code Generation")
print("="*80)
model, tokenizer = load_model()
prompt = """Write a Python function that finds the longest palindromic substring:
def longest_palindrome(s: str) -> str:"""
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
outputs = model.generate(
**inputs,
max_new_tokens=512,
temperature=0.2, # Lower temperature for code
top_p=0.95,
do_sample=True
)
code = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(f"\nGenerated Code:\n{code}")
# Example 4: Structured Output (JSON)
def example_structured_output():
"""Generate structured JSON output"""
print("\n" + "="*80)
print("EXAMPLE 4: Structured JSON Output")
print("="*80)
model, tokenizer = load_model()
prompt = """Generate a JSON object describing a fictional book:
{
"title": "The Last Algorithm",
"author": """
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
outputs = model.generate(
**inputs,
max_new_tokens=256,
temperature=0.4,
top_p=0.9
)
result = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(f"\nGenerated JSON:\n{result}")
# Example 5: Batch Processing
def example_batch_processing():
"""Process multiple prompts in batch"""
print("\n" + "="*80)
print("EXAMPLE 5: Batch Processing")
print("="*80)
model, tokenizer = load_model()
prompts = [
"List three benefits of exercise:",
"What is quantum computing?",
"Explain photosynthesis briefly:"
]
inputs = tokenizer(
prompts,
return_tensors="pt",
padding=True,
truncation=True
).to(model.device)
outputs = model.generate(
**inputs,
max_new_tokens=128,
temperature=0.7,
do_sample=True
)
for i, output in enumerate(outputs):
response = tokenizer.decode(output, skip_special_tokens=True)
print(f"\nPrompt {i+1}: {prompts[i]}")
print(f"Response: {response[len(prompts[i]):]}\n")
# Example 6: Creative Writing
def example_creative_writing():
"""Creative writing with higher temperature"""
print("\n" + "="*80)
print("EXAMPLE 6: Creative Writing")
print("="*80)
model, tokenizer = load_model()
prompt = "Write the opening paragraph of a science fiction story:"
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
outputs = model.generate(
**inputs,
max_new_tokens=512,
temperature=0.9, # Higher for creativity
top_p=0.95,
top_k=100,
repetition_penalty=1.15,
do_sample=True
)
story = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(f"\n{story}")
# Example 7: Using Pipeline API
def example_pipeline_api():
"""Use the transformers pipeline API"""
print("\n" + "="*80)
print("EXAMPLE 7: Pipeline API")
print("="*80)
generator = pipeline(
"text-generation",
model=MODEL_NAME,
torch_dtype=torch.bfloat16,
device_map="auto"
)
results = generator(
"The future of artificial intelligence is",
max_new_tokens=200,
temperature=0.7,
top_p=0.9,
num_return_sequences=1
)
print(f"\nGenerated text:\n{results[0]['generated_text']}")
# Example 8: Streaming Generation
def example_streaming_generation():
"""Generate text with streaming (token by token)"""
print("\n" + "="*80)
print("EXAMPLE 8: Streaming Generation")
print("="*80)
from transformers import TextIteratorStreamer
from threading import Thread
model, tokenizer = load_model()
prompt = "Explain machine learning in three sentences:"
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
generation_kwargs = dict(
**inputs,
max_new_tokens=256,
temperature=0.7,
streamer=streamer
)
thread = Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()
print(f"\nPrompt: {prompt}\n\nResponse (streaming): ", end="")
for new_text in streamer:
print(new_text, end="", flush=True)
print("\n")
thread.join()
# Example 9: Few-Shot Learning
def example_few_shot():
"""Few-shot learning example"""
print("\n" + "="*80)
print("EXAMPLE 9: Few-Shot Learning")
print("="*80)
model, tokenizer = load_model()
prompt = """Translate English to French:
English: Hello, how are you?
French: Bonjour, comment allez-vous?
English: What is your name?
French: Comment vous appelez-vous?
English: I love programming.
French:"""
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
outputs = model.generate(**inputs, max_new_tokens=50, temperature=0.3)
result = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(f"\n{result}")
# Example 10: Custom Generation Parameters
def example_custom_parameters():
"""Advanced generation parameter tuning"""
print("\n" + "="*80)
print("EXAMPLE 10: Custom Generation Parameters")
print("="*80)
model, tokenizer = load_model()
prompt = "Write a haiku about technology:"
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
# Multiple generations with different parameters
configs = [
{"name": "Conservative", "temperature": 0.3, "top_p": 0.9, "top_k": 30},
{"name": "Balanced", "temperature": 0.7, "top_p": 0.9, "top_k": 50},
{"name": "Creative", "temperature": 1.0, "top_p": 0.95, "top_k": 100},
]
for config in configs:
outputs = model.generate(
**inputs,
max_new_tokens=128,
temperature=config["temperature"],
top_p=config["top_p"],
top_k=config["top_k"],
do_sample=True
)
result = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(f"\n{config['name']} (temp={config['temperature']}):")
print(result[len(prompt):])
def main():
"""Run all examples"""
print("\n" + "="*80)
print("HELION-V1.5-XL USAGE EXAMPLES")
print("="*80)
examples = [
("Simple Generation", example_simple_generation),
("Chat Conversation", example_chat_conversation),
("Code Generation", example_code_generation),
("Structured Output", example_structured_output),
("Batch Processing", example_batch_processing),
("Creative Writing", example_creative_writing),
("Pipeline API", example_pipeline_api),
("Streaming Generation", example_streaming_generation),
("Few-Shot Learning", example_few_shot),
("Custom Parameters", example_custom_parameters),
]
print("\nAvailable examples:")
for i, (name, _) in enumerate(examples, 1):
print(f" {i}. {name}")
print("\nRun individual examples or all examples.")
print("Example: python example_usage.py")
# Uncomment to run specific examples
# example_simple_generation()
# example_chat_conversation()
# example_code_generation()
if __name__ == "__main__":
main()