|
|
""" |
|
|
Helion-V1.5-XL Usage Examples |
|
|
Demonstrates various use cases and configurations |
|
|
""" |
|
|
|
|
|
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline |
|
|
import torch |
|
|
|
|
|
|
|
|
MODEL_NAME = "DeepXR/Helion-V1.5-XL" |
|
|
|
|
|
def load_model(quantization="none"): |
|
|
"""Load model with optional quantization""" |
|
|
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) |
|
|
|
|
|
if quantization == "4bit": |
|
|
from transformers import BitsAndBytesConfig |
|
|
quantization_config = BitsAndBytesConfig( |
|
|
load_in_4bit=True, |
|
|
bnb_4bit_compute_dtype=torch.bfloat16, |
|
|
bnb_4bit_use_double_quant=True, |
|
|
bnb_4bit_quant_type="nf4" |
|
|
) |
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
|
MODEL_NAME, |
|
|
quantization_config=quantization_config, |
|
|
device_map="auto", |
|
|
trust_remote_code=True |
|
|
) |
|
|
else: |
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
|
MODEL_NAME, |
|
|
torch_dtype=torch.bfloat16, |
|
|
device_map="auto", |
|
|
trust_remote_code=True |
|
|
) |
|
|
|
|
|
return model, tokenizer |
|
|
|
|
|
|
|
|
|
|
|
def example_simple_generation(): |
|
|
"""Basic text generation example""" |
|
|
print("\n" + "="*80) |
|
|
print("EXAMPLE 1: Simple Text Generation") |
|
|
print("="*80) |
|
|
|
|
|
model, tokenizer = load_model() |
|
|
|
|
|
prompt = "Explain the concept of neural networks in simple terms:" |
|
|
inputs = tokenizer(prompt, return_tensors="pt").to(model.device) |
|
|
|
|
|
outputs = model.generate( |
|
|
**inputs, |
|
|
max_new_tokens=256, |
|
|
temperature=0.7, |
|
|
top_p=0.9, |
|
|
do_sample=True |
|
|
) |
|
|
|
|
|
response = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
print(f"\nPrompt: {prompt}") |
|
|
print(f"\nResponse: {response[len(prompt):]}") |
|
|
|
|
|
|
|
|
|
|
|
def example_chat_conversation(): |
|
|
"""Multi-turn conversation example""" |
|
|
print("\n" + "="*80) |
|
|
print("EXAMPLE 2: Chat Conversation") |
|
|
print("="*80) |
|
|
|
|
|
model, tokenizer = load_model() |
|
|
|
|
|
conversation = [ |
|
|
{"role": "system", "content": "You are a helpful AI assistant."}, |
|
|
{"role": "user", "content": "What are the main benefits of renewable energy?"}, |
|
|
] |
|
|
|
|
|
prompt = tokenizer.apply_chat_template( |
|
|
conversation, |
|
|
tokenize=False, |
|
|
add_generation_prompt=True |
|
|
) |
|
|
|
|
|
inputs = tokenizer(prompt, return_tensors="pt").to(model.device) |
|
|
outputs = model.generate(**inputs, max_new_tokens=300, temperature=0.7) |
|
|
|
|
|
response = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
print(f"\nConversation:\n{response}") |
|
|
|
|
|
|
|
|
|
|
|
def example_code_generation(): |
|
|
"""Code generation example""" |
|
|
print("\n" + "="*80) |
|
|
print("EXAMPLE 3: Code Generation") |
|
|
print("="*80) |
|
|
|
|
|
model, tokenizer = load_model() |
|
|
|
|
|
prompt = """Write a Python function that finds the longest palindromic substring: |
|
|
|
|
|
def longest_palindrome(s: str) -> str:""" |
|
|
|
|
|
inputs = tokenizer(prompt, return_tensors="pt").to(model.device) |
|
|
|
|
|
outputs = model.generate( |
|
|
**inputs, |
|
|
max_new_tokens=512, |
|
|
temperature=0.2, |
|
|
top_p=0.95, |
|
|
do_sample=True |
|
|
) |
|
|
|
|
|
code = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
print(f"\nGenerated Code:\n{code}") |
|
|
|
|
|
|
|
|
|
|
|
def example_structured_output(): |
|
|
"""Generate structured JSON output""" |
|
|
print("\n" + "="*80) |
|
|
print("EXAMPLE 4: Structured JSON Output") |
|
|
print("="*80) |
|
|
|
|
|
model, tokenizer = load_model() |
|
|
|
|
|
prompt = """Generate a JSON object describing a fictional book: |
|
|
{ |
|
|
"title": "The Last Algorithm", |
|
|
"author": """ |
|
|
|
|
|
inputs = tokenizer(prompt, return_tensors="pt").to(model.device) |
|
|
|
|
|
outputs = model.generate( |
|
|
**inputs, |
|
|
max_new_tokens=256, |
|
|
temperature=0.4, |
|
|
top_p=0.9 |
|
|
) |
|
|
|
|
|
result = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
print(f"\nGenerated JSON:\n{result}") |
|
|
|
|
|
|
|
|
|
|
|
def example_batch_processing(): |
|
|
"""Process multiple prompts in batch""" |
|
|
print("\n" + "="*80) |
|
|
print("EXAMPLE 5: Batch Processing") |
|
|
print("="*80) |
|
|
|
|
|
model, tokenizer = load_model() |
|
|
|
|
|
prompts = [ |
|
|
"List three benefits of exercise:", |
|
|
"What is quantum computing?", |
|
|
"Explain photosynthesis briefly:" |
|
|
] |
|
|
|
|
|
inputs = tokenizer( |
|
|
prompts, |
|
|
return_tensors="pt", |
|
|
padding=True, |
|
|
truncation=True |
|
|
).to(model.device) |
|
|
|
|
|
outputs = model.generate( |
|
|
**inputs, |
|
|
max_new_tokens=128, |
|
|
temperature=0.7, |
|
|
do_sample=True |
|
|
) |
|
|
|
|
|
for i, output in enumerate(outputs): |
|
|
response = tokenizer.decode(output, skip_special_tokens=True) |
|
|
print(f"\nPrompt {i+1}: {prompts[i]}") |
|
|
print(f"Response: {response[len(prompts[i]):]}\n") |
|
|
|
|
|
|
|
|
|
|
|
def example_creative_writing(): |
|
|
"""Creative writing with higher temperature""" |
|
|
print("\n" + "="*80) |
|
|
print("EXAMPLE 6: Creative Writing") |
|
|
print("="*80) |
|
|
|
|
|
model, tokenizer = load_model() |
|
|
|
|
|
prompt = "Write the opening paragraph of a science fiction story:" |
|
|
inputs = tokenizer(prompt, return_tensors="pt").to(model.device) |
|
|
|
|
|
outputs = model.generate( |
|
|
**inputs, |
|
|
max_new_tokens=512, |
|
|
temperature=0.9, |
|
|
top_p=0.95, |
|
|
top_k=100, |
|
|
repetition_penalty=1.15, |
|
|
do_sample=True |
|
|
) |
|
|
|
|
|
story = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
print(f"\n{story}") |
|
|
|
|
|
|
|
|
|
|
|
def example_pipeline_api(): |
|
|
"""Use the transformers pipeline API""" |
|
|
print("\n" + "="*80) |
|
|
print("EXAMPLE 7: Pipeline API") |
|
|
print("="*80) |
|
|
|
|
|
generator = pipeline( |
|
|
"text-generation", |
|
|
model=MODEL_NAME, |
|
|
torch_dtype=torch.bfloat16, |
|
|
device_map="auto" |
|
|
) |
|
|
|
|
|
results = generator( |
|
|
"The future of artificial intelligence is", |
|
|
max_new_tokens=200, |
|
|
temperature=0.7, |
|
|
top_p=0.9, |
|
|
num_return_sequences=1 |
|
|
) |
|
|
|
|
|
print(f"\nGenerated text:\n{results[0]['generated_text']}") |
|
|
|
|
|
|
|
|
|
|
|
def example_streaming_generation(): |
|
|
"""Generate text with streaming (token by token)""" |
|
|
print("\n" + "="*80) |
|
|
print("EXAMPLE 8: Streaming Generation") |
|
|
print("="*80) |
|
|
|
|
|
from transformers import TextIteratorStreamer |
|
|
from threading import Thread |
|
|
|
|
|
model, tokenizer = load_model() |
|
|
|
|
|
prompt = "Explain machine learning in three sentences:" |
|
|
inputs = tokenizer(prompt, return_tensors="pt").to(model.device) |
|
|
|
|
|
streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True) |
|
|
|
|
|
generation_kwargs = dict( |
|
|
**inputs, |
|
|
max_new_tokens=256, |
|
|
temperature=0.7, |
|
|
streamer=streamer |
|
|
) |
|
|
|
|
|
thread = Thread(target=model.generate, kwargs=generation_kwargs) |
|
|
thread.start() |
|
|
|
|
|
print(f"\nPrompt: {prompt}\n\nResponse (streaming): ", end="") |
|
|
for new_text in streamer: |
|
|
print(new_text, end="", flush=True) |
|
|
|
|
|
print("\n") |
|
|
thread.join() |
|
|
|
|
|
|
|
|
|
|
|
def example_few_shot(): |
|
|
"""Few-shot learning example""" |
|
|
print("\n" + "="*80) |
|
|
print("EXAMPLE 9: Few-Shot Learning") |
|
|
print("="*80) |
|
|
|
|
|
model, tokenizer = load_model() |
|
|
|
|
|
prompt = """Translate English to French: |
|
|
|
|
|
English: Hello, how are you? |
|
|
French: Bonjour, comment allez-vous? |
|
|
|
|
|
English: What is your name? |
|
|
French: Comment vous appelez-vous? |
|
|
|
|
|
English: I love programming. |
|
|
French:""" |
|
|
|
|
|
inputs = tokenizer(prompt, return_tensors="pt").to(model.device) |
|
|
outputs = model.generate(**inputs, max_new_tokens=50, temperature=0.3) |
|
|
|
|
|
result = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
print(f"\n{result}") |
|
|
|
|
|
|
|
|
|
|
|
def example_custom_parameters(): |
|
|
"""Advanced generation parameter tuning""" |
|
|
print("\n" + "="*80) |
|
|
print("EXAMPLE 10: Custom Generation Parameters") |
|
|
print("="*80) |
|
|
|
|
|
model, tokenizer = load_model() |
|
|
|
|
|
prompt = "Write a haiku about technology:" |
|
|
inputs = tokenizer(prompt, return_tensors="pt").to(model.device) |
|
|
|
|
|
|
|
|
configs = [ |
|
|
{"name": "Conservative", "temperature": 0.3, "top_p": 0.9, "top_k": 30}, |
|
|
{"name": "Balanced", "temperature": 0.7, "top_p": 0.9, "top_k": 50}, |
|
|
{"name": "Creative", "temperature": 1.0, "top_p": 0.95, "top_k": 100}, |
|
|
] |
|
|
|
|
|
for config in configs: |
|
|
outputs = model.generate( |
|
|
**inputs, |
|
|
max_new_tokens=128, |
|
|
temperature=config["temperature"], |
|
|
top_p=config["top_p"], |
|
|
top_k=config["top_k"], |
|
|
do_sample=True |
|
|
) |
|
|
|
|
|
result = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
print(f"\n{config['name']} (temp={config['temperature']}):") |
|
|
print(result[len(prompt):]) |
|
|
|
|
|
|
|
|
def main(): |
|
|
"""Run all examples""" |
|
|
print("\n" + "="*80) |
|
|
print("HELION-V1.5-XL USAGE EXAMPLES") |
|
|
print("="*80) |
|
|
|
|
|
examples = [ |
|
|
("Simple Generation", example_simple_generation), |
|
|
("Chat Conversation", example_chat_conversation), |
|
|
("Code Generation", example_code_generation), |
|
|
("Structured Output", example_structured_output), |
|
|
("Batch Processing", example_batch_processing), |
|
|
("Creative Writing", example_creative_writing), |
|
|
("Pipeline API", example_pipeline_api), |
|
|
("Streaming Generation", example_streaming_generation), |
|
|
("Few-Shot Learning", example_few_shot), |
|
|
("Custom Parameters", example_custom_parameters), |
|
|
] |
|
|
|
|
|
print("\nAvailable examples:") |
|
|
for i, (name, _) in enumerate(examples, 1): |
|
|
print(f" {i}. {name}") |
|
|
|
|
|
print("\nRun individual examples or all examples.") |
|
|
print("Example: python example_usage.py") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |