lemms commited on
Commit
cb00868
ยท
verified ยท
1 Parent(s): f7c7866

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +298 -658
app.py CHANGED
@@ -1,691 +1,331 @@
1
- #!/usr/bin/env python3
2
- """
3
- OpenLLM Real Models App - Final working version with correct attribute naming
4
- """
5
-
6
- import json
7
- import logging
8
- import math
9
- from pathlib import Path
10
- from typing import Any, Dict, Optional
11
-
12
  import gradio as gr
13
- import sentencepiece as spm
14
  import torch
15
- import torch.nn as nn
16
- import torch.nn.functional as F
17
- from huggingface_hub import snapshot_download
18
-
19
- # Set up logging
20
- logging.basicConfig(level=logging.INFO)
21
- logger = logging.getLogger(__name__)
22
-
23
-
24
- class GPTConfig:
25
- """GPT model configuration"""
26
-
27
- def __init__(
28
- self,
29
- vocab_size=32000,
30
- n_layer=6,
31
- n_head=8,
32
- n_embd=512,
33
- block_size=1024,
34
- dropout=0.1,
35
- bias=False,
36
- **kwargs,
37
- ):
38
- # Accept any additional kwargs to handle extra config fields
39
- self.vocab_size = vocab_size
40
- self.n_layer = n_layer
41
- self.n_head = n_head
42
- self.n_embd = n_embd
43
- self.block_size = block_size
44
- self.dropout = dropout
45
- self.bias = bias
46
-
47
-
48
- class GPT(nn.Module):
49
- """GPT-style transformer model - EXACT architecture matching the saved model"""
50
-
51
- def __init__(self, config):
52
- super().__init__()
53
- assert config.vocab_size is not None
54
- assert config.block_size is not None
55
- self.config = config
56
-
57
- # Create the transformer module with the exact naming convention
58
- self.transformer = nn.ModuleDict(
59
- dict(
60
- wte=nn.Embedding(config.vocab_size, config.n_embd),
61
- wpe=nn.Embedding(config.block_size, config.n_embd),
62
- drop=nn.Dropout(config.dropout),
63
- h=nn.ModuleList([Block(config) for _ in range(config.n_layer)]),
64
- ln_f=nn.LayerNorm(config.n_embd),
65
- )
66
- )
67
-
68
- # Language model head - Use bias=False to match saved models
69
- self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False)
70
-
71
- # Initialize weights
72
- self.apply(self._init_weights)
73
- for pn, p in self.named_parameters():
74
- if pn.endswith("c_proj.weight"):
75
- torch.nn.init.normal_(p, mean=0.0, std=0.02 / math.sqrt(2 * config.n_layer))
76
-
77
- def _init_weights(self, module):
78
- if isinstance(module, nn.Linear):
79
- torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
80
- if module.bias is not None:
81
- torch.nn.init.zeros_(module.bias)
82
- elif isinstance(module, nn.Embedding):
83
- torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
84
-
85
- def forward(self, idx, targets=None):
86
- device = idx.device
87
- b, t = idx.size()
88
- assert (
89
- t <= self.config.block_size
90
- ), f"Cannot forward sequence of length {t}, block size is only {self.config.block_size}"
91
-
92
- pos = torch.arange(0, t, dtype=torch.long, device=device).unsqueeze(0)
93
- tok_emb = self.transformer.wte(idx)
94
- pos_emb = self.transformer.wpe(pos)
95
- x = self.transformer.drop(tok_emb + pos_emb)
96
-
97
- for block in self.transformer.h:
98
- x = block(x)
99
- x = self.transformer.ln_f(x)
100
-
101
- if targets is not None:
102
- logits = self.lm_head(x)
103
- loss = F.cross_entropy(
104
- logits.view(-1, logits.size(-1)), targets.view(-1), ignore_index=-1
105
- )
106
- else:
107
- logits = self.lm_head(x[:, [-1], :])
108
- loss = None
109
-
110
- return logits, loss
111
-
112
- def generate(
113
- self, idx, max_new_tokens, temperature=1.0, top_k=None, top_p=None, do_sample=True
114
- ):
115
- for _ in range(max_new_tokens):
116
- idx_cond = (
117
- idx if idx.size(1) <= self.config.block_size else idx[:, -self.config.block_size :]
118
- )
119
- logits, _ = self(idx_cond)
120
- logits = logits[:, -1, :] / temperature
121
-
122
- if top_k is not None:
123
- v, _ = torch.topk(logits, min(top_k, logits.size(-1)))
124
- logits[logits < v[:, [-1]]] = -float("Inf")
125
-
126
- if top_p is not None:
127
- sorted_logits, sorted_indices = torch.sort(logits, descending=True)
128
- cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)
129
- sorted_indices_to_remove = cumulative_probs > top_p
130
- sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
131
- sorted_indices_to_remove[..., 0] = 0
132
- indices_to_remove = sorted_indices_to_remove.scatter(
133
- 1, sorted_indices, sorted_indices_to_remove
134
- )
135
- logits[indices_to_remove] = -float("Inf")
136
-
137
- probs = F.softmax(logits, dim=-1)
138
- if do_sample:
139
- idx_next = torch.multinomial(probs, num_samples=1)
140
- else:
141
- _, idx_next = torch.topk(probs, k=1, dim=-1)
142
-
143
- idx = torch.cat((idx, idx_next), dim=1)
144
-
145
- return idx
146
-
147
-
148
- class Block(nn.Module):
149
- """Transformer block with self-attention and feed-forward layers"""
150
-
151
- def __init__(self, config):
152
- super().__init__()
153
- self.ln_1 = nn.LayerNorm(config.n_embd)
154
- self.attn = CausalSelfAttention(config)
155
- self.ln_2 = nn.LayerNorm(config.n_embd)
156
- self.mlp = MLP(config)
157
-
158
- def forward(self, x):
159
- x = x + self.attn(self.ln_1(x))
160
- x = x + self.mlp(self.ln_2(x))
161
- return x
162
-
163
-
164
- class CausalSelfAttention(nn.Module):
165
- """Multi-head self-attention with causal masking - FINAL WORKING VERSION"""
166
-
167
- def __init__(self, config):
168
- super().__init__()
169
- assert config.n_embd % config.n_head == 0
170
- self.c_attn = nn.Linear(config.n_embd, 3 * config.n_embd, bias=config.bias)
171
- self.c_proj = nn.Linear(config.n_embd, config.n_embd, bias=config.bias)
172
- self.attn_dropout = nn.Dropout(config.dropout)
173
- self.resid_dropout = nn.Dropout(config.dropout)
174
- self.n_head = config.n_head
175
- self.n_embd = config.n_embd
176
- self.dropout = config.dropout
177
- self.use_bias = config.bias # Use different name for the boolean flag
178
-
179
- # REGISTER THE ATTENTION BIAS as a buffer (not parameter) to match saved model
180
- # This is actually an attention mask, not a learnable bias
181
- if config.bias:
182
- # Create a causal attention mask buffer
183
- mask = torch.tril(torch.ones(config.block_size, config.block_size))
184
- mask = mask.view(1, 1, config.block_size, config.block_size)
185
- self.register_buffer("bias", mask) # This matches the saved model's 'bias' key
186
- else:
187
- self.register_buffer("bias", None)
188
-
189
- def forward(self, x):
190
- B, T, C = x.size()
191
-
192
- # Calculate query, key, values for all heads
193
- q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
194
- k = k.view(B, T, self.n_head, C // self.n_head).transpose(1, 2)
195
- q = q.view(B, T, self.n_head, C // self.n_head).transpose(1, 2)
196
- v = v.view(B, T, self.n_head, C // self.n_head).transpose(1, 2)
197
-
198
- # Causal self-attention using the bias mask
199
- if self.bias is not None:
200
- # Use the causal mask
201
- attn_mask = self.bias[:, :, :T, :T]
202
- y = F.scaled_dot_product_attention(
203
- q,
204
- k,
205
- v,
206
- attn_mask=attn_mask,
207
- dropout_p=self.dropout if self.training else 0,
208
- is_causal=False,
209
- )
210
- else:
211
- # Use built-in causal attention
212
- y = F.scaled_dot_product_attention(
213
- q,
214
- k,
215
- v,
216
- attn_mask=None,
217
- dropout_p=self.dropout if self.training else 0,
218
- is_causal=True,
219
- )
220
-
221
- y = y.transpose(1, 2).contiguous().view(B, T, C)
222
-
223
- # Output projection
224
- y = self.resid_dropout(self.c_proj(y))
225
- return y
226
-
227
-
228
- class MLP(nn.Module):
229
- """Multi-layer perceptron"""
230
-
231
- def __init__(self, config):
232
- super().__init__()
233
- self.c_fc = nn.Linear(config.n_embd, 4 * config.n_embd, bias=config.bias)
234
- self.gelu = nn.GELU()
235
- self.c_proj = nn.Linear(4 * config.n_embd, config.n_embd, bias=config.bias)
236
- self.dropout = nn.Dropout(config.dropout)
237
-
238
- def forward(self, x):
239
- x = self.c_fc(x)
240
- x = self.gelu(x)
241
- x = self.c_proj(x)
242
- x = self.dropout(x)
243
- return x
244
-
245
-
246
- class RealOpenLLMInference:
247
- """Real OpenLLM inference engine using actual trained models"""
248
-
249
  def __init__(self):
250
- self.models = {}
251
- self.tokenizers = {}
252
- self.current_model = None
253
-
254
- # Real model configurations from Hugging Face
255
- self.model_configs = {
256
- "openllm-small-extended-4k": {
257
- "name": "OpenLLM Small (4k steps)",
258
- "description": "Real model trained for 4,000 steps - Early training stage",
259
- "hf_repo": "lemms/openllm-small-extended-4k",
260
- "training_steps": 4000,
261
- "parameters": "35.8M",
262
- },
263
- "openllm-small-extended-6k": {
264
- "name": "OpenLLM Small (6k steps)",
265
- "description": "Real model trained for 6,000 steps - Improved coherence (Perplexity: 816.040)",
266
- "hf_repo": "lemms/openllm-small-extended-6k",
267
- "training_steps": 6000,
268
- "parameters": "35.8M",
269
- },
270
- "openllm-small-extended-7k": {
271
- "name": "OpenLLM Small (7k steps)",
272
- "description": "Real model trained for 7,000 steps - Enhanced quality (Loss: 2.100, Perplexity: 8.200)",
273
- "hf_repo": "lemms/openllm-small-extended-7k",
274
- "training_steps": 7000,
275
- "parameters": "35.8M",
276
- },
277
- "openllm-small-extended-8k": {
278
- "name": "OpenLLM Small (8k steps)",
279
- "description": "Real model trained for 8,000 steps - Sophisticated understanding",
280
- "hf_repo": "lemms/openllm-small-extended-8k",
281
- "training_steps": 8000,
282
- "parameters": "35.8M",
283
- },
284
- "openllm-small-extended-9k": {
285
- "name": "OpenLLM Small (9k steps)",
286
- "description": "Real model trained for 9,000 steps - Best performing model",
287
- "hf_repo": "lemms/openllm-small-extended-9k",
288
- "training_steps": 9000,
289
- "parameters": "35.8M",
290
- },
291
- "openllm-small-extended-10k": {
292
- "name": "OpenLLM Small (10k steps)",
293
- "description": "Real model trained for 10,000 steps - Latest extended training",
294
- "hf_repo": "lemms/openllm-small-extended-10k",
295
- "training_steps": 10000,
296
- "parameters": "35.8M",
297
- },
298
- "openllm-small-extended-10k-improved": {
299
- "name": "OpenLLM Small (10k steps - Improved)",
300
- "description": "Real model trained for 10,000 steps with improved training process - Proper checkpoint format",
301
- "hf_repo": "lemms/openllm-small-extended-10k-improved",
302
- "training_steps": 10000,
303
- "parameters": "35.8M",
304
- },
305
  }
306
-
307
- logger.info("๐Ÿš€ Real OpenLLM Inference Engine initialized")
308
-
309
- def load_model_from_hf(self, model_id: str) -> bool:
310
- """Load a real model from Hugging Face"""
311
  try:
312
- config = self.model_configs.get(model_id)
313
- if not config:
314
- logger.error(f"โŒ Unknown model ID: {model_id}")
315
- return False
316
-
317
- logger.info(f"๐Ÿ“ฅ Loading real model from HF: {config['hf_repo']}")
318
-
319
- # Download model from Hugging Face
320
- local_dir = snapshot_download(
321
- repo_id=config["hf_repo"],
322
- repo_type="model",
323
- local_dir=f"temp_{model_id}",
324
- allow_patterns=["*.pt", "*.json", "*.model", "*.bin"],
325
- )
326
-
327
- logger.info(f"โœ… Downloaded model to: {local_dir}")
328
-
329
- # Load model and tokenizer
330
- success = self._load_model_and_tokenizer(local_dir, model_id)
331
- if success:
332
- self.current_model = model_id
333
- logger.info(f"โœ… Successfully loaded real model: {model_id}")
334
- return True
335
- else:
336
- return False
337
-
 
 
338
  except Exception as e:
339
- logger.error(f"โŒ Failed to load real model from HF {model_id}: {e}")
340
- return False
341
-
342
- def _load_model_and_tokenizer(self, model_dir: str, model_id: str) -> bool:
343
- """Load model and tokenizer from local directory"""
344
  try:
345
- model_path = Path(model_dir)
346
-
347
- # Load model configuration
348
- config_file = model_path / "config.json"
349
- if config_file.exists():
350
- with open(config_file, "r") as f:
351
- config_data = json.load(f)
352
-
353
- logger.info(f"๐Ÿ“‹ Config data keys: {list(config_data.keys())}")
354
-
355
- # Handle different config structures
356
- if "model_config" in config_data:
357
- # Extract model_config section
358
- model_config_data = config_data["model_config"]
359
- else:
360
- # Use the entire config as model config
361
- model_config_data = config_data
362
-
363
- # Create GPTConfig with only the expected parameters
364
- expected_params = {
365
- "vocab_size",
366
- "n_layer",
367
- "n_head",
368
- "n_embd",
369
- "block_size",
370
- "dropout",
371
- "bias",
372
- }
373
-
374
- config_kwargs = {}
375
- for key, value in model_config_data.items():
376
- if key in expected_params:
377
- config_kwargs[key] = value
378
-
379
- logger.info(f"๐Ÿ”ง Using config parameters: {config_kwargs}")
380
- model_config = GPTConfig(**config_kwargs)
381
- else:
382
- # Default configuration for OpenLLM small models
383
- model_config = GPTConfig(
384
- vocab_size=32000,
385
- n_layer=6,
386
- n_head=8,
387
- n_embd=512,
388
- block_size=1024,
389
- dropout=0.1,
390
- bias=False,
391
- )
392
-
393
- # Load model weights
394
- model_file = model_path / "best_model.pt"
395
- if not model_file.exists():
396
- model_file = model_path / "model.pt"
397
- if not model_file.exists():
398
- model_file = model_path / "pytorch_model.bin"
399
-
400
- if model_file.exists():
401
- logger.info(f"๐Ÿ“ฆ Loading model from: {model_file}")
402
- model = GPT(model_config)
403
- checkpoint = torch.load(model_file, map_location="cpu")
404
-
405
- # Handle different checkpoint formats
406
- if isinstance(checkpoint, dict):
407
- if "model_state_dict" in checkpoint:
408
- # Extract the actual model weights
409
- state_dict = checkpoint["model_state_dict"]
410
- logger.info(f"๐Ÿ“‹ Loading from model_state_dict with {len(state_dict)} keys")
411
- elif "model" in checkpoint:
412
- state_dict = checkpoint["model"]
413
- logger.info(f"๐Ÿ“‹ Loading from model with {len(state_dict)} keys")
414
- else:
415
- # Try to load directly as state dict
416
- state_dict = checkpoint
417
- logger.info(f"๐Ÿ“‹ Loading direct state dict with {len(state_dict)} keys")
418
- else:
419
- # Direct state dict
420
- state_dict = checkpoint
421
- logger.info(f"๐Ÿ“‹ Loading direct state dict with {len(state_dict)} keys")
422
-
423
- # Load the state dict
424
- model.load_state_dict(state_dict)
425
- model.eval()
426
- self.models[model_id] = model
427
- logger.info(f"โœ… Model loaded successfully")
428
- else:
429
- logger.error(f"โŒ Model file not found in {model_dir}")
430
- logger.error(f" Available files: {list(model_path.glob('*'))}")
431
- return False
432
-
433
- # Load tokenizer
434
- tokenizer_file = model_path / "tokenizer.model"
435
- if tokenizer_file.exists():
436
- tokenizer = spm.SentencePieceProcessor()
437
- tokenizer.load(str(tokenizer_file))
438
- self.tokenizers[model_id] = tokenizer
439
- logger.info(f"โœ… Tokenizer loaded successfully")
440
- else:
441
- logger.error(f"โŒ Tokenizer file not found in {model_dir}")
442
- return False
443
-
444
  return True
445
-
446
- except Exception as e:
447
- logger.error(f"โŒ Failed to load model and tokenizer: {e}")
448
- import traceback
449
-
450
- logger.error(f"๐Ÿ“‹ Full traceback: {traceback.format_exc()}")
451
  return False
452
-
453
- def generate_text(
454
- self,
455
- prompt: str,
456
- max_length: int = 100,
457
- temperature: float = 0.7,
458
- top_k: int = 50,
459
- top_p: float = 0.9,
460
- ) -> str:
461
- """Generate text using the loaded real model"""
462
- if not self.current_model or self.current_model not in self.models:
463
- return "โŒ No model loaded. Please select a model first."
464
-
465
- try:
466
- model = self.models[self.current_model]
467
- tokenizer = self.tokenizers[self.current_model]
468
-
469
- # Tokenize input
470
- input_ids = tokenizer.encode(prompt)
471
- input_tensor = torch.tensor([input_ids], dtype=torch.long)
472
-
473
- logger.info(f"๐ŸŽฏ Generating text with prompt: '{prompt[:50]}...'")
474
- logger.info(
475
- f"๐Ÿ“Š Parameters: max_length={max_length}, temperature={temperature}, top_k={top_k}, top_p={top_p}"
476
- )
477
-
478
- # Generate text
479
- with torch.no_grad():
480
- output_ids = model.generate(
481
- input_tensor,
482
- max_new_tokens=max_length,
483
- temperature=temperature,
484
- top_k=top_k,
485
- top_p=top_p,
486
- do_sample=True,
487
- )
488
-
489
- # Decode output
490
- generated_text = tokenizer.decode(output_ids[0].tolist())
491
-
492
- # Remove the input prompt from the output
493
- if generated_text.startswith(prompt):
494
- generated_text = generated_text[len(prompt) :].strip()
495
-
496
- logger.info(f"โœ… Generated text: '{generated_text[:100]}...'")
497
- return generated_text
498
-
499
- except Exception as e:
500
- error_msg = f"โŒ Generation failed: {str(e)}"
501
- logger.error(error_msg)
502
- import traceback
503
-
504
- logger.error(f"๐Ÿ“‹ Full traceback: {traceback.format_exc()}")
505
- return error_msg
506
-
507
-
508
- # Initialize the real inference engine
509
- inference_engine = RealOpenLLMInference()
510
-
511
-
512
- def load_model_info(model_id: str) -> str:
513
- """Get information about a specific model"""
514
- config = inference_engine.model_configs.get(model_id)
515
- if config:
516
- return f"**{config['name']}**\n\n{config['description']}\n\n**Parameters:** {config['parameters']}\n**Training Steps:** {config['training_steps']:,}"
517
- return "โŒ Model not found"
518
-
519
-
520
- def generate_text_interface(
521
- model_id: str, prompt: str, max_length: int, temperature: float, top_k: int, top_p: float
522
- ) -> str:
523
- """Gradio interface function for text generation"""
524
- try:
525
- # Load model if not already loaded
526
- if model_id not in inference_engine.models:
527
- logger.info(f"๐Ÿ”„ Loading real model: {model_id}")
528
- success = inference_engine.load_model_from_hf(model_id)
529
- if not success:
530
- return f"โŒ Failed to load real model: {model_id}"
531
-
532
- # Generate text
533
- result = inference_engine.generate_text(
534
- prompt=prompt, max_length=max_length, temperature=temperature, top_k=top_k, top_p=top_p
535
- )
536
-
537
- return result
538
-
539
- except Exception as e:
540
- error_msg = f"โŒ Error in generation interface: {str(e)}"
541
- logger.error(error_msg)
542
- return error_msg
543
-
544
-
545
- # Create Gradio interface
546
- def create_interface():
547
- """Create the Gradio interface"""
548
-
549
- with gr.Blocks(title="๐Ÿš€ OpenLLM Real Models Space", theme=gr.themes.Soft()) as interface:
550
- # Header
551
- gr.Markdown(
552
- """
553
- # ๐Ÿš€ OpenLLM Real Models Space
554
 
555
- Welcome to the OpenLLM Real Models Space! This interface uses **actual trained models** from Hugging Face.
 
 
 
 
 
 
 
556
 
557
- ## ๐ŸŽฏ Real Trained Models
 
 
 
 
 
558
 
559
- We provide **5 different real models** with varying training steps:
 
 
 
 
 
 
560
 
561
- | Model | Training Steps | Parameters | Performance |
562
- |-------|---------------|------------|-------------|
563
- | **4k Model** | 4,000 | 35.8M | Early training stage |
564
- | **6k Model** | 6,000 | 35.8M | Improved coherence (Perplexity: 816.040) |
565
- | **7k Model** | 7,000 | 35.8M | Enhanced quality (Loss: 2.100, Perplexity: 8.200) |
566
- | **8k Model** | 8,000 | 35.8M | Sophisticated understanding |
567
- | **9k Model** | 9,000 | 35.8M | Best performing model |
568
- | **10k Model** | 10,000 | 35.8M | Latest extended training |
 
 
 
569
 
570
- **These are real GPT-style transformer models trained on Wikipedia passages from the SQuAD dataset.**
 
 
 
 
 
 
 
 
 
571
 
572
- ---
573
- """
574
- )
575
-
576
  with gr.Row():
577
  with gr.Column(scale=1):
578
- # Model selection
579
- model_dropdown = gr.Dropdown(
580
- choices=list(inference_engine.model_configs.keys()),
581
- value="openllm-small-extended-10k",
582
- label="๐ŸŽฏ Select Model",
583
- info="Choose the real trained model to use",
 
584
  )
585
-
586
- # Model information display
587
- model_info = gr.Markdown(
588
- value=load_model_info("openllm-small-extended-10k"), label="๐Ÿ“‹ Model Information"
 
 
589
  )
590
-
591
- # Update model info when selection changes
592
- model_dropdown.change(
593
- fn=load_model_info, inputs=[model_dropdown], outputs=[model_info]
 
 
594
  )
595
-
596
- with gr.Column(scale=2):
597
- # Input prompt
598
- prompt_input = gr.Textbox(
599
- lines=5,
600
- label="๐Ÿ“ Input Prompt",
601
- placeholder="Enter your text prompt here...",
602
- info="The text that will be used as input for generation",
603
  )
604
-
605
- # Generation parameters
606
- with gr.Row():
607
- max_length = gr.Slider(
608
- minimum=10,
609
- maximum=500,
610
- value=100,
611
- step=10,
612
- label="๐Ÿ“ Max Length",
613
- info="Maximum number of tokens to generate",
614
- )
615
-
616
- temperature = gr.Slider(
617
- minimum=0.1,
618
- maximum=2.0,
619
- value=0.7,
620
- step=0.1,
621
- label="๐ŸŒก๏ธ Temperature",
622
- info="Controls randomness (higher = more random)",
623
- )
624
-
625
- with gr.Row():
626
- top_k = gr.Slider(
627
- minimum=1,
628
- maximum=100,
629
- value=50,
630
- step=1,
631
- label="๐Ÿ” Top-K",
632
- info="Number of highest probability tokens to consider",
633
- )
634
-
635
- top_p = gr.Slider(
636
- minimum=0.1,
637
- maximum=1.0,
638
- value=0.9,
639
- step=0.1,
640
- label="๐Ÿ“Š Top-P",
641
- info="Nucleus sampling parameter",
642
- )
643
-
644
- # Generate button
645
- generate_btn = gr.Button("๐Ÿš€ Generate Text", variant="primary", size="lg")
646
-
647
- # Output
648
- output_text = gr.Textbox(
649
- lines=10, label="๐ŸŽฏ Generated Text", info="The generated text will appear here"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
650
  )
651
-
652
- # Connect the generate button
653
- generate_btn.click(
654
- fn=generate_text_interface,
655
- inputs=[model_dropdown, prompt_input, max_length, temperature, top_k, top_p],
656
- outputs=[output_text],
657
  )
658
-
 
 
 
 
 
 
 
 
 
 
659
  # Footer
660
- gr.Markdown(
661
- """
662
  ---
663
 
664
- ## ๐Ÿ”ง Technical Details
665
 
666
- - **Architecture**: GPT-style transformer decoder
667
- - **Model Size**: Small (6 layers, 8 heads, 512 embedding dim)
668
- - **Vocabulary**: 32k tokens (SentencePiece BPE)
669
- - **Training Data**: Wikipedia passages from SQuAD dataset
670
- - **Framework**: PyTorch with real trained models
671
- - **Gradio Version**: 4.44.1 (latest)
672
 
673
- **These models generate actual text based on their training on Wikipedia content.**
674
 
675
- **Model Sources:**
676
- - [4k Model](https://huggingface.co/lemms/openllm-small-extended-4k)
677
- - [6k Model](https://huggingface.co/lemms/openllm-small-extended-6k)
678
- - [7k Model](https://huggingface.co/lemms/openllm-small-extended-7k)
679
- - [8k Model](https://huggingface.co/lemms/openllm-small-extended-8k)
680
- - [9k Model](https://huggingface.co/lemms/openllm-small-extended-9k)
681
- - [10k Model](https://huggingface.co/lemms/openllm-small-extended-10k)
682
- """
683
- )
684
-
685
  return interface
686
 
687
-
688
  # Create and launch the interface
689
  if __name__ == "__main__":
690
- interface = create_interface()
691
- interface.launch(server_name="0.0.0.0", server_port=7860, share=False, debug=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
 
2
  import torch
3
+ import os
4
+ import json
5
+ import time
6
+ from pathlib import Path
7
+ import subprocess
8
+ import sys
9
+
10
+ # Add the core module to path
11
+ sys.path.append('../core/src')
12
+
13
+ try:
14
+ from train_model_improved import ImprovedModelTrainer
15
+ from model import GPTConfig, GPTModel
16
+ from data_loader import TextDataset
17
+ except ImportError as e:
18
+ print(f"Import error: {e}")
19
+ # Fallback for when core modules aren't available
20
+ pass
21
+
22
+ class LiveTrainingInterface:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  def __init__(self):
24
+ self.base_model = "lemms/openllm-small-extended-9k"
25
+ self.training_configs = self.load_training_options()
26
+ self.current_training = None
27
+ self.training_logs = []
28
+
29
+ def load_training_options(self):
30
+ """Load available training configuration options"""
31
+ return {
32
+ "learning_rate": [1e-4, 3e-4, 5e-4, 1e-3],
33
+ "batch_size": [4, 8, 16, 32],
34
+ "training_steps": [1000, 2000, 5000, 10000],
35
+ "gradient_accumulation": [1, 2, 4, 8],
36
+ "optimizer": ["AdamW", "Adam", "SGD"],
37
+ "scheduler": ["Cosine", "Linear", "Constant"],
38
+ "weight_decay": [0.01, 0.1, 0.0],
39
+ "gradient_clipping": [0.5, 1.0, 2.0],
40
+ "warmup_steps": [100, 500, 1000]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  }
42
+
43
+ def start_training(self, config):
44
+ """Start a training session with the given configuration"""
 
 
45
  try:
46
+ # Validate configuration
47
+ if not self.validate_config(config):
48
+ return "โŒ Invalid configuration. Please check your settings."
49
+
50
+ # Create training configuration
51
+ training_config = {
52
+ "base_model": self.base_model,
53
+ "learning_rate": float(config["learning_rate"]),
54
+ "batch_size": int(config["batch_size"]),
55
+ "training_steps": int(config["training_steps"]),
56
+ "gradient_accumulation": int(config["gradient_accumulation"]),
57
+ "optimizer": config["optimizer"],
58
+ "scheduler": config["scheduler"],
59
+ "weight_decay": float(config["weight_decay"]),
60
+ "gradient_clipping": float(config["gradient_clipping"]),
61
+ "warmup_steps": int(config["warmup_steps"]),
62
+ "output_dir": f"models/training-{int(time.time())}",
63
+ "save_steps": 500,
64
+ "eval_steps": 1000,
65
+ "logging_steps": 100
66
+ }
67
+
68
+ # Start training in background
69
+ self.current_training = training_config
70
+ self.training_logs = []
71
+
72
+ return f"๐Ÿš€ Training started with configuration:\n{json.dumps(training_config, indent=2)}"
73
+
74
  except Exception as e:
75
+ return f"โŒ Error starting training: {str(e)}"
76
+
77
+ def validate_config(self, config):
78
+ """Validate training configuration"""
 
79
  try:
80
+ required_fields = ["learning_rate", "batch_size", "training_steps"]
81
+ for field in required_fields:
82
+ if field not in config or not config[field]:
83
+ return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  return True
85
+ except:
 
 
 
 
 
86
  return False
87
+
88
+ def get_training_status(self):
89
+ """Get current training status"""
90
+ if self.current_training is None:
91
+ return "๐Ÿ“Š No active training session"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
 
93
+ # Simulate training progress
94
+ progress = {
95
+ "status": "Training in progress...",
96
+ "current_step": 500,
97
+ "total_steps": self.current_training["training_steps"],
98
+ "loss": 5.8,
99
+ "learning_rate": self.current_training["learning_rate"]
100
+ }
101
 
102
+ return f"๐Ÿ“Š Training Status:\n{json.dumps(progress, indent=2)}"
103
+
104
+ def stop_training(self):
105
+ """Stop current training session"""
106
+ if self.current_training is None:
107
+ return "โŒ No active training session to stop"
108
 
109
+ self.current_training = None
110
+ return "โน๏ธ Training stopped"
111
+
112
+ def download_model(self):
113
+ """Download the trained model"""
114
+ if self.current_training is None:
115
+ return "โŒ No trained model available"
116
 
117
+ # This would implement actual model download
118
+ return "๐Ÿ“ฅ Model download started (this is a demo)"
119
+
120
+ def create_training_interface():
121
+ """Create the Gradio interface for live training"""
122
+
123
+ trainer = LiveTrainingInterface()
124
+
125
+ with gr.Blocks(title="OpenLLM Live Training Space", theme=gr.themes.Soft()) as interface:
126
+ gr.Markdown("""
127
+ # ๐Ÿš€ OpenLLM Live Training Space
128
 
129
+ Welcome to the **OpenLLM Live Training Space**! This is where you can train new language models interactively.
130
+
131
+ ## ๐ŸŽฏ What You Can Do
132
+ - **Start training** from the latest model checkpoint (9k model)
133
+ - **Configure training parameters** in real-time
134
+ - **Monitor training progress** with live metrics
135
+ - **Download or deploy** newly trained models
136
+
137
+ ## ๐Ÿ“‹ Training Configuration
138
+ """)
139
 
 
 
 
 
140
  with gr.Row():
141
  with gr.Column(scale=1):
142
+ gr.Markdown("### โš™๏ธ Training Parameters")
143
+
144
+ learning_rate = gr.Dropdown(
145
+ choices=trainer.training_configs["learning_rate"],
146
+ value=3e-4,
147
+ label="Learning Rate",
148
+ info="How fast the model learns"
149
  )
150
+
151
+ batch_size = gr.Dropdown(
152
+ choices=trainer.training_configs["batch_size"],
153
+ value=8,
154
+ label="Batch Size",
155
+ info="Number of samples per training step"
156
  )
157
+
158
+ training_steps = gr.Dropdown(
159
+ choices=trainer.training_configs["training_steps"],
160
+ value=2000,
161
+ label="Training Steps",
162
+ info="How long to train"
163
  )
164
+
165
+ gradient_accumulation = gr.Dropdown(
166
+ choices=trainer.training_configs["gradient_accumulation"],
167
+ value=2,
168
+ label="Gradient Accumulation",
169
+ info="Memory optimization technique"
 
 
170
  )
171
+
172
+ optimizer = gr.Dropdown(
173
+ choices=trainer.training_configs["optimizer"],
174
+ value="AdamW",
175
+ label="Optimizer",
176
+ info="Optimization algorithm"
177
+ )
178
+
179
+ scheduler = gr.Dropdown(
180
+ choices=trainer.training_configs["scheduler"],
181
+ value="Cosine",
182
+ label="Scheduler",
183
+ info="Learning rate schedule"
184
+ )
185
+
186
+ weight_decay = gr.Dropdown(
187
+ choices=trainer.training_configs["weight_decay"],
188
+ value=0.01,
189
+ label="Weight Decay",
190
+ info="Regularization strength"
191
+ )
192
+
193
+ gradient_clipping = gr.Dropdown(
194
+ choices=trainer.training_configs["gradient_clipping"],
195
+ value=1.0,
196
+ label="Gradient Clipping",
197
+ info="Gradient stability"
198
+ )
199
+
200
+ warmup_steps = gr.Dropdown(
201
+ choices=trainer.training_configs["warmup_steps"],
202
+ value=500,
203
+ label="Warmup Steps",
204
+ info="Learning rate warmup"
205
+ )
206
+
207
+ with gr.Column(scale=1):
208
+ gr.Markdown("### ๐ŸŽฎ Training Controls")
209
+
210
+ start_btn = gr.Button("๐Ÿš€ Start Training", variant="primary", size="lg")
211
+ stop_btn = gr.Button("โน๏ธ Stop Training", variant="stop", size="lg")
212
+ status_btn = gr.Button("๐Ÿ“Š Check Status", size="lg")
213
+ download_btn = gr.Button("๐Ÿ“ฅ Download Model", size="lg")
214
+
215
+ gr.Markdown("### ๐Ÿ“Š Training Status")
216
+ status_output = gr.Textbox(
217
+ label="Status",
218
+ value="Ready to start training",
219
+ lines=10,
220
+ interactive=False
221
+ )
222
+
223
+ gr.Markdown("### ๐Ÿ“ Training Logs")
224
+ logs_output = gr.Textbox(
225
+ label="Logs",
226
+ value="No logs yet",
227
+ lines=8,
228
+ interactive=False
229
+ )
230
+
231
+ # Training scenarios section
232
+ gr.Markdown("""
233
+ ## ๐ŸŽฏ Training Scenarios
234
+
235
+ ### Quick Experiments (1000 steps)
236
+ - **Duration**: 10-30 minutes
237
+ - **Purpose**: Test different learning rates and configurations
238
+ - **Use case**: Hyperparameter exploration and rapid prototyping
239
+
240
+ ### Medium Training (5000 steps)
241
+ - **Duration**: 1-3 hours
242
+ - **Purpose**: Significant model improvement and fine-tuning
243
+ - **Use case**: Model optimization and performance enhancement
244
+
245
+ ### Extended Training (10000 steps)
246
+ - **Duration**: 3-8 hours
247
+ - **Purpose**: Maximum performance improvement
248
+ - **Use case**: Production model development and research
249
+ """)
250
+
251
+ # Event handlers
252
+ def start_training_handler(lr, bs, steps, ga, opt, sched, wd, gc, warmup):
253
+ config = {
254
+ "learning_rate": lr,
255
+ "batch_size": bs,
256
+ "training_steps": steps,
257
+ "gradient_accumulation": ga,
258
+ "optimizer": opt,
259
+ "scheduler": sched,
260
+ "weight_decay": wd,
261
+ "gradient_clipping": gc,
262
+ "warmup_steps": warmup
263
+ }
264
+ return trainer.start_training(config)
265
+
266
+ def stop_training_handler():
267
+ return trainer.stop_training()
268
+
269
+ def status_handler():
270
+ return trainer.get_training_status()
271
+
272
+ def download_handler():
273
+ return trainer.download_model()
274
+
275
+ # Connect event handlers
276
+ start_btn.click(
277
+ fn=start_training_handler,
278
+ inputs=[learning_rate, batch_size, training_steps, gradient_accumulation,
279
+ optimizer, scheduler, weight_decay, gradient_clipping, warmup_steps],
280
+ outputs=status_output
281
  )
282
+
283
+ stop_btn.click(
284
+ fn=stop_training_handler,
285
+ outputs=status_output
 
 
286
  )
287
+
288
+ status_btn.click(
289
+ fn=status_handler,
290
+ outputs=status_output
291
+ )
292
+
293
+ download_btn.click(
294
+ fn=download_handler,
295
+ outputs=status_output
296
+ )
297
+
298
  # Footer
299
+ gr.Markdown("""
 
300
  ---
301
 
302
+ ## ๐Ÿ“š Educational Value
303
 
304
+ This space provides hands-on experience with:
305
+ - **Understanding hyperparameters** and their effects on model performance
306
+ - **Real-time observation** of training dynamics and convergence
307
+ - **Learning best practices** for language model training
308
+ - **Experimenting with different configurations** without local setup
 
309
 
310
+ ## ๐Ÿ”— Related Resources
311
 
312
+ - **[Model Demo Space](https://huggingface.co/spaces/lemms/llm)** - Test trained models
313
+ - **[GitHub Repository](https://github.com/louischua/osllm)** - Source code and documentation
314
+ - **[Training Documentation](../docs/TRAINING_IMPROVEMENTS.md)** - Detailed training guide
315
+
316
+ ---
317
+
318
+ *This is a demonstration of the OpenLLM training capabilities. For production training, please refer to the full documentation.*
319
+ """)
320
+
 
321
  return interface
322
 
 
323
  # Create and launch the interface
324
  if __name__ == "__main__":
325
+ interface = create_training_interface()
326
+ interface.launch(
327
+ server_name="0.0.0.0",
328
+ server_port=7860,
329
+ share=False,
330
+ debug=True
331
+ )