lemms commited on
Commit
f7c7866
·
verified ·
1 Parent(s): 25c8a83

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +658 -298
app.py CHANGED
@@ -1,331 +1,691 @@
1
- import gradio as gr
2
- import torch
3
- import os
 
 
4
  import json
5
- import time
 
6
  from pathlib import Path
7
- import subprocess
8
- import sys
9
-
10
- # Add the core module to path
11
- sys.path.append('../core/src')
12
-
13
- try:
14
- from train_model_improved import ImprovedModelTrainer
15
- from model import GPTConfig, GPTModel
16
- from data_loader import TextDataset
17
- except ImportError as e:
18
- print(f"Import error: {e}")
19
- # Fallback for when core modules aren't available
20
- pass
21
-
22
- class LiveTrainingInterface:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  def __init__(self):
24
- self.base_model = "lemms/openllm-small-extended-9k"
25
- self.training_configs = self.load_training_options()
26
- self.current_training = None
27
- self.training_logs = []
28
-
29
- def load_training_options(self):
30
- """Load available training configuration options"""
31
- return {
32
- "learning_rate": [1e-4, 3e-4, 5e-4, 1e-3],
33
- "batch_size": [4, 8, 16, 32],
34
- "training_steps": [1000, 2000, 5000, 10000],
35
- "gradient_accumulation": [1, 2, 4, 8],
36
- "optimizer": ["AdamW", "Adam", "SGD"],
37
- "scheduler": ["Cosine", "Linear", "Constant"],
38
- "weight_decay": [0.01, 0.1, 0.0],
39
- "gradient_clipping": [0.5, 1.0, 2.0],
40
- "warmup_steps": [100, 500, 1000]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  }
42
-
43
- def start_training(self, config):
44
- """Start a training session with the given configuration"""
 
 
45
  try:
46
- # Validate configuration
47
- if not self.validate_config(config):
48
- return "❌ Invalid configuration. Please check your settings."
49
-
50
- # Create training configuration
51
- training_config = {
52
- "base_model": self.base_model,
53
- "learning_rate": float(config["learning_rate"]),
54
- "batch_size": int(config["batch_size"]),
55
- "training_steps": int(config["training_steps"]),
56
- "gradient_accumulation": int(config["gradient_accumulation"]),
57
- "optimizer": config["optimizer"],
58
- "scheduler": config["scheduler"],
59
- "weight_decay": float(config["weight_decay"]),
60
- "gradient_clipping": float(config["gradient_clipping"]),
61
- "warmup_steps": int(config["warmup_steps"]),
62
- "output_dir": f"models/training-{int(time.time())}",
63
- "save_steps": 500,
64
- "eval_steps": 1000,
65
- "logging_steps": 100
66
- }
67
-
68
- # Start training in background
69
- self.current_training = training_config
70
- self.training_logs = []
71
-
72
- return f"🚀 Training started with configuration:\n{json.dumps(training_config, indent=2)}"
73
-
74
  except Exception as e:
75
- return f"❌ Error starting training: {str(e)}"
76
-
77
- def validate_config(self, config):
78
- """Validate training configuration"""
 
79
  try:
80
- required_fields = ["learning_rate", "batch_size", "training_steps"]
81
- for field in required_fields:
82
- if field not in config or not config[field]:
83
- return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  return True
85
- except:
 
 
 
 
 
86
  return False
87
-
88
- def get_training_status(self):
89
- """Get current training status"""
90
- if self.current_training is None:
91
- return "📊 No active training session"
92
-
93
- # Simulate training progress
94
- progress = {
95
- "status": "Training in progress...",
96
- "current_step": 500,
97
- "total_steps": self.current_training["training_steps"],
98
- "loss": 5.8,
99
- "learning_rate": self.current_training["learning_rate"]
100
- }
101
-
102
- return f"📊 Training Status:\n{json.dumps(progress, indent=2)}"
103
-
104
- def stop_training(self):
105
- """Stop current training session"""
106
- if self.current_training is None:
107
- return "❌ No active training session to stop"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
 
109
- self.current_training = None
110
- return "⏹️ Training stopped"
111
-
112
- def download_model(self):
113
- """Download the trained model"""
114
- if self.current_training is None:
115
- return "❌ No trained model available"
116
 
117
- # This would implement actual model download
118
- return "📥 Model download started (this is a demo)"
119
-
120
- def create_training_interface():
121
- """Create the Gradio interface for live training"""
122
-
123
- trainer = LiveTrainingInterface()
124
-
125
- with gr.Blocks(title="OpenLLM Live Training Space", theme=gr.themes.Soft()) as interface:
126
- gr.Markdown("""
127
- # 🚀 OpenLLM Live Training Space
128
 
129
- Welcome to the **OpenLLM Live Training Space**! This is where you can train new language models interactively.
130
 
131
- ## 🎯 What You Can Do
132
- - **Start training** from the latest model checkpoint (9k model)
133
- - **Configure training parameters** in real-time
134
- - **Monitor training progress** with live metrics
135
- - **Download or deploy** newly trained models
 
 
 
136
 
137
- ## 📋 Training Configuration
138
- """)
139
 
 
 
 
 
140
  with gr.Row():
141
  with gr.Column(scale=1):
142
- gr.Markdown("### ⚙️ Training Parameters")
143
-
144
- learning_rate = gr.Dropdown(
145
- choices=trainer.training_configs["learning_rate"],
146
- value=3e-4,
147
- label="Learning Rate",
148
- info="How fast the model learns"
149
- )
150
-
151
- batch_size = gr.Dropdown(
152
- choices=trainer.training_configs["batch_size"],
153
- value=8,
154
- label="Batch Size",
155
- info="Number of samples per training step"
156
- )
157
-
158
- training_steps = gr.Dropdown(
159
- choices=trainer.training_configs["training_steps"],
160
- value=2000,
161
- label="Training Steps",
162
- info="How long to train"
163
- )
164
-
165
- gradient_accumulation = gr.Dropdown(
166
- choices=trainer.training_configs["gradient_accumulation"],
167
- value=2,
168
- label="Gradient Accumulation",
169
- info="Memory optimization technique"
170
  )
171
-
172
- optimizer = gr.Dropdown(
173
- choices=trainer.training_configs["optimizer"],
174
- value="AdamW",
175
- label="Optimizer",
176
- info="Optimization algorithm"
177
- )
178
-
179
- scheduler = gr.Dropdown(
180
- choices=trainer.training_configs["scheduler"],
181
- value="Cosine",
182
- label="Scheduler",
183
- info="Learning rate schedule"
184
- )
185
-
186
- weight_decay = gr.Dropdown(
187
- choices=trainer.training_configs["weight_decay"],
188
- value=0.01,
189
- label="Weight Decay",
190
- info="Regularization strength"
191
- )
192
-
193
- gradient_clipping = gr.Dropdown(
194
- choices=trainer.training_configs["gradient_clipping"],
195
- value=1.0,
196
- label="Gradient Clipping",
197
- info="Gradient stability"
198
- )
199
-
200
- warmup_steps = gr.Dropdown(
201
- choices=trainer.training_configs["warmup_steps"],
202
- value=500,
203
- label="Warmup Steps",
204
- info="Learning rate warmup"
205
  )
206
-
207
- with gr.Column(scale=1):
208
- gr.Markdown("### 🎮 Training Controls")
209
-
210
- start_btn = gr.Button("🚀 Start Training", variant="primary", size="lg")
211
- stop_btn = gr.Button("⏹️ Stop Training", variant="stop", size="lg")
212
- status_btn = gr.Button("📊 Check Status", size="lg")
213
- download_btn = gr.Button("📥 Download Model", size="lg")
214
-
215
- gr.Markdown("### 📊 Training Status")
216
- status_output = gr.Textbox(
217
- label="Status",
218
- value="Ready to start training",
219
- lines=10,
220
- interactive=False
221
  )
222
-
223
- gr.Markdown("### 📝 Training Logs")
224
- logs_output = gr.Textbox(
225
- label="Logs",
226
- value="No logs yet",
227
- lines=8,
228
- interactive=False
 
229
  )
230
-
231
- # Training scenarios section
232
- gr.Markdown("""
233
- ## 🎯 Training Scenarios
234
-
235
- ### Quick Experiments (1000 steps)
236
- - **Duration**: 10-30 minutes
237
- - **Purpose**: Test different learning rates and configurations
238
- - **Use case**: Hyperparameter exploration and rapid prototyping
239
-
240
- ### Medium Training (5000 steps)
241
- - **Duration**: 1-3 hours
242
- - **Purpose**: Significant model improvement and fine-tuning
243
- - **Use case**: Model optimization and performance enhancement
244
-
245
- ### Extended Training (10000 steps)
246
- - **Duration**: 3-8 hours
247
- - **Purpose**: Maximum performance improvement
248
- - **Use case**: Production model development and research
249
- """)
250
-
251
- # Event handlers
252
- def start_training_handler(lr, bs, steps, ga, opt, sched, wd, gc, warmup):
253
- config = {
254
- "learning_rate": lr,
255
- "batch_size": bs,
256
- "training_steps": steps,
257
- "gradient_accumulation": ga,
258
- "optimizer": opt,
259
- "scheduler": sched,
260
- "weight_decay": wd,
261
- "gradient_clipping": gc,
262
- "warmup_steps": warmup
263
- }
264
- return trainer.start_training(config)
265
-
266
- def stop_training_handler():
267
- return trainer.stop_training()
268
-
269
- def status_handler():
270
- return trainer.get_training_status()
271
-
272
- def download_handler():
273
- return trainer.download_model()
274
-
275
- # Connect event handlers
276
- start_btn.click(
277
- fn=start_training_handler,
278
- inputs=[learning_rate, batch_size, training_steps, gradient_accumulation,
279
- optimizer, scheduler, weight_decay, gradient_clipping, warmup_steps],
280
- outputs=status_output
281
- )
282
-
283
- stop_btn.click(
284
- fn=stop_training_handler,
285
- outputs=status_output
286
- )
287
-
288
- status_btn.click(
289
- fn=status_handler,
290
- outputs=status_output
291
  )
292
-
293
- download_btn.click(
294
- fn=download_handler,
295
- outputs=status_output
 
 
296
  )
297
-
298
  # Footer
299
- gr.Markdown("""
 
300
  ---
301
 
302
- ## 📚 Educational Value
303
 
304
- This space provides hands-on experience with:
305
- - **Understanding hyperparameters** and their effects on model performance
306
- - **Real-time observation** of training dynamics and convergence
307
- - **Learning best practices** for language model training
308
- - **Experimenting with different configurations** without local setup
 
309
 
310
- ## 🔗 Related Resources
311
 
312
- - **[Model Demo Space](https://huggingface.co/spaces/lemms/llm)** - Test trained models
313
- - **[GitHub Repository](https://github.com/louischua/osllm)** - Source code and documentation
314
- - **[Training Documentation](../docs/TRAINING_IMPROVEMENTS.md)** - Detailed training guide
315
-
316
- ---
317
-
318
- *This is a demonstration of the OpenLLM training capabilities. For production training, please refer to the full documentation.*
319
- """)
320
-
 
321
  return interface
322
 
 
323
  # Create and launch the interface
324
  if __name__ == "__main__":
325
- interface = create_training_interface()
326
- interface.launch(
327
- server_name="0.0.0.0",
328
- server_port=7860,
329
- share=False,
330
- debug=True
331
- )
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ OpenLLM Real Models App - Final working version with correct attribute naming
4
+ """
5
+
6
  import json
7
+ import logging
8
+ import math
9
  from pathlib import Path
10
+ from typing import Any, Dict, Optional
11
+
12
+ import gradio as gr
13
+ import sentencepiece as spm
14
+ import torch
15
+ import torch.nn as nn
16
+ import torch.nn.functional as F
17
+ from huggingface_hub import snapshot_download
18
+
19
+ # Set up logging
20
+ logging.basicConfig(level=logging.INFO)
21
+ logger = logging.getLogger(__name__)
22
+
23
+
24
+ class GPTConfig:
25
+ """GPT model configuration"""
26
+
27
+ def __init__(
28
+ self,
29
+ vocab_size=32000,
30
+ n_layer=6,
31
+ n_head=8,
32
+ n_embd=512,
33
+ block_size=1024,
34
+ dropout=0.1,
35
+ bias=False,
36
+ **kwargs,
37
+ ):
38
+ # Accept any additional kwargs to handle extra config fields
39
+ self.vocab_size = vocab_size
40
+ self.n_layer = n_layer
41
+ self.n_head = n_head
42
+ self.n_embd = n_embd
43
+ self.block_size = block_size
44
+ self.dropout = dropout
45
+ self.bias = bias
46
+
47
+
48
+ class GPT(nn.Module):
49
+ """GPT-style transformer model - EXACT architecture matching the saved model"""
50
+
51
+ def __init__(self, config):
52
+ super().__init__()
53
+ assert config.vocab_size is not None
54
+ assert config.block_size is not None
55
+ self.config = config
56
+
57
+ # Create the transformer module with the exact naming convention
58
+ self.transformer = nn.ModuleDict(
59
+ dict(
60
+ wte=nn.Embedding(config.vocab_size, config.n_embd),
61
+ wpe=nn.Embedding(config.block_size, config.n_embd),
62
+ drop=nn.Dropout(config.dropout),
63
+ h=nn.ModuleList([Block(config) for _ in range(config.n_layer)]),
64
+ ln_f=nn.LayerNorm(config.n_embd),
65
+ )
66
+ )
67
+
68
+ # Language model head - Use bias=False to match saved models
69
+ self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False)
70
+
71
+ # Initialize weights
72
+ self.apply(self._init_weights)
73
+ for pn, p in self.named_parameters():
74
+ if pn.endswith("c_proj.weight"):
75
+ torch.nn.init.normal_(p, mean=0.0, std=0.02 / math.sqrt(2 * config.n_layer))
76
+
77
+ def _init_weights(self, module):
78
+ if isinstance(module, nn.Linear):
79
+ torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
80
+ if module.bias is not None:
81
+ torch.nn.init.zeros_(module.bias)
82
+ elif isinstance(module, nn.Embedding):
83
+ torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
84
+
85
+ def forward(self, idx, targets=None):
86
+ device = idx.device
87
+ b, t = idx.size()
88
+ assert (
89
+ t <= self.config.block_size
90
+ ), f"Cannot forward sequence of length {t}, block size is only {self.config.block_size}"
91
+
92
+ pos = torch.arange(0, t, dtype=torch.long, device=device).unsqueeze(0)
93
+ tok_emb = self.transformer.wte(idx)
94
+ pos_emb = self.transformer.wpe(pos)
95
+ x = self.transformer.drop(tok_emb + pos_emb)
96
+
97
+ for block in self.transformer.h:
98
+ x = block(x)
99
+ x = self.transformer.ln_f(x)
100
+
101
+ if targets is not None:
102
+ logits = self.lm_head(x)
103
+ loss = F.cross_entropy(
104
+ logits.view(-1, logits.size(-1)), targets.view(-1), ignore_index=-1
105
+ )
106
+ else:
107
+ logits = self.lm_head(x[:, [-1], :])
108
+ loss = None
109
+
110
+ return logits, loss
111
+
112
+ def generate(
113
+ self, idx, max_new_tokens, temperature=1.0, top_k=None, top_p=None, do_sample=True
114
+ ):
115
+ for _ in range(max_new_tokens):
116
+ idx_cond = (
117
+ idx if idx.size(1) <= self.config.block_size else idx[:, -self.config.block_size :]
118
+ )
119
+ logits, _ = self(idx_cond)
120
+ logits = logits[:, -1, :] / temperature
121
+
122
+ if top_k is not None:
123
+ v, _ = torch.topk(logits, min(top_k, logits.size(-1)))
124
+ logits[logits < v[:, [-1]]] = -float("Inf")
125
+
126
+ if top_p is not None:
127
+ sorted_logits, sorted_indices = torch.sort(logits, descending=True)
128
+ cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)
129
+ sorted_indices_to_remove = cumulative_probs > top_p
130
+ sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
131
+ sorted_indices_to_remove[..., 0] = 0
132
+ indices_to_remove = sorted_indices_to_remove.scatter(
133
+ 1, sorted_indices, sorted_indices_to_remove
134
+ )
135
+ logits[indices_to_remove] = -float("Inf")
136
+
137
+ probs = F.softmax(logits, dim=-1)
138
+ if do_sample:
139
+ idx_next = torch.multinomial(probs, num_samples=1)
140
+ else:
141
+ _, idx_next = torch.topk(probs, k=1, dim=-1)
142
+
143
+ idx = torch.cat((idx, idx_next), dim=1)
144
+
145
+ return idx
146
+
147
+
148
+ class Block(nn.Module):
149
+ """Transformer block with self-attention and feed-forward layers"""
150
+
151
+ def __init__(self, config):
152
+ super().__init__()
153
+ self.ln_1 = nn.LayerNorm(config.n_embd)
154
+ self.attn = CausalSelfAttention(config)
155
+ self.ln_2 = nn.LayerNorm(config.n_embd)
156
+ self.mlp = MLP(config)
157
+
158
+ def forward(self, x):
159
+ x = x + self.attn(self.ln_1(x))
160
+ x = x + self.mlp(self.ln_2(x))
161
+ return x
162
+
163
+
164
+ class CausalSelfAttention(nn.Module):
165
+ """Multi-head self-attention with causal masking - FINAL WORKING VERSION"""
166
+
167
+ def __init__(self, config):
168
+ super().__init__()
169
+ assert config.n_embd % config.n_head == 0
170
+ self.c_attn = nn.Linear(config.n_embd, 3 * config.n_embd, bias=config.bias)
171
+ self.c_proj = nn.Linear(config.n_embd, config.n_embd, bias=config.bias)
172
+ self.attn_dropout = nn.Dropout(config.dropout)
173
+ self.resid_dropout = nn.Dropout(config.dropout)
174
+ self.n_head = config.n_head
175
+ self.n_embd = config.n_embd
176
+ self.dropout = config.dropout
177
+ self.use_bias = config.bias # Use different name for the boolean flag
178
+
179
+ # REGISTER THE ATTENTION BIAS as a buffer (not parameter) to match saved model
180
+ # This is actually an attention mask, not a learnable bias
181
+ if config.bias:
182
+ # Create a causal attention mask buffer
183
+ mask = torch.tril(torch.ones(config.block_size, config.block_size))
184
+ mask = mask.view(1, 1, config.block_size, config.block_size)
185
+ self.register_buffer("bias", mask) # This matches the saved model's 'bias' key
186
+ else:
187
+ self.register_buffer("bias", None)
188
+
189
+ def forward(self, x):
190
+ B, T, C = x.size()
191
+
192
+ # Calculate query, key, values for all heads
193
+ q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
194
+ k = k.view(B, T, self.n_head, C // self.n_head).transpose(1, 2)
195
+ q = q.view(B, T, self.n_head, C // self.n_head).transpose(1, 2)
196
+ v = v.view(B, T, self.n_head, C // self.n_head).transpose(1, 2)
197
+
198
+ # Causal self-attention using the bias mask
199
+ if self.bias is not None:
200
+ # Use the causal mask
201
+ attn_mask = self.bias[:, :, :T, :T]
202
+ y = F.scaled_dot_product_attention(
203
+ q,
204
+ k,
205
+ v,
206
+ attn_mask=attn_mask,
207
+ dropout_p=self.dropout if self.training else 0,
208
+ is_causal=False,
209
+ )
210
+ else:
211
+ # Use built-in causal attention
212
+ y = F.scaled_dot_product_attention(
213
+ q,
214
+ k,
215
+ v,
216
+ attn_mask=None,
217
+ dropout_p=self.dropout if self.training else 0,
218
+ is_causal=True,
219
+ )
220
+
221
+ y = y.transpose(1, 2).contiguous().view(B, T, C)
222
+
223
+ # Output projection
224
+ y = self.resid_dropout(self.c_proj(y))
225
+ return y
226
+
227
+
228
+ class MLP(nn.Module):
229
+ """Multi-layer perceptron"""
230
+
231
+ def __init__(self, config):
232
+ super().__init__()
233
+ self.c_fc = nn.Linear(config.n_embd, 4 * config.n_embd, bias=config.bias)
234
+ self.gelu = nn.GELU()
235
+ self.c_proj = nn.Linear(4 * config.n_embd, config.n_embd, bias=config.bias)
236
+ self.dropout = nn.Dropout(config.dropout)
237
+
238
+ def forward(self, x):
239
+ x = self.c_fc(x)
240
+ x = self.gelu(x)
241
+ x = self.c_proj(x)
242
+ x = self.dropout(x)
243
+ return x
244
+
245
+
246
+ class RealOpenLLMInference:
247
+ """Real OpenLLM inference engine using actual trained models"""
248
+
249
  def __init__(self):
250
+ self.models = {}
251
+ self.tokenizers = {}
252
+ self.current_model = None
253
+
254
+ # Real model configurations from Hugging Face
255
+ self.model_configs = {
256
+ "openllm-small-extended-4k": {
257
+ "name": "OpenLLM Small (4k steps)",
258
+ "description": "Real model trained for 4,000 steps - Early training stage",
259
+ "hf_repo": "lemms/openllm-small-extended-4k",
260
+ "training_steps": 4000,
261
+ "parameters": "35.8M",
262
+ },
263
+ "openllm-small-extended-6k": {
264
+ "name": "OpenLLM Small (6k steps)",
265
+ "description": "Real model trained for 6,000 steps - Improved coherence (Perplexity: 816.040)",
266
+ "hf_repo": "lemms/openllm-small-extended-6k",
267
+ "training_steps": 6000,
268
+ "parameters": "35.8M",
269
+ },
270
+ "openllm-small-extended-7k": {
271
+ "name": "OpenLLM Small (7k steps)",
272
+ "description": "Real model trained for 7,000 steps - Enhanced quality (Loss: 2.100, Perplexity: 8.200)",
273
+ "hf_repo": "lemms/openllm-small-extended-7k",
274
+ "training_steps": 7000,
275
+ "parameters": "35.8M",
276
+ },
277
+ "openllm-small-extended-8k": {
278
+ "name": "OpenLLM Small (8k steps)",
279
+ "description": "Real model trained for 8,000 steps - Sophisticated understanding",
280
+ "hf_repo": "lemms/openllm-small-extended-8k",
281
+ "training_steps": 8000,
282
+ "parameters": "35.8M",
283
+ },
284
+ "openllm-small-extended-9k": {
285
+ "name": "OpenLLM Small (9k steps)",
286
+ "description": "Real model trained for 9,000 steps - Best performing model",
287
+ "hf_repo": "lemms/openllm-small-extended-9k",
288
+ "training_steps": 9000,
289
+ "parameters": "35.8M",
290
+ },
291
+ "openllm-small-extended-10k": {
292
+ "name": "OpenLLM Small (10k steps)",
293
+ "description": "Real model trained for 10,000 steps - Latest extended training",
294
+ "hf_repo": "lemms/openllm-small-extended-10k",
295
+ "training_steps": 10000,
296
+ "parameters": "35.8M",
297
+ },
298
+ "openllm-small-extended-10k-improved": {
299
+ "name": "OpenLLM Small (10k steps - Improved)",
300
+ "description": "Real model trained for 10,000 steps with improved training process - Proper checkpoint format",
301
+ "hf_repo": "lemms/openllm-small-extended-10k-improved",
302
+ "training_steps": 10000,
303
+ "parameters": "35.8M",
304
+ },
305
  }
306
+
307
+ logger.info("🚀 Real OpenLLM Inference Engine initialized")
308
+
309
+ def load_model_from_hf(self, model_id: str) -> bool:
310
+ """Load a real model from Hugging Face"""
311
  try:
312
+ config = self.model_configs.get(model_id)
313
+ if not config:
314
+ logger.error(f"❌ Unknown model ID: {model_id}")
315
+ return False
316
+
317
+ logger.info(f"📥 Loading real model from HF: {config['hf_repo']}")
318
+
319
+ # Download model from Hugging Face
320
+ local_dir = snapshot_download(
321
+ repo_id=config["hf_repo"],
322
+ repo_type="model",
323
+ local_dir=f"temp_{model_id}",
324
+ allow_patterns=["*.pt", "*.json", "*.model", "*.bin"],
325
+ )
326
+
327
+ logger.info(f"✅ Downloaded model to: {local_dir}")
328
+
329
+ # Load model and tokenizer
330
+ success = self._load_model_and_tokenizer(local_dir, model_id)
331
+ if success:
332
+ self.current_model = model_id
333
+ logger.info(f"✅ Successfully loaded real model: {model_id}")
334
+ return True
335
+ else:
336
+ return False
337
+
 
 
338
  except Exception as e:
339
+ logger.error(f"❌ Failed to load real model from HF {model_id}: {e}")
340
+ return False
341
+
342
+ def _load_model_and_tokenizer(self, model_dir: str, model_id: str) -> bool:
343
+ """Load model and tokenizer from local directory"""
344
  try:
345
+ model_path = Path(model_dir)
346
+
347
+ # Load model configuration
348
+ config_file = model_path / "config.json"
349
+ if config_file.exists():
350
+ with open(config_file, "r") as f:
351
+ config_data = json.load(f)
352
+
353
+ logger.info(f"📋 Config data keys: {list(config_data.keys())}")
354
+
355
+ # Handle different config structures
356
+ if "model_config" in config_data:
357
+ # Extract model_config section
358
+ model_config_data = config_data["model_config"]
359
+ else:
360
+ # Use the entire config as model config
361
+ model_config_data = config_data
362
+
363
+ # Create GPTConfig with only the expected parameters
364
+ expected_params = {
365
+ "vocab_size",
366
+ "n_layer",
367
+ "n_head",
368
+ "n_embd",
369
+ "block_size",
370
+ "dropout",
371
+ "bias",
372
+ }
373
+
374
+ config_kwargs = {}
375
+ for key, value in model_config_data.items():
376
+ if key in expected_params:
377
+ config_kwargs[key] = value
378
+
379
+ logger.info(f"🔧 Using config parameters: {config_kwargs}")
380
+ model_config = GPTConfig(**config_kwargs)
381
+ else:
382
+ # Default configuration for OpenLLM small models
383
+ model_config = GPTConfig(
384
+ vocab_size=32000,
385
+ n_layer=6,
386
+ n_head=8,
387
+ n_embd=512,
388
+ block_size=1024,
389
+ dropout=0.1,
390
+ bias=False,
391
+ )
392
+
393
+ # Load model weights
394
+ model_file = model_path / "best_model.pt"
395
+ if not model_file.exists():
396
+ model_file = model_path / "model.pt"
397
+ if not model_file.exists():
398
+ model_file = model_path / "pytorch_model.bin"
399
+
400
+ if model_file.exists():
401
+ logger.info(f"📦 Loading model from: {model_file}")
402
+ model = GPT(model_config)
403
+ checkpoint = torch.load(model_file, map_location="cpu")
404
+
405
+ # Handle different checkpoint formats
406
+ if isinstance(checkpoint, dict):
407
+ if "model_state_dict" in checkpoint:
408
+ # Extract the actual model weights
409
+ state_dict = checkpoint["model_state_dict"]
410
+ logger.info(f"📋 Loading from model_state_dict with {len(state_dict)} keys")
411
+ elif "model" in checkpoint:
412
+ state_dict = checkpoint["model"]
413
+ logger.info(f"📋 Loading from model with {len(state_dict)} keys")
414
+ else:
415
+ # Try to load directly as state dict
416
+ state_dict = checkpoint
417
+ logger.info(f"📋 Loading direct state dict with {len(state_dict)} keys")
418
+ else:
419
+ # Direct state dict
420
+ state_dict = checkpoint
421
+ logger.info(f"📋 Loading direct state dict with {len(state_dict)} keys")
422
+
423
+ # Load the state dict
424
+ model.load_state_dict(state_dict)
425
+ model.eval()
426
+ self.models[model_id] = model
427
+ logger.info(f"✅ Model loaded successfully")
428
+ else:
429
+ logger.error(f"❌ Model file not found in {model_dir}")
430
+ logger.error(f" Available files: {list(model_path.glob('*'))}")
431
+ return False
432
+
433
+ # Load tokenizer
434
+ tokenizer_file = model_path / "tokenizer.model"
435
+ if tokenizer_file.exists():
436
+ tokenizer = spm.SentencePieceProcessor()
437
+ tokenizer.load(str(tokenizer_file))
438
+ self.tokenizers[model_id] = tokenizer
439
+ logger.info(f"✅ Tokenizer loaded successfully")
440
+ else:
441
+ logger.error(f"❌ Tokenizer file not found in {model_dir}")
442
+ return False
443
+
444
  return True
445
+
446
+ except Exception as e:
447
+ logger.error(f"❌ Failed to load model and tokenizer: {e}")
448
+ import traceback
449
+
450
+ logger.error(f"📋 Full traceback: {traceback.format_exc()}")
451
  return False
452
+
453
+ def generate_text(
454
+ self,
455
+ prompt: str,
456
+ max_length: int = 100,
457
+ temperature: float = 0.7,
458
+ top_k: int = 50,
459
+ top_p: float = 0.9,
460
+ ) -> str:
461
+ """Generate text using the loaded real model"""
462
+ if not self.current_model or self.current_model not in self.models:
463
+ return " No model loaded. Please select a model first."
464
+
465
+ try:
466
+ model = self.models[self.current_model]
467
+ tokenizer = self.tokenizers[self.current_model]
468
+
469
+ # Tokenize input
470
+ input_ids = tokenizer.encode(prompt)
471
+ input_tensor = torch.tensor([input_ids], dtype=torch.long)
472
+
473
+ logger.info(f"🎯 Generating text with prompt: '{prompt[:50]}...'")
474
+ logger.info(
475
+ f"📊 Parameters: max_length={max_length}, temperature={temperature}, top_k={top_k}, top_p={top_p}"
476
+ )
477
+
478
+ # Generate text
479
+ with torch.no_grad():
480
+ output_ids = model.generate(
481
+ input_tensor,
482
+ max_new_tokens=max_length,
483
+ temperature=temperature,
484
+ top_k=top_k,
485
+ top_p=top_p,
486
+ do_sample=True,
487
+ )
488
+
489
+ # Decode output
490
+ generated_text = tokenizer.decode(output_ids[0].tolist())
491
+
492
+ # Remove the input prompt from the output
493
+ if generated_text.startswith(prompt):
494
+ generated_text = generated_text[len(prompt) :].strip()
495
+
496
+ logger.info(f"✅ Generated text: '{generated_text[:100]}...'")
497
+ return generated_text
498
+
499
+ except Exception as e:
500
+ error_msg = f"❌ Generation failed: {str(e)}"
501
+ logger.error(error_msg)
502
+ import traceback
503
+
504
+ logger.error(f"📋 Full traceback: {traceback.format_exc()}")
505
+ return error_msg
506
+
507
+
508
+ # Initialize the real inference engine
509
+ inference_engine = RealOpenLLMInference()
510
+
511
+
512
+ def load_model_info(model_id: str) -> str:
513
+ """Get information about a specific model"""
514
+ config = inference_engine.model_configs.get(model_id)
515
+ if config:
516
+ return f"**{config['name']}**\n\n{config['description']}\n\n**Parameters:** {config['parameters']}\n**Training Steps:** {config['training_steps']:,}"
517
+ return "❌ Model not found"
518
+
519
+
520
+ def generate_text_interface(
521
+ model_id: str, prompt: str, max_length: int, temperature: float, top_k: int, top_p: float
522
+ ) -> str:
523
+ """Gradio interface function for text generation"""
524
+ try:
525
+ # Load model if not already loaded
526
+ if model_id not in inference_engine.models:
527
+ logger.info(f"🔄 Loading real model: {model_id}")
528
+ success = inference_engine.load_model_from_hf(model_id)
529
+ if not success:
530
+ return f"❌ Failed to load real model: {model_id}"
531
+
532
+ # Generate text
533
+ result = inference_engine.generate_text(
534
+ prompt=prompt, max_length=max_length, temperature=temperature, top_k=top_k, top_p=top_p
535
+ )
536
+
537
+ return result
538
+
539
+ except Exception as e:
540
+ error_msg = f"❌ Error in generation interface: {str(e)}"
541
+ logger.error(error_msg)
542
+ return error_msg
543
+
544
+
545
+ # Create Gradio interface
546
+ def create_interface():
547
+ """Create the Gradio interface"""
548
+
549
+ with gr.Blocks(title="🚀 OpenLLM Real Models Space", theme=gr.themes.Soft()) as interface:
550
+ # Header
551
+ gr.Markdown(
552
+ """
553
+ # 🚀 OpenLLM Real Models Space
554
 
555
+ Welcome to the OpenLLM Real Models Space! This interface uses **actual trained models** from Hugging Face.
 
 
 
 
 
 
556
 
557
+ ## 🎯 Real Trained Models
 
 
 
 
 
 
 
 
 
 
558
 
559
+ We provide **5 different real models** with varying training steps:
560
 
561
+ | Model | Training Steps | Parameters | Performance |
562
+ |-------|---------------|------------|-------------|
563
+ | **4k Model** | 4,000 | 35.8M | Early training stage |
564
+ | **6k Model** | 6,000 | 35.8M | Improved coherence (Perplexity: 816.040) |
565
+ | **7k Model** | 7,000 | 35.8M | Enhanced quality (Loss: 2.100, Perplexity: 8.200) |
566
+ | **8k Model** | 8,000 | 35.8M | Sophisticated understanding |
567
+ | **9k Model** | 9,000 | 35.8M | Best performing model |
568
+ | **10k Model** | 10,000 | 35.8M | Latest extended training |
569
 
570
+ **These are real GPT-style transformer models trained on Wikipedia passages from the SQuAD dataset.**
 
571
 
572
+ ---
573
+ """
574
+ )
575
+
576
  with gr.Row():
577
  with gr.Column(scale=1):
578
+ # Model selection
579
+ model_dropdown = gr.Dropdown(
580
+ choices=list(inference_engine.model_configs.keys()),
581
+ value="openllm-small-extended-10k",
582
+ label="🎯 Select Model",
583
+ info="Choose the real trained model to use",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
584
  )
585
+
586
+ # Model information display
587
+ model_info = gr.Markdown(
588
+ value=load_model_info("openllm-small-extended-10k"), label="📋 Model Information"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
589
  )
590
+
591
+ # Update model info when selection changes
592
+ model_dropdown.change(
593
+ fn=load_model_info, inputs=[model_dropdown], outputs=[model_info]
 
 
 
 
 
 
 
 
 
 
 
594
  )
595
+
596
+ with gr.Column(scale=2):
597
+ # Input prompt
598
+ prompt_input = gr.Textbox(
599
+ lines=5,
600
+ label="📝 Input Prompt",
601
+ placeholder="Enter your text prompt here...",
602
+ info="The text that will be used as input for generation",
603
  )
604
+
605
+ # Generation parameters
606
+ with gr.Row():
607
+ max_length = gr.Slider(
608
+ minimum=10,
609
+ maximum=500,
610
+ value=100,
611
+ step=10,
612
+ label="📏 Max Length",
613
+ info="Maximum number of tokens to generate",
614
+ )
615
+
616
+ temperature = gr.Slider(
617
+ minimum=0.1,
618
+ maximum=2.0,
619
+ value=0.7,
620
+ step=0.1,
621
+ label="🌡️ Temperature",
622
+ info="Controls randomness (higher = more random)",
623
+ )
624
+
625
+ with gr.Row():
626
+ top_k = gr.Slider(
627
+ minimum=1,
628
+ maximum=100,
629
+ value=50,
630
+ step=1,
631
+ label="🔝 Top-K",
632
+ info="Number of highest probability tokens to consider",
633
+ )
634
+
635
+ top_p = gr.Slider(
636
+ minimum=0.1,
637
+ maximum=1.0,
638
+ value=0.9,
639
+ step=0.1,
640
+ label="📊 Top-P",
641
+ info="Nucleus sampling parameter",
642
+ )
643
+
644
+ # Generate button
645
+ generate_btn = gr.Button("🚀 Generate Text", variant="primary", size="lg")
646
+
647
+ # Output
648
+ output_text = gr.Textbox(
649
+ lines=10, label="🎯 Generated Text", info="The generated text will appear here"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
650
  )
651
+
652
+ # Connect the generate button
653
+ generate_btn.click(
654
+ fn=generate_text_interface,
655
+ inputs=[model_dropdown, prompt_input, max_length, temperature, top_k, top_p],
656
+ outputs=[output_text],
657
  )
658
+
659
  # Footer
660
+ gr.Markdown(
661
+ """
662
  ---
663
 
664
+ ## 🔧 Technical Details
665
 
666
+ - **Architecture**: GPT-style transformer decoder
667
+ - **Model Size**: Small (6 layers, 8 heads, 512 embedding dim)
668
+ - **Vocabulary**: 32k tokens (SentencePiece BPE)
669
+ - **Training Data**: Wikipedia passages from SQuAD dataset
670
+ - **Framework**: PyTorch with real trained models
671
+ - **Gradio Version**: 4.44.1 (latest)
672
 
673
+ **These models generate actual text based on their training on Wikipedia content.**
674
 
675
+ **Model Sources:**
676
+ - [4k Model](https://huggingface.co/lemms/openllm-small-extended-4k)
677
+ - [6k Model](https://huggingface.co/lemms/openllm-small-extended-6k)
678
+ - [7k Model](https://huggingface.co/lemms/openllm-small-extended-7k)
679
+ - [8k Model](https://huggingface.co/lemms/openllm-small-extended-8k)
680
+ - [9k Model](https://huggingface.co/lemms/openllm-small-extended-9k)
681
+ - [10k Model](https://huggingface.co/lemms/openllm-small-extended-10k)
682
+ """
683
+ )
684
+
685
  return interface
686
 
687
+
688
  # Create and launch the interface
689
  if __name__ == "__main__":
690
+ interface = create_interface()
691
+ interface.launch(server_name="0.0.0.0", server_port=7860, share=False, debug=True)