| import os |
| import torch |
| import numpy as np |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification |
|
|
|
|
|
|
| MAX_LEN = 64 |
| labels = ["Negative", "Neutral", "Positive"] |
|
|
|
|
|
|
| MODEL_REPOS = { |
| "roberta": os.getenv("ROBERTA_MODEL"), |
| "distilroberta": os.getenv("DISTILROBERTA_MODEL"), |
| "bert": os.getenv("BERT_MODEL"), |
| "albert": os.getenv("ALBERT_MODEL"), |
| } |
|
|
|
|
| BASE_TOKENIZERS = { |
| "roberta": "roberta-base", |
| "distilroberta": "distilroberta-base", |
| "bert": "bert-base-uncased", |
| "albert": "albert-base-v2" |
| } |
|
|
| MODEL_CACHE = {} |
|
|
|
|
|
|
|
|
| def load_model(model_name): |
| if model_name in MODEL_CACHE: |
| return MODEL_CACHE[model_name] |
|
|
| print(f"🔄 Loading {model_name} from HuggingFace...") |
|
|
| tokenizer = AutoTokenizer.from_pretrained(BASE_TOKENIZERS[model_name]) |
|
|
| model = AutoModelForSequenceClassification.from_pretrained( |
| MODEL_REPOS[model_name] |
| ) |
|
|
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
| model.to(device) |
| model.eval() |
|
|
| MODEL_CACHE[model_name] = (tokenizer, model, device) |
| return tokenizer, model, device |
|
|
|
|
| def predict(text, model_name="roberta"): |
| tokenizer, model, device = load_model(model_name) |
|
|
| inputs = tokenizer( |
| text, |
| return_tensors="pt", |
| truncation=True, |
| padding=True, |
| max_length=MAX_LEN |
| ).to(device) |
|
|
| with torch.no_grad(): |
| outputs = model(**inputs) |
| probs = torch.softmax(outputs.logits, dim=1).cpu().numpy()[0] |
|
|
| pred = np.argmax(probs) |
| return labels[pred], probs.tolist() |
|
|
|
|
| def compare_all_models(text): |
| results = [] |
|
|
| for model_name in MODEL_REPOS.keys(): |
| tokenizer, model, device = load_model(model_name) |
|
|
| inputs = tokenizer( |
| text, |
| return_tensors="pt", |
| truncation=True, |
| padding=True, |
| max_length=MAX_LEN |
| ).to(device) |
|
|
| with torch.no_grad(): |
| outputs = model(**inputs) |
| probs = torch.softmax(outputs.logits, dim=1).cpu().numpy()[0] |
|
|
| pred = np.argmax(probs) |
|
|
| results.append({ |
| "model": model_name, |
| "prediction": labels[pred], |
| "confidence": float(max(probs)), |
| "negative": float(probs[0]), |
| "neutral": float(probs[1]), |
| "positive": float(probs[2]), |
| }) |
|
|
| return results |