shashshekh8 commited on
Commit
ef9649f
·
1 Parent(s): 9617a0c
Files changed (7) hide show
  1. .gitattributes +1 -22
  2. .gitignore +1 -3
  3. Dockerfile +1 -2
  4. asl_model.pt +3 -0
  5. label_map.pkl +3 -0
  6. main.py +82 -136
  7. requirements.txt +0 -5
.gitattributes CHANGED
@@ -1,35 +1,14 @@
1
  *.7z filter=lfs diff=lfs merge=lfs -text
2
  *.arrow filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
  *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
  *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
  *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
  *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
  *.pickle filter=lfs diff=lfs merge=lfs -text
21
  *.pkl filter=lfs diff=lfs merge=lfs -text
22
  *.pt filter=lfs diff=lfs merge=lfs -text
23
  *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
  *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
  *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
  *.7z filter=lfs diff=lfs merge=lfs -text
2
  *.arrow filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
 
 
 
 
4
  *.h5 filter=lfs diff=lfs merge=lfs -text
 
 
 
5
  *.model filter=lfs diff=lfs merge=lfs -text
 
6
  *.npy filter=lfs diff=lfs merge=lfs -text
 
7
  *.onnx filter=lfs diff=lfs merge=lfs -text
 
 
 
8
  *.pickle filter=lfs diff=lfs merge=lfs -text
9
  *.pkl filter=lfs diff=lfs merge=lfs -text
10
  *.pt filter=lfs diff=lfs merge=lfs -text
11
  *.pth filter=lfs diff=lfs merge=lfs -text
 
12
  *.safetensors filter=lfs diff=lfs merge=lfs -text
 
 
 
13
  *.tflite filter=lfs diff=lfs merge=lfs -text
14
+ *.tar.gz filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
.gitignore CHANGED
@@ -2,6 +2,4 @@ __pycache__
2
  *.pyc
3
  .env
4
  *.wav
5
-
6
- # Ignore the heavy model file so git doesn't crash
7
- model.p
 
2
  *.pyc
3
  .env
4
  *.wav
5
+ # asl_model.pt is needed here, but usually ignored in git if large
 
 
Dockerfile CHANGED
@@ -9,8 +9,7 @@ RUN apt-get update && apt-get install -y ffmpeg libsndfile1 libmagic1 && rm -rf
9
  COPY ./requirements.txt /code/requirements.txt
10
  RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
11
 
12
- # Copy all project files (NO model.p)
13
  COPY . .
14
 
15
- # Run the FastAPI app
16
  CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
 
9
  COPY ./requirements.txt /code/requirements.txt
10
  RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
11
 
12
+ # Copy all project files
13
  COPY . .
14
 
 
15
  CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
asl_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97274818b6b9f09dde65fb7b14852001a928fd8c48434805bd7c6d9f6847b7d7
3
+ size 2765021
label_map.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:231da80f7dff2b6fc6d4aea2a742d6bbf56cbd1cf2e85ad142d8671976646414
3
+ size 598
main.py CHANGED
@@ -2,17 +2,16 @@ import os
2
  import io
3
  import base64
4
  import uuid
5
- import torch
6
  import pickle
7
  import numpy as np
8
- import requests
 
9
  from fastapi import FastAPI, HTTPException
10
  from pydantic import BaseModel
11
  from typing import List
12
  from transformers import pipeline
13
  from PIL import Image
14
 
15
- # FALLBACK FOR MAGIC (Prevents crash if libmagic1 is missing)
16
  try:
17
  import magic
18
  MAGIC_AVAILABLE = True
@@ -22,116 +21,69 @@ except ImportError:
22
 
23
  app = FastAPI(title="SenseMesh AI Engine")
24
 
25
- # --------------------------------------------------------
26
- # 1. DOWNLOAD MODEL FROM GOOGLE DRIVE IF NOT PRESENT
27
- # --------------------------------------------------------
28
- MODEL_URL = "https://drive.google.com/uc?export=download&id=1cT1idReSFH2xzPxhvURR7E0NsarD6AhK"
29
- MODEL_PATH = "model.p"
30
-
31
- def download_model_if_missing():
32
- MODEL_URL = "https://drive.google.com/uc?export=download&id=13Yxz1nUB2Az-sjJv73SLpd7k0C7sTHF5"
33
- MODEL_PATH = "model.p"
34
-
35
- if os.path.exists(MODEL_PATH):
36
- print("Model exists, skipping download.")
37
- return
38
-
39
- print("Downloading ASL model...")
40
- response = requests.get(MODEL_URL)
41
-
42
- if response.status_code != 200:
43
- raise Exception("Failed to download model from Google Drive")
44
 
45
- with open(MODEL_PATH, "wb") as f:
46
- f.write(response.content)
 
47
 
48
- print("Model downloaded successfully!")
49
-
50
- # Trigger download if missing
51
- download_model_if_missing()
52
-
53
- # --------------------------------------------------------
54
- # 2. LOAD ASL MODEL
55
- # --------------------------------------------------------
56
  asl_model = None
57
- print(f"Loading ASL Model from {MODEL_PATH}...")
58
 
59
- if os.path.exists(MODEL_PATH):
60
  try:
61
- with open(MODEL_PATH, 'rb') as f:
62
- model_dict = pickle.load(f)
63
- asl_model = model_dict['model'] if isinstance(model_dict, dict) and 'model' in model_dict else model_dict
64
- print(" ✅ ASL Model Loaded.")
 
 
 
 
 
 
65
  except Exception as e:
66
- print(f" ❌ ASL Load Error: {e}")
67
  else:
68
- print(" ⚠️ ASL Model not found. Sign language features will fail.")
69
-
70
- # --------------------------------------------------------
71
- # 3. LOAD CORE AI MODELS
72
- # --------------------------------------------------------
73
- print("Loading Core AI Models... (Checking for GPU)")
74
- device = 0 if torch.cuda.is_available() else -1
75
- print(f"Device set to: {'GPU' if device == 0 else 'CPU'}")
76
-
77
- sentiment_pipe = pipeline(
78
- "sentiment-analysis",
79
- model="distilbert-base-uncased-finetuned-sst-2-english",
80
- device=device
81
- )
82
- caption_pipe = pipeline(
83
- "image-to-text",
84
- model="nlpconnect/vit-gpt2-image-captioning",
85
- device=device
86
- )
87
- transcribe_pipe = pipeline(
88
- "automatic-speech-recognition",
89
- model="openai/whisper-tiny",
90
- device=device
91
- )
92
- env_audio_pipe = pipeline(
93
- "audio-classification",
94
- model="mit/ast-finetuned-audioset-10-10-0.4593",
95
- device=device
96
- )
97
-
98
- print(" ✅ All Core Models Loaded.")
99
 
 
 
 
 
 
 
 
 
 
 
100
 
101
  class Payload(BaseModel):
102
  data_base64: str = ""
103
  text: str = ""
104
 
105
-
106
  class LandmarkPayload(BaseModel):
107
  landmarks: List[float]
108
 
109
-
110
- DANGER_LABELS = [
111
- "gunshot", "gunfire", "explosion", "scream", "screaming",
112
- "fire alarm", "siren", "glass breaking", "crying", "police car", "ambulance"
113
- ]
114
-
115
-
116
  def save_audio_smartly(base64_string):
117
- """Decodes base64 and saves with correct extension based on MIME type"""
118
  try:
119
  b64_clean = base64_string.split(",")[1] if "," in base64_string else base64_string
120
  data = base64.b64decode(b64_clean)
121
-
122
- ext = ".wav" # Default
123
  if MAGIC_AVAILABLE:
124
  try:
125
  mime = magic.from_buffer(data, mime=True)
126
- if "webm" in mime:
127
- ext = ".webm"
128
- elif "ogg" in mime:
129
- ext = ".ogg"
130
- elif "mp4" in mime:
131
- ext = ".m4a"
132
- except Exception:
133
- pass
134
-
135
  filename = f"/tmp/{uuid.uuid4()}{ext}"
136
  with open(filename, "wb") as f:
137
  f.write(data)
@@ -139,40 +91,17 @@ def save_audio_smartly(base64_string):
139
  except Exception as e:
140
  raise HTTPException(status_code=400, detail=f"Audio Decode Error: {str(e)}")
141
 
142
-
143
  @app.get("/")
144
  def health_check():
145
- return {
146
- "status": "online",
147
- "gpu": torch.cuda.is_available(),
148
- "asl_active": asl_model is not None
149
- }
150
-
151
 
152
  @app.post("/analyze_text")
153
  def analyze_text(payload: Payload):
154
- results = sentiment_pipe(payload.text)
155
- urgency = "low"
156
- triggers = ["help", "emergency", "fire", "danger", "hurt", "call 911"]
157
- if any(t in payload.text.lower() for t in triggers):
158
- urgency = "high"
159
- return {"emotion": results[0]["label"], "urgency": urgency}
160
-
161
-
162
- @app.post("/describe")
163
- def describe_image(payload: Payload):
164
- try:
165
- b64_str = payload.data_base64.split(",")[1] if "," in payload.data_base64 else payload.data_base64
166
- image_data = base64.b64decode(b64_str)
167
- image = Image.open(io.BytesIO(image_data))
168
- captions = caption_pipe(image)
169
- return {"description": captions[0]["generated_text"]}
170
- except Exception as e:
171
- raise HTTPException(status_code=500, detail=str(e))
172
-
173
 
174
  @app.post("/transcribe")
175
- def transcribe_audio(payload: Payload):
176
  filename = None
177
  try:
178
  filename = save_audio_smartly(payload.data_base64)
@@ -181,38 +110,55 @@ def transcribe_audio(payload: Payload):
181
  except Exception as e:
182
  raise HTTPException(status_code=500, detail=str(e))
183
  finally:
184
- if filename and os.path.exists(filename):
185
- os.remove(filename)
186
-
187
 
188
- @app.post("/detect_sound_event")
189
- def detect_sound_event(payload: Payload):
190
  filename = None
191
  try:
192
  filename = save_audio_smartly(payload.data_base64)
193
- results = env_audio_pipe(filename)
194
- top_event = results[0]["label"]
195
-
196
- is_dangerous = any(danger in top_event.lower() for danger in DANGER_LABELS)
197
  urgency_level = "critical" if is_dangerous else "low"
198
-
199
  return {"event": top_event, "urgency": urgency_level}
200
  except Exception as e:
201
  raise HTTPException(status_code=500, detail=str(e))
202
  finally:
203
- if filename and os.path.exists(filename):
204
- os.remove(filename)
 
 
 
 
 
 
 
 
 
 
205
 
206
  @app.post("/predict_sign")
207
  def predict_sign(payload: LandmarkPayload):
208
- if not asl_model:
209
- return {"gesture": "Error: Model Missing"}
210
-
211
  try:
212
- data = np.asarray(payload.landmarks).reshape(1, -1)
213
- prediction = asl_model.predict(data)
214
- return {"gesture": str(prediction[0])}
215
-
 
 
 
 
 
 
 
 
 
 
 
 
216
  except Exception as e:
217
  print("❌ ASL Prediction Error:", e)
218
  return {"gesture": "Error"}
 
2
  import io
3
  import base64
4
  import uuid
 
5
  import pickle
6
  import numpy as np
7
+ import torch
8
+ import torch.nn as nn
9
  from fastapi import FastAPI, HTTPException
10
  from pydantic import BaseModel
11
  from typing import List
12
  from transformers import pipeline
13
  from PIL import Image
14
 
 
15
  try:
16
  import magic
17
  MAGIC_AVAILABLE = True
 
21
 
22
  app = FastAPI(title="SenseMesh AI Engine")
23
 
24
+ # 1. MODEL DEFINITION
25
+ class ASLModel(nn.Module):
26
+ def __init__(self, num_classes):
27
+ super().__init__()
28
+ self.lstm = nn.LSTM(150, 128, num_layers=2, batch_first=True, dropout=0.3, bidirectional=True)
29
+ self.fc = nn.Linear(256, num_classes)
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
+ def forward(self, x):
32
+ out, _ = self.lstm(x)
33
+ return self.fc(out[:, -1, :])
34
 
35
+ # 2. LOAD RESOURCES
36
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
 
 
 
 
 
37
  asl_model = None
38
+ asl_meta = {}
39
 
40
+ if os.path.exists("label_map.pkl") and os.path.exists("asl_model.pt"):
41
  try:
42
+ with open("label_map.pkl", "rb") as f:
43
+ asl_meta = pickle.load(f)
44
+
45
+ num_classes = len(asl_meta["idx_to_label"])
46
+ print(f"Loading ASL Model for {num_classes} words...")
47
+
48
+ asl_model = ASLModel(num_classes=num_classes)
49
+ asl_model.load_state_dict(torch.load("asl_model.pt", map_location=device))
50
+ asl_model.to(device).eval()
51
+ print(" ✅ ASL LSTM Model Loaded Successfully.")
52
  except Exception as e:
53
+ print(f" ❌ ASL Load Fail: {e}")
54
  else:
55
+ print(" ⚠️ ASL Files Missing (asl_model.pt or label_map.pkl). Sign Language disabled.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
+ # 3. LOAD MODELS
58
+ print("Loading Core AI Models...")
59
+ try:
60
+ sentiment_pipe = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", top_k=1)
61
+ transcribe_pipe = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
62
+ hazard_pipe = pipeline("audio-classification", model="mit/ast-finetuned-audioset-10-10-0.4593")
63
+ caption_pipe = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
64
+ print(" ✅ All Core Models Loaded.")
65
+ except Exception as e:
66
+ print(f" ⚠️ Core Model Load Warning: {e}")
67
 
68
  class Payload(BaseModel):
69
  data_base64: str = ""
70
  text: str = ""
71
 
 
72
  class LandmarkPayload(BaseModel):
73
  landmarks: List[float]
74
 
 
 
 
 
 
 
 
75
  def save_audio_smartly(base64_string):
 
76
  try:
77
  b64_clean = base64_string.split(",")[1] if "," in base64_string else base64_string
78
  data = base64.b64decode(b64_clean)
79
+ ext = ".wav"
 
80
  if MAGIC_AVAILABLE:
81
  try:
82
  mime = magic.from_buffer(data, mime=True)
83
+ if "webm" in mime: ext = ".webm"
84
+ elif "ogg" in mime: ext = ".ogg"
85
+ elif "mp4" in mime: ext = ".m4a"
86
+ except: pass
 
 
 
 
 
87
  filename = f"/tmp/{uuid.uuid4()}{ext}"
88
  with open(filename, "wb") as f:
89
  f.write(data)
 
91
  except Exception as e:
92
  raise HTTPException(status_code=400, detail=f"Audio Decode Error: {str(e)}")
93
 
 
94
  @app.get("/")
95
  def health_check():
96
+ return {"status": "online", "gpu": torch.cuda.is_available(), "asl_active": asl_model is not None}
 
 
 
 
 
97
 
98
  @app.post("/analyze_text")
99
  def analyze_text(payload: Payload):
100
+ res = sentiment_pipe(payload.text)
101
+ return {"emotion": res[0][0]['label']}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
 
103
  @app.post("/transcribe")
104
+ def transcribe(payload: Payload):
105
  filename = None
106
  try:
107
  filename = save_audio_smartly(payload.data_base64)
 
110
  except Exception as e:
111
  raise HTTPException(status_code=500, detail=str(e))
112
  finally:
113
+ if filename and os.path.exists(filename): os.remove(filename)
 
 
114
 
115
+ @app.post("/detect_hazard")
116
+ def detect_hazard(payload: Payload):
117
  filename = None
118
  try:
119
  filename = save_audio_smartly(payload.data_base64)
120
+ events = hazard_pipe(filename, top_k=5)
121
+ dangers = ["siren", "alarm", "scream", "explosion", "glass", "gunshot", "fire"]
122
+ top_event = events[0]['label']
123
+ is_dangerous = any(d in e['label'].lower() for e in events for d in dangers)
124
  urgency_level = "critical" if is_dangerous else "low"
 
125
  return {"event": top_event, "urgency": urgency_level}
126
  except Exception as e:
127
  raise HTTPException(status_code=500, detail=str(e))
128
  finally:
129
+ if filename and os.path.exists(filename): os.remove(filename)
130
+
131
+ @app.post("/describe")
132
+ def describe_image(payload: Payload):
133
+ try:
134
+ b64_str = payload.data_base64.split(",")[1] if "," in payload.data_base64 else payload.data_base64
135
+ image_data = base64.b64decode(b64_str)
136
+ image = Image.open(io.BytesIO(image_data))
137
+ captions = caption_pipe(image)
138
+ return {"description": captions[0]["generated_text"]}
139
+ except Exception as e:
140
+ raise HTTPException(status_code=500, detail=str(e))
141
 
142
  @app.post("/predict_sign")
143
  def predict_sign(payload: LandmarkPayload):
144
+ if not asl_model: return {"gesture": "Error: Model Missing"}
 
 
145
  try:
146
+ raw_data = np.array(payload.landmarks, dtype=np.float32)
147
+ if raw_data.size != 30 * 150:
148
+ return {"gesture": "Shape Error"}
149
+
150
+ # NORMALIZE
151
+ norm_data = (raw_data - asl_meta["mean"]) / (asl_meta["std"] + 1e-7)
152
+ input_tensor = torch.tensor(norm_data).reshape(1, 30, 150).to(device)
153
+
154
+ with torch.no_grad():
155
+ logits = asl_model(input_tensor)
156
+ idx = torch.argmax(logits, dim=1).item()
157
+ confidence = torch.softmax(logits, dim=1)[0, idx].item()
158
+
159
+ label = asl_meta["idx_to_label"][idx]
160
+ if confidence < 0.7: return {"gesture": "..."}
161
+ return {"gesture": str(label)}
162
  except Exception as e:
163
  print("❌ ASL Prediction Error:", e)
164
  return {"gesture": "Error"}
requirements.txt CHANGED
@@ -1,19 +1,14 @@
1
  numpy==1.26.4
2
  scikit-learn==1.3.0
3
-
4
  fastapi
5
  uvicorn
6
  python-multipart
7
-
8
  torch
9
  transformers==4.37.2
10
-
11
  pillow
12
  pydantic
13
-
14
  scipy
15
  soundfile
16
  librosa
17
  python-magic
18
-
19
  requests
 
1
  numpy==1.26.4
2
  scikit-learn==1.3.0
 
3
  fastapi
4
  uvicorn
5
  python-multipart
 
6
  torch
7
  transformers==4.37.2
 
8
  pillow
9
  pydantic
 
10
  scipy
11
  soundfile
12
  librosa
13
  python-magic
 
14
  requests