abhinavvvvv commited on
Commit
fb121b9
·
1 Parent(s): 33c27ca

fixed embed dim errors

Browse files
api/predictor.py CHANGED
@@ -7,6 +7,9 @@ from features.log_feature_extraction import run_pipeline
7
  MODEL_PATH = "models/failure_model.pkl"
8
  FEATURE_PATH = "models/feature_columns.pkl"
9
 
 
 
 
10
 
11
  def predict_logs(log_file):
12
 
@@ -14,23 +17,26 @@ def predict_logs(log_file):
14
 
15
  df = pd.read_csv("temp_features.csv")
16
 
17
- model = joblib.load(MODEL_PATH)
18
- feature_cols = joblib.load(FEATURE_PATH)
 
 
19
 
20
- X = df[feature_cols]
 
21
 
22
- probs = model.predict_proba(X)[:, 1]
23
 
24
  df["failure_probability"] = probs
25
 
 
 
26
  module_risk = (
27
  df.groupby("module")["failure_probability"]
28
  .mean()
29
  .sort_values(ascending=False)
30
  )
31
 
32
- module_results = []
33
-
34
  for module, prob in module_risk.items():
35
 
36
  if prob > 0.75:
@@ -40,18 +46,16 @@ def predict_logs(log_file):
40
  else:
41
  risk = "LOW"
42
 
43
- module_results.append({
44
  "module": module,
45
  "failure_probability": float(prob),
46
  "risk": risk
47
  })
48
 
49
- summary = {
50
- "total_logs": int(len(df)),
51
- "modules_analyzed": int(df["module"].nunique())
52
- }
53
-
54
  return {
55
- "summary": summary,
56
- "module_risk": module_results
 
 
 
57
  }
 
7
  MODEL_PATH = "models/failure_model.pkl"
8
  FEATURE_PATH = "models/feature_columns.pkl"
9
 
10
+ model = joblib.load(MODEL_PATH)
11
+ feature_cols = joblib.load(FEATURE_PATH)
12
+
13
 
14
  def predict_logs(log_file):
15
 
 
17
 
18
  df = pd.read_csv("temp_features.csv")
19
 
20
+ # ensure all training columns exist
21
+ for col in feature_cols:
22
+ if col not in df.columns:
23
+ df[col] = 0
24
 
25
+ # remove extra columns not used by model
26
+ df = df[feature_cols]
27
 
28
+ probs = model.predict_proba(df)[:, 1]
29
 
30
  df["failure_probability"] = probs
31
 
32
+ results = []
33
+
34
  module_risk = (
35
  df.groupby("module")["failure_probability"]
36
  .mean()
37
  .sort_values(ascending=False)
38
  )
39
 
 
 
40
  for module, prob in module_risk.items():
41
 
42
  if prob > 0.75:
 
46
  else:
47
  risk = "LOW"
48
 
49
+ results.append({
50
  "module": module,
51
  "failure_probability": float(prob),
52
  "risk": risk
53
  })
54
 
 
 
 
 
 
55
  return {
56
+ "summary": {
57
+ "total_logs": int(len(df)),
58
+ "modules_analyzed": len(results)
59
+ },
60
+ "module_risk": results
61
  }
features/log_feature_extraction.py CHANGED
@@ -117,9 +117,8 @@ def text_features(df):
117
  for k in keywords:
118
  df[f"kw_{k}"] = df["clean_message"].str.contains(k).astype(int)
119
 
120
- vectorizer = TfidfVectorizer(max_features=300)
121
-
122
- X = vectorizer.fit_transform(df["clean_message"])
123
 
124
  tfidf = pd.DataFrame(
125
  X.toarray(),
 
117
  for k in keywords:
118
  df[f"kw_{k}"] = df["clean_message"].str.contains(k).astype(int)
119
 
120
+ vectorizer = joblib.load("models/tfidf_vectorizer.pkl")
121
+ X = vectorizer.transform(df["clean_message"])
 
122
 
123
  tfidf = pd.DataFrame(
124
  X.toarray(),