amitbhatt6075 commited on
Commit
5ac001d
Β·
1 Parent(s): 4f2c694

Added Community AI Module: Toxicity Check and Smart Tagging

Browse files
Files changed (2) hide show
  1. api/main.py +86 -36
  2. core/community_brain.py +114 -0
api/main.py CHANGED
@@ -30,6 +30,7 @@ from core.utils import get_supabase_client, extract_colors_from_url
30
  from core.document_parser import parse_pdf_from_url
31
  from core.creative_chat import CreativeDirector
32
  from core.matcher import load_embedding_model
 
33
 
34
  try:
35
  from core.rag.store import VectorStore
@@ -67,6 +68,7 @@ _likes_predictor = None
67
  _comments_predictor = None
68
  _revenue_forecaster = None
69
  _performance_scorer = None
 
70
 
71
  def to_snake(name: str) -> str:
72
  return re.sub(r'(?<!^)(?=[A-Z])', '_', name).lower()
@@ -150,6 +152,28 @@ class ForecastRequest(BaseModel):
150
  budget: float; category: str; follower_count: int; engagement_rate: float
151
  config: Optional[Dict[str, str]] = None
152
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
  # ==========================================================
154
  # 🎯 FIX 2: NOW DEFINE ForecastResponse
155
  # PerformanceForecast and PayoutForecast are already defined above.
@@ -203,10 +227,10 @@ app = FastAPI(title="Reachify AI Service (Deploy-Ready)", version="11.0.0")
203
  @app.on_event("startup")
204
  def startup_event():
205
  # Make sure we can modify the global variables
206
- global _llm_instance, _creative_director, _support_agent, _ai_strategist, _vector_store, \
207
- _budget_predictor, _influencer_matcher, _performance_predictor, _payout_forecaster, \
208
- _earnings_optimizer, _earnings_encoder, _likes_predictor, _comments_predictor, \
209
- _revenue_forecaster, _performance_scorer
210
 
211
  # --- STEP 1: DOWNLOAD AND LOAD THE LLM MODEL ---
212
  print("--- πŸš€ AI Service Starting Up... ---")
@@ -243,10 +267,17 @@ def startup_event():
243
  if VectorStore:
244
  _vector_store = VectorStore()
245
  print(" - RAG Engine Ready.")
 
246
  _ai_strategist = AIStrategist(llm_instance=_llm_instance, store=_vector_store)
 
 
 
 
 
 
247
  _support_agent = SupportAgent(llm_instance=_llm_instance, embedding_path=EMBEDDING_MODEL_PATH, db_path=DB_PATH)
248
 
249
- print(" - βœ… Core AI components (Director, Strategist, Agent) are online.")
250
 
251
  except Exception as e:
252
  print(f" - ❌ FAILED to initialize core AI components: {e}")
@@ -282,6 +313,7 @@ def startup_event():
282
 
283
  print("\n--- βœ… AI Service startup sequence finished! ---")
284
 
 
285
  @app.get("/")
286
  def health_check():
287
  if _llm_instance:
@@ -723,65 +755,44 @@ def analyze_content_quality(request: ContentQualityRequest):
723
 
724
  caption = request.caption
725
 
726
- # This is a very structured prompt that asks the LLM to act as a specialist
727
- # and return a JSON object, which is easier and more reliable to parse.
728
  prompt = f"""
729
  [SYSTEM]
730
- You are a social media expert. Analyze the following caption based on four criteria: Readability, Engagement, Call to Action (CTA), and Hashtag Strategy.
731
- For each criterion, provide a score from 1 (poor) to 10 (excellent).
732
- Also, provide a final overall score (average of the four scores) and short, actionable feedback.
733
- Respond ONLY with a valid JSON object in the following format:
734
- {{
735
- "overall_score": <float>,
736
- "scores": {{
737
- "readability": <int>,
738
- "engagement": <int>,
739
- "call_to_action": <int>,
740
- "hashtag_strategy": <int>
741
- }},
742
- "feedback": "<string>"
743
- }}
744
 
745
  [CAPTION TO ANALYZE]
746
  "{caption}"
747
 
748
  [YOUR JSON RESPONSE]
749
  """
750
-
751
  try:
752
  print("--- Sending caption to LLM for quality analysis...")
753
  response = _llm_instance(prompt, max_tokens=512, temperature=0.2, stop=["[SYSTEM]", "\n\n"], echo=False)
754
 
755
- # Extract the JSON part of the response
756
  json_text = response['choices'][0]['text'].strip()
757
-
758
- # Find the start and end of the JSON object
759
  start_index = json_text.find('{')
760
  end_index = json_text.rfind('}') + 1
 
761
  if start_index == -1 or end_index == 0:
762
  raise ValueError("LLM did not return a valid JSON object.")
763
 
764
  clean_json_text = json_text[start_index:end_index]
765
-
766
  import json
 
 
767
  analysis_result = json.loads(clean_json_text)
768
 
769
  final_result = {
770
- "overall_score": analysis_result_raw.get("overall_score"),
771
- "feedback": analysis_result_raw.get("feedback"),
772
- "scores": analysis_result_raw.get("scores") or analysis_result_raw.get("score")
773
  }
774
 
775
  print("--- Successfully received and parsed JSON response from LLM.")
776
  return ContentQualityResponse(**final_result)
777
 
778
- except (json.JSONDecodeError, KeyError, ValueError) as e:
779
- print(f"🚨 ERROR parsing LLM response: {e}. Raw response was: {json_text}")
780
- raise HTTPException(status_code=500, detail="Failed to parse the analysis from the AI model. The model may have returned an unexpected format.")
781
  except Exception as e:
782
- print(f"🚨 An unexpected error occurred during content analysis: {e}")
783
- traceback.print_exc()
784
- raise HTTPException(status_code=500, detail=str(e))
785
 
786
  @app.post("/rank/campaigns-for-influencer", response_model=RankCampaignsResponse, summary="Ranks a list of campaigns for one influencer")
787
  async def rank_campaigns_for_influencer_route(request: RankCampaignsRequest):
@@ -1663,4 +1674,43 @@ def create_campaign_from_prompt_endpoint(payload: DirectPromptPayload):
1663
  )
1664
  return {"response": response_text}
1665
  except Exception as e:
1666
- raise HTTPException(status_code=500, detail=str(e))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  from core.document_parser import parse_pdf_from_url
31
  from core.creative_chat import CreativeDirector
32
  from core.matcher import load_embedding_model
33
+ from core.community_brain import CommunityBrain
34
 
35
  try:
36
  from core.rag.store import VectorStore
 
68
  _comments_predictor = None
69
  _revenue_forecaster = None
70
  _performance_scorer = None
71
+ _community_brain: Optional[CommunityBrain] = None
72
 
73
  def to_snake(name: str) -> str:
74
  return re.sub(r'(?<!^)(?=[A-Z])', '_', name).lower()
 
152
  budget: float; category: str; follower_count: int; engagement_rate: float
153
  config: Optional[Dict[str, str]] = None
154
 
155
+
156
+ # --- COMMUNITY LAYER MODELS ---
157
+ class ContentCheckRequest(BaseModel):
158
+ text: str
159
+ user_id: Optional[str] = None
160
+
161
+ class TagGenerationRequest(BaseModel):
162
+ content: str
163
+ niche: Optional[str] = "General"
164
+
165
+ class ContentCheckResponse(BaseModel):
166
+ toxicity_score: float
167
+ is_safe: bool
168
+ tags: List[str]
169
+
170
+ class ThreadSummaryRequest(BaseModel):
171
+ comments: List[str]
172
+ class ThreadSummaryResponse(BaseModel):
173
+ summary: str
174
+
175
+
176
+
177
  # ==========================================================
178
  # 🎯 FIX 2: NOW DEFINE ForecastResponse
179
  # PerformanceForecast and PayoutForecast are already defined above.
 
227
  @app.on_event("startup")
228
  def startup_event():
229
  # Make sure we can modify the global variables
230
+ global _llm_instance, _creative_director, _support_agent, _ai_strategist, _community_brain, \
231
+ _vector_store, _budget_predictor, _influencer_matcher, _performance_predictor, \
232
+ _payout_forecaster, _earnings_optimizer, _earnings_encoder, _likes_predictor, \
233
+ _comments_predictor, _revenue_forecaster, _performance_scorer
234
 
235
  # --- STEP 1: DOWNLOAD AND LOAD THE LLM MODEL ---
236
  print("--- πŸš€ AI Service Starting Up... ---")
 
267
  if VectorStore:
268
  _vector_store = VectorStore()
269
  print(" - RAG Engine Ready.")
270
+
271
  _ai_strategist = AIStrategist(llm_instance=_llm_instance, store=_vector_store)
272
+
273
+ # πŸ‘‡ NEW: COMMUNITY MODULE INJECTION
274
+ from core.community_brain import CommunityBrain # Late import prevents circular issues
275
+ _community_brain = CommunityBrain(llm_instance=_llm_instance)
276
+ print(" - βœ… Community Brain (Mod/Tags) initialized.")
277
+
278
  _support_agent = SupportAgent(llm_instance=_llm_instance, embedding_path=EMBEDDING_MODEL_PATH, db_path=DB_PATH)
279
 
280
+ print(" - βœ… Core AI components (Director, Strategist, CommunityBrain, Agent) are online.")
281
 
282
  except Exception as e:
283
  print(f" - ❌ FAILED to initialize core AI components: {e}")
 
313
 
314
  print("\n--- βœ… AI Service startup sequence finished! ---")
315
 
316
+
317
  @app.get("/")
318
  def health_check():
319
  if _llm_instance:
 
755
 
756
  caption = request.caption
757
 
 
 
758
  prompt = f"""
759
  [SYSTEM]
760
+ You are a social media expert. Analyze the following caption... Respond ONLY with a valid JSON object.
 
 
 
 
 
 
 
 
 
 
 
 
 
761
 
762
  [CAPTION TO ANALYZE]
763
  "{caption}"
764
 
765
  [YOUR JSON RESPONSE]
766
  """
 
767
  try:
768
  print("--- Sending caption to LLM for quality analysis...")
769
  response = _llm_instance(prompt, max_tokens=512, temperature=0.2, stop=["[SYSTEM]", "\n\n"], echo=False)
770
 
 
771
  json_text = response['choices'][0]['text'].strip()
 
 
772
  start_index = json_text.find('{')
773
  end_index = json_text.rfind('}') + 1
774
+
775
  if start_index == -1 or end_index == 0:
776
  raise ValueError("LLM did not return a valid JSON object.")
777
 
778
  clean_json_text = json_text[start_index:end_index]
 
779
  import json
780
+
781
+ # βœ… Corrected Variable Name
782
  analysis_result = json.loads(clean_json_text)
783
 
784
  final_result = {
785
+ "overall_score": analysis_result.get("overall_score"), # FIXED: Removed _raw
786
+ "feedback": analysis_result.get("feedback"), # FIXED: Removed _raw
787
+ "scores": analysis_result.get("scores") or analysis_result.get("score") # FIXED: Removed _raw
788
  }
789
 
790
  print("--- Successfully received and parsed JSON response from LLM.")
791
  return ContentQualityResponse(**final_result)
792
 
 
 
 
793
  except Exception as e:
794
+ print(f"🚨 Error in Content Quality Analysis: {e}")
795
+ raise HTTPException(status_code=500, detail="Failed to parse analysis.")
 
796
 
797
  @app.post("/rank/campaigns-for-influencer", response_model=RankCampaignsResponse, summary="Ranks a list of campaigns for one influencer")
798
  async def rank_campaigns_for_influencer_route(request: RankCampaignsRequest):
 
1674
  )
1675
  return {"response": response_text}
1676
  except Exception as e:
1677
+ raise HTTPException(status_code=500, detail=str(e))
1678
+
1679
+
1680
+ # ==============================================================
1681
+ # 🧠 COMMUNITY INTELLIGENCE ENDPOINTS
1682
+ # ==============================================================
1683
+
1684
+ @app.post("/community/moderate-and-tag", response_model=ContentCheckResponse)
1685
+ def moderate_and_tag(request: ContentCheckRequest):
1686
+ """
1687
+ Called when a user hits 'Post'. Checks toxicity AND generates tags in one go.
1688
+ """
1689
+ print(f"\n🧠 Checking community post content...")
1690
+
1691
+ # 1. Moderation Check (Fast)
1692
+ if not _community_brain:
1693
+ # Fail safe
1694
+ return ContentCheckResponse(toxicity_score=0.0, is_safe=True, tags=["#NewPost"])
1695
+
1696
+ mod_result = _community_brain.moderate_content(request.text)
1697
+
1698
+ # 2. Tagging (Only if safe)
1699
+ tags = []
1700
+ if mod_result['is_safe']:
1701
+ # If model exists, run extraction
1702
+ tags = _community_brain.generate_smart_tags(request.text)
1703
+
1704
+ return ContentCheckResponse(
1705
+ toxicity_score=mod_result['toxicity_score'],
1706
+ is_safe=mod_result['is_safe'],
1707
+ tags=tags
1708
+ )
1709
+
1710
+ @app.post("/community/summarize-discussion", response_model=ThreadSummaryResponse)
1711
+ def summarize_community_thread(request: ThreadSummaryRequest):
1712
+ if not _community_brain:
1713
+ return ThreadSummaryResponse(summary="Summary unavailable.")
1714
+
1715
+ summary = _community_brain.summarize_thread(request.comments)
1716
+ return ThreadSummaryResponse(summary=summary)
core/community_brain.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import json
3
+ import traceback
4
+ from typing import List, Dict, Any
5
+ from llama_cpp import Llama
6
+
7
+ class CommunityBrain:
8
+ def __init__(self, llm_instance: Llama):
9
+ self.llm = llm_instance
10
+ print("--- Community Brain initialized. ---")
11
+
12
+ # πŸ›‘ HYBRID MODERATION (Fast + Cheap)
13
+ def moderate_content(self, text: str) -> Dict[str, Any]:
14
+ """
15
+ Check content for toxicity using a local Keyword list first,
16
+ then AI for deeper semantic checks if needed.
17
+ """
18
+ # 1. First Line of Defense: Static Python Keyword Match (Instant)
19
+ # Add your Hindi/English blacklist here
20
+ TOXIC_KEYWORDS = [
21
+ "abuse", "kill", "suicide", "hate", "idiot", "scam",
22
+ "hack", "betting", "casino", "stupid", "fuck"
23
+ ]
24
+
25
+ score = 0.0
26
+ flags = []
27
+ text_lower = text.lower()
28
+
29
+ # Simple string matching
30
+ for word in TOXIC_KEYWORDS:
31
+ if word in text_lower:
32
+ score += 0.4
33
+ flags.append(f"Potential inappropriate word: {word}")
34
+
35
+ # If highly toxic by keywords alone, fail immediately (Don't waste LLM)
36
+ if score > 0.5:
37
+ return {"toxicity_score": min(score, 1.0), "is_safe": False, "flags": flags}
38
+
39
+ # 2. Second Line of Defense: Contextual Analysis via AI (Mocked for speed if needed)
40
+ # Note: Running LLM for every post creates latency. We strictly format tags here.
41
+ # Returning current score if keyword check passed semi-clean
42
+ return {
43
+ "toxicity_score": score,
44
+ "is_safe": score < 0.5,
45
+ "flags": flags
46
+ }
47
+
48
+ # 🏷️ SMART TAGGING
49
+ def generate_smart_tags(self, content: str) -> List[str]:
50
+ """
51
+ Extracts 3-5 relevant hashtags from the content using LLM.
52
+ """
53
+ try:
54
+ prompt = f"""[INST] Extract 4 relevant hashtags for this post. Return ONLY hashtags separated by spaces. No explanation.
55
+
56
+ Post: "I just bought this amazing lipstick shade, the red is so vibrant and it lasts all day! Perfect for summer looks."
57
+ Hashtags: #Beauty #Lipstick #Makeup #SummerVibes
58
+
59
+ Post: "{content[:300]}"
60
+ Hashtags: [/INST]"""
61
+
62
+ response = self.llm(
63
+ prompt,
64
+ max_tokens=30,
65
+ temperature=0.3, # Low temp for precision
66
+ stop=["[INST]", "\n"],
67
+ echo=False
68
+ )
69
+
70
+ raw_tags = response['choices'][0]['text'].strip()
71
+ # Clean up hashtags (ensure they start with #)
72
+ tags = [t.strip() for t in raw_tags.split() if t.strip()]
73
+
74
+ # Format correction
75
+ clean_tags = []
76
+ for t in tags:
77
+ if not t.startswith('#'): t = f"#{t}"
78
+ # Remove symbols excluding #
79
+ t = re.sub(r'[^a-zA-Z0-9#]', '', t)
80
+ clean_tags.append(t)
81
+
82
+ return clean_tags[:5] # Max 5 tags
83
+
84
+ except Exception as e:
85
+ print(f"Smart Tag Error: {e}")
86
+ return ["#Community"]
87
+
88
+ # 🧠 DISCUSSION SUMMARIZATION
89
+ def summarize_thread(self, comments: List[str]) -> str:
90
+ """
91
+ Summarizes a list of comments into a key insight.
92
+ """
93
+ if not comments: return "No activity yet."
94
+
95
+ # Combine last 10 comments (truncate context)
96
+ context_text = " | ".join(comments[:15])[:1500]
97
+
98
+ prompt = f"""[INST] Summarize the main sentiment and topic of this discussion in one sentence.
99
+
100
+ Discussion: {context_text}
101
+
102
+ Summary: [/INST]"""
103
+
104
+ try:
105
+ response = self.llm(
106
+ prompt,
107
+ max_tokens=60,
108
+ temperature=0.5,
109
+ stop=["[INST]", "\n"],
110
+ echo=False
111
+ )
112
+ return response['choices'][0]['text'].strip()
113
+ except Exception as e:
114
+ return "Discussion is active."