Zh1m1ngC commited on
Commit
ebf39f5
·
1 Parent(s): 02683f4

First test for the APP

Browse files
Files changed (2) hide show
  1. app.py +167 -1
  2. requirements.txt +6 -0
app.py CHANGED
@@ -1,5 +1,171 @@
1
  import streamlit as st
2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
  st.write('LEVEL 1')
5
- st.write('This is my first app')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
 
3
+ import requests
4
+ from bs4 import BeautifulSoup
5
+ from sentence_transformers import SentenceTransformer, util
6
+ from transformers import pipeline
7
+ from googlesearch import search
8
+
9
+ # Optional: Add your SerpAPI key here if you want to use Google Scholar lookup
10
+ SERPAPI_API_KEY = "YOUR_SERPAPI_KEY"
11
+
12
+ class URLValidator:
13
+ """
14
+ An optimized credibility rating class that combines citation lookup, relevance,
15
+ fact-checking, bias detection, and cross-verification to evaluate web content.
16
+ """
17
+
18
+ def __init__(self):
19
+ # Load models once to avoid redundant API calls
20
+ self.similarity_model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")
21
+ self.fake_news_classifier = pipeline("text-classification", model="mrm8488/bert-tiny-finetuned-fake-news-detection")
22
+ self.sentiment_analyzer = pipeline("text-classification", model="cardiffnlp/twitter-roberta-base-sentiment")
23
+
24
+ def fetch_page_content(self, url: str) -> str:
25
+ """ Extracts text content from the given URL. """
26
+ try:
27
+ response = requests.get(url, timeout=10)
28
+ response.raise_for_status()
29
+ soup = BeautifulSoup(response.text, "html.parser")
30
+ return " ".join([p.text for p in soup.find_all("p")]) # Extract paragraph text
31
+ except requests.RequestException:
32
+ return "" # Return empty string if failed
33
+
34
+ def compute_similarity_score(self, user_query: str, content: str) -> int:
35
+ """ Computes semantic similarity between user query and page content. """
36
+ if not content:
37
+ return 0
38
+ return int(util.pytorch_cos_sim(self.similarity_model.encode(user_query), self.similarity_model.encode(content)).item() * 100)
39
+
40
+ def detect_bias(self, content: str) -> int:
41
+ """ Uses NLP sentiment analysis to detect potential bias in content. """
42
+ if not content:
43
+ return 50
44
+ sentiment_result = self.sentiment_analyzer(content[:512])[0]
45
+ return 100 if sentiment_result["label"] == "POSITIVE" else 50 if sentiment_result["label"] == "NEUTRAL" else 30
46
+
47
+ def check_google_scholar(self, url: str) -> int:
48
+ """ Checks Google Scholar citations using SerpAPI. """
49
+ if not SERPAPI_API_KEY:
50
+ return 0 # Skip if no API key provided
51
+ params = {"q": url, "engine": "google_scholar", "api_key": SERPAPI_API_KEY}
52
+ try:
53
+ response = requests.get("https://serpapi.com/search", params=params)
54
+ data = response.json()
55
+ return min(len(data.get("organic_results", [])) * 10, 100) # Normalize to 100 scale
56
+ except:
57
+ return 0 # Default to no citations
58
+
59
+ def check_facts(self, content: str) -> int:
60
+ """ Cross-checks extracted content with Google Fact Check API. """
61
+ if not content:
62
+ return 50
63
+ api_url = f"https://toolbox.google.com/factcheck/api/v1/claimsearch?query={content[:200]}"
64
+ try:
65
+ response = requests.get(api_url)
66
+ data = response.json()
67
+ return 80 if "claims" in data and data["claims"] else 40
68
+ except:
69
+ return 50 # Default uncertainty score
70
+
71
+ def cross_verify(self, user_query: str) -> int:
72
+ """ Checks if multiple sources discuss the same topic using Google Search. """
73
+ try:
74
+ similar_articles = list(search(user_query, num_results=5))
75
+ return min(len(similar_articles) * 20, 100) # Normalize
76
+ except:
77
+ return 50 # Default
78
+
79
+ def get_star_rating(self, score: float) -> tuple:
80
+ """ Converts a score (0-100) into a 1-5 star rating. """
81
+ stars = max(1, min(5, round(score / 20))) # Normalize 100-scale to 5-star scale
82
+ return stars, "⭐" * stars + "☆" * (5 - stars)
83
+
84
+ def generate_explanation(self, scores) -> str:
85
+ """ Generates a human-readable explanation for the score. """
86
+ explanation = "Here’s how we evaluated the source:\n\n"
87
+
88
+ if scores["citations"] > 80:
89
+ explanation += "✅ This source is widely cited, indicating strong credibility.\n"
90
+ elif scores["citations"] > 40:
91
+ explanation += "ℹ️ This source has some citations but is not a top reference.\n"
92
+ else:
93
+ explanation += "⚠️ This source has few or no citations, so credibility is uncertain.\n"
94
+
95
+ if scores["relevance"] > 80:
96
+ explanation += "✅ The content is highly relevant to your query.\n"
97
+ elif scores["relevance"] > 50:
98
+ explanation += "ℹ️ The content is somewhat relevant but may include extra information.\n"
99
+ else:
100
+ explanation += "⚠️ The content has low relevance to your query.\n"
101
+
102
+ if scores["bias"] < 50:
103
+ explanation += "⚠️ The article appears biased and opinionated.\n"
104
+ elif scores["bias"] > 70:
105
+ explanation += "✅ The content appears neutral and balanced.\n"
106
+
107
+ if scores["cross_verification"] > 80:
108
+ explanation += "✅ Other sources confirm the information, increasing reliability.\n"
109
+ elif scores["cross_verification"] > 50:
110
+ explanation += "ℹ️ Some other sources confirm this, but not many.\n"
111
+ else:
112
+ explanation += "⚠️ Few sources discuss this, so it may be speculative.\n"
113
+
114
+ return explanation
115
+
116
+ def rate_url_validity(self, user_query: str, url: str) -> dict:
117
+ """ Main function to evaluate the validity of a webpage. """
118
+ content = self.fetch_page_content(url)
119
+
120
+ scores = {
121
+ "citations": self.check_google_scholar(url),
122
+ "relevance": self.compute_similarity_score(user_query, content),
123
+ "bias": self.detect_bias(content),
124
+ "fact_check": self.check_facts(content),
125
+ "cross_verification": self.cross_verify(user_query)
126
+ }
127
+
128
+ # Weighted Score Calculation
129
+ final_score = (
130
+ (0.3 * scores["citations"]) +
131
+ (0.25 * scores["relevance"]) +
132
+ (0.2 * scores["fact_check"]) +
133
+ (0.15 * scores["bias"]) +
134
+ (0.1 * scores["cross_verification"])
135
+ )
136
+
137
+ stars, star_icon = self.get_star_rating(final_score)
138
+ explanation = self.generate_explanation(scores)
139
+
140
+ return {
141
+ "url": url,
142
+ "validity_score": round(final_score, 2),
143
+ "stars": star_icon,
144
+ "explanation": explanation
145
+ }
146
 
147
  st.write('LEVEL 1')
148
+ st.write('This is my first app')
149
+
150
+ # Input fields for the user query and URL
151
+ user_query = st.text_input("Enter your query", "")
152
+ url = st.text_input("Enter the URL", "")
153
+
154
+ # Create a button to trigger validation
155
+ if st.button("Check URL Validity"):
156
+
157
+ # Check if both inputs are provided
158
+ if user_query and url:
159
+ # Create an object of URLValidator
160
+ validator = URLValidator()
161
+
162
+ # Run the validation function
163
+ result = validator.rate_url_validity(user_query, url)
164
+
165
+ # Display results in the app
166
+ st.write(f"🔗 **URL**: {result['url']}")
167
+ st.write(f"⭐ **Rating**: {result['stars']} ({result['validity_score']}/5)")
168
+ st.write("### Explanation:")
169
+ st.write(result["explanation"])
170
+ else:
171
+ st.error("Please enter both a query and a URL to proceed.")
requirements.txt CHANGED
@@ -1 +1,7 @@
 
 
 
 
 
 
1
  streamlit
 
1
+ requests
2
+ beautifulsoup4
3
+ sentence-transformers
4
+ transformers
5
+ googlesearch-python
6
+ serpapi
7
  streamlit