Spaces:

zhimingc
/

APP1

Sleeping

App Files Files Community

Zh1m1ngC commited on Mar 1

Commit

ebf39f5

1 Parent(s): 02683f4

First test for the APP

Browse files

Files changed (2) hide show

app.py +167 -1
requirements.txt +6 -0

app.py CHANGED Viewed

@@ -1,5 +1,171 @@
 import streamlit as st
 st.write('LEVEL 1')
-st.write('This is my first app')

 import streamlit as st
+import requests
+from bs4 import BeautifulSoup
+from sentence_transformers import SentenceTransformer, util
+from transformers import pipeline
+from googlesearch import search
+# Optional: Add your SerpAPI key here if you want to use Google Scholar lookup
+SERPAPI_API_KEY = "YOUR_SERPAPI_KEY"
+class URLValidator:
+    """
+    An optimized credibility rating class that combines citation lookup, relevance,
+    fact-checking, bias detection, and cross-verification to evaluate web content.
+    """
+    def __init__(self):
+        # Load models once to avoid redundant API calls
+        self.similarity_model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")
+        self.fake_news_classifier = pipeline("text-classification", model="mrm8488/bert-tiny-finetuned-fake-news-detection")
+        self.sentiment_analyzer = pipeline("text-classification", model="cardiffnlp/twitter-roberta-base-sentiment")
+    def fetch_page_content(self, url: str) -> str:
+        """ Extracts text content from the given URL. """
+        try:
+            response = requests.get(url, timeout=10)
+            response.raise_for_status()
+            soup = BeautifulSoup(response.text, "html.parser")
+            return " ".join([p.text for p in soup.find_all("p")])  # Extract paragraph text
+        except requests.RequestException:
+            return ""  # Return empty string if failed
+    def compute_similarity_score(self, user_query: str, content: str) -> int:
+        """ Computes semantic similarity between user query and page content. """
+        if not content:
+            return 0
+        return int(util.pytorch_cos_sim(self.similarity_model.encode(user_query), self.similarity_model.encode(content)).item() * 100)
+    def detect_bias(self, content: str) -> int:
+        """ Uses NLP sentiment analysis to detect potential bias in content. """
+        if not content:
+            return 50
+        sentiment_result = self.sentiment_analyzer(content[:512])[0]
+        return 100 if sentiment_result["label"] == "POSITIVE" else 50 if sentiment_result["label"] == "NEUTRAL" else 30
+    def check_google_scholar(self, url: str) -> int:
+        """ Checks Google Scholar citations using SerpAPI. """
+        if not SERPAPI_API_KEY:
+            return 0  # Skip if no API key provided
+        params = {"q": url, "engine": "google_scholar", "api_key": SERPAPI_API_KEY}
+        try:
+            response = requests.get("https://serpapi.com/search", params=params)
+            data = response.json()
+            return min(len(data.get("organic_results", [])) * 10, 100)  # Normalize to 100 scale
+        except:
+            return 0  # Default to no citations
+    def check_facts(self, content: str) -> int:
+        """ Cross-checks extracted content with Google Fact Check API. """
+        if not content:
+            return 50
+        api_url = f"https://toolbox.google.com/factcheck/api/v1/claimsearch?query={content[:200]}"
+        try:
+            response = requests.get(api_url)
+            data = response.json()
+            return 80 if "claims" in data and data["claims"] else 40
+        except:
+            return 50  # Default uncertainty score
+    def cross_verify(self, user_query: str) -> int:
+        """ Checks if multiple sources discuss the same topic using Google Search. """
+        try:
+            similar_articles = list(search(user_query, num_results=5))
+            return min(len(similar_articles) * 20, 100)  # Normalize
+        except:
+            return 50  # Default
+    def get_star_rating(self, score: float) -> tuple:
+        """ Converts a score (0-100) into a 1-5 star rating. """
+        stars = max(1, min(5, round(score / 20)))  # Normalize 100-scale to 5-star scale
+        return stars, "⭐" * stars + "☆" * (5 - stars)
+    def generate_explanation(self, scores) -> str:
+        """ Generates a human-readable explanation for the score. """
+        explanation = "Here’s how we evaluated the source:\n\n"
+        if scores["citations"] > 80:
+            explanation += "✅ This source is widely cited, indicating strong credibility.\n"
+        elif scores["citations"] > 40:
+            explanation += "ℹ️ This source has some citations but is not a top reference.\n"
+        else:
+            explanation += "⚠️ This source has few or no citations, so credibility is uncertain.\n"
+        if scores["relevance"] > 80:
+            explanation += "✅ The content is highly relevant to your query.\n"
+        elif scores["relevance"] > 50:
+            explanation += "ℹ️ The content is somewhat relevant but may include extra information.\n"
+        else:
+            explanation += "⚠️ The content has low relevance to your query.\n"
+        if scores["bias"] < 50:
+            explanation += "⚠️ The article appears biased and opinionated.\n"
+        elif scores["bias"] > 70:
+            explanation += "✅ The content appears neutral and balanced.\n"
+        if scores["cross_verification"] > 80:
+            explanation += "✅ Other sources confirm the information, increasing reliability.\n"
+        elif scores["cross_verification"] > 50:
+            explanation += "ℹ️ Some other sources confirm this, but not many.\n"
+        else:
+            explanation += "⚠️ Few sources discuss this, so it may be speculative.\n"
+        return explanation
+    def rate_url_validity(self, user_query: str, url: str) -> dict:
+        """ Main function to evaluate the validity of a webpage. """
+        content = self.fetch_page_content(url)
+        scores = {
+            "citations": self.check_google_scholar(url),
+            "relevance": self.compute_similarity_score(user_query, content),
+            "bias": self.detect_bias(content),
+            "fact_check": self.check_facts(content),
+            "cross_verification": self.cross_verify(user_query)
+        }
+        # Weighted Score Calculation
+        final_score = (
+            (0.3 * scores["citations"]) +
+            (0.25 * scores["relevance"]) +
+            (0.2 * scores["fact_check"]) +
+            (0.15 * scores["bias"]) +
+            (0.1 * scores["cross_verification"])
+        )
+        stars, star_icon = self.get_star_rating(final_score)
+        explanation = self.generate_explanation(scores)
+        return {
+            "url": url,
+            "validity_score": round(final_score, 2),
+            "stars": star_icon,
+            "explanation": explanation
+        }
 st.write('LEVEL 1')
+st.write('This is my first app')
+# Input fields for the user query and URL
+user_query = st.text_input("Enter your query", "")
+url = st.text_input("Enter the URL", "")
+# Create a button to trigger validation
+if st.button("Check URL Validity"):
+    # Check if both inputs are provided
+    if user_query and url:
+        # Create an object of URLValidator
+        validator = URLValidator()
+        # Run the validation function
+        result = validator.rate_url_validity(user_query, url)
+        # Display results in the app
+        st.write(f"🔗 **URL**: {result['url']}")
+        st.write(f"⭐ **Rating**: {result['stars']} ({result['validity_score']}/5)")
+        st.write("### Explanation:")
+        st.write(result["explanation"])
+    else:
+        st.error("Please enter both a query and a URL to proceed.")

requirements.txt CHANGED Viewed

	@@ -1 +1,7 @@






1	streamlit

+requests
+beautifulsoup4
+sentence-transformers
+transformers
+googlesearch-python
+serpapi
 streamlit