Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| Europe PMC API 测试脚本 | |
| 专门用于测试Europe PMC API的引用量数据和排序功能 | |
| """ | |
| import requests | |
| import json | |
| import time | |
| from typing import Dict, Any, List | |
| class EuropePMCTester: | |
| """Europe PMC API 测试器""" | |
| def __init__(self): | |
| self.base_url = "https://www.ebi.ac.uk/europepmc/webservices/rest/search" | |
| self.timeout = 30 | |
| def test_search(self, query: str, sortby: str = "CITED+desc", limit: int = 10) -> Dict[str, Any]: | |
| """测试Europe PMC搜索API""" | |
| print(f"\n=== 测试Europe PMC API ===") | |
| print(f"查询: {query}") | |
| print(f"排序: {sortby}") | |
| print(f"限制: {limit}") | |
| print("-" * 50) | |
| params = { | |
| "query": query, | |
| "resultType": "core", | |
| "pageSize": str(limit), | |
| "format": "json", | |
| "sortby": sortby, | |
| } | |
| try: | |
| response = requests.get( | |
| self.base_url, | |
| params=params, | |
| timeout=self.timeout, | |
| headers={ | |
| 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' | |
| } | |
| ) | |
| response.raise_for_status() | |
| data = response.json() | |
| results = data.get("resultList", {}).get("result", []) | |
| total_hits = data.get("resultList", {}).get("hitCount", 0) | |
| print(f"总命中数: {total_hits}") | |
| print(f"返回结果数: {len(results)}") | |
| print() | |
| # 显示前几个结果的完整信息 | |
| for i, result in enumerate(results[:3], 1): # 只显示前3个结果,因为信息很多 | |
| print(f"结果 {i} - 完整数据:") | |
| print("=" * 80) | |
| # 显示所有字段 | |
| for key, value in result.items(): | |
| if isinstance(value, (dict, list)) and len(str(value)) > 100: | |
| # 对于复杂对象,只显示类型和长度 | |
| if isinstance(value, dict): | |
| print(f" {key}: <dict with {len(value)} keys>") | |
| elif isinstance(value, list): | |
| print(f" {key}: <list with {len(value)} items>") | |
| else: | |
| print(f" {key}: {value}") | |
| print("=" * 80) | |
| print() | |
| return { | |
| "success": True, | |
| "total_hits": total_hits, | |
| "results": results, | |
| "params": params | |
| } | |
| except Exception as e: | |
| print(f"API调用失败: {str(e)}") | |
| return { | |
| "success": False, | |
| "error": str(e), | |
| "params": params | |
| } | |
| def test_different_sort_options(self, query: str) -> None: | |
| """测试不同的排序选项""" | |
| sort_options = [ | |
| "CITED+desc", # 按引用量降序 | |
| "CITED+asc", # 按引用量升序 | |
| "DATE+desc", # 按日期降序 | |
| "DATE+asc", # 按日期升序 | |
| "RELEVANCE", # 按相关性 | |
| ] | |
| print(f"\n=== 测试不同排序选项 ===") | |
| print(f"查询: {query}") | |
| print("=" * 60) | |
| for sortby in sort_options: | |
| print(f"\n--- 排序: {sortby} ---") | |
| result = self.test_search(query, sortby=sortby, limit=5) | |
| if result["success"]: | |
| # 显示引用量统计 | |
| results = result["results"] | |
| citation_counts = [r.get('citedByCount', 0) for r in results] | |
| print(f"引用量统计: {citation_counts}") | |
| print(f"平均引用量: {sum(citation_counts) / len(citation_counts):.2f}") | |
| print(f"最大引用量: {max(citation_counts)}") | |
| else: | |
| print(f"排序选项 {sortby} 失败") | |
| time.sleep(1) # 避免请求过快 | |
| def test_different_queries(self) -> None: | |
| """测试不同的查询""" | |
| test_queries = [ | |
| "cryo-electron microscopy", | |
| "CRISPR", | |
| "machine learning", | |
| "cancer immunotherapy", | |
| "artificial intelligence", | |
| ] | |
| print(f"\n=== 测试不同查询 ===") | |
| print("=" * 60) | |
| for query in test_queries: | |
| print(f"\n--- 查询: {query} ---") | |
| result = self.test_search(query, sortby="CITED+desc", limit=3) | |
| if result["success"]: | |
| results = result["results"] | |
| citation_counts = [r.get('citedByCount', 0) for r in results] | |
| print(f"引用量: {citation_counts}") | |
| else: | |
| print(f"查询 {query} 失败") | |
| time.sleep(1) | |
| def test_preprints_vs_published(self, query: str) -> None: | |
| """测试预印本 vs 已发表论文的引用量差异""" | |
| print(f"\n=== 测试预印本 vs 已发表论文 ===") | |
| print(f"查询: {query}") | |
| print("=" * 60) | |
| # 测试已发表论文 | |
| print(f"\n--- 已发表论文 ---") | |
| published_result = self.test_search(query, sortby="CITED+desc", limit=5) | |
| # 测试预印本 | |
| print(f"\n--- 预印本 ---") | |
| preprint_query = f'(SRC:PPR) AND (DOI:10.1101*) AND ({query})' | |
| preprint_result = self.test_search(preprint_query, sortby="CITED+desc", limit=5) | |
| # 比较结果 | |
| if published_result["success"] and preprint_result["success"]: | |
| published_citations = [r.get('citedByCount', 0) for r in published_result["results"]] | |
| preprint_citations = [r.get('citedByCount', 0) for r in preprint_result["results"]] | |
| print(f"\n--- 比较结果 ---") | |
| print(f"已发表论文引用量: {published_citations}") | |
| print(f"预印本引用量: {preprint_citations}") | |
| print(f"已发表论文平均引用量: {sum(published_citations) / len(published_citations):.2f}") | |
| print(f"预印本平均引用量: {sum(preprint_citations) / len(preprint_citations):.2f}") | |
| def main(): | |
| """主函数""" | |
| tester = EuropePMCTester() | |
| print("Europe PMC API 测试工具") | |
| print("=" * 60) | |
| while True: | |
| print("\n请选择测试选项:") | |
| print("1. 测试单个查询") | |
| print("2. 测试不同排序选项") | |
| print("3. 测试不同查询") | |
| print("4. 测试预印本 vs 已发表论文") | |
| print("5. 退出") | |
| choice = input("\n请输入选项 (1-5): ").strip() | |
| if choice == "1": | |
| query = input("请输入查询内容: ").strip() | |
| if query: | |
| tester.test_search(query) | |
| elif choice == "2": | |
| query = input("请输入查询内容: ").strip() | |
| if query: | |
| tester.test_different_sort_options(query) | |
| elif choice == "3": | |
| tester.test_different_queries() | |
| elif choice == "4": | |
| query = input("请输入查询内容: ").strip() | |
| if query: | |
| tester.test_preprints_vs_published(query) | |
| elif choice == "5": | |
| print("退出测试工具") | |
| break | |
| else: | |
| print("无效选项,请重新选择") | |
| if __name__ == "__main__": | |
| main() | |