#!/usr/bin/env python3 """ Europe PMC API 测试脚本 专门用于测试Europe PMC API的引用量数据和排序功能 """ import requests import json import time from typing import Dict, Any, List class EuropePMCTester: """Europe PMC API 测试器""" def __init__(self): self.base_url = "https://www.ebi.ac.uk/europepmc/webservices/rest/search" self.timeout = 30 def test_search(self, query: str, sortby: str = "CITED+desc", limit: int = 10) -> Dict[str, Any]: """测试Europe PMC搜索API""" print(f"\n=== 测试Europe PMC API ===") print(f"查询: {query}") print(f"排序: {sortby}") print(f"限制: {limit}") print("-" * 50) params = { "query": query, "resultType": "core", "pageSize": str(limit), "format": "json", "sortby": sortby, } try: response = requests.get( self.base_url, params=params, timeout=self.timeout, headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' } ) response.raise_for_status() data = response.json() results = data.get("resultList", {}).get("result", []) total_hits = data.get("resultList", {}).get("hitCount", 0) print(f"总命中数: {total_hits}") print(f"返回结果数: {len(results)}") print() # 显示前几个结果的完整信息 for i, result in enumerate(results[:3], 1): # 只显示前3个结果,因为信息很多 print(f"结果 {i} - 完整数据:") print("=" * 80) # 显示所有字段 for key, value in result.items(): if isinstance(value, (dict, list)) and len(str(value)) > 100: # 对于复杂对象,只显示类型和长度 if isinstance(value, dict): print(f" {key}: ") elif isinstance(value, list): print(f" {key}: ") else: print(f" {key}: {value}") print("=" * 80) print() return { "success": True, "total_hits": total_hits, "results": results, "params": params } except Exception as e: print(f"API调用失败: {str(e)}") return { "success": False, "error": str(e), "params": params } def test_different_sort_options(self, query: str) -> None: """测试不同的排序选项""" sort_options = [ "CITED+desc", # 按引用量降序 "CITED+asc", # 按引用量升序 "DATE+desc", # 按日期降序 "DATE+asc", # 按日期升序 "RELEVANCE", # 按相关性 ] print(f"\n=== 测试不同排序选项 ===") print(f"查询: {query}") print("=" * 60) for sortby in sort_options: print(f"\n--- 排序: {sortby} ---") result = self.test_search(query, sortby=sortby, limit=5) if result["success"]: # 显示引用量统计 results = result["results"] citation_counts = [r.get('citedByCount', 0) for r in results] print(f"引用量统计: {citation_counts}") print(f"平均引用量: {sum(citation_counts) / len(citation_counts):.2f}") print(f"最大引用量: {max(citation_counts)}") else: print(f"排序选项 {sortby} 失败") time.sleep(1) # 避免请求过快 def test_different_queries(self) -> None: """测试不同的查询""" test_queries = [ "cryo-electron microscopy", "CRISPR", "machine learning", "cancer immunotherapy", "artificial intelligence", ] print(f"\n=== 测试不同查询 ===") print("=" * 60) for query in test_queries: print(f"\n--- 查询: {query} ---") result = self.test_search(query, sortby="CITED+desc", limit=3) if result["success"]: results = result["results"] citation_counts = [r.get('citedByCount', 0) for r in results] print(f"引用量: {citation_counts}") else: print(f"查询 {query} 失败") time.sleep(1) def test_preprints_vs_published(self, query: str) -> None: """测试预印本 vs 已发表论文的引用量差异""" print(f"\n=== 测试预印本 vs 已发表论文 ===") print(f"查询: {query}") print("=" * 60) # 测试已发表论文 print(f"\n--- 已发表论文 ---") published_result = self.test_search(query, sortby="CITED+desc", limit=5) # 测试预印本 print(f"\n--- 预印本 ---") preprint_query = f'(SRC:PPR) AND (DOI:10.1101*) AND ({query})' preprint_result = self.test_search(preprint_query, sortby="CITED+desc", limit=5) # 比较结果 if published_result["success"] and preprint_result["success"]: published_citations = [r.get('citedByCount', 0) for r in published_result["results"]] preprint_citations = [r.get('citedByCount', 0) for r in preprint_result["results"]] print(f"\n--- 比较结果 ---") print(f"已发表论文引用量: {published_citations}") print(f"预印本引用量: {preprint_citations}") print(f"已发表论文平均引用量: {sum(published_citations) / len(published_citations):.2f}") print(f"预印本平均引用量: {sum(preprint_citations) / len(preprint_citations):.2f}") def main(): """主函数""" tester = EuropePMCTester() print("Europe PMC API 测试工具") print("=" * 60) while True: print("\n请选择测试选项:") print("1. 测试单个查询") print("2. 测试不同排序选项") print("3. 测试不同查询") print("4. 测试预印本 vs 已发表论文") print("5. 退出") choice = input("\n请输入选项 (1-5): ").strip() if choice == "1": query = input("请输入查询内容: ").strip() if query: tester.test_search(query) elif choice == "2": query = input("请输入查询内容: ").strip() if query: tester.test_different_sort_options(query) elif choice == "3": tester.test_different_queries() elif choice == "4": query = input("请输入查询内容: ").strip() if query: tester.test_preprints_vs_published(query) elif choice == "5": print("退出测试工具") break else: print("无效选项,请重新选择") if __name__ == "__main__": main()