new_recommendation / test_epmc_api.py
wujian123's picture
Upload all project files
3c6b551
#!/usr/bin/env python3
"""
Europe PMC API 测试脚本
专门用于测试Europe PMC API的引用量数据和排序功能
"""
import requests
import json
import time
from typing import Dict, Any, List
class EuropePMCTester:
"""Europe PMC API 测试器"""
def __init__(self):
self.base_url = "https://www.ebi.ac.uk/europepmc/webservices/rest/search"
self.timeout = 30
def test_search(self, query: str, sortby: str = "CITED+desc", limit: int = 10) -> Dict[str, Any]:
"""测试Europe PMC搜索API"""
print(f"\n=== 测试Europe PMC API ===")
print(f"查询: {query}")
print(f"排序: {sortby}")
print(f"限制: {limit}")
print("-" * 50)
params = {
"query": query,
"resultType": "core",
"pageSize": str(limit),
"format": "json",
"sortby": sortby,
}
try:
response = requests.get(
self.base_url,
params=params,
timeout=self.timeout,
headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
}
)
response.raise_for_status()
data = response.json()
results = data.get("resultList", {}).get("result", [])
total_hits = data.get("resultList", {}).get("hitCount", 0)
print(f"总命中数: {total_hits}")
print(f"返回结果数: {len(results)}")
print()
# 显示前几个结果的完整信息
for i, result in enumerate(results[:3], 1): # 只显示前3个结果,因为信息很多
print(f"结果 {i} - 完整数据:")
print("=" * 80)
# 显示所有字段
for key, value in result.items():
if isinstance(value, (dict, list)) and len(str(value)) > 100:
# 对于复杂对象,只显示类型和长度
if isinstance(value, dict):
print(f" {key}: <dict with {len(value)} keys>")
elif isinstance(value, list):
print(f" {key}: <list with {len(value)} items>")
else:
print(f" {key}: {value}")
print("=" * 80)
print()
return {
"success": True,
"total_hits": total_hits,
"results": results,
"params": params
}
except Exception as e:
print(f"API调用失败: {str(e)}")
return {
"success": False,
"error": str(e),
"params": params
}
def test_different_sort_options(self, query: str) -> None:
"""测试不同的排序选项"""
sort_options = [
"CITED+desc", # 按引用量降序
"CITED+asc", # 按引用量升序
"DATE+desc", # 按日期降序
"DATE+asc", # 按日期升序
"RELEVANCE", # 按相关性
]
print(f"\n=== 测试不同排序选项 ===")
print(f"查询: {query}")
print("=" * 60)
for sortby in sort_options:
print(f"\n--- 排序: {sortby} ---")
result = self.test_search(query, sortby=sortby, limit=5)
if result["success"]:
# 显示引用量统计
results = result["results"]
citation_counts = [r.get('citedByCount', 0) for r in results]
print(f"引用量统计: {citation_counts}")
print(f"平均引用量: {sum(citation_counts) / len(citation_counts):.2f}")
print(f"最大引用量: {max(citation_counts)}")
else:
print(f"排序选项 {sortby} 失败")
time.sleep(1) # 避免请求过快
def test_different_queries(self) -> None:
"""测试不同的查询"""
test_queries = [
"cryo-electron microscopy",
"CRISPR",
"machine learning",
"cancer immunotherapy",
"artificial intelligence",
]
print(f"\n=== 测试不同查询 ===")
print("=" * 60)
for query in test_queries:
print(f"\n--- 查询: {query} ---")
result = self.test_search(query, sortby="CITED+desc", limit=3)
if result["success"]:
results = result["results"]
citation_counts = [r.get('citedByCount', 0) for r in results]
print(f"引用量: {citation_counts}")
else:
print(f"查询 {query} 失败")
time.sleep(1)
def test_preprints_vs_published(self, query: str) -> None:
"""测试预印本 vs 已发表论文的引用量差异"""
print(f"\n=== 测试预印本 vs 已发表论文 ===")
print(f"查询: {query}")
print("=" * 60)
# 测试已发表论文
print(f"\n--- 已发表论文 ---")
published_result = self.test_search(query, sortby="CITED+desc", limit=5)
# 测试预印本
print(f"\n--- 预印本 ---")
preprint_query = f'(SRC:PPR) AND (DOI:10.1101*) AND ({query})'
preprint_result = self.test_search(preprint_query, sortby="CITED+desc", limit=5)
# 比较结果
if published_result["success"] and preprint_result["success"]:
published_citations = [r.get('citedByCount', 0) for r in published_result["results"]]
preprint_citations = [r.get('citedByCount', 0) for r in preprint_result["results"]]
print(f"\n--- 比较结果 ---")
print(f"已发表论文引用量: {published_citations}")
print(f"预印本引用量: {preprint_citations}")
print(f"已发表论文平均引用量: {sum(published_citations) / len(published_citations):.2f}")
print(f"预印本平均引用量: {sum(preprint_citations) / len(preprint_citations):.2f}")
def main():
"""主函数"""
tester = EuropePMCTester()
print("Europe PMC API 测试工具")
print("=" * 60)
while True:
print("\n请选择测试选项:")
print("1. 测试单个查询")
print("2. 测试不同排序选项")
print("3. 测试不同查询")
print("4. 测试预印本 vs 已发表论文")
print("5. 退出")
choice = input("\n请输入选项 (1-5): ").strip()
if choice == "1":
query = input("请输入查询内容: ").strip()
if query:
tester.test_search(query)
elif choice == "2":
query = input("请输入查询内容: ").strip()
if query:
tester.test_different_sort_options(query)
elif choice == "3":
tester.test_different_queries()
elif choice == "4":
query = input("请输入查询内容: ").strip()
if query:
tester.test_preprints_vs_published(query)
elif choice == "5":
print("退出测试工具")
break
else:
print("无效选项,请重新选择")
if __name__ == "__main__":
main()