#!/usr/bin/env python3 """ Crossref API 测试脚本 测试Crossref API的引用量数据和检索功能 """ import requests import json import time from typing import Dict, Any, List class CrossrefTester: """Crossref API 测试器""" def __init__(self): self.base_url = "https://api.crossref.org/works" self.timeout = 30 self.headers = { 'User-Agent': 'Academic-Reviewer-System/1.0 (mailto:test@example.com)' } def test_search(self, query: str, limit: int = 10, sort: str = "published") -> Dict[str, Any]: """测试Crossref搜索API""" print(f"\n=== 测试Crossref API ===") print(f"查询: {query}") print(f"排序: {sort}") print(f"限制: {limit}") print("-" * 50) params = { "query": query, "rows": limit, "sort": sort, "order": "desc", "select": "DOI,title,author,container-title,published-print,published-online,is-referenced-by-count" } try: response = requests.get( self.base_url, params=params, headers=self.headers, timeout=self.timeout ) response.raise_for_status() data = response.json() items = data.get("message", {}).get("items", []) total_results = data.get("message", {}).get("total-results", 0) print(f"总命中数: {total_results}") print(f"返回结果数: {len(items)}") print() # 显示前几个结果 for i, item in enumerate(items[:5], 1): title = item.get('title', ['N/A'])[0] if item.get('title') else 'N/A' if len(title) > 80: title = title[:80] + "..." # 获取引用量信息 cited_count = item.get('is-referenced-by-count', 0) # 获取期刊信息 journal = item.get('container-title', ['N/A'])[0] if item.get('container-title') else 'N/A' # 获取发表年份 pub_date = item.get('published-print', {}) or item.get('published-online', {}) pub_year = pub_date.get('date-parts', [[None]])[0][0] if pub_date else 'N/A' # 获取作者信息 authors = item.get('author', []) author_names = [] for author in authors[:3]: given = author.get('given', '') family = author.get('family', '') if given and family: author_names.append(f"{given} {family}") elif family: author_names.append(family) print(f"结果 {i}:") print(f" 标题: {title}") print(f" 被引用次数: {cited_count}") print(f" 期刊: {journal}") print(f" 年份: {pub_year}") print(f" 作者: {', '.join(author_names)}") print(f" DOI: {item.get('DOI', 'N/A')}") print() return { "success": True, "total_results": total_results, "items": items, "params": params } except Exception as e: print(f"API调用失败: {str(e)}") return { "success": False, "error": str(e), "params": params } def test_different_sort_options(self, query: str) -> None: """测试不同的排序选项""" sort_options = [ "published", # 按发表时间排序 "relevance", # 按相关性排序 "deposited", # 按提交时间排序 ] print(f"\n=== 测试不同排序选项 ===") print(f"查询: {query}") print("=" * 60) for sort_option in sort_options: print(f"\n--- 排序: {sort_option} ---") result = self.test_search(query, sort=sort_option, limit=5) if result["success"]: # 显示引用量统计 items = result["items"] cited_counts = [item.get('is-referenced-by-count', 0) for item in items] print(f"被引用次数统计: {cited_counts}") if cited_counts: print(f"平均被引用次数: {sum(cited_counts) / len(cited_counts):.2f}") print(f"最大被引用次数: {max(cited_counts)}") # 按引用量排序显示 sorted_items = sorted(items, key=lambda x: x.get('is-referenced-by-count', 0), reverse=True) print(f"按引用量排序的前3个结果:") for i, item in enumerate(sorted_items[:3], 1): title = item.get('title', ['N/A'])[0] if item.get('title') else 'N/A' if len(title) > 50: title = title[:50] + "..." cited_count = item.get('is-referenced-by-count', 0) print(f" {i}. {title} (被引用: {cited_count})") else: print(f"排序选项 {sort_option} 失败") time.sleep(1) # 避免请求过快 def test_different_queries(self) -> None: """测试不同的查询""" test_queries = [ "cryo-electron microscopy", "CRISPR", "machine learning", "cancer immunotherapy", "artificial intelligence", ] print(f"\n=== 测试不同查询 ===") print("=" * 60) for query in test_queries: print(f"\n--- 查询: {query} ---") result = self.test_search(query, sort="published", limit=3) if result["success"]: items = result["items"] cited_counts = [item.get('is-referenced-by-count', 0) for item in items] print(f"被引用次数: {cited_counts}") else: print(f"查询 {query} 失败") time.sleep(1) def test_doi_lookup(self, doi: str) -> None: """测试DOI查找功能""" print(f"\n=== 测试DOI查找 ===") print(f"DOI: {doi}") print("-" * 50) url = f"https://api.crossref.org/works/{doi}" try: response = requests.get(url, headers=self.headers, timeout=self.timeout) response.raise_for_status() data = response.json() item = data.get("message", {}) title = item.get('title', ['N/A'])[0] if item.get('title') else 'N/A' cited_count = item.get('is-referenced-by-count', 0) print(f"标题: {title}") print(f"被引用次数: {cited_count}") print(f"DOI: {item.get('DOI', 'N/A')}") # 显示完整的引用量相关字段 print(f"\n引用量相关字段:") for key, value in item.items(): if 'cite' in key.lower() or 'reference' in key.lower(): print(f" {key}: {value}") except Exception as e: print(f"DOI查找失败: {str(e)}") def main(): """主函数""" tester = CrossrefTester() print("Crossref API 测试工具") print("=" * 60) while True: print("\n请选择测试选项:") print("1. 测试单个查询") print("2. 测试不同排序选项") print("3. 测试不同查询") print("4. 测试DOI查找") print("5. 退出") choice = input("\n请输入选项 (1-5): ").strip() if choice == "1": query = input("请输入查询内容: ").strip() if query: tester.test_search(query) elif choice == "2": query = input("请输入查询内容: ").strip() if query: tester.test_different_sort_options(query) elif choice == "3": tester.test_different_queries() elif choice == "4": doi = input("请输入DOI: ").strip() if doi: tester.test_doi_lookup(doi) elif choice == "5": print("退出测试工具") break else: print("无效选项,请重新选择") if __name__ == "__main__": main()