Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| 测试Crossref API的排序机制 | |
| 验证排序是否真的按引用量进行 | |
| """ | |
| import requests | |
| import json | |
| def test_crossref_sorting(): | |
| """测试Crossref API的排序机制""" | |
| url = "https://api.crossref.org/works" | |
| headers = { | |
| 'User-Agent': 'Academic-Reviewer-System/1.0 (mailto:[email protected])' | |
| } | |
| # 测试查询 | |
| query = "machine learning" | |
| print("=== 测试Crossref API排序机制 ===") | |
| print(f"查询: {query}") | |
| print("=" * 60) | |
| # 测试不同的排序方式 | |
| sort_options = ["published", "relevance", "deposited"] | |
| for sort_option in sort_options: | |
| print(f"\n--- 排序方式: {sort_option} ---") | |
| params = { | |
| "query": query, | |
| "rows": 10, | |
| "sort": sort_option, | |
| "order": "desc", | |
| "select": "DOI,title,author,container-title,published-print,published-online,is-referenced-by-count" | |
| } | |
| try: | |
| response = requests.get(url, params=params, headers=headers, timeout=30) | |
| response.raise_for_status() | |
| data = response.json() | |
| items = data.get("message", {}).get("items", []) | |
| print(f"返回 {len(items)} 个结果") | |
| # 显示原始排序的引用量 | |
| print("原始排序的引用量:") | |
| original_citations = [] | |
| for i, item in enumerate(items, 1): | |
| title = item.get('title', ['N/A'])[0] if item.get('title') else 'N/A' | |
| if len(title) > 40: | |
| title = title[:40] + "..." | |
| cited_count = item.get('is-referenced-by-count', 0) | |
| original_citations.append(cited_count) | |
| print(f" {i}. {title} (引用: {cited_count})") | |
| # 按引用量重新排序 | |
| sorted_items = sorted(items, key=lambda x: x.get('is-referenced-by-count', 0), reverse=True) | |
| print("\n按引用量重新排序:") | |
| sorted_citations = [] | |
| for i, item in enumerate(sorted_items, 1): | |
| title = item.get('title', ['N/A'])[0] if item.get('title') else 'N/A' | |
| if len(title) > 40: | |
| title = title[:40] + "..." | |
| cited_count = item.get('is-referenced-by-count', 0) | |
| sorted_citations.append(cited_count) | |
| print(f" {i}. {title} (引用: {cited_count})") | |
| # 比较排序差异 | |
| print(f"\n排序差异分析:") | |
| print(f"原始排序引用量: {original_citations}") | |
| print(f"按引用量排序: {sorted_citations}") | |
| # 检查是否相同 | |
| if original_citations == sorted_citations: | |
| print("✅ 原始排序与按引用量排序相同") | |
| else: | |
| print("❌ 原始排序与按引用量排序不同") | |
| print(" 说明Crossref API没有按引用量排序") | |
| # 统计引用量分布 | |
| non_zero_citations = [c for c in original_citations if c > 0] | |
| print(f"非零引用量数量: {len(non_zero_citations)}/{len(original_citations)}") | |
| if non_zero_citations: | |
| print(f"平均引用量: {sum(non_zero_citations) / len(non_zero_citations):.2f}") | |
| print(f"最大引用量: {max(non_zero_citations)}") | |
| except Exception as e: | |
| print(f"错误: {str(e)}") | |
| print("-" * 50) | |
| def test_high_citation_query(): | |
| """测试高引用量查询""" | |
| url = "https://api.crossref.org/works" | |
| headers = { | |
| 'User-Agent': 'Academic-Reviewer-System/1.0 (mailto:[email protected])' | |
| } | |
| # 使用更可能产生高引用量的查询 | |
| high_citation_queries = [ | |
| "CRISPR", | |
| "machine learning", | |
| "deep learning", | |
| "artificial intelligence" | |
| ] | |
| print("\n=== 测试高引用量查询 ===") | |
| print("=" * 60) | |
| for query in high_citation_queries: | |
| print(f"\n--- 查询: {query} ---") | |
| params = { | |
| "query": query, | |
| "rows": 5, | |
| "sort": "relevance", # 使用相关性排序 | |
| "order": "desc", | |
| "select": "DOI,title,author,container-title,published-print,published-online,is-referenced-by-count" | |
| } | |
| try: | |
| response = requests.get(url, params=params, headers=headers, timeout=30) | |
| response.raise_for_status() | |
| data = response.json() | |
| items = data.get("message", {}).get("items", []) | |
| # 按引用量排序 | |
| sorted_items = sorted(items, key=lambda x: x.get('is-referenced-by-count', 0), reverse=True) | |
| print(f"按引用量排序的前3个结果:") | |
| for i, item in enumerate(sorted_items[:3], 1): | |
| title = item.get('title', ['N/A'])[0] if item.get('title') else 'N/A' | |
| if len(title) > 50: | |
| title = title[:50] + "..." | |
| cited_count = item.get('is-referenced-by-count', 0) | |
| # 获取发表年份 | |
| pub_date = item.get('published-print', {}) or item.get('published-online', {}) | |
| pub_year = pub_date.get('date-parts', [[None]])[0][0] if pub_date else 'N/A' | |
| print(f" {i}. {title}") | |
| print(f" 引用量: {cited_count}, 年份: {pub_year}") | |
| # 统计 | |
| citations = [item.get('is-referenced-by-count', 0) for item in items] | |
| non_zero = [c for c in citations if c > 0] | |
| print(f" 总结果: {len(items)}, 有引用量: {len(non_zero)}, 平均引用量: {sum(non_zero) / len(non_zero) if non_zero else 0:.2f}") | |
| except Exception as e: | |
| print(f"错误: {str(e)}") | |
| if __name__ == "__main__": | |
| test_crossref_sorting() | |
| test_high_citation_query() | |