#!/usr/bin/env python3
"""
Crossref API 测试脚本
测试Crossref API的引用量数据和检索功能
"""

import requests
import json
import time
from typing import Dict, Any, List

class CrossrefTester:
    """Crossref API 测试器"""
    
    def __init__(self):
        self.base_url = "https://api.crossref.org/works"
        self.timeout = 30
        self.headers = {
            'User-Agent': 'Academic-Reviewer-System/1.0 (mailto:test@example.com)'
        }
        
    def test_search(self, query: str, limit: int = 10, sort: str = "published") -> Dict[str, Any]:
        """测试Crossref搜索API"""
        print(f"\n=== 测试Crossref API ===")
        print(f"查询: {query}")
        print(f"排序: {sort}")
        print(f"限制: {limit}")
        print("-" * 50)
        
        params = {
            "query": query,
            "rows": limit,
            "sort": sort,
            "order": "desc",
            "select": "DOI,title,author,container-title,published-print,published-online,is-referenced-by-count"
        }
        
        try:
            response = requests.get(
                self.base_url,
                params=params,
                headers=self.headers,
                timeout=self.timeout
            )
            response.raise_for_status()
            data = response.json()
            
            items = data.get("message", {}).get("items", [])
            total_results = data.get("message", {}).get("total-results", 0)
            
            print(f"总命中数: {total_results}")
            print(f"返回结果数: {len(items)}")
            print()
            
            # 显示前几个结果
            for i, item in enumerate(items[:5], 1):
                title = item.get('title', ['N/A'])[0] if item.get('title') else 'N/A'
                if len(title) > 80:
                    title = title[:80] + "..."
                
                # 获取引用量信息
                cited_count = item.get('is-referenced-by-count', 0)
                
                # 获取期刊信息
                journal = item.get('container-title', ['N/A'])[0] if item.get('container-title') else 'N/A'
                
                # 获取发表年份
                pub_date = item.get('published-print', {}) or item.get('published-online', {})
                pub_year = pub_date.get('date-parts', [[None]])[0][0] if pub_date else 'N/A'
                
                # 获取作者信息
                authors = item.get('author', [])
                author_names = []
                for author in authors[:3]:
                    given = author.get('given', '')
                    family = author.get('family', '')
                    if given and family:
                        author_names.append(f"{given} {family}")
                    elif family:
                        author_names.append(family)
                
                print(f"结果 {i}:")
                print(f"  标题: {title}")
                print(f"  被引用次数: {cited_count}")
                print(f"  期刊: {journal}")
                print(f"  年份: {pub_year}")
                print(f"  作者: {', '.join(author_names)}")
                print(f"  DOI: {item.get('DOI', 'N/A')}")
                print()
            
            return {
                "success": True,
                "total_results": total_results,
                "items": items,
                "params": params
            }
            
        except Exception as e:
            print(f"API调用失败: {str(e)}")
            return {
                "success": False,
                "error": str(e),
                "params": params
            }
    
    def test_different_sort_options(self, query: str) -> None:
        """测试不同的排序选项"""
        sort_options = [
            "published",        # 按发表时间排序
            "relevance",        # 按相关性排序
            "deposited",        # 按提交时间排序
        ]
        
        print(f"\n=== 测试不同排序选项 ===")
        print(f"查询: {query}")
        print("=" * 60)
        
        for sort_option in sort_options:
            print(f"\n--- 排序: {sort_option} ---")
            result = self.test_search(query, sort=sort_option, limit=5)
            
            if result["success"]:
                # 显示引用量统计
                items = result["items"]
                cited_counts = [item.get('is-referenced-by-count', 0) for item in items]
                print(f"被引用次数统计: {cited_counts}")
                if cited_counts:
                    print(f"平均被引用次数: {sum(cited_counts) / len(cited_counts):.2f}")
                    print(f"最大被引用次数: {max(cited_counts)}")
                
                # 按引用量排序显示
                sorted_items = sorted(items, key=lambda x: x.get('is-referenced-by-count', 0), reverse=True)
                print(f"按引用量排序的前3个结果:")
                for i, item in enumerate(sorted_items[:3], 1):
                    title = item.get('title', ['N/A'])[0] if item.get('title') else 'N/A'
                    if len(title) > 50:
                        title = title[:50] + "..."
                    cited_count = item.get('is-referenced-by-count', 0)
                    print(f"  {i}. {title} (被引用: {cited_count})")
            else:
                print(f"排序选项 {sort_option} 失败")
            
            time.sleep(1)  # 避免请求过快
    
    def test_different_queries(self) -> None:
        """测试不同的查询"""
        test_queries = [
            "cryo-electron microscopy",
            "CRISPR",
            "machine learning",
            "cancer immunotherapy",
            "artificial intelligence",
        ]
        
        print(f"\n=== 测试不同查询 ===")
        print("=" * 60)
        
        for query in test_queries:
            print(f"\n--- 查询: {query} ---")
            result = self.test_search(query, sort="published", limit=3)
            
            if result["success"]:
                items = result["items"]
                cited_counts = [item.get('is-referenced-by-count', 0) for item in items]
                print(f"被引用次数: {cited_counts}")
            else:
                print(f"查询 {query} 失败")
            
            time.sleep(1)
    
    def test_doi_lookup(self, doi: str) -> None:
        """测试DOI查找功能"""
        print(f"\n=== 测试DOI查找 ===")
        print(f"DOI: {doi}")
        print("-" * 50)
        
        url = f"https://api.crossref.org/works/{doi}"
        
        try:
            response = requests.get(url, headers=self.headers, timeout=self.timeout)
            response.raise_for_status()
            data = response.json()
            
            item = data.get("message", {})
            
        title = item.get('title', ['N/A'])[0] if item.get('title') else 'N/A'
        cited_count = item.get('is-referenced-by-count', 0)
            
            print(f"标题: {title}")
            print(f"被引用次数: {cited_count}")
            print(f"DOI: {item.get('DOI', 'N/A')}")
            
            # 显示完整的引用量相关字段
            print(f"\n引用量相关字段:")
            for key, value in item.items():
                if 'cite' in key.lower() or 'reference' in key.lower():
                    print(f"  {key}: {value}")
            
        except Exception as e:
            print(f"DOI查找失败: {str(e)}")

def main():
    """主函数"""
    tester = CrossrefTester()
    
    print("Crossref API 测试工具")
    print("=" * 60)
    
    while True:
        print("\n请选择测试选项:")
        print("1. 测试单个查询")
        print("2. 测试不同排序选项")
        print("3. 测试不同查询")
        print("4. 测试DOI查找")
        print("5. 退出")
        
        choice = input("\n请输入选项 (1-5): ").strip()
        
        if choice == "1":
            query = input("请输入查询内容: ").strip()
            if query:
                tester.test_search(query)
        
        elif choice == "2":
            query = input("请输入查询内容: ").strip()
            if query:
                tester.test_different_sort_options(query)
        
        elif choice == "3":
            tester.test_different_queries()
        
        elif choice == "4":
            doi = input("请输入DOI: ").strip()
            if doi:
                tester.test_doi_lookup(doi)
        
        elif choice == "5":
            print("退出测试工具")
            break
        
        else:
            print("无效选项，请重新选择")

if __name__ == "__main__":
    main()