new_recommendation / test_integration_debug.py
wujian123's picture
Upload all project files
3c6b551
#!/usr/bin/env python3
"""
测试集成代码中的OpenAlex排序问题
"""
import sys
import os
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
from reviewer_recommendation.searcher import OpenAlexSearcher, DynamicAcademicSearcher
from reviewer_recommendation.models import PaperInfo
def test_integration_sorting():
"""测试集成代码中的排序问题"""
print("=== 测试集成代码中的OpenAlex排序 ===")
# 创建测试用的论文信息
paper = PaperInfo(
title="Molecular and Structural Biology Cryo-EM",
abstract="Test abstract for cryo-EM research",
keywords=["cryo-EM", "structural biology", "molecular biology"]
)
# 创建OpenAlex检索器
openalex_searcher = OpenAlexSearcher(limit=10)
# 创建动态检索器
dynamic_searcher = DynamicAcademicSearcher(openalex_searcher=openalex_searcher)
print(f"测试论文: {paper.title}")
print("=" * 60)
# 测试直接调用OpenAlexSearcher
print("\n--- 直接测试OpenAlexSearcher ---")
query = "Molecular and Structural Biology Cryo-EM"
print("1. 按引用量排序:")
results_cited = openalex_searcher.search(query, sort_by_citations=True)
if results_cited:
citations = [r.get('citedByCount', 0) for r in results_cited]
print(f" 引用量: {citations[:5]}")
print(f" 最大引用量: {max(citations)}")
print("\n2. 按相关性排序:")
results_relevance = openalex_searcher.search(query, sort_by_citations=False)
if results_relevance:
citations = [r.get('citedByCount', 0) for r in results_relevance]
print(f" 引用量: {citations[:5]}")
print(f" 最大引用量: {max(citations)}")
# 测试DynamicAcademicSearcher
print("\n--- 测试DynamicAcademicSearcher ---")
try:
channel1_results, channel2_results = dynamic_searcher.search_with_dynamic_queries(paper, num_queries=1)
print(f"通道1结果数量: {len(channel1_results)}")
if channel1_results:
citations1 = [r.get('citedByCount', 0) for r in channel1_results]
print(f"通道1引用量: {citations1[:5]}")
print(f"通道1最大引用量: {max(citations1)}")
print(f"通道2结果数量: {len(channel2_results)}")
if channel2_results:
citations2 = [r.get('citedByCount', 0) for r in channel2_results]
print(f"通道2引用量: {citations2[:5]}")
print(f"通道2最大引用量: {max(citations2)}")
# 对比分析
if channel1_results and channel2_results:
max1 = max([r.get('citedByCount', 0) for r in channel1_results])
max2 = max([r.get('citedByCount', 0) for r in channel2_results])
print(f"\n--- 对比分析 ---")
print(f"通道1最大引用量: {max1}")
print(f"通道2最大引用量: {max2}")
if max1 < max2:
print("❌ 问题确认:通道1的引用量反而更低!")
else:
print("✅ 通道1工作正常")
except Exception as e:
print(f"DynamicAcademicSearcher测试失败: {str(e)}")
import traceback
traceback.print_exc()
if __name__ == "__main__":
test_integration_sorting()