JC321 commited on
Commit
c9edecd
·
verified ·
1 Parent(s): e2c7fc0

Upload 2 files

Browse files
service/financial_data_cache_manager.py ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ 财务数据缓存管理器
3
+ 用于管理 2024 FY Financial Metrics 和 Latest 3 Years Financial Metrics 的数据加载和缓存
4
+ """
5
+
6
+ from datetime import datetime
7
+ from typing import Callable, Dict, Any
8
+ import threading
9
+ import time
10
+
11
+
12
+ class DataTask:
13
+ """数据加载任务"""
14
+ def __init__(self, company: str, data_type: str, loader_func: Callable):
15
+ self.company = company
16
+ self.data_type = data_type
17
+ self.loader_func = loader_func
18
+ self.status = "pending" # pending, running, completed, error
19
+ self.result = None
20
+ self.error = None
21
+ self.start_time = None
22
+ self.end_time = None
23
+ self.thread = None
24
+
25
+ def run(self):
26
+ """执行数据加载"""
27
+ self.status = "running"
28
+ self.start_time = datetime.now()
29
+
30
+ try:
31
+ self.result = self.loader_func()
32
+ self.status = "completed"
33
+ except Exception as e:
34
+ self.status = "error"
35
+ self.error = str(e)
36
+ import traceback
37
+ self.error_trace = traceback.format_exc()
38
+ finally:
39
+ self.end_time = datetime.now()
40
+
41
+ def get_age_seconds(self):
42
+ """获取任务年龄(秒)"""
43
+ if self.end_time:
44
+ return (datetime.now() - self.end_time).total_seconds()
45
+ elif self.start_time:
46
+ return (datetime.now() - self.start_time).total_seconds()
47
+ return 0
48
+
49
+
50
+ class FinancialDataCacheManager:
51
+ """财务数据缓存管理器"""
52
+
53
+ def __init__(self, cache_ttl_seconds=1800, max_cache_size=100):
54
+ self.cache_ttl = cache_ttl_seconds
55
+ self.max_cache_size = max_cache_size
56
+ self.tasks: Dict[tuple, DataTask] = {}
57
+ self.lock = threading.Lock()
58
+ self.stats = {
59
+ "cache_hits": 0,
60
+ "cache_misses": 0,
61
+ "background_completions": 0,
62
+ "total_requests": 0
63
+ }
64
+
65
+ def get_or_load_data(self, company: str, data_type: str, loader_func: Callable) -> Any:
66
+ """获取或加载财务数据 (同步版本,返回最终结果)"""
67
+ with self.lock:
68
+ self.stats["total_requests"] += 1
69
+ key = (company, data_type)
70
+
71
+ # 清理过期缓存
72
+ self._cleanup_expired_cache()
73
+
74
+ task = self.tasks.get(key)
75
+
76
+ # 场景1: 缓存已存在且已完成
77
+ if task:
78
+ if task.status == "completed":
79
+ if task.get_age_seconds() < self.cache_ttl:
80
+ self.stats["cache_hits"] += 1
81
+ print(f"✅ [Data Cache HIT] {company} - {data_type} (age: {task.get_age_seconds():.1f}s)")
82
+ return task.result
83
+ else:
84
+ print(f"⏰ [Data Cache EXPIRED] {company} - {data_type}")
85
+ del self.tasks[key]
86
+ task = None
87
+
88
+ # 场景2: 后台任务正在运行
89
+ elif task.status == "running":
90
+ self.stats["cache_hits"] += 1
91
+ print(f"🔄 [Data Cache WAIT] {company} - {data_type} (running for {task.get_age_seconds():.1f}s)")
92
+
93
+ # 等待后台任务完成
94
+ max_wait = 30
95
+ waited = 0
96
+ while task.status == "running" and waited < max_wait:
97
+ time.sleep(0.5)
98
+ waited += 0.5
99
+
100
+ if task.status == "completed":
101
+ self.stats["background_completions"] += 1
102
+ print(f"✅ [Data Background COMPLETED] {company} - {data_type}")
103
+ return task.result
104
+ elif task.status == "error":
105
+ print(f"❌ [Data Background ERROR] {company} - {data_type}: {task.error}")
106
+ raise Exception(f"Data loading failed: {task.error}")
107
+
108
+ # 场景3: 之前失败了,重试
109
+ elif task.status == "error":
110
+ print(f"🔄 [Data Retry after ERROR] {company} - {data_type}")
111
+ del self.tasks[key]
112
+ task = None
113
+
114
+ # 场景4: 缓存不存在,启动新任务
115
+ if not task:
116
+ self.stats["cache_misses"] += 1
117
+ print(f"🆕 [Data Cache MISS] {company} - {data_type} - Starting background loading")
118
+
119
+ task = DataTask(company, data_type, loader_func)
120
+ self.tasks[key] = task
121
+ task.thread = threading.Thread(target=task.run, daemon=True)
122
+ task.thread.start()
123
+
124
+ # 等待任务完成
125
+ max_wait = 30
126
+ waited = 0
127
+ while task.status == "running" and waited < max_wait:
128
+ time.sleep(0.5)
129
+ waited += 0.5
130
+
131
+ if task.status == "completed":
132
+ print(f"✅ [Data NEW COMPLETED] {company} - {data_type}")
133
+ return task.result
134
+ elif task.status == "error":
135
+ print(f"❌ [Data NEW ERROR] {company} - {data_type}: {task.error}")
136
+ raise Exception(f"Data loading failed: {task.error}")
137
+ else:
138
+ print(f"⏱️ [Data TIMEOUT] {company} - {data_type}")
139
+ raise Exception(f"Data loading timeout after {max_wait}s")
140
+
141
+ def _cleanup_expired_cache(self):
142
+ """清理过期缓存"""
143
+ keys_to_remove = []
144
+ for key, task in self.tasks.items():
145
+ if task.status == "completed" and task.get_age_seconds() > self.cache_ttl:
146
+ keys_to_remove.append(key)
147
+
148
+ for key in keys_to_remove:
149
+ company, data_type = key
150
+ print(f"🗑️ [Data Cache CLEANUP] {company} - {data_type}")
151
+ del self.tasks[key]
152
+
153
+ # 限制缓存大小
154
+ if len(self.tasks) > self.max_cache_size:
155
+ completed_tasks = [(k, v) for k, v in self.tasks.items() if v.status == "completed"]
156
+ completed_tasks.sort(key=lambda x: x[1].end_time or datetime.min)
157
+ to_remove = len(self.tasks) - self.max_cache_size
158
+ for i in range(to_remove):
159
+ key, task = completed_tasks[i]
160
+ company, data_type = key
161
+ print(f"🗑️ [Data Cache SIZE LIMIT] {company} - {data_type}")
162
+ del self.tasks[key]
163
+
164
+ def get_stats(self):
165
+ """获取缓存统计"""
166
+ with self.lock:
167
+ total = self.stats["total_requests"]
168
+ hits = self.stats["cache_hits"]
169
+ misses = self.stats["cache_misses"]
170
+ hit_rate = (hits / total * 100) if total > 0 else 0
171
+
172
+ return {
173
+ **self.stats,
174
+ "hit_rate": f"{hit_rate:.1f}%",
175
+ "active_tasks": len([t for t in self.tasks.values() if t.status == "running"]),
176
+ "cached_data": len([t for t in self.tasks.values() if t.status == "completed"])
177
+ }
service/report_cache_manager.py ADDED
@@ -0,0 +1,219 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ 智能报告缓存管理器
3
+ 用于管理 Investment Suggestion 和 Analysis Report 的后台生成和缓存
4
+ """
5
+
6
+ from datetime import datetime
7
+ from typing import Callable, Dict
8
+ import threading
9
+ import time
10
+
11
+
12
+ class ReportTask:
13
+ """报告生成任务"""
14
+ def __init__(self, company: str, report_type: str, generator_func: Callable):
15
+ self.company = company
16
+ self.report_type = report_type
17
+ self.generator_func = generator_func
18
+ self.status = "pending" # pending, running, completed, error
19
+ self.result = None
20
+ self.error = None
21
+ self.start_time = None
22
+ self.end_time = None
23
+ self.thread = None
24
+
25
+ def run(self):
26
+ """执行任务"""
27
+ self.status = "running"
28
+ self.start_time = datetime.now()
29
+
30
+ try:
31
+ self.result = self.generator_func()
32
+ self.status = "completed"
33
+ except Exception as e:
34
+ self.status = "error"
35
+ self.error = str(e)
36
+ import traceback
37
+ self.error_trace = traceback.format_exc()
38
+ finally:
39
+ self.end_time = datetime.now()
40
+
41
+ def get_age_seconds(self):
42
+ """获取任务年龄(秒)"""
43
+ if self.end_time:
44
+ return (datetime.now() - self.end_time).total_seconds()
45
+ elif self.start_time:
46
+ return (datetime.now() - self.start_time).total_seconds()
47
+ return 0
48
+
49
+
50
+ class ReportCacheManager:
51
+ """智能报告缓存管理器"""
52
+
53
+ def __init__(self, cache_ttl_seconds=3600, max_cache_size=50):
54
+ self.cache_ttl = cache_ttl_seconds
55
+ self.max_cache_size = max_cache_size
56
+ self.tasks: Dict[tuple, ReportTask] = {}
57
+ self.lock = threading.Lock()
58
+ self.stats = {
59
+ "cache_hits": 0,
60
+ "cache_misses": 0,
61
+ "background_completions": 0,
62
+ "total_requests": 0
63
+ }
64
+
65
+ def get_or_create_report(self, company: str, report_type: str, generator_func: Callable):
66
+ """获取或创建报告"""
67
+ with self.lock:
68
+ self.stats["total_requests"] += 1
69
+ key = (company, report_type)
70
+
71
+ # 清理过期缓存
72
+ self._cleanup_expired_cache()
73
+
74
+ task = self.tasks.get(key)
75
+
76
+ # 场景1: 缓存已存在且已完成
77
+ if task:
78
+ if task.status == "completed":
79
+ if task.get_age_seconds() < self.cache_ttl:
80
+ self.stats["cache_hits"] += 1
81
+ print(f"✅ [Cache HIT] {company} - {report_type} (age: {task.get_age_seconds():.1f}s)")
82
+ yield task.result
83
+ return
84
+ else:
85
+ print(f"⏰ [Cache EXPIRED] {company} - {report_type}")
86
+ del self.tasks[key]
87
+ task = None
88
+
89
+ # 场景2: 后台任务正在运行
90
+ elif task.status == "running":
91
+ self.stats["cache_hits"] += 1
92
+ print(f"🔄 [Cache WAIT] {company} - {report_type} (running for {task.get_age_seconds():.1f}s)")
93
+ yield self._get_loading_html(company, report_type, task.get_age_seconds())
94
+
95
+ # 等待后台任务完成
96
+ max_wait = 90
97
+ waited = 0
98
+ while task.status == "running" and waited < max_wait:
99
+ time.sleep(1)
100
+ waited += 1
101
+ yield self._get_loading_html(company, report_type, task.get_age_seconds())
102
+
103
+ if task.status == "completed":
104
+ self.stats["background_completions"] += 1
105
+ print(f"✅ [Background COMPLETED] {company} - {report_type}")
106
+ yield task.result
107
+ return
108
+ elif task.status == "error":
109
+ print(f"❌ [Background ERROR] {company} - {report_type}: {task.error}")
110
+ yield self._get_error_html(company, report_type, task.error)
111
+ return
112
+
113
+ # 场景3: 之前失败了,重试
114
+ elif task.status == "error":
115
+ print(f"🔄 [Retry after ERROR] {company} - {report_type}")
116
+ del self.tasks[key]
117
+ task = None
118
+
119
+ # 场景4: 缓存不存在,启动新任务
120
+ if not task:
121
+ self.stats["cache_misses"] += 1
122
+ print(f"🆕 [Cache MISS] {company} - {report_type} - Starting background generation")
123
+
124
+ task = ReportTask(company, report_type, generator_func)
125
+ self.tasks[key] = task
126
+ task.thread = threading.Thread(target=task.run, daemon=True)
127
+ task.thread.start()
128
+
129
+ # 等待任务完成
130
+ yield self._get_loading_html(company, report_type, 0)
131
+
132
+ max_wait = 90
133
+ waited = 0
134
+ while task.status == "running" and waited < max_wait:
135
+ time.sleep(1)
136
+ waited += 1
137
+ yield self._get_loading_html(company, report_type, task.get_age_seconds())
138
+
139
+ if task.status == "completed":
140
+ print(f"✅ [NEW COMPLETED] {company} - {report_type}")
141
+ yield task.result
142
+ return
143
+ elif task.status == "error":
144
+ print(f"❌ [NEW ERROR] {company} - {report_type}: {task.error}")
145
+ yield self._get_error_html(company, report_type, task.error)
146
+ return
147
+
148
+ def _cleanup_expired_cache(self):
149
+ """清理过期缓存"""
150
+ keys_to_remove = []
151
+ for key, task in self.tasks.items():
152
+ if task.status == "completed" and task.get_age_seconds() > self.cache_ttl:
153
+ keys_to_remove.append(key)
154
+
155
+ for key in keys_to_remove:
156
+ company, report_type = key
157
+ print(f"🗑️ [Cache CLEANUP] {company} - {report_type}")
158
+ del self.tasks[key]
159
+
160
+ # 限制缓存大小
161
+ if len(self.tasks) > self.max_cache_size:
162
+ completed_tasks = [(k, v) for k, v in self.tasks.items() if v.status == "completed"]
163
+ completed_tasks.sort(key=lambda x: x[1].end_time or datetime.min)
164
+ to_remove = len(self.tasks) - self.max_cache_size
165
+ for i in range(to_remove):
166
+ key, task = completed_tasks[i]
167
+ company, report_type = key
168
+ print(f"🗑️ [Cache SIZE LIMIT] {company} - {report_type}")
169
+ del self.tasks[key]
170
+
171
+ def _get_loading_html(self, company: str, report_type: str, elapsed_seconds: float):
172
+ """生成加载状态HTML"""
173
+ report_name = "Investment Suggestion" if report_type == "suggestion" else "Analysis Report"
174
+ elapsed_str = f"{elapsed_seconds:.0f}s" if elapsed_seconds > 0 else "just started"
175
+
176
+ return f'''
177
+ <div style="display: flex; justify-content: center; align-items: center; height: 200px;">
178
+ <div style="text-align: center;">
179
+ <div class="loading-spinner" style="width: 40px; height: 40px; border: 4px solid #f3f3f3; border-top: 4px solid #3498db; border-radius: 50%; animation: spin 1s linear infinite; margin: 0 auto;"></div>
180
+ <p style="margin-top: 20px; color: #666;">
181
+ 🤖 Generating {report_name} for <strong>{company}</strong>...<br>
182
+ <small>Elapsed: {elapsed_str}</small>
183
+ </p>
184
+ <style>
185
+ @keyframes spin {{
186
+ 0% {{ transform: rotate(0deg); }}
187
+ 100% {{ transform: rotate(360deg); }}
188
+ }}
189
+ </style>
190
+ </div>
191
+ </div>
192
+ '''
193
+
194
+ def _get_error_html(self, company: str, report_type: str, error: str):
195
+ """生成错误状态HTML"""
196
+ report_name = "Investment Suggestion" if report_type == "suggestion" else "Analysis Report"
197
+ return f'''
198
+ <div style="padding: 20px; background-color: #fff3cd; border-left: 4px solid #ffc107; border-radius: 4px;">
199
+ <h4 style="margin-top: 0; color: #856404;">⚠️ Generation Failed</h4>
200
+ <p><strong>Report:</strong> {report_name}</p>
201
+ <p><strong>Company:</strong> {company}</p>
202
+ <p><strong>Error:</strong> {error}</p>
203
+ </div>
204
+ '''
205
+
206
+ def get_stats(self):
207
+ """获取缓存统计"""
208
+ with self.lock:
209
+ total = self.stats["total_requests"]
210
+ hits = self.stats["cache_hits"]
211
+ misses = self.stats["cache_misses"]
212
+ hit_rate = (hits / total * 100) if total > 0 else 0
213
+
214
+ return {
215
+ **self.stats,
216
+ "hit_rate": f"{hit_rate:.1f}%",
217
+ "active_tasks": len([t for t in self.tasks.values() if t.status == "running"]),
218
+ "cached_reports": len([t for t in self.tasks.values() if t.status == "completed"])
219
+ }