Andy0830 commited on
Commit
8c3f3bc
·
verified ·
1 Parent(s): 0ed4edb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -7
app.py CHANGED
@@ -12,18 +12,19 @@ SYSTEM_TITLE = "花蓮慈濟醫院公文輔助判決系統"
12
  FILE_PATH = 'data.csv'
13
  INDEX_FILE = 'corpus_embeddings.pt'
14
 
15
- # ▼▼▼ 設定登入帳號密碼 (您可以修改這裡) ▼▼▼
16
  # 格式:("帳號", "密碼")
17
- LOGIN_DATA = ("admin", "1234")
18
 
19
  # --- 1. 讀取資料 ---
20
- print("🚀 正在啟動快取模式...")
21
 
22
  if not os.path.exists(FILE_PATH):
23
  print(f"❌ 錯誤:找不到 {FILE_PATH}")
24
  sys.exit(1)
25
 
26
  try:
 
27
  df = pd.read_csv(FILE_PATH, encoding='cp950')
28
  except UnicodeDecodeError:
29
  try:
@@ -35,11 +36,15 @@ except Exception:
35
 
36
  # --- 2. 資料清洗 ---
37
  if not df.empty:
 
38
  df.columns = [str(c).strip().replace('\ufeff', '') for c in df.columns]
 
 
39
  for col in df.columns:
40
  if '主旨' in col or '內容' in col: df.rename(columns={col: '主旨'}, inplace=True)
41
  if '窗口' in col or '單位' in col: df.rename(columns={col: '收文窗口'}, inplace=True)
42
 
 
43
  df['主旨'] = df['主旨'].astype(str)
44
  df['收文窗口'] = df['收文窗口'].astype(str)
45
  df = df.dropna(subset=['主旨', '收文窗口'])
@@ -63,6 +68,7 @@ except Exception as e:
63
  corpus_embeddings = None
64
 
65
  if total_records > 0 and model is not None:
 
66
  if os.path.exists(INDEX_FILE):
67
  print(f"⚡ 偵測到快取檔案,正在秒速載入...")
68
  try:
@@ -72,8 +78,9 @@ if total_records > 0 and model is not None:
72
  print(f"❌ 快取檔案損壞,將重新計算。錯誤: {e}")
73
  corpus_embeddings = None
74
 
 
75
  if corpus_embeddings is None:
76
- print(f"🔥 開始計算索引 (需時約 2-4 分鐘)...")
77
  chunk_size = 500
78
  embeddings_chunks = []
79
 
@@ -86,6 +93,7 @@ if total_records > 0 and model is not None:
86
  gc.collect()
87
 
88
  corpus_embeddings = torch.cat(embeddings_chunks)
 
89
  torch.save(corpus_embeddings, INDEX_FILE)
90
  print("✅ 索引計算並儲存完成!")
91
 
@@ -96,7 +104,7 @@ if total_records > 0 and model is not None:
96
  # --- 4. 定義搜尋 ---
97
  def search_department(query):
98
  if corpus_embeddings is None:
99
- return "⚠️ 系統初始化失敗。"
100
 
101
  if not query.strip():
102
  return "請輸入公文主旨..."
@@ -126,7 +134,7 @@ def search_department(query):
126
 
127
  return output_text
128
 
129
- # --- 5. 介面 (包含密碼鎖) ---
130
  iface = gr.Interface(
131
  fn=search_department,
132
  inputs=gr.Textbox(lines=3, placeholder="請輸入公文主旨..."),
@@ -137,5 +145,5 @@ iface = gr.Interface(
137
  )
138
 
139
  if __name__ == "__main__":
140
- # ▼▼▼ 這裡加上了 auth 參數,啟動時會要求輸入帳號密碼 ▼▼▼
141
  iface.launch(auth=LOGIN_DATA)
 
12
  FILE_PATH = 'data.csv'
13
  INDEX_FILE = 'corpus_embeddings.pt'
14
 
15
+ # ▼▼▼ 設定登入帳號密碼 (已更新) ▼▼▼
16
  # 格式:("帳號", "密碼")
17
+ LOGIN_DATA = ("admin", "htch15583")
18
 
19
  # --- 1. 讀取資料 ---
20
+ print("🚀 正在啟動系統...")
21
 
22
  if not os.path.exists(FILE_PATH):
23
  print(f"❌ 錯誤:找不到 {FILE_PATH}")
24
  sys.exit(1)
25
 
26
  try:
27
+ # 讀取檔案 (CP950 優先)
28
  df = pd.read_csv(FILE_PATH, encoding='cp950')
29
  except UnicodeDecodeError:
30
  try:
 
36
 
37
  # --- 2. 資料清洗 ---
38
  if not df.empty:
39
+ # 移除 BOM 與空白
40
  df.columns = [str(c).strip().replace('\ufeff', '') for c in df.columns]
41
+
42
+ # 自動對應欄位
43
  for col in df.columns:
44
  if '主旨' in col or '內容' in col: df.rename(columns={col: '主旨'}, inplace=True)
45
  if '窗口' in col or '單位' in col: df.rename(columns={col: '收文窗口'}, inplace=True)
46
 
47
+ # 轉字串 & 移除空值
48
  df['主旨'] = df['主旨'].astype(str)
49
  df['收文窗口'] = df['收文窗口'].astype(str)
50
  df = df.dropna(subset=['主旨', '收文窗口'])
 
68
  corpus_embeddings = None
69
 
70
  if total_records > 0 and model is not None:
71
+ # 檢查是否有快取檔案
72
  if os.path.exists(INDEX_FILE):
73
  print(f"⚡ 偵測到快取檔案,正在秒速載入...")
74
  try:
 
78
  print(f"❌ 快取檔案損壞,將重新計算。錯誤: {e}")
79
  corpus_embeddings = None
80
 
81
+ # 如果沒有快取,則進行計算
82
  if corpus_embeddings is None:
83
+ print(f"🔥 開始計算索引 (需時約 2-4 分鐘,請耐心等候)...")
84
  chunk_size = 500
85
  embeddings_chunks = []
86
 
 
93
  gc.collect()
94
 
95
  corpus_embeddings = torch.cat(embeddings_chunks)
96
+ # 儲存到硬碟,下次啟動就會很快
97
  torch.save(corpus_embeddings, INDEX_FILE)
98
  print("✅ 索引計算並儲存完成!")
99
 
 
104
  # --- 4. 定義搜尋 ---
105
  def search_department(query):
106
  if corpus_embeddings is None:
107
+ return "⚠️ 系統初始化失敗,請檢查 Logs。"
108
 
109
  if not query.strip():
110
  return "請輸入公文主旨..."
 
134
 
135
  return output_text
136
 
137
+ # --- 5. 介面 (已啟用密碼鎖) ---
138
  iface = gr.Interface(
139
  fn=search_department,
140
  inputs=gr.Textbox(lines=3, placeholder="請輸入公文主旨..."),
 
145
  )
146
 
147
  if __name__ == "__main__":
148
+ # 啟動時加入驗證
149
  iface.launch(auth=LOGIN_DATA)