manu commited on
Commit
7e7bfe7
·
verified ·
1 Parent(s): 7068618

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -44
app.py CHANGED
@@ -124,12 +124,9 @@ def index_from_url(url: str) -> Tuple[str, str]:
124
  Download a PDF from URL and index it.
125
  Returns: (status_message, saved_pdf_path)
126
  """
127
- tmp_dir = tempfile.mkdtemp(prefix="colpali_")
128
- local_path = os.path.join(tmp_dir, "document.pdf")
129
- urlretrieve(url, local_path)
130
- status = index_from_path(local_path)
131
- return status, local_path
132
-
133
 
134
  def query_gpt(query: str, retrieved_images: list[tuple[Image.Image, str]]) -> str:
135
  """Calls OpenAI's GPT model with the query and image data."""
@@ -596,41 +593,41 @@ def build_ui():
596
  )
597
 
598
  # ---- Tab 1: Index & Preview
599
- # with gr.Tab("1) Index & Preview"):
600
- # with gr.Row():
601
- # with gr.Column(scale=1):
602
- # pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
603
- # index_btn = gr.Button("📥 Index Uploaded PDF", variant="secondary")
604
- # url_box = gr.Textbox(
605
- # label="Or index from URL",
606
- # placeholder="https://example.com/file.pdf",
607
- # value="",
608
- # )
609
- # index_url_btn = gr.Button("🌐 Load From URL", variant="secondary")
610
- # status_box = gr.Textbox(label="Status", interactive=False)
611
- # with gr.Column(scale=2):
612
- # pdf_view = PDF(label="PDF Preview")
613
-
614
- # # wiring
615
- # def handle_upload(file):
616
- # global current_pdf_path
617
- # if file is None:
618
- # return "Please upload a PDF.", None
619
- # path = getattr(file, "name", file)
620
- # status = index_from_path(path)
621
- # current_pdf_path = path
622
- # return status, path
623
-
624
- # def handle_url(url: str):
625
- # global current_pdf_path
626
- # if not url or not url.lower().endswith(".pdf"):
627
- # return "Please provide a direct PDF URL ending in .pdf", None
628
- # status, path = index_from_url(url)
629
- # current_pdf_path = path
630
- # return status, path
631
-
632
- # index_btn.click(handle_upload, inputs=[pdf_input], outputs=[status_box, pdf_view])
633
- # index_url_btn.click(handle_url, inputs=[url_box], outputs=[status_box, pdf_view])
634
 
635
  # ---- Tab 2: Ask (Direct — returns indices)
636
  with gr.Tab("2) Direct Search"):
@@ -716,9 +713,6 @@ def build_ui():
716
 
717
  if __name__ == "__main__":
718
  demo = build_ui()
719
- images = load_dataset("vidore/esg_reports_human_labeled_v2", "corpus", split="test")["image"]
720
- print("Indexing")
721
- print(index_gpu(images))
722
  # mcp_server=True exposes this app's MCP endpoint at /gradio_api/mcp/
723
  # We keep the MCP server available, but the agent never uses MCP to pass images.
724
  demo.queue(max_size=5).launch(debug=True, mcp_server=True)
 
124
  Download a PDF from URL and index it.
125
  Returns: (status_message, saved_pdf_path)
126
  """
127
+ images = load_dataset(url, "corpus", split="test")["image"]
128
+ status = index_gpu(images)
129
+ return status, "path"
 
 
 
130
 
131
  def query_gpt(query: str, retrieved_images: list[tuple[Image.Image, str]]) -> str:
132
  """Calls OpenAI's GPT model with the query and image data."""
 
593
  )
594
 
595
  # ---- Tab 1: Index & Preview
596
+ with gr.Tab("1) Index & Preview"):
597
+ with gr.Row():
598
+ with gr.Column(scale=1):
599
+ pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
600
+ index_btn = gr.Button("📥 Index Uploaded PDF", variant="secondary")
601
+ url_box = gr.Textbox(
602
+ label="Or index from HF Dataset",
603
+ placeholder="manu/test-dataset",
604
+ value="",
605
+ )
606
+ index_url_btn = gr.Button("🌐 Load From HF Datset", variant="secondary")
607
+ status_box = gr.Textbox(label="Status", interactive=False)
608
+ with gr.Column(scale=2):
609
+ pdf_view = PDF(label="PDF Preview")
610
+
611
+ # wiring
612
+ def handle_upload(file):
613
+ global current_pdf_path
614
+ if file is None:
615
+ return "Please upload a PDF.", None
616
+ path = getattr(file, "name", file)
617
+ status = index_from_path(path)
618
+ current_pdf_path = path
619
+ return status, path
620
+
621
+ def handle_url(url: str):
622
+ global current_pdf_path
623
+ if not url or not url.lower().endswith(".pdf"):
624
+ return "Please provide a direct PDF URL ending in .pdf", None
625
+ status, path = index_from_url(url)
626
+ current_pdf_path = path
627
+ return status, path
628
+
629
+ index_btn.click(handle_upload, inputs=[pdf_input], outputs=[status_box, pdf_view])
630
+ index_url_btn.click(handle_url, inputs=[url_box], outputs=[status_box, pdf_view])
631
 
632
  # ---- Tab 2: Ask (Direct — returns indices)
633
  with gr.Tab("2) Direct Search"):
 
713
 
714
  if __name__ == "__main__":
715
  demo = build_ui()
 
 
 
716
  # mcp_server=True exposes this app's MCP endpoint at /gradio_api/mcp/
717
  # We keep the MCP server available, but the agent never uses MCP to pass images.
718
  demo.queue(max_size=5).launch(debug=True, mcp_server=True)