Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -124,12 +124,9 @@ def index_from_url(url: str) -> Tuple[str, str]:
|
|
| 124 |
Download a PDF from URL and index it.
|
| 125 |
Returns: (status_message, saved_pdf_path)
|
| 126 |
"""
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
status = index_from_path(local_path)
|
| 131 |
-
return status, local_path
|
| 132 |
-
|
| 133 |
|
| 134 |
def query_gpt(query: str, retrieved_images: list[tuple[Image.Image, str]]) -> str:
|
| 135 |
"""Calls OpenAI's GPT model with the query and image data."""
|
|
@@ -596,41 +593,41 @@ def build_ui():
|
|
| 596 |
)
|
| 597 |
|
| 598 |
# ---- Tab 1: Index & Preview
|
| 599 |
-
|
| 600 |
-
|
| 601 |
-
|
| 602 |
-
|
| 603 |
-
|
| 604 |
-
|
| 605 |
-
|
| 606 |
-
|
| 607 |
-
|
| 608 |
-
|
| 609 |
-
|
| 610 |
-
|
| 611 |
-
|
| 612 |
-
|
| 613 |
-
|
| 614 |
-
|
| 615 |
-
|
| 616 |
-
|
| 617 |
-
|
| 618 |
-
|
| 619 |
-
|
| 620 |
-
|
| 621 |
-
|
| 622 |
-
|
| 623 |
-
|
| 624 |
-
|
| 625 |
-
|
| 626 |
-
|
| 627 |
-
|
| 628 |
-
|
| 629 |
-
|
| 630 |
-
|
| 631 |
-
|
| 632 |
-
|
| 633 |
-
|
| 634 |
|
| 635 |
# ---- Tab 2: Ask (Direct — returns indices)
|
| 636 |
with gr.Tab("2) Direct Search"):
|
|
@@ -716,9 +713,6 @@ def build_ui():
|
|
| 716 |
|
| 717 |
if __name__ == "__main__":
|
| 718 |
demo = build_ui()
|
| 719 |
-
images = load_dataset("vidore/esg_reports_human_labeled_v2", "corpus", split="test")["image"]
|
| 720 |
-
print("Indexing")
|
| 721 |
-
print(index_gpu(images))
|
| 722 |
# mcp_server=True exposes this app's MCP endpoint at /gradio_api/mcp/
|
| 723 |
# We keep the MCP server available, but the agent never uses MCP to pass images.
|
| 724 |
demo.queue(max_size=5).launch(debug=True, mcp_server=True)
|
|
|
|
| 124 |
Download a PDF from URL and index it.
|
| 125 |
Returns: (status_message, saved_pdf_path)
|
| 126 |
"""
|
| 127 |
+
images = load_dataset(url, "corpus", split="test")["image"]
|
| 128 |
+
status = index_gpu(images)
|
| 129 |
+
return status, "path"
|
|
|
|
|
|
|
|
|
|
| 130 |
|
| 131 |
def query_gpt(query: str, retrieved_images: list[tuple[Image.Image, str]]) -> str:
|
| 132 |
"""Calls OpenAI's GPT model with the query and image data."""
|
|
|
|
| 593 |
)
|
| 594 |
|
| 595 |
# ---- Tab 1: Index & Preview
|
| 596 |
+
with gr.Tab("1) Index & Preview"):
|
| 597 |
+
with gr.Row():
|
| 598 |
+
with gr.Column(scale=1):
|
| 599 |
+
pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
|
| 600 |
+
index_btn = gr.Button("📥 Index Uploaded PDF", variant="secondary")
|
| 601 |
+
url_box = gr.Textbox(
|
| 602 |
+
label="Or index from HF Dataset",
|
| 603 |
+
placeholder="manu/test-dataset",
|
| 604 |
+
value="",
|
| 605 |
+
)
|
| 606 |
+
index_url_btn = gr.Button("🌐 Load From HF Datset", variant="secondary")
|
| 607 |
+
status_box = gr.Textbox(label="Status", interactive=False)
|
| 608 |
+
with gr.Column(scale=2):
|
| 609 |
+
pdf_view = PDF(label="PDF Preview")
|
| 610 |
+
|
| 611 |
+
# wiring
|
| 612 |
+
def handle_upload(file):
|
| 613 |
+
global current_pdf_path
|
| 614 |
+
if file is None:
|
| 615 |
+
return "Please upload a PDF.", None
|
| 616 |
+
path = getattr(file, "name", file)
|
| 617 |
+
status = index_from_path(path)
|
| 618 |
+
current_pdf_path = path
|
| 619 |
+
return status, path
|
| 620 |
+
|
| 621 |
+
def handle_url(url: str):
|
| 622 |
+
global current_pdf_path
|
| 623 |
+
if not url or not url.lower().endswith(".pdf"):
|
| 624 |
+
return "Please provide a direct PDF URL ending in .pdf", None
|
| 625 |
+
status, path = index_from_url(url)
|
| 626 |
+
current_pdf_path = path
|
| 627 |
+
return status, path
|
| 628 |
+
|
| 629 |
+
index_btn.click(handle_upload, inputs=[pdf_input], outputs=[status_box, pdf_view])
|
| 630 |
+
index_url_btn.click(handle_url, inputs=[url_box], outputs=[status_box, pdf_view])
|
| 631 |
|
| 632 |
# ---- Tab 2: Ask (Direct — returns indices)
|
| 633 |
with gr.Tab("2) Direct Search"):
|
|
|
|
| 713 |
|
| 714 |
if __name__ == "__main__":
|
| 715 |
demo = build_ui()
|
|
|
|
|
|
|
|
|
|
| 716 |
# mcp_server=True exposes this app's MCP endpoint at /gradio_api/mcp/
|
| 717 |
# We keep the MCP server available, but the agent never uses MCP to pass images.
|
| 718 |
demo.queue(max_size=5).launch(debug=True, mcp_server=True)
|