Spaces:

ariG23498
/

zero-shot-od

Running on Zero

App Files Files Community

ariG23498 HF Staff commited on Aug 11

Commit

e007554

1 Parent(s): 29750ea

init model and processor beforehand

Browse files

Files changed (1) hide show

app.py +26 -10

app.py CHANGED Viewed

@@ -11,9 +11,28 @@ def extract_model_short_name(model_id):
 model_llmdet_id = "iSEE-Laboratory/llmdet_tiny"
 model_mm_grounding_id = "rziga/mm_grounding_dino_tiny_o365v1_goldg"
 model_omdet_id = "omlab/omdet-turbo-swin-tiny-hf"
 model_owlv2_id = "google/owlv2-large-patch14-ensemble"
 model_llmdet_name = extract_model_short_name(model_llmdet_id)
 model_mm_grounding_name = extract_model_short_name(model_mm_grounding_id)
@@ -22,13 +41,10 @@ model_owlv2_name = extract_model_short_name(model_owlv2_id)
 @spaces.GPU
-def detect(model_id: str, image: Image.Image, prompts: list, threshold: float):
     t0 = time.perf_counter()
     device = "cuda" if torch.cuda.is_available() else "cpu"
-    processor = AutoProcessor.from_pretrained(model_id)
-    model = (
-        AutoModelForZeroShotObjectDetection.from_pretrained(model_id).to(device).eval()
-    )
     texts = [prompts]
     inputs = processor(images=image, text=texts, return_tensors="pt").to(device)
     with torch.inference_mode():
@@ -38,7 +54,7 @@ def detect(model_id: str, image: Image.Image, prompts: list, threshold: float):
     )
     result = results[0]
     annotations = []
-    for box, score, label_name in zip(result["boxes"], result["scores"], result["text_abels"]):
         if score >= threshold:
             xmin, ymin, xmax, ymax = [int(x) for x in box.tolist()]
             annotations.append(((xmin, ymin, xmax, ymax), f"{label_name} {score:.2f}"))
@@ -51,10 +67,10 @@ def run_detection(
     image: Image.Image, prompts_str: str, threshold_llm, threshold_mm, threshold_owlv2, threshold_omdet,
 ):
     prompts = [p.strip() for p in prompts_str.split(",")]
-    ann_llm, time_llm = detect(model_llmdet_id, image, prompts, threshold_llm)
-    ann_mm, time_mm = detect(model_mm_grounding_name, image, prompts, threshold_mm)
-    ann_owlv2, time_owlv2 = detect(model_omdet_id, image, prompts, threshold_owlv2)
-    ann_omdet, time_omdet = detect(model_owlv2_name, image, prompts, threshold_omdet)
     return (
         (image, ann_llm),
         time_llm,

 model_llmdet_id = "iSEE-Laboratory/llmdet_tiny"
+processor_llmdet = AutoProcessor.from_pretrained(model_llmdet_id)
+model_llmdet = (
+    AutoModelForZeroShotObjectDetection.from_pretrained(model_llmdet_id)
+)
 model_mm_grounding_id = "rziga/mm_grounding_dino_tiny_o365v1_goldg"
+processor_mm_grounding = AutoProcessor.from_pretrained(model_mm_grounding_id)
+model_mm_grounding = (
+    AutoModelForZeroShotObjectDetection.from_pretrained(model_mm_grounding_id)
+)
 model_omdet_id = "omlab/omdet-turbo-swin-tiny-hf"
+processor_omdet = AutoProcessor.from_pretrained(model_omdet_id)
+model_omdet = (
+    AutoModelForZeroShotObjectDetection.from_pretrained(model_omdet_id)
+)
 model_owlv2_id = "google/owlv2-large-patch14-ensemble"
+processor_owlv2 = AutoProcessor.from_pretrained(model_owlv2_id)
+model_owlv2 = (
+    AutoModelForZeroShotObjectDetection.from_pretrained(model_owlv2_id)
+)
 model_llmdet_name = extract_model_short_name(model_llmdet_id)
 model_mm_grounding_name = extract_model_short_name(model_mm_grounding_id)
 @spaces.GPU
+def detect(model, processor, image: Image.Image, prompts: list, threshold: float):
     t0 = time.perf_counter()
     device = "cuda" if torch.cuda.is_available() else "cpu"
+    model = model.to(device).eval()
     texts = [prompts]
     inputs = processor(images=image, text=texts, return_tensors="pt").to(device)
     with torch.inference_mode():
     )
     result = results[0]
     annotations = []
+    for box, score, label_name in zip(result["boxes"], result["scores"], result["text_labels"]):
         if score >= threshold:
             xmin, ymin, xmax, ymax = [int(x) for x in box.tolist()]
             annotations.append(((xmin, ymin, xmax, ymax), f"{label_name} {score:.2f}"))
     image: Image.Image, prompts_str: str, threshold_llm, threshold_mm, threshold_owlv2, threshold_omdet,
 ):
     prompts = [p.strip() for p in prompts_str.split(",")]
+    ann_llm, time_llm = detect(model_llmdet, processor_llmdet, image, prompts, threshold_llm)
+    ann_mm, time_mm = detect(model_mm_grounding, processor_mm_grounding, image, prompts, threshold_mm)
+    ann_owlv2, time_owlv2 = detect(model_owlv2, processor_owlv2, image, prompts, threshold_owlv2)
+    ann_omdet, time_omdet = detect(model_omdet, processor_omdet, image, prompts, threshold_omdet)
     return (
         (image, ann_llm),
         time_llm,