import sys import cv2 import numpy as np import easyocr from ultralytics import YOLO # Initialize EasyOCR reader (you can set gpu=True if you have CUDA) reader = easyocr.Reader(['en'], gpu=False) def preprocess_cropped_region(cropped_bgr: np.ndarray) -> np.ndarray: # 1) Convert to grayscale gray = cv2.cvtColor(cropped_bgr, cv2.COLOR_BGR2GRAY) # 2) Upscale by 2× h, w = gray.shape gray_up = cv2.resize(gray, (w * 2, h * 2), interpolation=cv2.INTER_LINEAR) # 4) Apply Otsu's threshold → binary _, thresh = cv2.threshold( gray_up, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU ) return thresh # single‐channel (0/255) image def draw_obb(image: np.ndarray, obb) -> (np.ndarray, list): """ - Draws each OBB polygon on `image` - Crops the region inside the OBB, preprocesses it, runs EasyOCR - Writes the extracted text back onto `image` just above the box - Returns (modified_image, list_of_extracted_texts) """ boxes = obb.xyxyxyxy.cpu().numpy() # shape: (N, 8) extracted_texts = [] for i, box in enumerate(boxes): # Reshape into 4 points: [[x1, y1], [x2, y2], [x3, y3], [x4, y4]] pts = box.reshape(4, 2).astype(np.int32) # Draw the bounding polygon (green) cv2.polylines(image, [pts], isClosed=True, color=(0, 255, 0), thickness=2) # Compute axis‐aligned crop coordinates x_min, y_min = np.min(pts, axis=0) x_max, y_max = np.max(pts, axis=0) # Ensure coordinates are within image x_min = max(0, x_min) y_min = max(0, y_min) x_max = min(image.shape[1] - 1, x_max) y_max = min(image.shape[0] - 1, y_max) cropped_region = image[y_min:y_max, x_min:x_max] # Only proceed if crop is non-empty if cropped_region.size == 0: continue # Preprocess the cropped region before OCR preprocessed = preprocess_cropped_region(cropped_region) # (Optional) If you want to visualize how the preprocessed patch looks: # cv2.imshow(f"Preprocessed Crop {i}", preprocessed) # cv2.waitKey(0) # Run EasyOCR on the single‐channel (binarized) image ocr_results = reader.readtext(preprocessed) # Concatenate all recognized text fragments detected_text = " ".join([entry[1] for entry in ocr_results]).strip() extracted_texts.append(detected_text) # Put the extracted text above the bounding box (yellow text) cv2.putText( image, detected_text, (x_min, y_min - 10 if y_min - 10 > 10 else y_min + 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), 2, lineType=cv2.LINE_AA, ) return image, extracted_texts def main(model_path_3: str, image_path: str): # Load the YOLO OBB model for detection model_3 = YOLO(model_path_3) # Read the input image image = cv2.imread(image_path) if image is None: print("Error: Could not read image at", image_path) sys.exit(1) # Run inference using the YOLO OBB model results = model_3(image) all_extracted_texts = [] # Iterate over each detection result for r in results: if r.obb is not None: image, extracted_texts = draw_obb(image, r.obb) all_extracted_texts.extend(extracted_texts) # Print class info & OCR results to console for i, class_id in enumerate(r.obb.cls.cpu().numpy()): class_name = r.names[int(class_id)] print(f"Detected class ID: {class_id}, Class name: {class_name}") for idx, text in enumerate(extracted_texts): print(f"OCR Extracted Text {idx + 1}: {text}") return image, all_extracted_texts if __name__ == "__main__": # Replace these with your actual paths yolo_weights = "Models/Remaining_tests_model.pt" test_image = "test_images/HV_PD/11.png" output_image, texts = main(yolo_weights, test_image)