ProjectIOT/application/api/image_processing.py

import io
from application import car_model, plate_model, ocr_reader
from PIL import Image
import numpy as np


async def process_image(image: bytes) -> str:
    print("Saving file to memory")
    image_file = io.BytesIO(image)

    img = Image.open(image_file)
    img.save("received_image.jpg")

    results = car_model.predict(source=img)

    cars: list[tuple[int, tuple[int, int, int, int]]] = []

    # Filter out the cars and calculate box size
    for r in results:
        if r.boxes:
            for box in r.boxes:
                cls_name = r.names[int(box.cls[0])]
                if cls_name in ["car", "truck"]:
                    x1, y1, x2, y2 = map(int, box.xyxy[0])
                    size = (x2 - x1) ** 2 + (y2 - y1) ** 2
                    cars.append((size, (x1, y1, x2, y2)))
        else:
            return ""

    if cars == []:
        return ""
    # Get the biggest car box
    if not cars:
        return ""
    size, corners = max(cars, key=lambda x: x[0])

    # Crop biggest car
    cropped_img = img.crop(corners)
    cropped_img.save("car_crop_pillow.jpg")

    # Search for license plates in car box and OCR all
    results = plate_model.predict(source=cropped_img)
    for r in results:
        if r.boxes:
            for box in r.boxes:
                cls_name = r.names[int(box.cls[0])]
                if cls_name == "License_Plate":
                    x1, y1, x2, y2 = map(int, box.xyxy[0])
                    lp_img = cropped_img.crop((x1, y1, x2, y2))
                    lp_img.save("license_plate.jpg")
                    lp_np = np.array(object=lp_img)
                    result = ocr_reader.readtext(image=lp_np)
                    print(result)
        else:
            return ""

    return str(result[0][1])  # type: ignore