from ultralytics import YOLO from PIL import Image from transformers import TrOCRProcessor, VisionEncoderDecoderModel import os os.chdir("C:/Users/celma/OneDrive - Hanze/School/periode 1.4/IOT/YOLO11/License Plate Recognition.v11i.yolov11") model = YOLO("license_plate_detector.pt") names = model.names results = model.predict("test/images/000i.jpg", show=False, save=False) img = Image.open("test/images/000i.jpg") boxes = results[0].boxes.xyxy.cpu().tolist() clss = results[0].boxes.cls.cpu().tolist() print(boxes) if boxes is not None: for box, cls in zip(boxes,clss): crop_obj = boxes[int(box[1]):int(box[3]) + int(box[0]):int(box[2])] cropped_img = img.crop( (int(box[0]), int(box[1]), int(box[2]), int(box[3])) ) cropped_img.show() processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten") model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-large-printed") image = cropped_img.convert("RGB") pixel_values = processor(images=image, return_tensors="pt").pixel_values generated_ids = model.generate(pixel_values) text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] print(text)