diff --git a/OCR YOLO11/yolomain.py b/OCR YOLO11/yolomain.py index 19caf18..742eba2 100644 --- a/OCR YOLO11/yolomain.py +++ b/OCR YOLO11/yolomain.py @@ -2,7 +2,10 @@ from ultralytics import YOLO from PIL import Image from transformers import TrOCRProcessor, VisionEncoderDecoderModel import os -os.chdir("C:/Users/celma/OneDrive - Hanze/School/periode 1.4/IOT/YOLO11/License Plate Recognition.v11i.yolov11") + +os.chdir( + "C:/Users/celma/OneDrive - Hanze/School/periode 1.4/IOT/YOLO11/License Plate Recognition.v11i.yolov11" +) model = YOLO("license_plate_detector.pt") @@ -17,18 +20,13 @@ clss = results[0].boxes.cls.cpu().tolist() print(boxes) if boxes is not None: - for box, cls in zip(boxes,clss): - crop_obj = boxes[int(box[1]):int(box[3]) + int(box[0]):int(box[2])] + for box, cls in zip(boxes, clss): + crop_obj = boxes[int(box[1]) : int(box[3]) + int(box[0]) : int(box[2])] -cropped_img = img.crop( - (int(box[0]), int(box[1]), int(box[2]), int(box[3])) -) +cropped_img = img.crop((int(box[0]), int(box[1]), int(box[2]), int(box[3]))) cropped_img.show() -processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten") -model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-large-printed") - image = cropped_img.convert("RGB") pixel_values = processor(images=image, return_tensors="pt").pixel_values generated_ids = model.generate(pixel_values) diff --git a/test2.py b/test2.py new file mode 100644 index 0000000..7aac140 --- /dev/null +++ b/test2.py @@ -0,0 +1,29 @@ +from ultralytics import YOLO +from PIL import Image +from transformers import TrOCRProcessor, VisionEncoderDecoderModel +import os +from datetime import datetime +import torch + +print("11") +processor = TrOCRProcessor.from_pretrained( + "microsoft/trocr-base-handwritten", use_fast=True +) +print("12") +model = VisionEncoderDecoderModel.from_pretrained( + "microsoft/trocr-Large-printed" +) +print("13") + +cropped_img = Image.open("license_plate.jpg") +cropped_img.show() + + +image = cropped_img.convert("RGB") +first = datetime.now() +pixel_values = processor(images=image, return_tensors="pt").pixel_values +generated_ids = model.generate(pixel_values) +print(generated_ids) +text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] +print(datetime.now() - first) +print(text)