from ultralytics import YOLO from PIL import Image from transformers import TrOCRProcessor, VisionEncoderDecoderModel import os from datetime import datetime import torch print("11") processor = TrOCRProcessor.from_pretrained( "microsoft/trocr-base-handwritten", use_fast=True ) print("12") model = VisionEncoderDecoderModel.from_pretrained( "microsoft/trocr-Large-printed" ) print("13") cropped_img = Image.open("license_plate.jpg") cropped_img.show() image = cropped_img.convert("RGB") first = datetime.now() pixel_values = processor(images=image, return_tensors="pt").pixel_values generated_ids = model.generate(pixel_values) print(generated_ids) text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] print(datetime.now() - first) print(text)