Dear All, I have similar issue. If the next inference has no object, the result will be the previous bounding boxes with the same location and confidence, but different label. This happens when running on Intel GPU but the CPU is fine.
The result for the first and second inference are as follow.
Following is my code.
from pathlib import Path
from ultralytics import YOLO
import openvino as ov
IMAGE_PATH = "./data/coco_bike.jpg"
IMAGE_BLANK_PATH = "./data/blank.jpg"
def compile_model(det_model_path, device):
core = ov.Core()
det_ov_model = core.read_model(det_model_path)
ov_config = {}
#if device != "CPU":
# det_ov_model.reshape({0: [1, 3, 640, 640]})
if "GPU" in device or ("AUTO" in device and "GPU" in core.available_devices):
ov_config = {"GPU_DISABLE_WINOGRAD_CONVOLUTION": "YES"}
det_compiled_model = core.compile_model(det_ov_model, device, ov_config)
det_model = YOLO(det_model_path.parent, task="detect")
if det_model.predictor is None:
custom = {"conf": 0.25, "batch": 1, "save": False, "mode": "predict"} # method defaults
args = {**det_model.overrides, **custom}
det_model.predictor = det_model._smart_load("predictor")(overrides=args, _callbacks=det_model.callbacks)
det_model.predictor.setup_model(model=det_model.model)
det_model.predictor.model.ov_compiled_model = det_compiled_model
return det_model
def run_e2e(det_model, image_path):
res = det_model(image_path)
det_model_path = Path("yolo11s_openvino_model/yolo11s.xml")
for device in ["CPU", "GPU.0", "GPU.1"]:
det_model = compile_model(det_model_path, device)
for x in range(10):
print("\n\n###", device, " - object:")
run_e2e(det_model, IMAGE_PATH)
print("\n\n###", device, " - blank:")
run_e2e(det_model, IMAGE_BLANK_PATH)
>python inference.py
Ultralytics 8.3.142 Python-3.12.10 torch-2.8.0+cpu CPU (Intel Core(TM) Ultra 9 285K)
Loading yolo11s_openvino_model for OpenVINO inference...
Using OpenVINO LATENCY mode for batch=1 inference...
### CPU - object:
image 1/1 C:\Users\ekaak\Workspace\yolov11_blank\data\coco_bike.jpg: 480x640 1 bicycle, 2 cars, 1 parking meter, 1 dog, 61.5ms
Speed: 2.2ms preprocess, 61.5ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 640)
### CPU - blank:
image 1/1 C:\Users\ekaak\Workspace\yolov11_blank\data\blank.jpg: 480x640 (no detections), 19.7ms
Speed: 1.3ms preprocess, 19.7ms inference, 0.2ms postprocess per image at shape (1, 3, 480, 640)
### GPU.0 - object:
image 1/1 C:\Users\ekaak\Workspace\yolov11_blank\data\coco_bike.jpg: 480x640 1 bicycle, 2 cars, 1 parking meter, 1 dog, 490.1ms
Speed: 1.7ms preprocess, 490.1ms inference, 0.2ms postprocess per image at shape (1, 3, 480, 640)
### GPU.0 - blank:
image 1/1 C:\Users\ekaak\Workspace\yolov11_blank\data\blank.jpg: 480x640 1 car, 1 bus, 1 truck, 1 giraffe, 1 baseball bat, 19.9ms
Speed: 1.4ms preprocess, 19.9ms inference, 0.2ms postprocess per image at shape (1, 3, 480, 640)
### GPU.1 - object:
image 1/1 C:\Users\ekaak\Workspace\yolov11_blank\data\coco_bike.jpg: 480x640 1 bicycle, 2 cars, 1 parking meter, 1 dog, 2759.1ms
Speed: 1.8ms preprocess, 2759.1ms inference, 0.2ms postprocess per image at shape (1, 3, 480, 640)
### GPU.1 - blank:
image 1/1 C:\Users\ekaak\Workspace\yolov11_blank\data\blank.jpg: 480x640 1 car, 1 bus, 1 truck, 1 giraffe, 1 baseball bat, 6.9ms
Speed: 1.7ms preprocess, 6.9ms inference, 0.2ms postprocess per image at shape (1, 3, 480, 640)
But if I re-setup the model before each inference, the GPU result is as expected. This method increases total inference time form 6ms to 40ms.
det_model.predictor.setup_model(model=det_model.model)
New code is as follow.
from pathlib import Path
from ultralytics import YOLO
import openvino as ov
IMAGE_PATH = "./data/coco_bike.jpg"
IMAGE_BLANK_PATH = "./data/blank.jpg"
def compile_model(det_model_path, device):
core = ov.Core()
det_ov_model = core.read_model(det_model_path)
ov_config = {}
#if device != "CPU":
# det_ov_model.reshape({0: [1, 3, 640, 640]})
if "GPU" in device or ("AUTO" in device and "GPU" in core.available_devices):
ov_config = {"GPU_DISABLE_WINOGRAD_CONVOLUTION": "YES"}
det_compiled_model = core.compile_model(det_ov_model, device, ov_config)
det_model = YOLO(det_model_path.parent, task="detect")
if det_model.predictor is None:
custom = {"conf": 0.25, "batch": 1, "save": False, "mode": "predict"} # method defaults
args = {**det_model.overrides, **custom}
det_model.predictor = det_model._smart_load("predictor")(overrides=args, _callbacks=det_model.callbacks)
det_model.predictor.setup_model(model=det_model.model)
det_model.predictor.model.ov_compiled_model = det_compiled_model
return det_model
def run_e2e(det_model, image_path):
results = det_model(image_path)
det_model_path = Path("yolo11s_openvino_model/yolo11s.xml")
for device in ["CPU", "GPU.0", "GPU.1"]:
det_model = compile_model(det_model_path, device)
for x in range(10):
print("\n\n###", device, " - object:")
det_model.predictor.setup_model(model=det_model.model)
run_e2e(det_model, IMAGE_PATH)
print("\n\n###", device, " - blank:")
det_model.predictor.setup_model(model=det_model.model)
run_e2e(det_model, IMAGE_BLANK_PATH)
>python inference.py
Ultralytics 8.3.142 Python-3.12.10 torch-2.8.0+cpu CPU (Intel Core(TM) Ultra 9 285K)
Loading yolo11s_openvino_model for OpenVINO inference...
Using OpenVINO LATENCY mode for batch=1 inference...
### CPU - object:
Ultralytics 8.3.142 Python-3.12.10 torch-2.8.0+cpu CPU (Intel Core(TM) Ultra 9 285K)
Loading yolo11s_openvino_model for OpenVINO inference...
Using OpenVINO LATENCY mode for batch=1 inference...
image 1/1 C:\Users\ekaak\Workspace\yolov11_blank\data\coco_bike.jpg: 480x640 1 bicycle, 2 cars, 1 parking meter, 1 dog, 2459.3ms
Speed: 4.0ms preprocess, 2459.3ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 640)
### CPU - blank:
Ultralytics 8.3.142 Python-3.12.10 torch-2.8.0+cpu CPU (Intel Core(TM) Ultra 9 285K)
Loading yolo11s_openvino_model for OpenVINO inference...
Using OpenVINO LATENCY mode for batch=1 inference...
image 1/1 C:\Users\ekaak\Workspace\yolov11_blank\data\blank.jpg: 480x640 (no detections), 67.8ms
Speed: 1.5ms preprocess, 67.8ms inference, 0.2ms postprocess per image at shape (1, 3, 480, 640)
### GPU.0 - object:
Ultralytics 8.3.142 Python-3.12.10 torch-2.8.0+cpu CPU (Intel Core(TM) Ultra 9 285K)
Loading yolo11s_openvino_model for OpenVINO inference...
Using OpenVINO LATENCY mode for batch=1 inference...
image 1/1 C:\Users\ekaak\Workspace\yolov11_blank\data\coco_bike.jpg: 480x640 1 bicycle, 2 cars, 1 parking meter, 1 dog, 43.8ms
Speed: 1.6ms preprocess, 43.8ms inference, 0.2ms postprocess per image at shape (1, 3, 480, 640)
### GPU.0 - blank:
Ultralytics 8.3.142 Python-3.12.10 torch-2.8.0+cpu CPU (Intel Core(TM) Ultra 9 285K)
Loading yolo11s_openvino_model for OpenVINO inference...
Using OpenVINO LATENCY mode for batch=1 inference...
image 1/1 C:\Users\ekaak\Workspace\yolov11_blank\data\blank.jpg: 480x640 (no detections), 45.5ms
Speed: 1.6ms preprocess, 45.5ms inference, 0.2ms postprocess per image at shape (1, 3, 480, 640)
### GPU.1 - object:
Ultralytics 8.3.142 Python-3.12.10 torch-2.8.0+cpu CPU (Intel Core(TM) Ultra 9 285K)
Loading yolo11s_openvino_model for OpenVINO inference...
Using OpenVINO LATENCY mode for batch=1 inference...
image 1/1 C:\Users\ekaak\Workspace\yolov11_blank\data\coco_bike.jpg: 480x640 1 bicycle, 2 cars, 1 parking meter, 1 dog, 38.8ms
Speed: 1.6ms preprocess, 38.8ms inference, 0.2ms postprocess per image at shape (1, 3, 480, 640)
### GPU.1 - blank:
Ultralytics 8.3.142 Python-3.12.10 torch-2.8.0+cpu CPU (Intel Core(TM) Ultra 9 285K)
Loading yolo11s_openvino_model for OpenVINO inference...
Using OpenVINO LATENCY mode for batch=1 inference...
image 1/1 C:\Users\ekaak\Workspace\yolov11_blank\data\blank.jpg: 480x640 (no detections), 45.5ms
Speed: 1.7ms preprocess, 45.5ms inference, 0.2ms postprocess per image at shape (1, 3, 480, 640)
Is there a method to reset just the output especially the one in GPU?
Thanks.