I have doing an experiment where I need to get detection boxes in (x1, y1, x2, y2) format for some calculations. I tried replicating the procedure in from detect.py but no luck. I was using a pre-trained model.
with torch.cuda.amp.autocast(amp):
pred = model(imgs) # forward
if epoch == 100:
sys.exit()
conf_thres = 0.4
iou_thres = 0.6
classes = [0, 1, 2, 3, 4, 5, 6]
agnostic_nms = False
max_det = 300
use_pred = non_max_suppression(pred, conf_thres=conf_thres, iou_thres=iou_thres, classes=classes, agnostic=agnostic_nms, max_det=max_det)
if not epoch_dict.get(epoch, False):
epoch_dict[epoch] = True
for i , pi in enumerate(use_pred):
if pi is not None and len(pi):
s_name = (paths[i].split("images")[-1][1:]).split('.jp')[0]
s_img = transform(imgs[i])
s_img = numpy.array(s_img)
s_img = s_img[:, :, ::-1]
s_img_shape = s_img.shape
s_img = cv2.UMat(s_img)
output_folder = "runs/test"
image_name = f"image_{epoch}_{i}.jpg"
output_path = os.path.join(output_folder, image_name)
txt_file_path = os.path.splitext(output_path)[0] + ".txt"
gn = torch.tensor(s_img_shape)[[1, 0, 1, 0]]
scaled_bboxes = []
for *xyxy, conf, cls in reversed(pi):
xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh
line = (cls, *xywh) # label format
scaled_bboxes.append(('%g ' * len(line)).rstrip() % line + '\n')
x1, y1, x2, y2 = [int(x.item()) for x in xyxy]
cv2.rectangle(s_img, (x1, y1), (x2, y2), (0, 255, 0), 2)
s_img = s_img.get()
cv2.imwrite(output_path, s_img)
with open(txt_file_path, "w") as txt_file:
txt_file.write("".join(scaled_bboxes))
loss, loss_items = compute_loss(pred, targets.to(device)) # loss scaled by batch_size
if RANK != -1:
loss *= WORLD_SIZE # gradient averaged between devices in DDP mode
if opt.quad:
loss *= 4.
# Backward
scaler.scale(loss).backward()
please let me know if any of you have any idea where i am going wrong. I have been stuck on this for months. appreciate any help.