Hi, can someone confirm if I’m correctly modifying the YOLO convolution module? I’m editing the
ultralytics/nn/modules/conv.pyfile directly (inside themodulesfolder) to change how Conv blocks behave. Is that the proper way to apply custom changes to YOLO layers?ChatGPT recommend the following, changing from the default:
def \__init_\_(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True): """ Initialize Conv layer with given parameters. Args: c1 (int): Number of input channels. c2 (int): Number of output channels. k (int): Kernel size. s (int): Stride. p (int, optional): Padding. g (int): Groups. d (int): Dilation. act (bool | nn.Module): Activation function. """ super().\__init_\_() self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False) self.bn = nn.BatchNorm2d(c2) self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity() def forward(self, x): """ Apply convolution, batch normalization and activation to input tensor. Args: x (torch.Tensor): Input tensor. Returns: (torch.Tensor): Output tensor. """ return self.act(self.bn(self.conv(x))) def forward_fuse(self, x): """ Apply convolution and activation without batch normalization. Args: x (torch.Tensor): Input tensor. Returns: (torch.Tensor): Output tensor. """ return self.act(self.conv(x))
to:
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):
super().__init__()
# amount of padding we need to add manually
self.pad = autopad(k, p, d) # int or list[int] for asymmetric kernels
# set Conv2d padding=0; we will pad explicitly in forward()
self.conv = nn.Conv2d(c1, c2, k, s, padding=0, groups=g, dilation=d, bias=False)
self.bn = nn.BatchNorm2d(c2)
self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()
def _reflect_pad(self, x):
# support int or [px_l, px_r, py_t, py_b] (autopad returns int for common cases)
if isinstance(self.pad, int):
if self.pad:
x = F.pad(x, [self.pad, self.pad, self.pad, self.pad], mode="reflect")
else:
px = self.pad
# px is [pad_x, pad_y] or [l, r, t, b] depending on upstream; normalize to 4-tuple
if len(px) == 2:
l = r = px[0] // 2
t = b = px[1] // 2
else:
l, r, t, b = px[0], px[1], px[2], px[3]
if any(v > 0 for v in (l, r, t, b)):
x = F.pad(x, [l, r, t, b], mode="reflect")
return x
def forward(self, x):
x = self._reflect_pad(x)
return self.act(self.bn(self.conv(x)))
def forward_fuse(self, x):
x = self._reflect_pad(x)
return self.act(self.conv(x))
My main objective is to improve model performance on border objects. Is this the correct way to change the padding and do you think my strategy could yield good results?
Thank you