- self.conv = Conv(c1, c_, k, s, p, g)
- self.pool = nn.AdaptiveAvgPool2d(1) # to x(b,c_,1,1)
- self.drop = nn.Dropout(p=0.0, inplace=True)
- self.linear = nn.Linear(c_, c2) # to x(b,c2)
- def forward(self, x):
- """Performs a forward pass of the YOLO model on input image data."""
- if isinstance(x, list):
- x = torch.cat(x, 1)
- x = self.linear(self.drop(self.pool(self.conv(x)).flatten(1)))
- return x if self.training else x.softmax(1)
- class WorldDetect(Detect):
- """Head for integrating YOLO detection models with semantic understanding from text embeddings."""
- def __init__(self, nc=80, embed=512, with_bn=False, ch=()):
- """Initialize YOLO detection layer with nc classes and layer channels ch."""
- super().__init__(nc, ch)
- c3 = max(ch[0], min(self.nc, 100))
- self.cv3 = nn.ModuleList(nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, embed, 1)) for x in ch)