Skip to content

Commit

Permalink
pp refactor and add ONNX converter
Browse files Browse the repository at this point in the history
Signed-off-by: Kaan Çolak <[email protected]>
  • Loading branch information
kaancolak committed Nov 30, 2023
1 parent 5c0613b commit be583bc
Show file tree
Hide file tree
Showing 3 changed files with 490 additions and 28 deletions.
181 changes: 181 additions & 0 deletions configs/centerpoint/centerpoint_custom.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
_base_ = [
'../_base_/datasets/nus-3d.py',
'../_base_/models/centerpoint_pillar02_second_secfpn_nus.py',
'../_base_/schedules/cyclic-20e.py', '../_base_/default_runtime.py'
]

point_cloud_range = [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]

class_names = [
"car",
"truck",
"bus",
"bicycle",
"pedestrian",
]

data_prefix = dict(pts='samples/LIDAR_TOP', img='', sweeps='sweeps/LIDAR_TOP')

out_size_factor = 1
model = dict(
data_preprocessor=dict(
voxel_layer=dict(
point_cloud_range=point_cloud_range)),
pts_voxel_encoder=dict(
point_cloud_range=point_cloud_range,
in_channels=4,
feat_channels=[32, 32],
use_voxel_center_z=False),
pts_middle_encoder=dict(
in_channels=32),
pts_backbone=dict(
in_channels=32,
layer_strides=[1, 2, 2],),
pts_neck=dict(
upsample_strides=[1, 2, 4], ),
pts_bbox_head=dict(
tasks=[dict(
num_class=len(class_names),
class_names=class_names)],
bbox_coder=dict(
out_size_factor=out_size_factor,
pc_range=point_cloud_range[:2])),
# model training and testing settings
train_cfg=dict(
pts=dict(
point_cloud_range=point_cloud_range,
out_size_factor=out_size_factor)),
test_cfg=dict(
pts=dict(
pc_range=point_cloud_range[:2],
nms_type='circle',
out_size_factor=out_size_factor,)))

dataset_type = 'NuScenesDataset'
data_root = 'data/nuscenes/'
backend_args = None

point_load_dim = 5
point_use_dim = [0, 1, 2, 4]

db_sampler = dict(
data_root=data_root,
info_path=data_root + 'nuscenes_dbinfos_train.pkl',
rate=1.0,
prepare=dict(
filter_by_difficulty=[-1],
filter_by_min_points=dict(
car=5,
truck=5,
bus=5,
bicycle=5,
pedestrian=5)),
classes=class_names,
sample_groups=dict(
car=2,
truck=3,
bus=4,
bicycle=6,
pedestrian=2),
points_loader=dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=point_load_dim,
use_dim=point_use_dim,
backend_args=backend_args),
backend_args=backend_args)

train_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=point_load_dim,
use_dim=5,
backend_args=backend_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=9,
use_dim=point_use_dim,
pad_empty_sweeps=True,
remove_close=True,
backend_args=backend_args),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(type='ObjectSample', db_sampler=db_sampler),
dict(
type='GlobalRotScaleTrans',
rot_range=[-0.3925, 0.3925],
scale_ratio_range=[0.95, 1.05],
translation_std=[0, 0, 0]),
dict(
type='RandomFlip3D',
sync_2d=False,
flip_ratio_bev_horizontal=0.5,
flip_ratio_bev_vertical=0.5),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectNameFilter', classes=class_names),
dict(type='PointShuffle'),
dict(
type='Pack3DDetInputs',
keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=point_load_dim,
use_dim=5,
backend_args=backend_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=9,
use_dim=point_use_dim,
pad_empty_sweeps=True,
remove_close=True,
backend_args=backend_args),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(type='RandomFlip3D')
]),
dict(type='Pack3DDetInputs', keys=['points'])
]

train_dataloader = dict(
_delete_=True,
batch_size=2,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type='CBGSDataset',
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file='nuscenes_infos_train.pkl',
pipeline=train_pipeline,
metainfo=dict(classes=class_names),
test_mode=False,
data_prefix=data_prefix,
use_valid_flag=True,
box_type_3d='LiDAR',
backend_args=backend_args)))
test_dataloader = dict(
dataset=dict(pipeline=test_pipeline, metainfo=dict(classes=class_names)))
val_dataloader = dict(
dataset=dict(pipeline=test_pipeline, metainfo=dict(classes=class_names)))

train_cfg = dict(val_interval=5)
default_hooks = dict(
checkpoint=dict(
type='CheckpointHook',
interval=1,
save_optimizer=True))
64 changes: 36 additions & 28 deletions mmdet3d/models/voxel_encoders/pillar_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class PillarFeatureNet(nn.Module):

def __init__(self,
in_channels: Optional[int] = 4,
feat_channels: Optional[tuple] = (64, ),
feat_channels: Optional[tuple] = (64,),
with_distance: Optional[bool] = False,
with_cluster_center: Optional[bool] = True,
with_voxel_center: Optional[bool] = True,
Expand All @@ -50,14 +50,18 @@ def __init__(self,
norm_cfg: Optional[dict] = dict(
type='BN1d', eps=1e-3, momentum=0.01),
mode: Optional[str] = 'max',
legacy: Optional[bool] = True):
legacy: Optional[bool] = True,
use_voxel_center_z: Optional[bool] = True, ):
super(PillarFeatureNet, self).__init__()
assert len(feat_channels) > 0
self.legacy = legacy
self.use_voxel_center_z = use_voxel_center_z
if with_cluster_center:
in_channels += 3
if with_voxel_center:
in_channels += 3
in_channels += 2
if self.use_voxel_center_z:
in_channels += 1
if with_distance:
in_channels += 1
self._with_distance = with_distance
Expand Down Expand Up @@ -110,35 +114,38 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor,
if self._with_cluster_center:
points_mean = features[:, :, :3].sum(
dim=1, keepdim=True) / num_points.type_as(features).view(
-1, 1, 1)
-1, 1, 1)
f_cluster = features[:, :, :3] - points_mean
features_ls.append(f_cluster)

# Find distance of x, y, and z from pillar center
dtype = features.dtype
if self._with_voxel_center:
center_feature_size = 3 if self.use_voxel_center_z else 2
if not self.legacy:
f_center = torch.zeros_like(features[:, :, :3])
f_center = torch.zeros_like(features[:, :, :center_feature_size])
f_center[:, :, 0] = features[:, :, 0] - (
coors[:, 3].to(dtype).unsqueeze(1) * self.vx +
self.x_offset)
coors[:, 3].to(dtype).unsqueeze(1) * self.vx +
self.x_offset)
f_center[:, :, 1] = features[:, :, 1] - (
coors[:, 2].to(dtype).unsqueeze(1) * self.vy +
self.y_offset)
f_center[:, :, 2] = features[:, :, 2] - (
coors[:, 1].to(dtype).unsqueeze(1) * self.vz +
self.z_offset)
coors[:, 2].to(dtype).unsqueeze(1) * self.vy +
self.y_offset)
if self.use_voxel_center_z:
f_center[:, :, 2] = features[:, :, 2] - (
coors[:, 1].to(dtype).unsqueeze(1) * self.vz +
self.z_offset)
else:
f_center = features[:, :, :3]
f_center = features[:, :, :center_feature_size]
f_center[:, :, 0] = f_center[:, :, 0] - (
coors[:, 3].type_as(features).unsqueeze(1) * self.vx +
self.x_offset)
coors[:, 3].type_as(features).unsqueeze(1) * self.vx +
self.x_offset)
f_center[:, :, 1] = f_center[:, :, 1] - (
coors[:, 2].type_as(features).unsqueeze(1) * self.vy +
self.y_offset)
f_center[:, :, 2] = f_center[:, :, 2] - (
coors[:, 1].type_as(features).unsqueeze(1) * self.vz +
self.z_offset)
coors[:, 2].type_as(features).unsqueeze(1) * self.vy +
self.y_offset)
if self.use_voxel_center_z:
f_center[:, :, 2] = f_center[:, :, 2] - (
coors[:, 1].type_as(features).unsqueeze(1) * self.vz +
self.z_offset)
features_ls.append(f_center)

if self._with_distance:
Expand Down Expand Up @@ -193,7 +200,7 @@ class DynamicPillarFeatureNet(PillarFeatureNet):

def __init__(self,
in_channels: Optional[int] = 4,
feat_channels: Optional[tuple] = (64, ),
feat_channels: Optional[tuple] = (64,),
with_distance: Optional[bool] = False,
with_cluster_center: Optional[bool] = True,
with_voxel_center: Optional[bool] = True,
Expand Down Expand Up @@ -264,15 +271,15 @@ def map_voxel_center_to_point(self, pts_coors: Tensor, voxel_mean: Tensor,
canvas = voxel_mean.new_zeros(canvas_channel, canvas_len)
# Only include non-empty pillars
indices = (
voxel_coors[:, 0] * canvas_y * canvas_x +
voxel_coors[:, 2] * canvas_x + voxel_coors[:, 3])
voxel_coors[:, 0] * canvas_y * canvas_x +
voxel_coors[:, 2] * canvas_x + voxel_coors[:, 3])
# Scatter the blob back to the canvas
canvas[:, indices.long()] = voxel_mean.t()

# Step 2: get voxel mean for each point
voxel_index = (
pts_coors[:, 0] * canvas_y * canvas_x +
pts_coors[:, 2] * canvas_x + pts_coors[:, 3])
pts_coors[:, 0] * canvas_y * canvas_x +
pts_coors[:, 2] * canvas_x + pts_coors[:, 3])
center_per_point = canvas[:, voxel_index.long()].t()
return center_per_point

Expand Down Expand Up @@ -301,11 +308,11 @@ def forward(self, features: Tensor, coors: Tensor) -> Tensor:
if self._with_voxel_center:
f_center = features.new_zeros(size=(features.size(0), 3))
f_center[:, 0] = features[:, 0] - (
coors[:, 3].type_as(features) * self.vx + self.x_offset)
coors[:, 3].type_as(features) * self.vx + self.x_offset)
f_center[:, 1] = features[:, 1] - (
coors[:, 2].type_as(features) * self.vy + self.y_offset)
coors[:, 2].type_as(features) * self.vy + self.y_offset)
f_center[:, 2] = features[:, 2] - (
coors[:, 1].type_as(features) * self.vz + self.z_offset)
coors[:, 1].type_as(features) * self.vz + self.z_offset)
features_ls.append(f_center)

if self._with_distance:
Expand All @@ -324,3 +331,4 @@ def forward(self, features: Tensor, coors: Tensor) -> Tensor:
features = torch.cat([point_feats, feat_per_point], dim=1)

return voxel_feats, voxel_coors

Loading

0 comments on commit be583bc

Please sign in to comment.