-
Notifications
You must be signed in to change notification settings - Fork 66
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
李波
authored and
李波
committed
Dec 30, 2021
1 parent
6461ce5
commit 0a83f85
Showing
9 changed files
with
819 additions
and
32 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,208 @@ | ||
num_classes: &num_classes 1204 | ||
|
||
flip: &flip | ||
type: flip | ||
kwargs: | ||
flip_p: 0.5 | ||
|
||
resize: &train_resize | ||
type: keep_ar_resize | ||
kwargs: | ||
scales: [640, 672, 704, 736, 768, 800] | ||
max_size: 1333 | ||
separate_wh: True | ||
|
||
resize: &test_resize | ||
type: keep_ar_resize | ||
kwargs: | ||
scales: [800] | ||
max_size: 1333 | ||
separate_wh: True | ||
|
||
|
||
to_tensor: &to_tensor | ||
type: to_tensor | ||
|
||
normalize: &normalize | ||
type: normalize | ||
kwargs: | ||
mean: [0.485, 0.456, 0.406] # ImageNet pretrained statics | ||
std: [0.229, 0.224, 0.225] | ||
|
||
dataset: # Required. | ||
train: | ||
dataset: | ||
type: lvisv1 | ||
kwargs: | ||
meta_file: lvis/annotations/lvis_v1_train.json | ||
image_reader: | ||
type: fs_opencv | ||
kwargs: | ||
image_dir: lvis | ||
color_mode: RGB | ||
transformer: [*flip, *train_resize, *to_tensor, *normalize] | ||
batch_sampler: | ||
type: aspect_ratio_group | ||
kwargs: | ||
sampler: | ||
type: repeat_factor | ||
kwargs: | ||
t: 0.001 | ||
ri_mode: ceil | ||
pn: 0.5 | ||
static_size: False | ||
batch_size: 1 | ||
aspect_grouping: [1,] | ||
test: | ||
dataset: | ||
type: lvisv1 | ||
kwargs: | ||
meta_file: >_file lvis/annotations/lvis_v1_val.json | ||
image_reader: | ||
type: fs_opencv | ||
kwargs: | ||
image_dir: lvis | ||
color_mode: RGB | ||
transformer: [*test_resize, *to_tensor, *normalize] | ||
evaluator: | ||
type: LVIS # choices = {'COCO', 'VOC', 'MR'} | ||
kwargs: | ||
gt_file: *gt_file | ||
iou_types: [bbox] | ||
batch_sampler: | ||
type: aspect_ratio_group | ||
kwargs: | ||
sampler: | ||
type: dist | ||
kwargs: {} | ||
batch_size: 1 | ||
aspect_grouping: [1,] | ||
dataloader: | ||
type: base | ||
kwargs: | ||
num_workers: 4 | ||
alignment: 64 | ||
|
||
|
||
trainer: # Required. | ||
max_epoch: 24 # total epochs for the training | ||
test_freq: 24 | ||
optimizer: # optimizer = SGD(params,lr=0.001,momentum=0.9,weight_decay=0.0001) | ||
type: SGD | ||
kwargs: | ||
lr: 0.00125 | ||
momentum: 0.9 | ||
weight_decay: 0.0001 | ||
lr_scheduler: # lr_scheduler = MultStepLR(optimizer, milestones=[9,14],gamma=0.1) | ||
warmup_epochs: 1 # set to be 0 to disable warmup. When warmup, target_lr = init_lr * total_batch_size | ||
warmup_type: linear | ||
warmup_ratio: 0.001 # warmup init lr = warmup_ratio * base_lr | ||
type: MultiStepLR | ||
kwargs: | ||
milestones: [16, 22] # epochs to decay lr | ||
gamma: 0.1 # decay rate | ||
|
||
saver: # Required. | ||
save_dir: checkpoints # dir to save checkpoints | ||
pretrain_model: pretrain/pytorch/imagenet/resnet101-5d3b4d8f.pth | ||
results_dir: results_dir # dir to save detection results. i.e., bboxes, masks, keypoints | ||
auto_resume: True | ||
|
||
hooks: | ||
- type: auto_checkpoint | ||
- type: gradient_collector | ||
kwargs: | ||
hook_head: roi_head | ||
hook_cls_head: cls_subnet_pred | ||
- type: grad_clipper | ||
kwargs: | ||
mode: pre_defined | ||
norm_type: 2 | ||
max_norm: 35 | ||
|
||
net: | ||
- name: backbone # backbone = resnet50(frozen_layers, out_layers, out_strides) | ||
type: resnet101 | ||
kwargs: | ||
frozen_layers: [0,1] # layer0...1 is fixed | ||
out_layers: [2,3,4] # layer1...4, commonly named Conv2...5 | ||
out_strides: [8,16,32] # tell the strides of output features | ||
normalize: | ||
type: freeze_bn | ||
initializer: | ||
method: msra | ||
- name: neck | ||
prev: backbone | ||
type: FPN | ||
kwargs: | ||
outplanes: 256 | ||
start_level: 3 | ||
num_level: 5 # if num_level>len(backbone.out_layers), additional conv with be stacked. | ||
out_strides: [8,16,32,64,128] # strides of output features. aka., anchor strides for roi_head | ||
downsample: conv # method to downsample, for FPN, it's pool, for RetienaNet, it's conv | ||
upsample: nearest # method to interp, nearest or bilinear | ||
initializer: | ||
method: xavier | ||
- name: roi_head | ||
prev: neck | ||
type: RetinaHeadWithIOU | ||
kwargs: | ||
feat_planes: 256 # channels of intermediate conv | ||
num_classes: *num_classes # number of classes including backgroudn. for rpn, it's 2; for RetinaNet, it's 81 | ||
initializer: | ||
method: normal | ||
std: 0.01 | ||
normalize: | ||
type: gn | ||
kwargs: | ||
num_groups: 32 | ||
init_prior: 0.001 | ||
num_anchors: 2 | ||
class_activation: sigmoid | ||
- name: post_process | ||
prev: roi_head | ||
type: retina_post_iou | ||
kwargs: | ||
num_classes: *num_classes # number of classes including backgroudn. for rpn, it's 2; for RetinaNet, it's 81 | ||
cfg: | ||
cls_loss: | ||
type: equalized_focal_loss | ||
kwargs: | ||
num_classes: *num_classes | ||
focal_alpha: 0.25 | ||
focal_gamma: 2.0 | ||
scale_factor: 8.0 | ||
fpn_levels: 5 | ||
loc_loss: | ||
type: iou_loss | ||
kwargs: | ||
loss_type: giou | ||
loss_weight: 1.0 | ||
iou_branch_loss: | ||
type: sigmoid_cross_entropy | ||
kwargs: | ||
loss_weight: 1.0 | ||
anchor_generator: | ||
type: hand_craft | ||
kwargs: | ||
anchor_ratios: [1] # anchor strides are provided as feature strides by feature extractor | ||
anchor_scales: [6, 8] # scale of anchors relative to feature map | ||
roi_supervisor: | ||
type: atss | ||
kwargs: | ||
top_n: 18 | ||
use_iou: True | ||
roi_predictor: | ||
type: base_multicls | ||
kwargs: | ||
pre_nms_score_thresh: 0 # to reduce computation | ||
pre_nms_top_n: 6000 | ||
post_nms_top_n: 1000 | ||
roi_min_size: 0 # minimum scale of a valid roi | ||
merger: | ||
type: retina_multicls | ||
kwargs: | ||
top_n: 300 | ||
nms: | ||
type: naive | ||
nms_iou_thresh: 0.5 # Required in RetinaNet. DO not nms in FPN across levels |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.