diff --git a/hannah/conf/config_resnet.yaml b/hannah/conf/config_resnet.yaml new file mode 100644 index 00000000..611aaed9 --- /dev/null +++ b/hannah/conf/config_resnet.yaml @@ -0,0 +1,61 @@ +## +## Copyright (c) 2022 University of Tübingen. +## +## This file is part of hannah. +## See https://atreus.informatik.uni-tuebingen.de/ties/ai/hannah/hannah for further info. +## +## Licensed under the Apache License, Version 2.0 (the "License"); +## you may not use this file except in compliance with the License. +## You may obtain a copy of the License at +## +## http://www.apache.org/licenses/LICENSE-2.0 +## +## Unless required by applicable law or agreed to in writing, software +## distributed under the License is distributed on an "AS IS" BASIS, +## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +## See the License for the specific language governing permissions and +## limitations under the License. +## +defaults: + - base_config + - override dataset: cifar10 # Dataset configuration name + - override features: identity # Feature extractor configuration name (use identity for vision datasets) + - override model: resnet # Neural network name + - override scheduler: 1cycle # learning rate scheduler config name + - override optimizer: adamw # Optimizer config name + - override normalizer: null # Feature normalizer (used for quantized neural networks) + - override module: image_classifier # Lightning module config for the training loop (image classifier for image classification tasks) + - override nas: aging_evolution_nas + - _self_ + + +# dataset: +# data_folder: ${oc.env:HANNAH_DATA_FOLDER,${hydra:runtime.cwd}/../../datasets/} + +experiment_id: "resnet_nas" + +seed: [1234] + +model: + num_classes: 10 + +nas: + budget: 500 + n_jobs: 4 + total_candidates: 100 + num_selected_candidates: 10 + sampler: + population_size: 50 + sample_size: 10 + +module: + batch_size: 64 + num_workers: 4 + +trainer: + max_epochs: 10 + +scheduler: + max_lr: 0.001 + +fx_mac_summary: True diff --git a/hannah/conf/model/resnet.yaml b/hannah/conf/model/resnet.yaml new file mode 100644 index 00000000..088ea3ba --- /dev/null +++ b/hannah/conf/model/resnet.yaml @@ -0,0 +1,3 @@ +_target_: hannah.models.resnet.models.search_space +name: resnet +num_classes: 10 \ No newline at end of file diff --git a/hannah/models/resnet/blocks.py b/hannah/models/resnet/blocks.py new file mode 100644 index 00000000..78b6c184 --- /dev/null +++ b/hannah/models/resnet/blocks.py @@ -0,0 +1,55 @@ +from functools import partial +from hannah.models.embedded_vision_net.expressions import expr_product +from hannah.nas.expressions.arithmetic import Ceil +from hannah.nas.expressions.types import Int +from hannah.nas.functional_operators.op import scope +from hannah.models.embedded_vision_net.operators import adaptive_avg_pooling, add, conv2d, conv_relu, depthwise_conv2d, dynamic_depth, pointwise_conv2d, linear, relu, batch_norm, choice, identity +from hannah.nas.parameters.parameters import CategoricalParameter, IntScalarParameter + + +@scope +def conv_relu_bn(input, out_channels, kernel_size, stride): + out = conv2d(input, out_channels, kernel_size, stride) + out = batch_norm(out) + out = relu(out) + return out + + +@scope +def residual(input, main_branch_output_shape): + input_shape = input.shape() + in_fmap = input_shape[2] + out_channels = main_branch_output_shape[1] + out_fmap = main_branch_output_shape[2] + stride = Int(Ceil(in_fmap / out_fmap)) + + out = conv2d(input, out_channels=out_channels, kernel_size=1, stride=stride, padding=0) + out = batch_norm(out) + out = relu(out) + return out + + +@scope +def block(input, depth, out_channels, kernel_size, stride): + assert isinstance(depth, IntScalarParameter), "block depth must be of type IntScalarParameter" + out = input + exits = [] + for i in range(depth.max+1): + out = conv_relu_bn(out, + out_channels=out_channels.new(), + kernel_size=kernel_size.new(), + stride=stride.new() if i == 0 else 1) + exits.append(out) + + out = dynamic_depth(*exits, switch=depth) + res = residual(input, out.shape()) + out = add(out, res) + + return out + + +@scope +def classifier_head(input, num_classes): + out = choice(input, adaptive_avg_pooling) + out = linear(out, num_classes) + return out diff --git a/hannah/models/resnet/expressions.py b/hannah/models/resnet/expressions.py new file mode 100644 index 00000000..8364cc7b --- /dev/null +++ b/hannah/models/resnet/expressions.py @@ -0,0 +1,24 @@ +from hannah.nas.expressions.logic import And, If +from hannah.nas.expressions.arithmetic import Ceil + + +def padding_expression(kernel_size, stride, dilation=1): + """Symbolically calculate padding such that for a given kernel_size, stride and dilation + the padding is such that the output dimension is kept the same(stride=1) or halved(stride=2). + Note: If the input dimension is 1 and stride = 2, the calculated padding will result in + an output with also dimension 1. + + Parameters + ---------- + kernel_size : Union[int, Expression] + stride : Union[int, Expression] + dilation : Union[int, Expression], optional + _description_, by default 1 + + Returns + ------- + Expression + """ + # r = 1 - (kernel_size % 2) + p = (dilation * (kernel_size - 1) - stride + 1) / 2 + return Ceil(p) diff --git a/hannah/models/resnet/models.py b/hannah/models/resnet/models.py index 9b41f9fd..22e562a4 100644 --- a/hannah/models/resnet/models.py +++ b/hannah/models/resnet/models.py @@ -1,229 +1,35 @@ -import torch -import torch.nn as nn -from hannah.nas.expressions.shapes import conv2d_shape, identity_shape -from hannah.nas.parameters.lazy import Lazy -from hannah.nas.parameters.parametrize import parametrize -from hannah.nas.parameters.iterators import RangeIterator -from hannah.nas.parameters.parameters import IntScalarParameter, CategoricalParameter -from hannah.nas.expressions.arithmetic import Ceil -from hannah.nas.expressions.choice import SymbolicAttr, Choice -from hannah.nas.expressions.types import Int +from hannah.models.embedded_vision_net.expressions import expr_product +from hannah.nas.parameters.parameters import CategoricalParameter, IntScalarParameter +from hannah.models.resnet.operators import dynamic_depth +from hannah.models.resnet.blocks import block, conv_relu_bn, classifier_head -conv2d = Lazy(nn.Conv2d, shape_func=conv2d_shape) -linear = Lazy(nn.Linear) -batch_norm = Lazy(nn.BatchNorm2d, shape_func=identity_shape) -relu = Lazy(nn.ReLU) -tensor = Lazy(torch.Tensor, shape_func=identity_shape) +def search_space(name, input, num_classes=10): + out_channels = IntScalarParameter(16, 64, step_size=4, name='out_channels') + kernel_size = CategoricalParameter([3, 5, 7, 9], name='kernel_size') + stride = CategoricalParameter([1, 2], name='stride') -def padding_expression(kernel_size, stride, dilation = 1): - """Symbolically calculate padding such that for a given kernel_size, stride and dilation - the padding is such that the output dimension is kept the same(stride=1) or halved(stride=2). - Note: If the input dimension is 1 and stride = 2, the calculated padding will result in - an output with also dimension 1. + depth = IntScalarParameter(0, 2, name='depth') + num_blocks = IntScalarParameter(0, 6, name='num_blocks') - Parameters - ---------- - kernel_size : Union[int, Expression] - stride : Union[int, Expression] - dilation : Union[int, Expression], optional - _description_, by default 1 + stem_kernel_size = CategoricalParameter([3, 5], name="kernel_size") + stem_channels = IntScalarParameter(min=16, max=32, step_size=4, name="out_channels") + out = conv_relu_bn(input, stem_channels, stem_kernel_size, stride.new()) - Returns - ------- - Expression - """ - p = (dilation * (kernel_size - 1) - stride + 1) / 2 - return Ceil(p) + exits = [] + for i in range(num_blocks.max+1): + out = block(out, + depth=depth.new(), + out_channels=out_channels.new(), + kernel_size=kernel_size.new(), + stride=stride.new()) + exits.append(out) -def stride_product(expressions: list): - res = None - for expr in expressions: - if res: - res = res * expr - else: - res = expr - return res + out = dynamic_depth(*exits, switch=num_blocks) + out = classifier_head(out, num_classes=num_classes) + strides = [v for k, v in out.parametrization(flatten=True).items() if k.split('.')[-1] == 'stride'] + total_stride = expr_product(strides) + out.cond(input.shape()[2] / total_stride > 1) -@parametrize -class ConvReluBn(nn.Module): - def __init__(self, in_channels, out_channels, kernel_size, stride, id, inputs) -> None: - super().__init__() - self.id = id - self.in_channels = in_channels - self.out_channels = out_channels - self.kernel_size = kernel_size - self.stride = stride - - self.conv = conv2d(self.id + ".conv", - inputs=inputs, - in_channels=in_channels, - out_channels=out_channels, - kernel_size=kernel_size, - stride=stride, - padding=padding_expression(kernel_size, stride)) - - self.shape = self.conv.shape - self.bn = batch_norm(self.id + ".bn", num_features=out_channels) - self.relu = relu(self.id + ".relu") - - def initialize(self): - self.tconv = self.conv.instantiate() - self.tbn = self.bn.instantiate() - self.trelu = self.relu.instantiate() - - def forward(self, x): - out = self.tconv(x) - out = self.tbn(out) - out = self.trelu(out) - return out - -@parametrize -class ResidualBlock(nn.Module): - def __init__(self, in_channels, out_channels, in_fmap_size, out_fmap_size, id, inputs) -> None: - super().__init__() - self.id = id - self.in_channels = in_channels - self.out_channels = out_channels - self.in_fmap = in_fmap_size - self.out_fmap = out_fmap_size - self.stride = Int(Ceil(in_fmap_size / out_fmap_size)) - self.conv = conv2d(id=self.id + ".residual_conv", - inputs=inputs, - in_channels=in_channels, - out_channels=out_channels, - kernel_size=1, - stride=self.stride, - padding=0) - self.activation = relu(self.id + '.relu') - - def initialize(self): - self.tconv = self.conv.instantiate() - self.tact = self.activation.instantiate() - - def forward(self, x): - out = self.tconv(x) - out = self.tact(out) - return out - - -@parametrize -class ConvReluBlock(nn.Module): - def __init__(self, params, input_shape, id, depth) -> None: - super().__init__() - self.input_shape = input_shape - self.depth = self.add_param(f'{id}.depth', depth) - self.mods = nn.ModuleList() - self.id = id - self.depth = depth - self.params = params - - self.strides = [] - - previous = input_shape - for d in RangeIterator(self.depth, instance=False): - in_channels = self.input_shape[1] if d == 0 else previous.out_channels - out_channels = self.add_param(f'{self.id}.conv{d}.out_channels', IntScalarParameter(self.params.conv.out_channels.min, - self.params.conv.out_channels.max, - self.params.conv.out_channels.step)) - kernel_size = self.add_param(f'{self.id}.conv{d}.kernel_size', CategoricalParameter(self.params.conv.kernel_size.choices)) - stride = self.add_param(f'{self.id}.conv{d}.stride', CategoricalParameter(self.params.conv.stride.choices)) - - self.strides.append(stride) - - layer = ConvReluBn(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, id=f'{self.id}.{d}', inputs=[previous]) - self.mods.append(layer) - previous = layer - - self.last_layer = Choice(self.mods, self.depth - 1) - self.cond(stride_product(self.strides) <= self.input_shape[2]) - - def initialize(self): - for d in RangeIterator(self.depth, instance=False): - self.mods[d].initialize() - - def forward(self, x): - out = x - for d in RangeIterator(self.depth, instance=True): - out = self.mods[d](out) - return out - -@parametrize -class ClassifierHead(nn.Module): - def __init__(self, input, labels) -> None: - super().__init__() - self.labels = labels - in_features = input.get('shape')[1] * input.get('shape')[2] * input.get('shape')[3] - self._linear = self.add_param('linear', - linear("linear", - inputs=[input], - in_features=in_features, - out_features=self.labels)) - - def initialize(self): - self.linear = self._linear.instantiate() - - def forward(self, x): - out = x.view(x.shape[0], -1) - out = self.linear(out) - return out - - -@parametrize -class ResNet(nn.Module): - def __init__(self, name, params, input_shape, labels) -> None: - super().__init__() - self.input_shape = input_shape - self.labels = labels - self.depth = IntScalarParameter(params.depth.min, params.depth.max) - self.num_blocks = IntScalarParameter(params.num_blocks.min, params.num_blocks.max) - self.conv_blocks = nn.ModuleList() - self.residual_blocks = nn.ModuleList() - - next_input = self.input_shape - for n in RangeIterator(self.num_blocks, instance=False): - block = self.add_param(f"conv_block_{n}", - ConvReluBlock(params, - next_input, - f"conv_block_{n}", - self.depth)) - # last = Choice(block.mods, self.depth - 1) - residual_block = self.add_param(f"residual_block_{n}", - ResidualBlock(next_input[1], - block.last_layer.get('shape')[1], - next_input[2], - block.last_layer.get('shape')[2], - f"residual_block_{n}", - next_input)) - next_input = [block.last_layer.get("shape")[0], block.last_layer.get("shape")[1], block.last_layer.get("shape")[2], block.last_layer.get("shape")[3]] - self.conv_blocks.append(block) - self.residual_blocks.append(residual_block) - - last_block = Choice(self.conv_blocks, self.num_blocks - 1) - self.classifier = ClassifierHead(last_block.get("last_layer"), self.labels) - - - def initialize(self): - for n in RangeIterator(self.num_blocks, instance=False): - self.conv_blocks[n].initialize() - self.residual_blocks[n].initialize() - self.classifier.initialize() - - def forward(self, x): - out = x - for n in RangeIterator(self.num_blocks, instance=True): - block_out = self.conv_blocks[n](out) - res_out = self.residual_blocks[n](out) - out = torch.add(block_out, res_out) - out = block_out - - out = self.classifier(out) - return out - - def get_hparams(self): - params = {} - for key, param in self.parametrization(flatten=True).items(): - params[key] = param.current_value.item() - - return params \ No newline at end of file + return out diff --git a/hannah/models/resnet/models_lazy.py b/hannah/models/resnet/models_lazy.py new file mode 100644 index 00000000..9b41f9fd --- /dev/null +++ b/hannah/models/resnet/models_lazy.py @@ -0,0 +1,229 @@ +import torch +import torch.nn as nn +from hannah.nas.expressions.shapes import conv2d_shape, identity_shape +from hannah.nas.parameters.lazy import Lazy +from hannah.nas.parameters.parametrize import parametrize +from hannah.nas.parameters.iterators import RangeIterator +from hannah.nas.parameters.parameters import IntScalarParameter, CategoricalParameter +from hannah.nas.expressions.arithmetic import Ceil +from hannah.nas.expressions.choice import SymbolicAttr, Choice +from hannah.nas.expressions.types import Int + +conv2d = Lazy(nn.Conv2d, shape_func=conv2d_shape) +linear = Lazy(nn.Linear) +batch_norm = Lazy(nn.BatchNorm2d, shape_func=identity_shape) +relu = Lazy(nn.ReLU) +tensor = Lazy(torch.Tensor, shape_func=identity_shape) + + +def padding_expression(kernel_size, stride, dilation = 1): + """Symbolically calculate padding such that for a given kernel_size, stride and dilation + the padding is such that the output dimension is kept the same(stride=1) or halved(stride=2). + Note: If the input dimension is 1 and stride = 2, the calculated padding will result in + an output with also dimension 1. + + Parameters + ---------- + kernel_size : Union[int, Expression] + stride : Union[int, Expression] + dilation : Union[int, Expression], optional + _description_, by default 1 + + Returns + ------- + Expression + """ + p = (dilation * (kernel_size - 1) - stride + 1) / 2 + return Ceil(p) + +def stride_product(expressions: list): + res = None + for expr in expressions: + if res: + res = res * expr + else: + res = expr + return res + + +@parametrize +class ConvReluBn(nn.Module): + def __init__(self, in_channels, out_channels, kernel_size, stride, id, inputs) -> None: + super().__init__() + self.id = id + self.in_channels = in_channels + self.out_channels = out_channels + self.kernel_size = kernel_size + self.stride = stride + + self.conv = conv2d(self.id + ".conv", + inputs=inputs, + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding_expression(kernel_size, stride)) + + self.shape = self.conv.shape + self.bn = batch_norm(self.id + ".bn", num_features=out_channels) + self.relu = relu(self.id + ".relu") + + def initialize(self): + self.tconv = self.conv.instantiate() + self.tbn = self.bn.instantiate() + self.trelu = self.relu.instantiate() + + def forward(self, x): + out = self.tconv(x) + out = self.tbn(out) + out = self.trelu(out) + return out + +@parametrize +class ResidualBlock(nn.Module): + def __init__(self, in_channels, out_channels, in_fmap_size, out_fmap_size, id, inputs) -> None: + super().__init__() + self.id = id + self.in_channels = in_channels + self.out_channels = out_channels + self.in_fmap = in_fmap_size + self.out_fmap = out_fmap_size + self.stride = Int(Ceil(in_fmap_size / out_fmap_size)) + self.conv = conv2d(id=self.id + ".residual_conv", + inputs=inputs, + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1, + stride=self.stride, + padding=0) + self.activation = relu(self.id + '.relu') + + def initialize(self): + self.tconv = self.conv.instantiate() + self.tact = self.activation.instantiate() + + def forward(self, x): + out = self.tconv(x) + out = self.tact(out) + return out + + +@parametrize +class ConvReluBlock(nn.Module): + def __init__(self, params, input_shape, id, depth) -> None: + super().__init__() + self.input_shape = input_shape + self.depth = self.add_param(f'{id}.depth', depth) + self.mods = nn.ModuleList() + self.id = id + self.depth = depth + self.params = params + + self.strides = [] + + previous = input_shape + for d in RangeIterator(self.depth, instance=False): + in_channels = self.input_shape[1] if d == 0 else previous.out_channels + out_channels = self.add_param(f'{self.id}.conv{d}.out_channels', IntScalarParameter(self.params.conv.out_channels.min, + self.params.conv.out_channels.max, + self.params.conv.out_channels.step)) + kernel_size = self.add_param(f'{self.id}.conv{d}.kernel_size', CategoricalParameter(self.params.conv.kernel_size.choices)) + stride = self.add_param(f'{self.id}.conv{d}.stride', CategoricalParameter(self.params.conv.stride.choices)) + + self.strides.append(stride) + + layer = ConvReluBn(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, id=f'{self.id}.{d}', inputs=[previous]) + self.mods.append(layer) + previous = layer + + self.last_layer = Choice(self.mods, self.depth - 1) + self.cond(stride_product(self.strides) <= self.input_shape[2]) + + def initialize(self): + for d in RangeIterator(self.depth, instance=False): + self.mods[d].initialize() + + def forward(self, x): + out = x + for d in RangeIterator(self.depth, instance=True): + out = self.mods[d](out) + return out + +@parametrize +class ClassifierHead(nn.Module): + def __init__(self, input, labels) -> None: + super().__init__() + self.labels = labels + in_features = input.get('shape')[1] * input.get('shape')[2] * input.get('shape')[3] + self._linear = self.add_param('linear', + linear("linear", + inputs=[input], + in_features=in_features, + out_features=self.labels)) + + def initialize(self): + self.linear = self._linear.instantiate() + + def forward(self, x): + out = x.view(x.shape[0], -1) + out = self.linear(out) + return out + + +@parametrize +class ResNet(nn.Module): + def __init__(self, name, params, input_shape, labels) -> None: + super().__init__() + self.input_shape = input_shape + self.labels = labels + self.depth = IntScalarParameter(params.depth.min, params.depth.max) + self.num_blocks = IntScalarParameter(params.num_blocks.min, params.num_blocks.max) + self.conv_blocks = nn.ModuleList() + self.residual_blocks = nn.ModuleList() + + next_input = self.input_shape + for n in RangeIterator(self.num_blocks, instance=False): + block = self.add_param(f"conv_block_{n}", + ConvReluBlock(params, + next_input, + f"conv_block_{n}", + self.depth)) + # last = Choice(block.mods, self.depth - 1) + residual_block = self.add_param(f"residual_block_{n}", + ResidualBlock(next_input[1], + block.last_layer.get('shape')[1], + next_input[2], + block.last_layer.get('shape')[2], + f"residual_block_{n}", + next_input)) + next_input = [block.last_layer.get("shape")[0], block.last_layer.get("shape")[1], block.last_layer.get("shape")[2], block.last_layer.get("shape")[3]] + self.conv_blocks.append(block) + self.residual_blocks.append(residual_block) + + last_block = Choice(self.conv_blocks, self.num_blocks - 1) + self.classifier = ClassifierHead(last_block.get("last_layer"), self.labels) + + + def initialize(self): + for n in RangeIterator(self.num_blocks, instance=False): + self.conv_blocks[n].initialize() + self.residual_blocks[n].initialize() + self.classifier.initialize() + + def forward(self, x): + out = x + for n in RangeIterator(self.num_blocks, instance=True): + block_out = self.conv_blocks[n](out) + res_out = self.residual_blocks[n](out) + out = torch.add(block_out, res_out) + out = block_out + + out = self.classifier(out) + return out + + def get_hparams(self): + params = {} + for key, param in self.parametrization(flatten=True).items(): + params[key] = param.current_value.item() + + return params \ No newline at end of file diff --git a/hannah/models/resnet/operators.py b/hannah/models/resnet/operators.py new file mode 100644 index 00000000..678403ce --- /dev/null +++ b/hannah/models/resnet/operators.py @@ -0,0 +1,68 @@ +from hannah.models.resnet.expressions import padding_expression +from hannah.nas.functional_operators.op import Tensor, scope, ChoiceOp +from hannah.nas.functional_operators.operators import AdaptiveAvgPooling, Add, BatchNorm, Conv2d, Linear, Relu, Identity +from hannah.nas.expressions.types import Int + + +def conv2d(input, out_channels, kernel_size=1, stride=1, dilation=1, groups=1, padding=None): + in_channels = input.shape()[1] + weight = Tensor(name='weight', + shape=(out_channels, in_channels, kernel_size, kernel_size), + axis=('O', 'I', 'kH', 'kW'), + grad=True) + + conv = Conv2d(stride=stride, dilation=dilation, groups=groups, padding=padding)(input, weight) + return conv + + +def linear(input, out_features): + input_shape = input.shape() + in_features = input_shape[1] * input_shape[2] * input_shape[3] + weight = Tensor(name='weight', + shape=(in_features, out_features), + axis=('in_features', 'out_features'), + grad=True) + + out = Linear()(input, weight) + return out + + +def add(input, other): + return Add()(input, other) + + +def identity(input): + return Identity()(input) + + +def adaptive_avg_pooling(input): + return AdaptiveAvgPooling()(input) + + +@scope +def batch_norm(input): + # https://stackoverflow.com/questions/44887446/pytorch-nn-functional-batch-norm-for-2d-input + n_chans = input.shape()[1] + running_mu = Tensor(name='running_mean', shape=(n_chans,), axis=('c',)) + running_std = Tensor(name='running_std', shape=(n_chans,), axis=('c',)) + # running_mu.data = torch.zeros(n_chans) # zeros are fine for first training iter + # running_std = torch.ones(n_chans) # ones are fine for first training iter + return BatchNorm()(input, running_mu, running_std) + + +def relu(input): + return Relu()(input) + + +def conv_relu(input, out_channels, kernel_size, stride): + out = conv2d(input, out_channels=out_channels, stride=stride, kernel_size=kernel_size) + out = relu(out) + return out + + +def choice(input, *choices, switch=None): + return ChoiceOp(*choices, switch=switch)(input) + + +def dynamic_depth(*exits, switch): + return ChoiceOp(*exits, switch=switch)() diff --git a/test/test_lazy_resnet.py b/test/test_lazy_resnet.py index 341fbd20..e165f677 100644 --- a/test/test_lazy_resnet.py +++ b/test/test_lazy_resnet.py @@ -4,7 +4,7 @@ import yaml import os -from hannah.models.resnet.models import ResNet +from hannah.models.resnet.models_lazy import ResNet def test_lazy_resnet_init():