Skip to content
This repository has been archived by the owner on Jan 24, 2024. It is now read-only.

Commit

Permalink
Introduce manually optimized CUDA block_reduce function and use it to…
Browse files Browse the repository at this point in the history
… generate a single reduce kernel (#622) (#637)
  • Loading branch information
SunNy820828449 authored Dec 10, 2021
1 parent de53fce commit 70b04f0
Show file tree
Hide file tree
Showing 11 changed files with 821 additions and 351 deletions.
16 changes: 10 additions & 6 deletions cinn/frontend/cinn_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -216,17 +216,21 @@ Variable CinnBuilder::Reverse(const Variable& operand, const std::vector<int>& a
return instr.GetOutput(0);
}

std::vector<Variable> CinnBuilder::BnMeanVarianceReduce(const Variable& x) {
Instruction instr("bn_mean_variance_reduce", {x});
std::vector<Variable> CinnBuilder::BnMeanVariance(const Variable& x) {
Instruction instr("bn_mean_variance", {x});
// optimize bn forward reduce computation, set reduce dimension(NCHW suppport only, to be deprecated).
instr.SetAttr("dim", std::vector<int>{0, 2, 3});
instr.SetAttr("keep_dim", false);
InferShape(instr);
AppendInstruction(instr);
return instr.GetOutputs();
}

std::vector<Variable> CinnBuilder::BnGradBiasScaleReduce(const Variable& x,
const Variable& x_mean,
const Variable& y_grad) {
Instruction instr("bn_grad_bias_scale_reduce", {x, x_mean, y_grad});
std::vector<Variable> CinnBuilder::BnGradBiasScale(const Variable& x, const Variable& x_mean, const Variable& y_grad) {
Instruction instr("bn_grad_bias_scale", {x, x_mean, y_grad});
// optimize bn backward reduce computation, set reduce dimension(NCHW suppport only, to be deprecated).
instr.SetAttr("dim", std::vector<int>{0, 2, 3});
instr.SetAttr("keep_dim", false);
InferShape(instr);
AppendInstruction(instr);
return instr.GetOutputs();
Expand Down
4 changes: 2 additions & 2 deletions cinn/frontend/cinn_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -179,9 +179,9 @@ class CinnBuilder : public BaseBuilder {

Variable Reverse(const Variable& operand, const std::vector<int>& axis);

std::vector<Variable> BnMeanVarianceReduce(const Variable& x);
std::vector<Variable> BnMeanVariance(const Variable& x);

std::vector<Variable> BnGradBiasScaleReduce(const Variable& x, const Variable& x_mean, const Variable& y_grad);
std::vector<Variable> BnGradBiasScale(const Variable& x, const Variable& x_mean, const Variable& y_grad);

private:
Variable UnaryOp(const std::string& op_type, const Variable& operand);
Expand Down
16 changes: 6 additions & 10 deletions cinn/frontend/decomposer/batch_norm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -60,15 +60,13 @@ struct BatchNormHelper {
std::vector<Variable> MeanAndVariance(Variable x) {
#ifdef CINN_WITH_CUDA
// To optimize the bn forward by merge the reduce computation of mean and variance,
// build a fusion op 'BnMeanVarianceReduce' by hand as the fusion pass is not support now.
// build a fusion op 'BnMeanVariance' by hand as the fusion pass is not support now.
// When the fusion pass is rebuild, this op is to be removed.
auto vars = builder->BnMeanVarianceReduce(x);
auto vars = builder->BnMeanVariance(x);
auto element_count_1d_0 = GetTensorFromScalar<float>(element_count, "element_count", param_shape);
auto element_count_1d_1 = GetTensorFromScalar<float>(element_count, "element_count", param_shape);
auto mean = builder->Div(builder->Reduce(vars[0], ReduceKind::kSum, std::vector<int>(1, vars[0]->shape.size() - 1)),
element_count_1d_0);
auto mean_squre = builder->Div(
builder->Reduce(vars[1], ReduceKind::kSum, std::vector<int>(1, vars[1]->shape.size() - 1)), element_count_1d_1);
auto mean = builder->Div(vars[0], element_count_1d_0);
auto mean_squre = builder->Div(vars[1], element_count_1d_1);

auto variance = builder->Sub(mean_squre, builder->Mul(mean, builder->Identity(mean)));
#else
Expand All @@ -82,11 +80,9 @@ struct BatchNormHelper {

std::vector<Variable> GradBiasAndScale(Variable x, Variable x_mean, Variable y_grad) {
#ifdef CINN_WITH_CUDA
// Using fusion op "BnGradBiasScaleReduce" as the same reason with "BnMeanVarianceReduce".
// Using fusion op "BnGradBiasScale" as the same reason with "BnMeanVariance".
// It also will be removed.
auto vars = builder->BnGradBiasScaleReduce(x, x_mean, y_grad);
return {builder->Reduce(vars[0], ReduceKind::kSum, std::vector<int>(1, vars[0]->shape.size() - 1)),
builder->Reduce(vars[1], ReduceKind::kSum, std::vector<int>(1, vars[1]->shape.size() - 1))};
return builder->BnGradBiasScale(x, x_mean, y_grad);
#else
auto mean_4d = builder->BroadcastTo(x_mean, x->shape, {channel_dim});
auto x_mean_diff = builder->Sub(x, mean_4d);
Expand Down
442 changes: 287 additions & 155 deletions cinn/hlir/op/reduction.cc

Large diffs are not rendered by default.

Loading

0 comments on commit 70b04f0

Please sign in to comment.