Skip to content

Commit

Permalink
Merge branch 'inference' into cuda_graph
Browse files Browse the repository at this point in the history
  • Loading branch information
goliaro authored Jan 22, 2024
2 parents 4d79e0d + 57d1883 commit 1043f1d
Show file tree
Hide file tree
Showing 97 changed files with 746 additions and 190 deletions.
2 changes: 1 addition & 1 deletion examples/python/keras/seq_cifar10_cnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def top_level_task():


if __name__ == "__main__":
print("Sequantial model, cifar10 cnn")
print("Sequential model, cifar10 cnn")
configs = ff.get_configs()
ff.init_flexflow_runtime(configs)
top_level_task()
125 changes: 123 additions & 2 deletions include/flexflow/operator.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,14 @@
#include "flexflow/utils/dot/record_formatter.h"
#include <vector>

#include <sys/stat.h>
#include <sys/types.h>
#if defined(FF_USE_CUDA) || defined(FF_USE_HIP_CUDA)
#include "flexflow/utils/cuda_helper.h"
#else
#include "flexflow/utils/hip_helper.h"
#endif

namespace FlexFlow {

extern LegionRuntime::Logger::Category log_measure;
Expand Down Expand Up @@ -227,13 +235,126 @@ class Op {
assert(false);
};
virtual void print_layer(FFModel const &model) = 0;
template <typename OpMetaType>
static std::string get_op_name_without_uid(OpMetaType *m) {
std::string op_name_without_uid = std::string(m->op_name);
size_t last_underscore = op_name_without_uid.length() - 1;
for (int i = op_name_without_uid.length() - 1; i > 0; i--) {
if (!(std::isdigit(m->op_name[i]) || m->op_name[i] == '_')) {
break;
} else if (m->op_name[i] == '_') {
last_underscore = i;
}
}
op_name_without_uid.erase(last_underscore);
return op_name_without_uid;
}
template <typename OpMetaType>
static void save_inference_tensors_to_file(
OpMeta *m,
OpMetaType *m,
int shard_id,
BatchConfig const *bc,
std::vector<GenericTensorAccessorR> input_tensors,
std::vector<GenericTensorAccessorR> weight_tensors,
std::vector<GenericTensorAccessorW> output_tensors);
std::vector<GenericTensorAccessorR> output_tensors,
bool before_kernel = false) {
// Check if output directory exists, and create it if it does not
char const *folder_path = "./inference_tensors";
struct stat st = {0};
if (stat(folder_path, &st) == -1) {
// Directory does not exist, create it
mkdir(folder_path, 0700);
}
// output base filepath, shared by all tensors from the same operator
std::string op_name_without_uid = get_op_name_without_uid(m);
std::string base_filepath =
"./inference_tensors/model_" + std::to_string(m->layer_guid.model_id) +
"_decoding-step_" + std::to_string(m->decoding_step) + "_layer-num_" +
std::to_string(m->layer_guid.transformer_layer_id) + "_layer-name_" +
op_name_without_uid + "_shard-id_" + std::to_string(shard_id);
if (before_kernel) {
base_filepath += "_pre";
}
// save batch config, if passed
if (bc != nullptr) {
bc->save_to_file(base_filepath + "_batch-config");
}
// save all inputs
for (int i = 0; i < input_tensors.size(); i++) {
std::string filename = base_filepath + "_input_" + std::to_string(i);
if (input_tensors[i].data_type == DT_FLOAT) {
save_tensor(input_tensors[i].get_float_ptr(),
input_tensors[i].domain.get_volume(),
filename.c_str());
} else if (input_tensors[i].data_type == DT_HALF) {
save_tensor(input_tensors[i].get_half_ptr(),
input_tensors[i].domain.get_volume(),
filename.c_str());
} else if (input_tensors[i].data_type == DT_INT32) {
save_tensor(input_tensors[i].get_int32_ptr(),
input_tensors[i].domain.get_volume(),
filename.c_str());
} else if (input_tensors[i].data_type == DT_INT64) {
save_tensor(input_tensors[i].get_int64_ptr(),
input_tensors[i].domain.get_volume(),
filename.c_str());
} else {
assert(false && "Tensor data type not supported");
}
}
// only dump the weights once
if (m->decoding_step == 0) {
for (int i = 0; i < weight_tensors.size(); i++) {
std::string filename = base_filepath + "_weight_" + std::to_string(i);
if (weight_tensors[i].data_type == DT_FLOAT) {
save_tensor(weight_tensors[i].get_float_ptr(),
weight_tensors[i].domain.get_volume(),
filename.c_str());
} else if (weight_tensors[i].data_type == DT_HALF) {
save_tensor(weight_tensors[i].get_half_ptr(),
weight_tensors[i].domain.get_volume(),
filename.c_str());
} else if (weight_tensors[i].data_type == DT_INT32) {
save_tensor(weight_tensors[i].get_int32_ptr(),
weight_tensors[i].domain.get_volume(),
filename.c_str());
} else if (weight_tensors[i].data_type == DT_INT64) {
save_tensor(weight_tensors[i].get_int64_ptr(),
weight_tensors[i].domain.get_volume(),
filename.c_str());
} else {
assert(false && "Tensor data type not supported");
}
}
}
// save all outputs
for (int i = 0; i < output_tensors.size(); i++) {
std::string filename = base_filepath + "_output_" + std::to_string(i);
if (output_tensors[i].data_type == DT_FLOAT) {
save_tensor(output_tensors[i].get_float_ptr(),
output_tensors[i].domain.get_volume(),
filename.c_str());
} else if (output_tensors[i].data_type == DT_HALF) {
save_tensor(output_tensors[i].get_half_ptr(),
output_tensors[i].domain.get_volume(),
filename.c_str());
} else if (output_tensors[i].data_type == DT_INT32) {
save_tensor(output_tensors[i].get_int32_ptr(),
output_tensors[i].domain.get_volume(),
filename.c_str());
} else if (output_tensors[i].data_type == DT_INT64) {
save_tensor(output_tensors[i].get_int64_ptr(),
output_tensors[i].domain.get_volume(),
filename.c_str());
} else {
assert(false && "Tensor data type not supported");
}
}
// increase count of decoding steps
if (!before_kernel) {
m->decoding_step++;
}
}
virtual bool measure_operator_cost(Simulator *sim,
MachineView const &mv,
CostMetrics &cost_metrics) const = 0;
Expand Down
1 change: 1 addition & 0 deletions include/flexflow/ops/add_bias_residual_layer_norm_params.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ struct AddBiasResidualLayerNormParams {
bool elementwise_affine;
float eps;
bool use_bias;
char name[MAX_OPNAME];
bool is_valid(
std::pair<ParallelTensorShape, ParallelTensorShape> const &) const;
};
Expand Down
1 change: 1 addition & 0 deletions include/flexflow/ops/aggregate_params.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ namespace FlexFlow {
struct AggregateParams {
int n;
float lambda_bal;
char name[MAX_OPNAME];
bool is_valid(std::vector<ParallelTensorShape> const &) const;
};
bool operator==(AggregateParams const &, AggregateParams const &);
Expand Down
1 change: 1 addition & 0 deletions include/flexflow/ops/aggregate_spec_params.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ namespace FlexFlow {
struct AggregateSpecParams {
int n;
float lambda_bal;
char name[MAX_OPNAME];
bool is_valid(ParallelTensorShape const &) const;
};
bool operator==(AggregateSpecParams const &, AggregateSpecParams const &);
Expand Down
1 change: 1 addition & 0 deletions include/flexflow/ops/arg_topk_params.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ struct ArgTopKParams {
int k;
bool sorted;
bool speculative_decoding;
char name[MAX_OPNAME];
bool is_valid(ParallelTensorShape const &) const;
};
bool operator==(ArgTopKParams const &, ArgTopKParams const &);
Expand Down
1 change: 1 addition & 0 deletions include/flexflow/ops/argmax_params.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ namespace FlexFlow {
struct ArgMaxParams {
bool beam_search;
bool is_valid(ParallelTensorShape const &) const;
char name[MAX_OPNAME];
};
bool operator==(ArgMaxParams const &, ArgMaxParams const &);

Expand Down
1 change: 1 addition & 0 deletions include/flexflow/ops/attention_params.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ struct MultiHeadAttentionParams {
int embed_dim, num_heads, kdim, vdim;
float dropout;
bool bias, add_bias_kv, add_zero_attn;
char name[MAX_OPNAME];

bool is_valid(std::tuple<ParallelTensorShape,
ParallelTensorShape,
Expand Down
1 change: 1 addition & 0 deletions include/flexflow/ops/batch_matmul_params.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ namespace FlexFlow {

struct BatchMatmulParams {
int a_seq_length_dim, b_seq_length_dim;
char name[MAX_OPNAME];
bool is_valid(
std::pair<ParallelTensorShape, ParallelTensorShape> const &) const;
};
Expand Down
1 change: 1 addition & 0 deletions include/flexflow/ops/beam_topk_params.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ struct BeamTopKParams {
LayerID layer_guid;
bool sorted;
int max_beam_width;
char name[MAX_OPNAME];
bool is_valid(ParallelTensorShape const &) const;
};
bool operator==(BeamTopKParams const &, BeamTopKParams const &);
Expand Down
1 change: 1 addition & 0 deletions include/flexflow/ops/cast_params.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ namespace FlexFlow {

struct CastParams {
DataType dtype;
char name[MAX_OPNAME];
bool is_valid(ParallelTensorShape const &) const;
};
bool operator==(CastParams const &, CastParams const &);
Expand Down
2 changes: 1 addition & 1 deletion include/flexflow/ops/concat_params.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ namespace FlexFlow {

struct ConcatParams {
int axis;

char name[MAX_OPNAME];
bool is_valid(std::vector<ParallelTensorShape> const &) const;
};

Expand Down
1 change: 1 addition & 0 deletions include/flexflow/ops/conv_2d_params.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ struct Conv2DParams {
padding_w, groups;
ActiMode activation;
bool use_bias;
char name[MAX_OPNAME];

bool is_valid(ParallelTensorShape const &input) const;
void solve_dims(ParallelTensorShape const &input,
Expand Down
1 change: 1 addition & 0 deletions include/flexflow/ops/dropout_params.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ namespace FlexFlow {
struct DropoutParams {
float rate;
unsigned long long seed;
char name[MAX_OPNAME];
bool is_valid(ParallelTensorShape const &) const;
};
bool operator==(DropoutParams const &, DropoutParams const &);
Expand Down
1 change: 1 addition & 0 deletions include/flexflow/ops/element_binary_params.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ struct ElementBinaryParams {
LayerID layer_guid;
OperatorType type;
bool inplace_a;
char name[MAX_OPNAME];

bool is_valid(
std::pair<ParallelTensorShape, ParallelTensorShape> const &) const;
Expand Down
1 change: 1 addition & 0 deletions include/flexflow/ops/element_unary_params.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ struct ElementUnaryParams {
bool inplace;
float scalar = 0.0;
LayerID layer_guid;
char name[MAX_OPNAME];

bool is_valid(ParallelTensorShape const &) const;
};
Expand Down
1 change: 1 addition & 0 deletions include/flexflow/ops/embedding_params.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ struct EmbeddingParams {
LayerID layer_guid;
AggrMode aggr;
DataType data_type;
char name[MAX_OPNAME];

bool is_valid(ParallelTensorShape const &) const;
};
Expand Down
1 change: 1 addition & 0 deletions include/flexflow/ops/experts_params.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ struct ExpertsParams {
int experts_internal_dim_size;
bool use_bias;
ActiMode activation;
char name[MAX_OPNAME];

bool is_valid(std::vector<ParallelTensorShape> const &) const;
};
Expand Down
1 change: 1 addition & 0 deletions include/flexflow/ops/flat_params.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
namespace FlexFlow {

struct FlatParams {
char name[MAX_OPNAME];
bool is_valid(ParallelTensorShape const &) const;
void solve_dims(ParallelTensorShape const &input,
ParallelDim output_dims[MAX_TENSOR_DIM],
Expand Down
1 change: 1 addition & 0 deletions include/flexflow/ops/gather_params.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ namespace FlexFlow {
struct GatherParams {
int legion_dim;
LayerID layer_guid;
char name[MAX_OPNAME];
bool is_valid(
std::pair<ParallelTensorShape, ParallelTensorShape> const &input) const;
};
Expand Down
1 change: 1 addition & 0 deletions include/flexflow/ops/groupby_params.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ namespace FlexFlow {
struct Group_byParams {
int n;
float alpha;
char name[MAX_OPNAME];
bool is_valid(
std::pair<ParallelTensorShape, ParallelTensorShape> const &) const;
};
Expand Down
1 change: 1 addition & 0 deletions include/flexflow/ops/inc_multihead_self_attention_params.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ struct IncMultiHeadSelfAttentionParams {
scaling_query, qk_prod_scaling, position_bias;
DataType quantization_type;
bool offload;
char name[MAX_OPNAME];
bool is_valid(ParallelTensorShape const &) const;
};

Expand Down
1 change: 1 addition & 0 deletions include/flexflow/ops/layer_norm_params.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ struct LayerNormParams {
bool elementwise_affine;
float eps;
bool use_bias;
char name[MAX_OPNAME];
bool is_valid(ParallelTensorShape const &) const;
};

Expand Down
1 change: 1 addition & 0 deletions include/flexflow/ops/linear_params.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class LinearParams {
float kernel_reg_lambda;
DataType quantization_type;
bool offload;
char name[MAX_OPNAME];

bool is_valid(ParallelTensorShape const &input_shape) const;
void solve_dims(const ParallelTensor input,
Expand Down
1 change: 1 addition & 0 deletions include/flexflow/ops/pool_2d_params.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ struct Pool2DParams {
int kernel_h, kernel_w, stride_h, stride_w, padding_h, padding_w;
PoolType pool_type;
ActiMode activation;
char name[MAX_OPNAME];

bool is_valid(ParallelTensorShape const &input) const;
void solve_dims(ParallelTensorShape const &input,
Expand Down
1 change: 1 addition & 0 deletions include/flexflow/ops/reduce_params.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ struct ReduceParams {
std::vector<int> axes;
bool keepdims;
LayerID layer_guid;
char name[MAX_OPNAME];

bool is_valid(ParallelTensorShape const &) const;
};
Expand Down
1 change: 1 addition & 0 deletions include/flexflow/ops/reshape_params.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ namespace FlexFlow {
struct ReshapeParams {
std::vector<int> shape;
LayerID layer_guid;
char name[MAX_OPNAME];

bool is_valid(ParallelTensorShape const &) const;
};
Expand Down
1 change: 1 addition & 0 deletions include/flexflow/ops/residual_layer_norm_params.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ struct ResidualLayerNormParams {
float eps;
bool use_bias;
bool use_two_residuals;
char name[MAX_OPNAME];
bool is_valid(std::tuple<ParallelTensorShape,
ParallelTensorShape,
ParallelTensorShape> const &) const;
Expand Down
1 change: 1 addition & 0 deletions include/flexflow/ops/residual_rms_norm_params.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ struct ResidualRMSNormParams {
LayerID layer_guid;
float eps;
int dim;
char name[MAX_OPNAME];
bool is_valid(
std::pair<ParallelTensorShape, ParallelTensorShape> const &input) const;
};
Expand Down
1 change: 1 addition & 0 deletions include/flexflow/ops/rms_norm_params.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ struct RMSNormParams {
LayerID layer_guid;
float eps;
int dim;
char name[MAX_OPNAME];
bool is_valid(ParallelTensorShape const &) const;
};

Expand Down
1 change: 1 addition & 0 deletions include/flexflow/ops/sampling_params.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ namespace FlexFlow {

struct SamplingParams {
float top_p;
char name[MAX_OPNAME];
bool is_valid(ParallelTensorShape const &) const;
};
bool operator==(SamplingParams const &, SamplingParams const &);
Expand Down
1 change: 1 addition & 0 deletions include/flexflow/ops/sigmoid_silu_multi_params.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ namespace FlexFlow {

struct SigmoidSiluMultiParams {
LayerID layer_guid;
char name[MAX_OPNAME];
bool is_valid(
std::pair<ParallelTensorShape, ParallelTensorShape> const &) const;
};
Expand Down
Loading

0 comments on commit 1043f1d

Please sign in to comment.