Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
goliaro committed Feb 20, 2024
1 parent e82a75f commit 583cb28
Showing 1 changed file with 14 additions and 17 deletions.
31 changes: 14 additions & 17 deletions src/runtime/file_loader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -219,10 +219,10 @@ void load_attention_weights_v2(DT *ptr,
int tensor_parallelism_degree) {
// layers_0_attention_wq_weight
// layers_0_self_attn_q_proj_weight
std::string q_file = layer_name + "_wq_weight";
std::string k_file = layer_name + "_wk_weight";
std::string v_file = layer_name + "_wv_weight";
std::string o_file = layer_name + "_wo_weight";
std::string q_file = layer_name + ".q_proj.weight";
std::string k_file = layer_name + ".k_proj.weight";
std::string v_file = layer_name + ".v_proj.weight";
std::string o_file = layer_name + ".o_proj.weight";
std::vector<std::string> weight_filenames = {q_file, k_file, v_file};
int file_index = 0;

Expand Down Expand Up @@ -409,10 +409,10 @@ void load_attention_weights_quantized(char *ptr,
bool use_full_precision) {
// layers_0_attention_wq_weight
// layers_0_self_attn_q_proj_weight
std::string q_file = layer_name + "_wq_weight";
std::string k_file = layer_name + "_wk_weight";
std::string v_file = layer_name + "_wv_weight";
std::string o_file = layer_name + "_wo_weight";
std::string q_file = layer_name + ".q_proj.weight";
std::string k_file = layer_name + ".k_proj.weight";
std::string v_file = layer_name + ".v_proj.weight";
std::string o_file = layer_name + ".o_proj.weight";
std::vector<std::string> weight_filenames = {q_file, k_file, v_file, o_file};

int file_index = 0;
Expand Down Expand Up @@ -690,7 +690,7 @@ void FileDataLoader::load_quantization_weight(FFModel *ff,
if (weight_idx > 0) {
assert(weight_idx == 0 || weight_idx == 1);
if (weight_filename != "embed_tokens_weight_lm_head") {
weight_filename += weight_idx == 0 ? "_weight" : "_bias";
weight_filename += weight_idx == 0 ? ".weight" : ".bias";
}
}
load_from_quantized_file(data,
Expand Down Expand Up @@ -728,12 +728,9 @@ void FileDataLoader::load_single_weight_tensor(FFModel *ff,
if (l->op_type == OP_INC_MULTIHEAD_SELF_ATTENTION ||
l->op_type == OP_SPEC_INC_MULTIHEAD_SELF_ATTENTION ||
l->op_type == OP_TREE_INC_MULTIHEAD_SELF_ATTENTION) {
if (weight_filename.find("self_attention") != std::string::npos) {
load_attention_weights_multi_query(
data, weight_filename, weights_folder, hidden_dim, num_heads);
} else if (weight_filename.find("attention") != std::string::npos &&
weight_filename.rfind("attention") ==
weight_filename.length() - strlen("attention")) {
if (weight_filename.find("attention") != std::string::npos &&
weight_filename.rfind("attention") ==
weight_filename.length() - strlen("attention")) {
if (weight_idx == 0) {
load_attention_weights_v2(data,
num_heads,
Expand Down Expand Up @@ -765,7 +762,7 @@ void FileDataLoader::load_single_weight_tensor(FFModel *ff,
assert(weight_idx >= 0 || weight_idx <= 2);
weight_filename += (weight_idx == 0)
? "_attn_bias"
: ((weight_idx == 1) ? "_weight" : "_bias");
: ((weight_idx == 1) ? ".weight" : ".bias");
std::cout << "Loading weight file " << weight_filename << std::endl;
std::string weight_filepath = join_path({weights_folder, weight_filename});
load_from_file(data, volume, weight_filepath);
Expand All @@ -774,7 +771,7 @@ void FileDataLoader::load_single_weight_tensor(FFModel *ff,
assert(weight_idx == 0 || weight_idx == 1);
// handle exception
if (weight_filename != "embed_tokens_weight_lm_head") {
weight_filename += weight_idx == 0 ? "_weight" : "_bias";
weight_filename += weight_idx == 0 ? ".weight" : ".bias";
}
std::cout << "Loading weight file " << weight_filename << std::endl;
std::string weight_filepath = join_path({weights_folder, weight_filename});
Expand Down

0 comments on commit 583cb28

Please sign in to comment.