Skip to content

Commit

Permalink
Fix default bias and loading order of key and value weights
Browse files Browse the repository at this point in the history
  • Loading branch information
Dobiasd committed Dec 30, 2023
1 parent 4184515 commit 291e127
Showing 1 changed file with 3 additions and 3 deletions.
6 changes: 3 additions & 3 deletions include/fdeep/layers/multi_head_attention_layer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ class multi_head_attention_layer : public layer
: layer(name), num_heads_(num_heads), key_dim_(key_dim),
value_dim_(value_dim), attention_axes_(attention_axes),
query_dense_(create_dense_layer(weights_and_biases, use_bias, 0, name + "_query_dense")),
value_dense_(create_dense_layer(weights_and_biases, use_bias, 1, name + "_value_dense")),
key_dense_(create_dense_layer(weights_and_biases, use_bias, 2, name + "_key_dense")),
value_dense_(create_dense_layer(weights_and_biases, use_bias, 2, name + "_value_dense")),
key_dense_(create_dense_layer(weights_and_biases, use_bias, 1, name + "_key_dense")),
output_dense_(create_dense_layer(weights_and_biases, use_bias, 3, name + "_output_dense"))
{
}
Expand All @@ -40,7 +40,7 @@ class multi_head_attention_layer : public layer
const std::size_t n = weights.shape().width_ * weights.shape().depth_;
const tensor biases = use_bias ?
weights_and_biases[index_factor * index + 1] :
tensor(tensor_shape(n), 1);
tensor(tensor_shape(n), 0);
return dense_layer(name, n, *weights.as_vector(), *biases.as_vector());
}
tensors extract_biases(const tensors& saved_weights, bool use_bias)
Expand Down

0 comments on commit 291e127

Please sign in to comment.