@@ -2265,8 +2265,8 @@ static void llm_load_tensors(
22652265 layer.attn_norm = ml.create_tensor (ctx, tn (LLM_TENSOR_ATTN_NORM, " weight" , i), {n_embd}, backend);
22662266 layer.attn_norm_b = ml.create_tensor (ctx, tn (LLM_TENSOR_ATTN_NORM, " bias" , i), {n_embd}, backend);
22672267
2268- layer.wqkv = ml.create_tensor (ctx, tn (LLM_TENSOR_ATTN_QKV, " weight" , i), {n_embd, 3 * n_embd}, backend_split);
2269- layer.bqkv = ml.create_tensor (ctx, tn (LLM_TENSOR_ATTN_QKV, " bias" , i), {3 * n_embd}, backend_split);
2268+ layer.wqkv = ml.create_tensor (ctx, tn (LLM_TENSOR_ATTN_QKV, " weight" , i), {n_embd, n_embd + 2 *n_embd_gqa }, backend_split);
2269+ layer.bqkv = ml.create_tensor (ctx, tn (LLM_TENSOR_ATTN_QKV, " bias" , i), {n_embd + 2 *n_embd_gqa }, backend_split);
22702270
22712271 layer.wo = ml.create_tensor (ctx, tn (LLM_TENSOR_ATTN_OUT, " weight" , i), {n_embd, n_embd}, backend_split);
22722272 layer.bo = ml.create_tensor (ctx, tn (LLM_TENSOR_ATTN_OUT, " bias" , i), {n_embd}, backend_split);
@@ -3538,8 +3538,8 @@ static struct ggml_cgraph * llm_build_starcoder(
35383538 cur = ggml_add (ctx0, ggml_mul_mat (ctx0, model.layers [il].wqkv , cur), model.layers [il].bqkv );
35393539
35403540 struct ggml_tensor * tmpq = ggml_view_2d (ctx0, cur, n_embd, N, cur->nb [1 ], 0 *sizeof (float )*n_embd);
3541- struct ggml_tensor * tmpk = ggml_view_2d (ctx0, cur, n_embd , N, cur->nb [1 ], 1 * sizeof (float )*n_embd);
3542- struct ggml_tensor * tmpv = ggml_view_2d (ctx0, cur, n_embd , N, cur->nb [1 ], 2 * sizeof (float )*n_embd);
3541+ struct ggml_tensor * tmpk = ggml_view_2d (ctx0, cur, n_embd_gqa , N, cur->nb [1 ], sizeof (float )*n_embd);
3542+ struct ggml_tensor * tmpv = ggml_view_2d (ctx0, cur, n_embd_gqa , N, cur->nb [1 ], sizeof (float )*( n_embd + n_embd_gqa) );
35433543
35443544 struct ggml_tensor * Qcur = tmpq;
35453545 struct ggml_tensor * Kcur = tmpk;
0 commit comments