@@ -31,16 +31,25 @@ llm_build_qwen2::llm_build_qwen2(const llama_model & model, const llm_graph_para
3131 {
3232 // compute Q and K and RoPE them
3333 ggml_tensor * Qcur = build_lora_mm (model.layers [il].wq , cur);
34- Qcur = ggml_add (ctx0, Qcur, model.layers [il].bq );
3534 cb (Qcur, " Qcur" , il);
35+ if (model.layers [il].bq ) {
36+ Qcur = ggml_add (ctx0, Qcur, model.layers [il].bq );
37+ cb (Qcur, " Qcur" , il);
38+ }
3639
3740 ggml_tensor * Kcur = build_lora_mm (model.layers [il].wk , cur);
38- Kcur = ggml_add (ctx0, Kcur, model.layers [il].bk );
3941 cb (Kcur, " Kcur" , il);
42+ if (model.layers [il].bk ) {
43+ Kcur = ggml_add (ctx0, Kcur, model.layers [il].bk );
44+ cb (Kcur, " Kcur" , il);
45+ }
4046
4147 ggml_tensor * Vcur = build_lora_mm (model.layers [il].wv , cur);
42- Vcur = ggml_add (ctx0, Vcur, model.layers [il].bv );
4348 cb (Vcur, " Vcur" , il);
49+ if (model.layers [il].bv ) {
50+ Vcur = ggml_add (ctx0, Vcur, model.layers [il].bv );
51+ cb (Vcur, " Vcur" , il);
52+ }
4453
4554 Qcur = ggml_reshape_3d (ctx0, Qcur, n_embd_head, n_head, n_tokens);
4655 Kcur = ggml_reshape_3d (ctx0, Kcur, n_embd_head, n_head_kv, n_tokens);
0 commit comments