Skip to content

Commit a5251ca

Browse files
authored
Optimization: Qwen3 next autoregressive pass (ggml-org#17996)
* It's Qwen3 Next, the lean mean token generation machine! * Apply patches from thread * Remove recurrent version, only keep chunked and autoregressive * Remove unnecessary conts and asserts * Remove more extra conts and asserts * Cleanup masking
1 parent fb64424 commit a5251ca

File tree

2 files changed

+86
-271
lines changed

2 files changed

+86
-271
lines changed

src/models/models.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -441,13 +441,14 @@ struct llm_build_qwen3next : public llm_graph_context_mamba {
441441
ggml_tensor * cur,
442442
ggml_tensor * causal_mask,
443443
ggml_tensor * identity,
444+
ggml_tensor * diag_mask,
444445
int il);
445446

446447
ggml_tensor * build_layer_ffn(
447448
ggml_tensor * cur,
448449
int il);
449450

450-
ggml_tensor * build_delta_net_recurrent(
451+
ggml_tensor * build_delta_net_chunking(
451452
ggml_tensor * q,
452453
ggml_tensor * k,
453454
ggml_tensor * v,
@@ -456,18 +457,17 @@ struct llm_build_qwen3next : public llm_graph_context_mamba {
456457
ggml_tensor * state,
457458
ggml_tensor * causal_mask,
458459
ggml_tensor * identity,
460+
ggml_tensor * diag_mask,
459461
int il);
460462

461-
ggml_tensor * build_delta_net_chunking(
463+
ggml_tensor * build_delta_net_autoregressive(
462464
ggml_tensor * q,
463465
ggml_tensor * k,
464466
ggml_tensor * v,
465467
ggml_tensor * g,
466468
ggml_tensor * beta,
467469
ggml_tensor * state,
468-
ggml_tensor * causal_mask,
469-
ggml_tensor * identity,
470-
int il);
470+
int il);
471471

472472
ggml_tensor * build_norm_gated(
473473
ggml_tensor * input,

0 commit comments

Comments
 (0)