Skip to content

Commit 9d52f17

Browse files
authored
model : add KORMo model (#18032)
* vocab: add KORMo Tokenizer * model: add KORMoForCausalLM * vocab: change pretokenizer to qwen2 * lint: fix unintended line removal * model: make qwen2 bias tensor optional * model: use qwen2 architecture for KORMo
1 parent 4529c66 commit 9d52f17

File tree

6 files changed

+25
-9
lines changed

6 files changed

+25
-9
lines changed

convert_hf_to_gguf.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1203,6 +1203,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
12031203
if chkhsh == "f4f37b6c8eb9ea29b3eac6bb8c8487c5ab7885f8d8022e67edc1c68ce8403e95":
12041204
# ref: https://huggingface.co/MiniMaxAI/MiniMax-M2
12051205
res = "minimax-m2"
1206+
if chkhsh == "4a2e2abae11ca2b86d570fc5b44be4d5eb5e72cc8f22dd136a94b37da83ab665":
1207+
# ref: https://huggingface.co/KORMo-Team/KORMo-tokenizer
1208+
res = "kormo"
12061209

12071210
if res is None:
12081211
logger.warning("\n")
@@ -3398,7 +3401,7 @@ def set_vocab(self):
33983401
self._set_vocab_qwen()
33993402

34003403

3401-
@ModelBase.register("Qwen2Model", "Qwen2ForCausalLM", "Qwen2AudioForConditionalGeneration")
3404+
@ModelBase.register("Qwen2Model", "Qwen2ForCausalLM", "Qwen2AudioForConditionalGeneration", "KORMoForCausalLM")
34023405
class Qwen2Model(TextModel):
34033406
model_arch = gguf.MODEL_ARCH.QWEN2
34043407

convert_hf_to_gguf_update.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,7 @@ class TOKENIZER_TYPE(IntEnum):
143143
{"name": "bailingmoe2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/inclusionAI/Ling-mini-base-2.0", },
144144
{"name": "granite-docling", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/ibm-granite/granite-docling-258M", },
145145
{"name": "minimax-m2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/MiniMaxAI/MiniMax-M2", },
146+
{"name": "kormo", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/KORMo-Team/KORMo-tokenizer", },
146147
]
147148

148149
# some models are known to be broken upstream, so we will skip them as exceptions

gguf-py/gguf/tensor_mapping.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,8 @@ class TensorNameMap:
154154
"model.layers.{bid}.operator_norm", # lfm2
155155
"model.transformer.blocks.{bid}.attn_norm", # llada
156156
"layers.{bid}.input_layernorm", # qwen3-embedding
157-
"model.layers.{bid}.attention_layernorm" # apertus
157+
"model.layers.{bid}.attention_layernorm", # apertus
158+
"model.layers.{bid}.pre_attention_layernorm", # kormo
158159
),
159160

160161
# Attention norm 2
@@ -342,6 +343,7 @@ class TensorNameMap:
342343
"model.transformer.blocks.{bid}.ff_norm", # llada
343344
"layers.{bid}.post_attention_layernorm", # qwen3-embedding
344345
"model.layers.{bid}.feedforward_layernorm", # apertus
346+
"model.layers.{bid}.pre_mlp_layernorm", # kormo
345347
),
346348

347349
# Pre feed-forward norm

src/llama-model.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3388,9 +3388,9 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
33883388
layer.wo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd, n_embd}, 0);
33893389

33903390
// optional bias tensors
3391-
layer.bq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "bias", i), {n_embd}, 0);
3392-
layer.bk = create_tensor(tn(LLM_TENSOR_ATTN_K, "bias", i), {n_embd_gqa}, 0);
3393-
layer.bv = create_tensor(tn(LLM_TENSOR_ATTN_V, "bias", i), {n_embd_gqa}, 0);
3391+
layer.bq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "bias", i), {n_embd}, TENSOR_NOT_REQUIRED);
3392+
layer.bk = create_tensor(tn(LLM_TENSOR_ATTN_K, "bias", i), {n_embd_gqa}, TENSOR_NOT_REQUIRED);
3393+
layer.bv = create_tensor(tn(LLM_TENSOR_ATTN_V, "bias", i), {n_embd_gqa}, TENSOR_NOT_REQUIRED);
33943394

33953395
layer.ffn_norm = create_tensor(tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd}, 0);
33963396

src/llama-vocab.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1895,7 +1895,8 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
18951895
clean_spaces = false;
18961896
} else if (
18971897
tokenizer_pre == "qwen2" ||
1898-
tokenizer_pre == "deepseek-r1-qwen") {
1898+
tokenizer_pre == "deepseek-r1-qwen" ||
1899+
tokenizer_pre == "kormo") {
18991900
pre_type = LLAMA_VOCAB_PRE_TYPE_QWEN2;
19001901
clean_spaces = false;
19011902
} else if (

src/models/qwen2.cpp

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,16 +31,25 @@ llm_build_qwen2::llm_build_qwen2(const llama_model & model, const llm_graph_para
3131
{
3232
// compute Q and K and RoPE them
3333
ggml_tensor * Qcur = build_lora_mm(model.layers[il].wq, cur);
34-
Qcur = ggml_add(ctx0, Qcur, model.layers[il].bq);
3534
cb(Qcur, "Qcur", il);
35+
if (model.layers[il].bq) {
36+
Qcur = ggml_add(ctx0, Qcur, model.layers[il].bq);
37+
cb(Qcur, "Qcur", il);
38+
}
3639

3740
ggml_tensor * Kcur = build_lora_mm(model.layers[il].wk, cur);
38-
Kcur = ggml_add(ctx0, Kcur, model.layers[il].bk);
3941
cb(Kcur, "Kcur", il);
42+
if (model.layers[il].bk) {
43+
Kcur = ggml_add(ctx0, Kcur, model.layers[il].bk);
44+
cb(Kcur, "Kcur", il);
45+
}
4046

4147
ggml_tensor * Vcur = build_lora_mm(model.layers[il].wv, cur);
42-
Vcur = ggml_add(ctx0, Vcur, model.layers[il].bv);
4348
cb(Vcur, "Vcur", il);
49+
if (model.layers[il].bv) {
50+
Vcur = ggml_add(ctx0, Vcur, model.layers[il].bv);
51+
cb(Vcur, "Vcur", il);
52+
}
4453

4554
Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens);
4655
Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens);

0 commit comments

Comments
 (0)