@@ -5007,7 +5007,7 @@ static void llm_load_hparams(
50075007 {
50085008 ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
50095009 switch (hparams.n_layer) {
5010- case 42: model.type = e_model::MODEL_SMALL ; break;
5010+ case 42: model.type = e_model::MODEL_7B ; break;
50115011 default: model.type = e_model::MODEL_UNKNOWN;
50125012 }
50135013 } break;
@@ -5525,6 +5525,9 @@ static void llm_load_vocab(
55255525 tokenizer_pre == "smollm") {
55265526 vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_SMOLLM;
55275527 vocab.tokenizer_clean_spaces = false;
5528+ } else if (
5529+ tokenizer_pre == "codeshell") {
5530+ vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_CODESHELL;
55285531 } else {
55295532 throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str()));
55305533 }
@@ -15548,6 +15551,7 @@ struct llm_tokenizer_bpe {
1554815551 case LLAMA_VOCAB_PRE_TYPE_REFACT:
1554915552 case LLAMA_VOCAB_PRE_TYPE_COMMAND_R:
1555015553 case LLAMA_VOCAB_PRE_TYPE_SMOLLM:
15554+ case LLAMA_VOCAB_PRE_TYPE_CODESHELL:
1555115555 regex_exprs = {
1555215556 "\\p{N}",
1555315557 "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)",
@@ -19447,7 +19451,6 @@ enum llama_rope_type llama_rope_type(const struct llama_model * model) {
1944719451 case LLM_ARCH_BAICHUAN:
1944819452 case LLM_ARCH_STARCODER:
1944919453 case LLM_ARCH_PLAMO:
19450- case LLM_ARCH_CODESHELL:
1945119454 case LLM_ARCH_ORION:
1945219455 case LLM_ARCH_INTERNLM2:
1945319456 case LLM_ARCH_MINICPM:
@@ -19477,6 +19480,7 @@ enum llama_rope_type llama_rope_type(const struct llama_model * model) {
1947719480 case LLM_ARCH_STARCODER2:
1947819481 case LLM_ARCH_OPENELM:
1947919482 case LLM_ARCH_GPTNEOX:
19483+ case LLM_ARCH_CODESHELL:
1948019484 return LLAMA_ROPE_TYPE_NEOX;
1948119485
1948219486 // all model arches should be listed explicitly here
0 commit comments