Skip to content

Commit 711990c

Browse files
committed
remove unused code
1 parent 4817145 commit 711990c

File tree

1 file changed

+0
-148
lines changed

1 file changed

+0
-148
lines changed

src/main/cpp/server.hpp

Lines changed: 0 additions & 148 deletions
Original file line numberDiff line numberDiff line change
@@ -3269,151 +3269,3 @@ struct server_context {
32693269
};
32703270
}
32713271
};
3272-
3273-
static void common_params_handle_model_default(std::string &model, const std::string &model_url, std::string &hf_repo,
3274-
std::string &hf_file, const std::string &hf_token) {
3275-
if (!hf_repo.empty()) {
3276-
// short-hand to avoid specifying --hf-file -> default it to --model
3277-
if (hf_file.empty()) {
3278-
if (model.empty()) {
3279-
auto auto_detected = common_get_hf_file(hf_repo, hf_token);
3280-
if (auto_detected.first.empty() || auto_detected.second.empty()) {
3281-
exit(1); // built without CURL, error message already printed
3282-
}
3283-
hf_repo = auto_detected.first;
3284-
hf_file = auto_detected.second;
3285-
} else {
3286-
hf_file = model;
3287-
}
3288-
}
3289-
// make sure model path is present (for caching purposes)
3290-
if (model.empty()) {
3291-
// this is to avoid different repo having same file name, or same file name in different subdirs
3292-
std::string filename = hf_repo + "_" + hf_file;
3293-
// to make sure we don't have any slashes in the filename
3294-
string_replace_all(filename, "/", "_");
3295-
model = fs_get_cache_file(filename);
3296-
}
3297-
} else if (!model_url.empty()) {
3298-
if (model.empty()) {
3299-
auto f = string_split<std::string>(model_url, '#').front();
3300-
f = string_split<std::string>(f, '?').front();
3301-
model = fs_get_cache_file(string_split<std::string>(f, '/').back());
3302-
}
3303-
} else if (model.empty()) {
3304-
model = DEFAULT_MODEL_PATH;
3305-
}
3306-
}
3307-
3308-
// parse the given jparams (see de.kherud.llama.args.ModelParameters#toString()) from JSON to the required C++ struct.
3309-
static void server_params_parse(json jparams, common_params &params) {
3310-
common_params default_params;
3311-
3312-
params.sampling.seed = json_value(jparams, "seed", default_params.sampling.seed);
3313-
params.cpuparams.n_threads = json_value(jparams, "n_threads", default_params.cpuparams.n_threads);
3314-
params.speculative.cpuparams.n_threads =
3315-
json_value(jparams, "n_threads_draft", default_params.speculative.cpuparams.n_threads);
3316-
params.cpuparams_batch.n_threads = json_value(jparams, "n_threads_batch", default_params.cpuparams_batch.n_threads);
3317-
params.speculative.cpuparams_batch.n_threads =
3318-
json_value(jparams, "n_threads_batch_draft", default_params.speculative.cpuparams_batch.n_threads);
3319-
params.n_predict = json_value(jparams, "n_predict", default_params.n_predict);
3320-
params.n_ctx = json_value(jparams, "n_ctx", default_params.n_ctx);
3321-
params.n_batch = json_value(jparams, "n_batch", default_params.n_batch);
3322-
params.n_ubatch = json_value(jparams, "n_ubatch", default_params.n_ubatch);
3323-
params.n_keep = json_value(jparams, "n_keep", default_params.n_keep);
3324-
3325-
params.speculative.n_max = json_value(jparams, "n_draft", default_params.speculative.n_max);
3326-
params.speculative.n_min = json_value(jparams, "n_draft_min", default_params.speculative.n_min);
3327-
3328-
params.n_chunks = json_value(jparams, "n_chunks", default_params.n_chunks);
3329-
params.n_parallel = json_value(jparams, "n_parallel", default_params.n_parallel);
3330-
params.n_sequences = json_value(jparams, "n_sequences", default_params.n_sequences);
3331-
params.speculative.p_split = json_value(jparams, "p_split", default_params.speculative.p_split);
3332-
params.grp_attn_n = json_value(jparams, "grp_attn_n", default_params.grp_attn_n);
3333-
params.grp_attn_w = json_value(jparams, "grp_attn_w", default_params.grp_attn_w);
3334-
params.n_print = json_value(jparams, "n_print", default_params.n_print);
3335-
params.rope_freq_base = json_value(jparams, "rope_freq_base", default_params.rope_freq_base);
3336-
params.rope_freq_scale = json_value(jparams, "rope_freq_scale", default_params.rope_freq_scale);
3337-
params.yarn_ext_factor = json_value(jparams, "yarn_ext_factor", default_params.yarn_ext_factor);
3338-
params.yarn_attn_factor = json_value(jparams, "yarn_attn_factor", default_params.yarn_attn_factor);
3339-
params.yarn_beta_fast = json_value(jparams, "yarn_beta_fast", default_params.yarn_beta_fast);
3340-
params.yarn_beta_slow = json_value(jparams, "yarn_beta_slow", default_params.yarn_beta_slow);
3341-
params.yarn_orig_ctx = json_value(jparams, "yarn_orig_ctx", default_params.yarn_orig_ctx);
3342-
params.defrag_thold = json_value(jparams, "defrag_thold", default_params.defrag_thold);
3343-
params.numa = json_value(jparams, "numa", default_params.numa);
3344-
params.rope_scaling_type = json_value(jparams, "rope_scaling_type", default_params.rope_scaling_type);
3345-
params.pooling_type = json_value(jparams, "pooling_type", default_params.pooling_type);
3346-
params.model = json_value(jparams, "model", default_params.model);
3347-
params.speculative.model = json_value(jparams, "model_draft", default_params.speculative.model);
3348-
params.model_alias = json_value(jparams, "model_alias", default_params.model_alias);
3349-
params.model_url = json_value(jparams, "model_url", default_params.model_url);
3350-
params.hf_repo = json_value(jparams, "hf_repo", default_params.hf_repo);
3351-
params.hf_file = json_value(jparams, "hf_file", default_params.hf_file);
3352-
params.prompt = json_value(jparams, "prompt", default_params.prompt);
3353-
params.prompt_file = json_value(jparams, "prompt_file", default_params.prompt_file);
3354-
params.path_prompt_cache = json_value(jparams, "path_prompt_cache", default_params.path_prompt_cache);
3355-
params.input_prefix = json_value(jparams, "input_prefix", default_params.input_prefix);
3356-
params.input_suffix = json_value(jparams, "input_suffix", default_params.input_suffix);
3357-
params.antiprompt = json_value(jparams, "antiprompt", default_params.antiprompt);
3358-
params.lookup_cache_static = json_value(jparams, "lookup_cache_static", default_params.lookup_cache_static);
3359-
params.lookup_cache_dynamic = json_value(jparams, "lookup_cache_dynamic", default_params.lookup_cache_dynamic);
3360-
params.logits_file = json_value(jparams, "logits_file", default_params.logits_file);
3361-
// params.lora_adapters = json_value(jparams, "lora_adapter", default_params.lora_adapters);
3362-
params.embedding = json_value(jparams, "embedding", default_params.embedding);
3363-
params.escape = json_value(jparams, "escape", default_params.escape);
3364-
params.cont_batching = json_value(jparams, "cont_batching", default_params.cont_batching);
3365-
params.flash_attn = json_value(jparams, "flash_attn", default_params.flash_attn);
3366-
params.input_prefix_bos = json_value(jparams, "input_prefix_bos", default_params.input_prefix_bos);
3367-
params.sampling.ignore_eos = json_value(jparams, "ignore_eos", default_params.sampling.ignore_eos);
3368-
params.use_mmap = json_value(jparams, "use_mmap", default_params.use_mmap);
3369-
params.use_mlock = json_value(jparams, "use_mlock", default_params.use_mlock);
3370-
params.no_kv_offload = json_value(jparams, "no_kv_offload", default_params.no_kv_offload);
3371-
params.chat_template = json_value(jparams, "chat_template", default_params.chat_template);
3372-
3373-
if (jparams.contains("n_gpu_layers")) {
3374-
if (llama_supports_gpu_offload()) {
3375-
params.n_gpu_layers = json_value(jparams, "n_gpu_layers", default_params.n_gpu_layers);
3376-
params.speculative.n_gpu_layers =
3377-
json_value(jparams, "n_gpu_layers_draft", default_params.speculative.n_gpu_layers);
3378-
} else {
3379-
SRV_WRN("Not compiled with GPU offload support, --n-gpu-layers option will be ignored. "
3380-
"See main README.md for information on enabling GPU BLAS support: %s = %d",
3381-
"n_gpu_layers", params.n_gpu_layers);
3382-
}
3383-
}
3384-
3385-
if (jparams.contains("split_mode")) {
3386-
params.split_mode = json_value(jparams, "split_mode", default_params.split_mode);
3387-
// todo: the definition checks here currently don't work due to cmake visibility reasons
3388-
#ifndef GGML_USE_CUDA
3389-
fprintf(stderr, "warning: llama.cpp was compiled without CUDA. Setting the split mode has no effect.\n");
3390-
#endif
3391-
}
3392-
3393-
if (jparams.contains("tensor_split")) {
3394-
#if defined(GGML_USE_CUDA) || defined(GGML_USE_SYCL)
3395-
std::vector<float> tensor_split = jparams["tensor_split"].get<std::vector<float>>();
3396-
GGML_ASSERT(tensor_split.size() <= llama_max_devices());
3397-
3398-
for (size_t i_device = 0; i_device < llama_max_devices(); ++i_device) {
3399-
if (i_device < tensor_split.size()) {
3400-
params.tensor_split[i_device] = tensor_split.at(i_device);
3401-
} else {
3402-
params.tensor_split[i_device] = 0.0f;
3403-
}
3404-
}
3405-
#else
3406-
SRV_WRN("%s", "llama.cpp was compiled without CUDA. It is not possible to set a tensor split.\n");
3407-
#endif // GGML_USE_CUDA
3408-
}
3409-
3410-
if (jparams.contains("main_gpu")) {
3411-
#if defined(GGML_USE_CUDA) || defined(GGML_USE_SYCL)
3412-
params.main_gpu = json_value(jparams, "main_gpu", default_params.main_gpu);
3413-
#else
3414-
SRV_WRN("%s", "llama.cpp was compiled without CUDA. It is not possible to set a main GPU.");
3415-
#endif
3416-
}
3417-
3418-
common_params_handle_model_default(params.model, params.model_url, params.hf_repo, params.hf_file, params.hf_token);
3419-
}

0 commit comments

Comments
 (0)