Skip to content

Commit

Permalink
examples : fix
Browse files Browse the repository at this point in the history
ggml-ci
  • Loading branch information
ggerganov committed Dec 22, 2024
1 parent dcbfda1 commit e1ac353
Show file tree
Hide file tree
Showing 21 changed files with 88 additions and 175 deletions.
10 changes: 5 additions & 5 deletions common/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -922,14 +922,14 @@ struct common_init_result common_init_from_params(common_params & params) {
common_lora_adapter_container loaded_la;
loaded_la.path = la.path;
loaded_la.scale = la.scale;
loaded_la.adapter = llama_lora_adapter_init(model, la.path.c_str());
loaded_la.adapter.reset(llama_lora_adapter_init(model, la.path.c_str()));
if (loaded_la.adapter == nullptr) {
LOG_ERR("%s: failed to apply lora adapter '%s'\n", __func__, la.path.c_str());
llama_free(lctx);
llama_free_model(model);
return iparams;
}
iparams.lora_adapters.push_back(loaded_la); // copy to list of loaded adapters
iparams.lora_adapters.emplace_back(std::move(loaded_la)); // copy to list of loaded adapters
}
if (!params.lora_init_without_apply) {
common_lora_adapters_apply(lctx, iparams.lora_adapters);
Expand Down Expand Up @@ -993,8 +993,8 @@ struct common_init_result common_init_from_params(common_params & params) {
llama_perf_context_reset(lctx);
}

iparams.model = model;
iparams.context = lctx;
iparams.model.reset(model);
iparams.context.reset(lctx);

return iparams;
}
Expand All @@ -1003,7 +1003,7 @@ void common_lora_adapters_apply(struct llama_context * ctx, std::vector<common_l
llama_lora_adapter_clear(ctx);
for (auto & la : lora_adapters) {
if (la.scale != 0.0f) {
llama_lora_adapter_set(ctx, la.adapter, la.scale);
llama_lora_adapter_set(ctx, la.adapter.get(), la.scale);
}
}
}
Expand Down
17 changes: 4 additions & 13 deletions common/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

#pragma once

#include "llama.h"
#include "llama-cpp.h"

#include <string>
#include <vector>
Expand Down Expand Up @@ -30,7 +30,7 @@ struct common_lora_adapter_info {
};

struct common_lora_adapter_container : common_lora_adapter_info {
struct llama_lora_adapter * adapter;
llama_lora_adapter_ptr adapter;
};

using llama_tokens = std::vector<llama_token>;
Expand Down Expand Up @@ -479,19 +479,10 @@ std::string fs_get_cache_file(const std::string & filename);
//

struct common_init_result {
struct llama_model * model = nullptr;
struct llama_context * context = nullptr;
llama_model_ptr model;
llama_context_ptr context;

std::vector<common_lora_adapter_container> lora_adapters;

~common_init_result() {
llama_free(context);
llama_free_model(model);

for (auto & lora_adapter : lora_adapters) {
llama_lora_adapter_free(lora_adapter.adapter);
}
}
};

struct common_init_result common_init_from_params(common_params & params);
Expand Down
7 changes: 3 additions & 4 deletions examples/cvector-generator/cvector-generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -415,12 +415,13 @@ int main(int argc, char ** argv) {
// load the model to get hparams
common_init_result llama_init = common_init_from_params(params);

llama_model * model = llama_init.model;
llama_context * ctx = llama_init.context;
llama_model * model = llama_init.model.get();
llama_context * ctx = llama_init.context.get();

// int n_ctx = llama_n_ctx(ctx);
int n_layers = llama_n_layer(model);
int n_embd = llama_n_embd(model);

// get model hint param (a.k.a model arch name)
char model_hint[128];
llama_model_meta_val_str(model, "general.architecture", model_hint, 128);
Expand Down Expand Up @@ -474,8 +475,6 @@ int main(int argc, char ** argv) {

// done with the model, we can now free it to make gain some memory
printf("Done evaluate prompts, unload model...\n");
llama_free(ctx);
llama_free_model(model);

bool use_pca = params.cvector_dimre_method == DIMRE_METHOD_PCA;

Expand Down
7 changes: 3 additions & 4 deletions examples/embedding/embedding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,9 @@ int main(int argc, char ** argv) {
// load the model
common_init_result llama_init = common_init_from_params(params);

llama_model * model = llama_init.model;
llama_context * ctx = llama_init.context;
llama_model * model = llama_init.model.get();
llama_context * ctx = llama_init.context.get();

if (model == NULL) {
LOG_ERR("%s: unable to load model\n", __func__);
return 1;
Expand Down Expand Up @@ -316,8 +317,6 @@ int main(int argc, char ** argv) {

// clean up
llama_batch_free(batch);
llama_free(ctx);
llama_free_model(model);
llama_backend_free();

return 0;
Expand Down
8 changes: 3 additions & 5 deletions examples/eval-callback/eval-callback.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,8 +162,9 @@ int main(int argc, char ** argv) {
// init
common_init_result llama_init = common_init_from_params(params);

llama_model * model = llama_init.model;
llama_context * ctx = llama_init.context;
llama_model * model = llama_init.model.get();
llama_context * ctx = llama_init.context.get();

if (model == nullptr || ctx == nullptr) {
LOG_ERR("%s : failed to init\n", __func__);
return 1;
Expand All @@ -184,9 +185,6 @@ int main(int argc, char ** argv) {
LOG("\n");
llama_perf_context_print(ctx);

llama_free(ctx);
llama_free_model(model);

llama_backend_free();

return 0;
Expand Down
11 changes: 5 additions & 6 deletions examples/imatrix/imatrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -430,9 +430,10 @@ static void process_logits(

static bool compute_imatrix(llama_context * ctx, const common_params & params) {
const bool add_bos = llama_add_bos_token(llama_get_model(ctx));
GGML_ASSERT(!llama_add_eos_token(llama_get_model(ctx)));
const int n_ctx = llama_n_ctx(ctx);

GGML_ASSERT(!llama_add_eos_token(llama_get_model(ctx)));

auto tim1 = std::chrono::high_resolution_clock::now();
LOG_INF("%s: tokenizing the input ..\n", __func__);

Expand Down Expand Up @@ -618,8 +619,9 @@ int main(int argc, char ** argv) {
// init
common_init_result llama_init = common_init_from_params(params);

llama_model * model = llama_init.model;
llama_context * ctx = llama_init.context;
llama_model * model = llama_init.model.get();
llama_context * ctx = llama_init.context.get();

if (model == nullptr || ctx == nullptr) {
LOG_ERR("%s : failed to init\n", __func__);
return 1;
Expand Down Expand Up @@ -655,9 +657,6 @@ int main(int argc, char ** argv) {
LOG("\n");
llama_perf_context_print(ctx);

llama_free(ctx);
llama_free_model(model);

llama_backend_free();

return 0;
Expand Down
7 changes: 2 additions & 5 deletions examples/infill/infill.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,8 @@ int main(int argc, char ** argv) {
LOG_INF("%s: load the model and apply lora adapter, if any\n", __func__);
common_init_result llama_init = common_init_from_params(params);

model = llama_init.model;
ctx = llama_init.context;
model = llama_init.model.get();
ctx = llama_init.context.get();

if (model == NULL) {
LOG_ERR("%s: unable to load model\n", __func__);
Expand Down Expand Up @@ -581,9 +581,6 @@ int main(int argc, char ** argv) {
LOG("\n");
common_perf_print(ctx, smpl);

llama_free(ctx);
llama_free_model(model);

common_sampler_free(smpl);
llama_backend_free();

Expand Down
7 changes: 2 additions & 5 deletions examples/lookahead/lookahead.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ int main(int argc, char ** argv) {
// load the target model
common_init_result llama_init = common_init_from_params(params);

llama_model * model = llama_init.model;
llama_context * ctx = llama_init.context;
llama_model * model = llama_init.model.get();
llama_context * ctx = llama_init.context.get();

// Tokenize the prompt
std::vector<llama_token> inp;
Expand Down Expand Up @@ -474,9 +474,6 @@ int main(int argc, char ** argv) {

llama_batch_free(batch);

llama_free(ctx);
llama_free_model(model);

llama_backend_free();

LOG("\n\n");
Expand Down
13 changes: 4 additions & 9 deletions examples/lookup/lookup-create.cpp
Original file line number Diff line number Diff line change
@@ -1,14 +1,9 @@
#include "arg.h"
#include "common.h"
#include "ngram-cache.h"
#include "ggml.h"
#include "llama.h"

#include <cstdint>
#include <fstream>
#include <iostream>
#include <string>
#include <unordered_map>
#include <vector>

int main(int argc, char ** argv){
Expand All @@ -25,16 +20,16 @@ int main(int argc, char ** argv){
// load the model
common_init_result llama_init = common_init_from_params(params);

llama_model * model = llama_init.model;
llama_context * ctx = llama_init.context;
llama_model_ptr & model = llama_init.model;
llama_context_ptr & ctx = llama_init.context;

GGML_ASSERT(model != nullptr);

// tokenize the prompt
std::vector<llama_token> inp;
inp = common_tokenize(ctx, params.prompt, true, true);
inp = common_tokenize(ctx.get(), params.prompt, true, true);
fprintf(stderr, "%s: tokenization done\n", __func__);


common_ngram_cache ngram_cache;
common_ngram_cache_update(ngram_cache, LLAMA_NGRAM_STATIC, LLAMA_NGRAM_STATIC, inp, inp.size(), true);
fprintf(stderr, "%s: hashing done, writing file to %s\n", __func__, params.lookup_cache_static.c_str());
Expand Down
10 changes: 3 additions & 7 deletions examples/lookup/lookup-stats.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,11 @@ int main(int argc, char ** argv){
// load the model
common_init_result llama_init = common_init_from_params(params);

llama_model * model = llama_init.model;
llama_context * ctx = llama_init.context;
llama_context_ptr & ctx = llama_init.context;

// tokenize the prompt
std::vector<llama_token> inp;
inp = common_tokenize(ctx, params.prompt, true, true);
inp = common_tokenize(ctx.get(), params.prompt, true, true);

common_ngram_cache ngram_cache_context;
common_ngram_cache ngram_cache_dynamic;
Expand Down Expand Up @@ -66,7 +65,7 @@ int main(int argc, char ** argv){
}

const int n_input = inp.size();
const int n_ctx = llama_n_ctx(ctx);
const int n_ctx = llama_n_ctx(ctx.get());

int n_drafted = 0;
int n_accept = 0;
Expand Down Expand Up @@ -150,9 +149,6 @@ int main(int argc, char ** argv){
LOG_INF("n_accept = %d\n", n_accept);
LOG_INF("accept = %.3f%%\n", 100.0f * n_accept / n_drafted);

llama_free(ctx);
llama_free_model(model);

llama_backend_free();

LOG("\n\n");
Expand Down
7 changes: 2 additions & 5 deletions examples/lookup/lookup.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ int main(int argc, char ** argv){
// load the model
common_init_result llama_init = common_init_from_params(params);

llama_model * model = llama_init.model;
llama_context * ctx = llama_init.context;
llama_model * model = llama_init.model.get();
llama_context * ctx = llama_init.context.get();

// tokenize the prompt
std::vector<llama_token> inp;
Expand Down Expand Up @@ -243,9 +243,6 @@ int main(int argc, char ** argv){

llama_batch_free(batch_tgt);

llama_free(ctx);
llama_free_model(model);

llama_backend_free();

LOG("\n\n");
Expand Down
11 changes: 4 additions & 7 deletions examples/main/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -145,18 +145,18 @@ int main(int argc, char ** argv) {
llama_context * ctx = nullptr;
common_sampler * smpl = nullptr;

std::vector<common_chat_msg> chat_msgs;

g_model = &model;
g_ctx = &ctx;
g_smpl = &smpl;

std::vector<common_chat_msg> chat_msgs;

// load the model and apply lora adapter, if any
LOG_INF("%s: load the model and apply lora adapter, if any\n", __func__);
common_init_result llama_init = common_init_from_params(params);

model = llama_init.model;
ctx = llama_init.context;
model = llama_init.model.get();
ctx = llama_init.context.get();

if (model == NULL) {
LOG_ERR("%s: error: unable to load model\n", __func__);
Expand Down Expand Up @@ -889,9 +889,6 @@ int main(int argc, char ** argv) {

common_sampler_free(smpl);

llama_free(ctx);
llama_free_model(model);

llama_backend_free();

ggml_threadpool_free_fn(threadpool);
Expand Down
7 changes: 2 additions & 5 deletions examples/parallel/parallel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,8 +132,8 @@ int main(int argc, char ** argv) {
// load the target model
common_init_result llama_init = common_init_from_params(params);

llama_model * model = llama_init.model;
llama_context * ctx = llama_init.context;
llama_model * model = llama_init.model.get();
llama_context * ctx = llama_init.context.get();

// load the prompts from an external file if there are any
if (params.prompt.empty()) {
Expand Down Expand Up @@ -416,9 +416,6 @@ int main(int argc, char ** argv) {

llama_batch_free(batch);

llama_free(ctx);
llama_free_model(model);

llama_backend_free();

LOG("\n\n");
Expand Down
8 changes: 3 additions & 5 deletions examples/perplexity/perplexity.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1987,8 +1987,9 @@ int main(int argc, char ** argv) {
// load the model and apply lora adapter, if any
common_init_result llama_init = common_init_from_params(params);

llama_model * model = llama_init.model;
llama_context * ctx = llama_init.context;
llama_model * model = llama_init.model.get();
llama_context * ctx = llama_init.context.get();

if (model == NULL) {
LOG_ERR("%s: unable to load model\n", __func__);
return 1;
Expand Down Expand Up @@ -2023,9 +2024,6 @@ int main(int argc, char ** argv) {
LOG("\n");
llama_perf_context_print(ctx);

llama_free(ctx);
llama_free_model(model);

llama_backend_free();

return 0;
Expand Down
Loading

0 comments on commit e1ac353

Please sign in to comment.