alpaca-core · pminev · Feb 13, 2025 · Jan 3, 2025 · Feb 13, 2025 · Feb 13, 2025
diff --git a/code/ac/llama/Instance.cpp b/code/ac/llama/Instance.cpp
@@ -34,9 +34,9 @@ llama_context_params llamaFromInstanceInitParams(const Instance::InitParams& par
 
 Instance::Instance(Model& model, InitParams params)
     : m_model(model)
-    , m_sampler(model, {
+    , m_sampler(new Sampler(model, {
         .grammar = params.grammar,
-    })
+    }))
     , m_lctx(llama_init_from_model(model.lmodel(), llamaFromInstanceInitParams(params)), llama_free)
 {
     if (!m_lctx) {

diff --git a/code/ac/llama/Instance.hpp b/code/ac/llama/Instance.hpp
@@ -40,11 +40,18 @@ class AC_LLAMA_EXPORT Instance {
     void stopSession() noexcept;
 
     const Model& model() const noexcept { return m_model; }
-    Sampler& sampler() noexcept { return m_sampler; }
+
+    Sampler& sampler() noexcept { return *m_sampler; }
+
+    // Change sampler settings by resetting it
+    // warning: this will clear any previous sampler state
+    void resetSampler(const Sampler::Params& params) {
+        m_sampler.reset(new Sampler(m_model, params));
+    }
 
 private:
     Model& m_model;
-    Sampler m_sampler;
+    std::unique_ptr<Sampler> m_sampler;
     astl::c_unique_ptr<llama_context> m_lctx;
     std::optional<Session> m_session;
 };

diff --git a/code/ac/llama/Model.cpp b/code/ac/llama/Model.cpp
@@ -88,7 +88,7 @@ std::shared_ptr<llama_model> ModelRegistry::loadModel(
     }
 
     if (!model) {
-        model = std::shared_ptr<llama_model>(llama_load_model_from_file(gguf.c_str(), llamaFromModelParams(params, pcb)), llama_free_model);
+        model = std::shared_ptr<llama_model>(llama_model_load_from_file(gguf.c_str(), llamaFromModelParams(params, pcb)), llama_model_free);
         m_models.push_back({key, model});
     }
 

diff --git a/test/t-integration.cpp b/test/t-integration.cpp
@@ -67,20 +67,50 @@ TEST_CASE("inference") {
         tokens = model.vocab().tokenize("President George W.", true, true);
         s.setInitialPrompt(tokens);
         {
-            auto t = s.getToken();
-            REQUIRE(t != ac::llama::Token_Invalid);
-            auto text = model.vocab().tokenToString(t);
-            CHECK(text == " Bush");
+                auto t = s.getToken();
+                REQUIRE(t != ac::llama::Token_Invalid);
+                auto text = model.vocab().tokenToString(t);
+                CHECK(text == " Bush");
         }
 
-        // add more very suggestive stuff
-        tokens = model.vocab().tokenize(" sent troops to Cleveland which was hit by torrential", false, false);
-        s.pushPrompt(tokens);
-        {
-            auto t = s.getToken();
-            REQUIRE(t != ac::llama::Token_Invalid);
-            auto text = model.vocab().tokenToString(t);
-            CHECK(text.starts_with(" rain")); // could be rains
+        SUBCASE("default sampler") {
+            // add more very suggestive stuff
+            tokens = model.vocab().tokenize(" sent troops to Cleveland which was hit by torrential", false, false);
+            s.pushPrompt(tokens);
+            {
+                auto t = s.getToken();
+                REQUIRE(t != ac::llama::Token_Invalid);
+                auto text = model.vocab().tokenToString(t);
+                CHECK(text.starts_with(" rain")); // could be rains
+            }
+        }
+
+        SUBCASE("custom sampler") {
+            ac::llama::Sampler::Params samplerParams = {};
+            samplerParams.rngSeed = 1717;
+            samplerParams.minP = 0.2f;
+            samplerParams.topK = 100;
+            samplerParams.topP = 0.2f;
+            samplerParams.minKeep = 1000;
+            samplerParams.temp = 10.0f;
+            samplerParams.tempExp = 5.0f;
+            samplerParams.samplerSequence = {
+                ac::llama::Sampler::SamplingType::Min_P,
+                ac::llama::Sampler::SamplingType::Temperature,
+                ac::llama::Sampler::SamplingType::Top_K,
+                ac::llama::Sampler::SamplingType::Top_P,
+                };
+            inst.resetSampler(samplerParams);
+
+            // add more very suggestive stuff
+            tokens = model.vocab().tokenize(" sent troops to Cleveland which was hit by torrential", false, false);
+            s.pushPrompt(tokens);
+            {
+                auto t = s.getToken();
+                REQUIRE(t != ac::llama::Token_Invalid);
+                auto text = model.vocab().tokenToString(t);
+                CHECK(text.starts_with(" down"));
+            }
         }
     }
 }