docs: remove duplicated code snippet

danbev · Oct 31, 2024 · b3a57f9 · b3a57f9
1 parent 1449ed0
commit b3a57f9
Show file tree

Hide file tree

Showing 2 changed files with 7 additions and 26 deletions.
diff --git a/notes/llama-kv-cache.md b/notes/llama-kv-cache.md
@@ -2317,6 +2317,13 @@ k_cache_size = 512 * 32 * 32 * 128 * 2 = 536870912 = 512MB
 ```
 And for other models:
 ```console
+kv-cache size = 2 *                     // both keys and values
+                ctx.cparams.n_ctx *
+                ctx.model.hparams.n_layer *
+                ctx.model.hparams.n_head_kv(0) *
+                ctx.model.hparams.n_embd_head_k *
+                ctx.kv_self.type_k
+
 kv-cache size = 2 * 30016 * 32 * 8 * 128 * 2 bytes
         = 2 * 30016 * 32 * 8 * 128 * 2
         = 3934257152

diff --git a/notes/llama-self-extend.md b/notes/llama-self-extend.md
@@ -873,32 +873,6 @@ By setting `cache.has_shift` to true when `llama_decode_internal` calls
             }
         }
     }
-    // apply K-shift if needed
-    if (lctx.model.hparams.rope_type != LLAMA_ROPE_TYPE_NONE && lctx.kv_self.has_shift) {
-        {
-            ggml_backend_sched_reset(lctx.sched);
-
-            ggml_cgraph * gf = llama_build_graph_k_shift(lctx);
-
-            ggml_backend_sched_alloc_graph(lctx.sched, gf);
-
-            llama_set_k_shift(lctx);
-
-            llama_graph_compute(lctx, gf, lctx.cparams.n_threads);
-
-            need_reserve = true;
-        }
-
-        {
-            auto & kv_self = lctx.kv_self;
-
-            kv_self.has_shift = false;
-
-            for (uint32_t i = 0; i < kv_self.size; ++i) {
-                kv_self.cells[i].delta = 0;
-            }
-        }
-    }
 ```
 
 When the kv-cache `has_shift` is true like in this case where we updated above