From a5e20d2cefc2080d64482c63a93f0d2f98c03358 Mon Sep 17 00:00:00 2001 From: Guenther Schmuelling Date: Tue, 30 Apr 2024 15:25:07 -0700 Subject: [PATCH] honor both eos tokens --- chat/chat.js | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/chat/chat.js b/chat/chat.js index 20791f3..391ca5e 100644 --- a/chat/chat.js +++ b/chat/chat.js @@ -187,6 +187,7 @@ const MODELS = { "tinyllama_fp16": { name: "tinyllama-fp16", path: "schmuell/TinyLlama-1.1B-Chat-v1.0-fp16", externaldata: true }, "phi2": { name: "phi2", path: "schmuell/phi2-int4" }, "phi3": { name: "phi3", path: "schmuell/phi3-int4", externaldata: true }, + "phi3-1": { name: "phi3-1", path: "schmuell/phi3-1", externaldata: true }, "stablelm": { name: "stablelm", path: "schmuell/stablelm-2-zephyr-1_6b-int4" }, } @@ -199,7 +200,7 @@ function getConfig() { verbose: 0, threads: 1, csv: 0, - max_tokens: 512, + max_tokens: 9999, local: 0, } let vars = query.split("&"); @@ -399,7 +400,7 @@ class LLM { } } - while (last_token != this.eos && seqlen < max_tokens && !this.stop) { + while (last_token != this.eos && last_token != 32007 && seqlen < max_tokens && !this.stop) { seqlen = this.output_tokens.length; feed['attention_mask'] = new ort.Tensor('int64', BigInt64Array.from({ length: seqlen }, () => 1n), [1, seqlen]); const outputs = await this.sess.run(feed); @@ -435,20 +436,12 @@ ort.env.wasm.wasmPaths = document.location.pathname.replace('index.html', '') + const llm = new LLM(); function token_to_text(tokenizer, tokens, startidx) { - const txt = tokenizer.decode(tokens.slice(startidx), { skip_special_tokens: true, }); + const txt = tokenizer.decode(tokens.slice(startidx), { skip_special_tokens: false, }); return txt; } async function Query(query, cb) { - let prompt; - - if (config.model.name == 'phi2') { - prompt = `User:${query}\nAssistant:`; - } else if (config.model.name == 'phix') { - prompt = query; - } else { - prompt = `"<|system|>\nYou are a friendly assistant.\n<|user|>\n${query}\n<|assistant|>\n`; - } + let prompt = `<|system|>\nYou are a friendly assistant.<|end|>\n<|user|>\n${query}<|end|>\n<|assistant|>\n`; const { input_ids } = await tokenizer(prompt, { return_tensor: false, padding: true, truncation: true });