From a8c81035c2da5f0a40d557f407adddf06559fb06 Mon Sep 17 00:00:00 2001 From: Amnon Catav Date: Mon, 16 Oct 2023 11:41:00 +0300 Subject: [PATCH 1/2] display data for validation from list of docs --- resin_cli/cli.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/resin_cli/cli.py b/resin_cli/cli.py index a14f31d4..a3d26947 100644 --- a/resin_cli/cli.py +++ b/resin_cli/cli.py @@ -104,7 +104,7 @@ def new(index_name, tokenizer_model): click.echo("Resin is going to create a new index: ", nl=False) click.echo(click.style(f"{kb.index_name}", fg="green")) click.confirm(click.style("Do you want to continue?", fg="red"), abort=True) - Tokenizer.initialize(OpenAITokenizer, tokenizer_model) + Tokenizer.initialize(OpenAITokenizer, model_name=tokenizer_model) with spinner: kb.create_resin_index() click.echo(click.style("Success!", fg="green")) @@ -126,7 +126,7 @@ def upsert(index_name, data_path, tokenizer_model): '`export INDEX_NAME="MY_INDEX_NAME`') click.echo(click.style(msg, fg="red"), err=True) sys.exit(1) - Tokenizer.initialize(OpenAITokenizer, tokenizer_model) + Tokenizer.initialize(OpenAITokenizer, model_name=tokenizer_model) if data_path is None: msg = ("Data path is not provided," + " please provide it with --data-path or set it with env var") @@ -173,8 +173,11 @@ def upsert(index_name, data_path, tokenizer_model): click.echo(click.style(msg, fg="red"), err=True) sys.exit(1) pd.options.display.max_colwidth = 20 - click.echo(data[0].json(exclude_none=True, indent=2)) - click.confirm(click.style("\nDoes this data look right?", fg="red"), abort=True) + + click.echo(pd.DataFrame([doc.dict(exclude_none=True) for doc in data[:5]])) + click.echo(click.style(f"\nTotal records: {len(data)}")) + click.confirm(click.style("\nDoes this data look right?", fg="red"), + abort=True) kb.upsert(data) click.echo(click.style("Success!", fg="green")) From e74c427050b6a1759efbcf893c3079c60a8a5b05 Mon Sep 17 00:00:00 2001 From: Amnon Catav Date: Mon, 16 Oct 2023 15:00:57 +0300 Subject: [PATCH 2/2] small CLI fix --- resin_cli/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resin_cli/cli.py b/resin_cli/cli.py index a3d26947..04796c6b 100644 --- a/resin_cli/cli.py +++ b/resin_cli/cli.py @@ -100,11 +100,11 @@ def health(host, port, ssl): @click.argument("index-name", nargs=1, envvar="INDEX_NAME", type=str, required=True) @click.option("--tokenizer-model", default="gpt-3.5-turbo", help="Tokenizer model") def new(index_name, tokenizer_model): + Tokenizer.initialize(OpenAITokenizer, model_name=tokenizer_model) kb = KnowledgeBase(index_name=index_name) click.echo("Resin is going to create a new index: ", nl=False) click.echo(click.style(f"{kb.index_name}", fg="green")) click.confirm(click.style("Do you want to continue?", fg="red"), abort=True) - Tokenizer.initialize(OpenAITokenizer, model_name=tokenizer_model) with spinner: kb.create_resin_index() click.echo(click.style("Success!", fg="green"))