Skip to content
This repository was archived by the owner on Jun 10, 2022. It is now read-only.

Frontend start command. #8

Open
wants to merge 55 commits into
base: main
Choose a base branch
from
Open
Changes from 5 commits
Commits
Show all changes
55 commits
Select commit Hold shift + click to select a range
e00243f
Merge pull request #3 from alexcg1/add-license-3
alexcg1 Jun 11, 2021
d066508
Create LICENSE
alexcg1 Jun 11, 2021
0038d75
frontend: layout, add URL links, sidebar
alexcg1 Jun 13, 2021
36e3437
dockerfile
alexcg1 Jun 14, 2021
8d1e2dc
chore(backend): tidy up code
alexcg1 Jun 14, 2021
191065a
chore(backend): saner variables and config
alexcg1 Jun 14, 2021
2e25f9a
chore(backend): remove debug code
alexcg1 Jun 14, 2021
8460d88
chore(backend): add more user config
alexcg1 Jun 14, 2021
525f289
chore(Dockerfile): bump version
alexcg1 Jun 14, 2021
83efe0e
docs(readme): update to reflect changes
alexcg1 Jun 14, 2021
4c98dc2
fix: endpoint, other user configurable vars
alexcg1 Jun 15, 2021
8ebeb24
fix(frontend): endpoint var name
alexcg1 Jun 15, 2021
f32a34c
fix(frontend): endpoint var name again
alexcg1 Jun 15, 2021
72a0ca4
fix(frontend): fix parameter name for endpoint
alexcg1 Jun 15, 2021
c882013
fix: sanitize strings
alexcg1 Jun 15, 2021
6079ea9
fix(frontend): sanitize escaped strings
alexcg1 Jun 15, 2021
26d85ea
docs(frontend): add note about clicking search button
alexcg1 Jun 15, 2021
a8f6433
chore: powered by jina image
alexcg1 Jun 15, 2021
8b54dae
chore: powered by jina logo display
alexcg1 Jun 15, 2021
a3d032f
frontend: better help text for search box
alexcg1 Jun 15, 2021
690d84e
fun: add oneline version
alexcg1 Jun 15, 2021
075f570
refactor: verbose variable names
alexcg1 Jun 16, 2021
da162dd
fix(frontend): include name
alexcg1 Jun 16, 2021
8ea4efd
fix: filename
alexcg1 Jun 16, 2021
bceea9e
chore(backend) comments, better var names
alexcg1 Jun 16, 2021
5fa19a0
chore(backend) comments
alexcg1 Jun 16, 2021
c75aa1f
chore: rm old files
alexcg1 Jun 16, 2021
2fb7a4b
fix: pretty_errors optional
alexcg1 Jun 16, 2021
c1e8bc5
chore(frontend): rm unused import
alexcg1 Jun 16, 2021
451b466
fix: hardcoded endpoint
alexcg1 Jun 22, 2021
f37d54b
fix: trying docarraymemmap to save memory
alexcg1 Jun 22, 2021
fe5db01
feat(data-getter): initial commit
alexcg1 Jun 22, 2021
2f82fbb
chore: rm joke oneliner
alexcg1 Jun 22, 2021
d32baeb
chore: rm data dir
alexcg1 Jun 22, 2021
7e5b10d
fix: input generator
hanxiao Jun 22, 2021
f2b4622
Merge pull request #4 from hanxiao/patch-1
alexcg1 Jun 22, 2021
a5d89dc
chore: reorder config
alexcg1 Jun 23, 2021
2a72209
chore: bump version
alexcg1 Jun 23, 2021
497cc81
fix(Dockerfile): run data getter
alexcg1 Jun 23, 2021
4760f1a
chore(dockerfile): version bump
alexcg1 Jun 23, 2021
96f688a
fix(dockerfile): install wget
alexcg1 Jun 23, 2021
4ae77b0
fix(dockerfile): disable apt-get prompt
alexcg1 Jun 23, 2021
a9d6185
feat(index): index to disk
alexcg1 Jun 23, 2021
074ccab
fix: everything working again with right workdir
alexcg1 Jun 23, 2021
f71af2a
chore: cleanup
alexcg1 Jun 24, 2021
db78008
chore: cleanup
alexcg1 Jun 24, 2021
87b67d9
Merge pull request #5 from alexcg1/feat-index-to-disk
alexcg1 Jun 24, 2021
c0e4e46
docs(readme): update
alexcg1 Jun 24, 2021
19f32ea
chore: update reqs
alexcg1 Jun 24, 2021
859fa8e
feat: dockerfile query only. run with mounted workspace
alexcg1 Jun 24, 2021
cbd099b
refactor: switch transformers executor for hub version
alexcg1 Jun 25, 2021
0ac1084
Merge pull request #6 from alexcg1/feat-hub-migrate
alexcg1 Jun 25, 2021
19b0e49
fix: ratings
alexcg1 Jun 25, 2021
9abedf6
fix(backend): num_docs
alexcg1 Jun 25, 2021
3b5c65e
Frontend start command.
fissoreg Jun 28, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 71 additions & 40 deletions backend/app.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,32 @@
__copyright__ = "Copyright (c) 2021 Jina AI Limited. All rights reserved."
__license__ = "Apache-2.0"

import os
import itertools
from pprint import pprint
from jina import Flow, Document, DocumentArray
from jina.types.arrays.memmap import DocumentArrayMemmap
from jina.parsers.helloworld import set_hw_chatbot_parser
import csv
from backend_config import backend_port, backend_workdir, backend_datafile, text_length, max_docs
from executors import MyTransformer, MyIndexer
import shutil
import click
import sys
from backend_config import (
text_length,
max_docs,
backend_datafile,
backend_port,
backend_workdir,
)
from executors import MyTransformer, DiskIndexer

from jina import Flow, Document

try:
__import__("pretty_errors")
except ImportError:
pass


def trim_string(input_string: str, word_count: int = text_length, sep: str = " ") -> str:
def trim_string(
input_string: str, word_count: int = text_length, sep: str = " "
) -> str:
"""
Trim a string to a certain number of words.
:param input_string: string to trim
@@ -46,48 +58,67 @@ def prep_docs(input_file: str, max_docs=max_docs):
yield doc


def run_appstore_flow(inputs, args) -> None:
"""
Execute the app store example. Indexes data and presents REST endpoint
:param inputs: Documents or DocumentArrays to input
:args: arguments like port, workdir, etc
:return: None
"""

# Create Flow and add
# - MyTransformer (an encoder Executor)
# - MyIndexer (a simple indexer Executor)
def index():
flow = (
Flow()
.add(uses=MyTransformer, parallel=args.parallel)
# .add(uses=EmbeddingIndexer)
# .add(uses=KeyValueIndexer)
.add(uses=MyIndexer, workspace=args.workdir)
.add(uses=MyTransformer, parallel=2, name="encoder")
.add(uses=DiskIndexer, workspace=backend_workdir, name="indexer")
)
# flow = Flow.load_config('flows/index.yml')

# Open the Flow
with flow:
# Start index pipeline, taking inputs then printing the processed DocumentArray
flow.post(on="/index", inputs=inputs)
flow.post(
on="/index",
inputs=prep_docs(input_file=backend_datafile, max_docs=max_docs),
request_size=64,
read_mode="r",
)

# Start REST gateway so clients can query via Streamlit or other frontend (like Jina Box)
flow.use_rest_gateway(backend_port)

# Block the process to keep it open. Otherwise it will just close and no-one could connect
flow.block()

def query_restful():
flow = (
Flow()
.add(uses=MyTransformer, name="encoder")
.add(uses=DiskIndexer, workspace=backend_workdir, name="indexer")
)

if __name__ == "__main__":
with flow:
flow.protocol = "http"
flow.port_expose = backend_port
flow.block()

# Get chatbot's default arguments
args = set_hw_chatbot_parser().parse_args()

# Change a few things
args.workdir = backend_workdir
@click.command()
@click.option(
"--task",
"-t",
type=click.Choice(["index", "query_restful"], case_sensitive=False),
)
@click.option("--num_docs", "-n", default=max_docs)
@click.option("--force", "-f", is_flag=True)
def main(task: str, num_docs: int, force: bool):
workspace = backend_workdir
if task == "index":
if os.path.exists(workspace):
if force:
shutil.rmtree(workspace)
else:
print(
f"\n +----------------------------------------------------------------------------------+ \
\n | 🤖🤖🤖 | \
\n | The directory {workspace} already exists. Please remove it before indexing again. | \
\n | 🤖🤖🤖 | \
\n +----------------------------------------------------------------------------------+"
)
sys.exit(1)
index()
if task == "query_restful":
if not os.path.exists(workspace):
print(
f"The directory {workspace} does not exist. Please index first via `python app.py -t index`"
)
sys.exit(1)
query_restful()

# Convert the csv file to a DocumentArray
docs = prep_docs(input_file=backend_datafile)

# Run the Flow
run_appstore_flow(inputs=docs, args=args)
if __name__ == "__main__":
main()
2 changes: 1 addition & 1 deletion backend/backend_config.py
Original file line number Diff line number Diff line change
@@ -7,7 +7,7 @@
backend_workdir = "workspace"
backend_datafile = "./data/appstore_games-shuffled.csv"
text_length = 50 # How many words to index for each app? Longer = more accurate, shorter = quicker
max_docs = 5000 # How many apps to index
max_docs = 3000 # How many apps to index

# dataset
dataset_url = "https://github.com/alexcg1/ml-datasets/blob/master/nlp/strategy_games/appstore_games.csv?raw=true"
90 changes: 54 additions & 36 deletions backend/executors.py
Original file line number Diff line number Diff line change
@@ -2,6 +2,7 @@
from backend_config import backend_model, backend_top_k

import numpy as np
import os
import torch
from transformers import AutoModel, AutoTokenizer

@@ -89,17 +90,67 @@ def encode(self, docs: "DocumentArray", *args, **kwargs):
doc.embedding = embed


class MyIndexer(Executor):
def _get_ones(x, y):
return np.ones((x, y))


def _ext_A(A):
nA, dim = A.shape
A_ext = _get_ones(nA, dim * 3)
A_ext[:, dim : 2 * dim] = A
A_ext[:, 2 * dim :] = A ** 2
return A_ext


def _ext_B(B):
nB, dim = B.shape
B_ext = _get_ones(dim * 3, nB)
B_ext[:dim] = (B ** 2).T
B_ext[dim : 2 * dim] = -2.0 * B.T
del B
return B_ext


def _euclidean(A_ext, B_ext):
sqdist = A_ext.dot(B_ext).clip(min=0)
return np.sqrt(sqdist)


def _norm(A):
return A / np.linalg.norm(A, ord=2, axis=1, keepdims=True)


def _cosine(A_norm_ext, B_norm_ext):
return A_norm_ext.dot(B_norm_ext).clip(min=0) / 2



class DiskIndexer(Executor):
"""Simple indexer class """

def __init__(self, **kwargs):
super().__init__(**kwargs)
self._docs = DocumentArray()
self.top_k = backend_top_k
if os.path.exists(self.save_path):
self._docs = DocumentArray.load(self.save_path)
else:
self._docs = DocumentArray()

@property
def save_path(self):
if not os.path.exists(self.workspace):
os.makedirs(self.workspace)
return os.path.join(self.workspace, 'apps.json')

def close(self):
self._docs.save(self.save_path)


@requests(on="/index")
def index(self, docs: "DocumentArray", **kwargs):
self._docs.extend(docs)
return docs

@requests(on="/search")
def search(self, docs: "DocumentArray", **kwargs):
@@ -112,8 +163,9 @@ def search(self, docs: "DocumentArray", **kwargs):
for _q, _ids, _dists in zip(docs, idx, dist):
for _id, _dist in zip(_ids, _dists):
d = Document(self._docs[int(_id)], copy=True)
d.score.value = 1 - _dist
# d.score.value = 1 - _dist
_q.matches.append(d)
return docs

@staticmethod
def _get_sorted_top_k(
@@ -130,37 +182,3 @@ def _get_sorted_top_k(
dist = np.take_along_axis(dist, idx_fs, axis=1)

return idx, dist


def _get_ones(x, y):
return np.ones((x, y))


def _ext_A(A):
nA, dim = A.shape
A_ext = _get_ones(nA, dim * 3)
A_ext[:, dim : 2 * dim] = A
A_ext[:, 2 * dim :] = A ** 2
return A_ext


def _ext_B(B):
nB, dim = B.shape
B_ext = _get_ones(dim * 3, nB)
B_ext[:dim] = (B ** 2).T
B_ext[dim : 2 * dim] = -2.0 * B.T
del B
return B_ext


def _euclidean(A_ext, B_ext):
sqdist = A_ext.dot(B_ext).clip(min=0)
return np.sqrt(sqdist)


def _norm(A):
return A / np.linalg.norm(A, ord=2, axis=1, keepdims=True)


def _cosine(A_norm_ext, B_norm_ext):
return A_norm_ext.dot(B_norm_ext).clip(min=0) / 2