Skip to content

Commit

Permalink
Merge pull request #47 from unum-cloud/main-dev
Browse files Browse the repository at this point in the history
Prototyping client-server communication
  • Loading branch information
ashvardanian authored May 22, 2023
2 parents 2d0846b + 07ac124 commit 7be0c86
Show file tree
Hide file tree
Showing 4 changed files with 182 additions and 52 deletions.
54 changes: 38 additions & 16 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,7 @@ jobs:
- name: Install CIBuildWheel
run: python -m pip install cibuildwheel

- name: Build wheels for Windows
if: matrix.os == 'windows-2022'
run: python -m cibuildwheel

- name: Build wheels for POSIX
if: matrix.os != 'windows-2022'
- name: Build wheels
run: python -m cibuildwheel

- uses: actions/upload-artifact@v3
Expand Down Expand Up @@ -165,11 +160,8 @@ jobs:
- name: Run tests
run: swift test

deploy_docs:
name: Deploy Docs
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
build_docs:
name: Build Docs
runs-on: ubuntu-22.04
if: ${{ always() }}
needs: [publish_python, publish_javascript, publish_rust, publish_java, publish_swift]
Expand All @@ -178,8 +170,6 @@ jobs:
uses: actions/checkout@v3
with:
ref: 'main'
- name: Setup GitHub Pages
uses: actions/configure-pages@v2
- name: Install dependencies
run: sudo apt update && sudo apt install -y doxygen graphviz dia git && pip install sphinx breathe furo m2r2 sphinxcontrib-googleanalytics==0.2.dev20220708 sphinxcontrib-jquery
- name: Install USearch from PyPi
Expand All @@ -188,11 +178,43 @@ jobs:
run: cd docs && make html
- name: Copy assets
run: cp -r assets build/docs/html/
- name: Compress assets
run: tar -czvf docs.tar.gz build/docs/html/
- name: Upload docs to release
uses: svenstaro/upload-release-action@v2
with:
repo_token: ${{ secrets.GITHUB_TOKEN }}
file: docs.tar.gz
asset_name: docs.tar.gz
tag: ${{ github.ref }}

deploy_docs_pages:
name: Deploy GitHub Pages
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
runs-on: ubuntu-22.04
needs: build_docs
steps:
- uses: robinraju/[email protected]
with:
latest: true
fileName: docs.tar.gz
- name: Setup GitHub Pages
uses: actions/configure-pages@v2
- name: Upload artifacts
uses: actions/upload-pages-artifact@v1
with:
# Upload entire repository
path: "./build/docs/html/"
- name: Deploy to GitHub Pages
id: deployment
uses: actions/deploy-pages@v1

deploy_docs_vercel:
name: Deploy GitHub Vercel
runs-on: ubuntu-22.04
needs: build_docs
steps:
- name: Notify Vercel
uses: fjogeleit/http-request-action@v1
with:
url: ${{ secrets.DOCS_VERCEL }}
method: 'POST'
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@ xfail_strict = true
filterwarnings = ["error"]

[tool.cibuildwheel]
test-requires = "pytest"
test-command = "pytest {project}/python/test.py"
build-verbosity = 0

skip = ["*musllinux*", "*i686*", "pp*"]

[tool.cibuildwheel.linux]
Expand Down
98 changes: 84 additions & 14 deletions python/usearch/client.py
Original file line number Diff line number Diff line change
@@ -1,37 +1,107 @@
from typing import Union, Optional

import numpy as np
from ucall.client import Client


def _vector_to_ascii(vector: np.ndarray) -> Optional[str]:
if vector.dtype != np.int8 and vector.dtype != np.uint8 and vector.dtype != np.byte:
return None
if not np.all((vector >= 0) | (vector <= 100)):
return None

# Let's map [0, 100] to the range from [23, 123],
# poking 60 and replacing with the 124.
vector += 23
vector[vector == 60] = 124
ascii = str(vector)
return ascii


class IndexClient:

def __init__(self, uri: str = '127.0.0.1', port: int = 8545, use_http: bool = True) -> None:
self.client = Client(uri=uri, port=port, use_http=use_http)

def add(self, labels: np.array, vectors: np.array):
if isinstance(labels, int):
self.client.add_one(label=labels, vectors=vectors)
def add_one(self, label: int, vector: np.ndarray):
assert isinstance(label, int)
assert isinstance(vector, np.ndarray)
vector = vector.flatten()
ascii = _vector_to_ascii(vector)
if ascii:
self.client.add_ascii(label=label, string=ascii)
else:
self.client.add_one(label=label, vectors=vector)

def add_many(self, labels: np.ndarray, vectors: np.ndarray):
assert isinstance(labels, int)
assert isinstance(vectors, np.ndarray)
assert labels.ndim == 1 and vectors.ndim == 2
assert labels.shape[0] == vectors.shape[0]
self.client.add_many(labels=labels, vectors=vectors)

def add(self, labels: Union[np.ndarray, int], vectors: np.ndarray):
if isinstance(labels, int) or len(labels) == 1:
return self.add_one(labels, vectors)
else:
return self.add_many(labels, vectors)

def search_one(self, vector: np.ndarray, count: int) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
matches: list[dict] = []
vector = vector.flatten()
ascii = _vector_to_ascii(vector)
if ascii:
matches = self.client.search_ascii(string=ascii, count=count)
else:
self.client.add_many(labels=labels, vectors=vectors)
matches = self.client.search_one(vector=vector, count=count)

print(matches.data)
matches = matches.json

labels = np.array((1, count), dtype=np.uint32)
distances = np.array((1, count), dtype=np.float32)
counts = np.array((1), dtype=np.uint32)
for col, result in enumerate(matches):
labels[0, col] = result['label']
distances[0, col] = result['distance']
counts[0] = len(matches)

return labels, distances, counts

def search(self, vectors: np.array, count: int) -> tuple[np.array, np.array, np.array]:
matches = []
distances = []
counts = []
# return self.client.search_one(vectors=vectors, count=count)
return matches, distances, counts
def search_many(self, vectors: np.ndarray, count: int) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
batch_size: int = vectors.shape[0]
list_of_matches: list[list[dict]] = self.client.search_many(
vectors=vectors, count=count)

labels = np.array((batch_size, count), dtype=np.uint32)
distances = np.array((batch_size, count), dtype=np.float32)
counts = np.array((batch_size), dtype=np.uint32)
for row, matches in enumerate(list_of_matches):
for col, result in enumerate(matches):
labels[row, col] = result['label']
distances[row, col] = result['distance']
counts[row] = len(results)

return labels, distances, counts

def search(self, vectors: np.ndarray, count: int) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
if vectors.ndim == 1 or (vectors.ndim == 2 and vectors.shape[0] == 1):
return self.search_one(vectors, count)
else:
return self.search_many(vectors, count)

def __len__(self):
return self.client.size()
return self.client.size().json()

@property
def ndim(self):
return self.client.ndim()
return self.client.ndim().json()

def capacity(self):
return self.client.capacity()
return self.client.capacity().json()

def connectivity(self):
return self.client.connectivity()
return self.client.connectivity().json()

def load(self, path: str):
raise NotImplementedError()
Expand Down
79 changes: 58 additions & 21 deletions python/usearch/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,37 +9,42 @@
from usearch.index import Index


def _results_to_json(results: tuple[np.ndarray, np.ndarray, np.ndarray], row: int) -> list[dict]:
count = results[2][row]
labels = results[0][row, :count]
distances = results[1][row, :count]
return [{'label': int(l), 'distance': float(d)} for l, d in zip(labels, distances)]


def _ascii_to_vector(string: str) -> np.ndarray:
"""
WARNING: A dirty performance hack!
Assuming the `f8` vectors in our implementations are just integers,
and generally contain scalars in the [0, 100] range, we can transmit
them as JSON-embedded strings. The only symbols we must avoid are
the double-quote '"' (code 22) and backslash '\' (code 60).
Printable ASCII characters are in [20, 126].
"""
vector = np.array(string, dtype=np.int8)
vector[vector == 124] = 60
vector -= 23
return vector


def serve(
ndim: int, metric: str = 'ip',
ndim_: int, metric: str = 'ip',
port: int = 8545, threads: int = 1,
path: str = 'index.usearch', immutable: bool = False):

server = Server(port=port)
index = Index(ndim=ndim, metric=metric)
index = Index(ndim=ndim_, metric=metric)

if os.path.exists(path):
if immutable:
index.view(path)
else:
index.load(path)

@server
def add_one(label: int, vector: np.array):
labels = np.array([label], dtype=np.longlong)
vectors = vector.reshape(vector.shape[0], 1)
index.add(labels, vectors, copy=True)

@server
def add_many(labels: np.array, vectors: np.array):
labels = labels.astype(np.longlong)
index.add(labels, vectors, threads=threads, copy=True)

@server
def search_one(vector: np.array, count: int) -> np.ndarray:
vectors = vector.reshape(vector.shape[0], 1)
results = index.search(vectors, 3)
return results[0][:results[2][0]]

@server
def size() -> int:
return len(index)
Expand All @@ -56,6 +61,38 @@ def capacity() -> int:
def connectivity() -> int:
return index.connectivity()

@server
def add_one(label: int, vector: np.ndarray):
print('adding', label, vector)
labels = np.array([label], dtype=np.longlong)
vectors = vector.flatten().reshape(vector.shape[0], 1)
index.add(labels, vectors)

@server
def add_many(labels: np.ndarray, vectors: np.ndarray):
labels = labels.astype(np.longlong)
index.add(labels, vectors, threads=threads)

@server
def search_one(vector: np.ndarray, count: int) -> list[dict]:
print('search', vector, count)
vectors = vector.reshape(vector.shape[0], 1)
results = index.search(vectors, count)
return _results_to_json(results, 0)

@server
def search_many(vectors: np.ndarray, count: int) -> list[list[dict]]:
results = index.search(vectors, count)
return [_results_to_json(results, i) for i in range(vectors.shape[0])]

@server
def add_ascii(label: int, string: str):
return add_one(label, _ascii_to_vector(string))

@server
def search_ascii(string: str, count: int):
return search_one(_ascii_to_vector(string), count)

try:
server.run()
except KeyboardInterrupt:
Expand All @@ -75,7 +112,7 @@ def connectivity() -> int:
help='the index can not be updated')

parser.add_argument(
'--metric', type=str, default='ip', choices=['ip', 'cos', 'l2', 'haversine'],
'--metric', type=str, default='ip', choices=['ip', 'cos', 'l2sq', 'haversine'],
help='distance function to compare vectors')
parser.add_argument(
'-p', '--port', type=int, default=8545,
Expand All @@ -90,6 +127,6 @@ def connectivity() -> int:
args = parser.parse_args()
assert args.ndim is not None, 'Define the number of dimensions!'
serve(
ndim=args.ndim, metric=args.metric,
ndim_=args.ndim, metric=args.metric,
threads=args.threads, port=args.port,
path=args.path, immutable=args.immutable)

0 comments on commit 7be0c86

Please sign in to comment.