From 16211ad5fe54c33b29f4f93ca3982ad168ed4dc0 Mon Sep 17 00:00:00 2001
From: Cordero Core <127983572+uwcdc@users.noreply.github.com>
Date: Thu, 28 Mar 2024 03:07:50 +0000
Subject: [PATCH 01/39] feat: Add pyproject.toml and pre-commit
---
.github/dependabot.yml | 10 +++++
.github/matchers/pylint.json | 32 ++++++++++++++
.pre-commit-config.yaml | 85 ++++++++++++++++++++++++++++++++++++
pyproject.toml | 53 ++++++++++++++++++++++
4 files changed, 180 insertions(+)
create mode 100644 .github/dependabot.yml
create mode 100644 .github/matchers/pylint.json
create mode 100644 .pre-commit-config.yaml
create mode 100644 pyproject.toml
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
new file mode 100644
index 0000000..3459d67
--- /dev/null
+++ b/.github/dependabot.yml
@@ -0,0 +1,10 @@
+version: 2
+updates:
+ # Maintain dependencies for GitHub Actions
+ - package-ecosystem: "github-actions"
+ directory: "/"
+ schedule:
+ interval: "weekly"
+ # Raise pull requests for version updates
+ # to pip against the `main` branch
+ target-branch: "main"
\ No newline at end of file
diff --git a/.github/matchers/pylint.json b/.github/matchers/pylint.json
new file mode 100644
index 0000000..ee5a60b
--- /dev/null
+++ b/.github/matchers/pylint.json
@@ -0,0 +1,32 @@
+{
+ "problemMatcher": [
+ {
+ "severity": "warning",
+ "pattern": [
+ {
+ "regexp": "^([^:]+):(\\d+):(\\d+): ([A-DF-Z]\\d+): \\033\\[[\\d;]+m([^\\033]+).*$",
+ "file": 1,
+ "line": 2,
+ "column": 3,
+ "code": 4,
+ "message": 5
+ }
+ ],
+ "owner": "pylint-warning"
+ },
+ {
+ "severity": "error",
+ "pattern": [
+ {
+ "regexp": "^([^:]+):(\\d+):(\\d+): (E\\d+): \\033\\[[\\d;]+m([^\\033]+).*$",
+ "file": 1,
+ "line": 2,
+ "column": 3,
+ "code": 4,
+ "message": 5
+ }
+ ],
+ "owner": "pylint-error"
+ }
+ ]
+ }
\ No newline at end of file
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..c0dc4cf
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,85 @@
+exclude: |
+ (?x)^(
+ tests/utils/
+ )
+
+ci:
+ autoupdate_commit_msg: "chore: update pre-commit hooks"
+ autofix_commit_msg: "style: pre-commit fixes"
+
+repos:
+ - repo: https://github.com/psf/black
+ rev: "24.2.0"
+ hooks:
+ - id: black-jupyter
+
+ - repo: https://github.com/asottile/blacken-docs
+ rev: "1.16.0"
+ hooks:
+ - id: blacken-docs
+ additional_dependencies: [black==23.7.0]
+
+ - repo: https://github.com/pre-commit/pre-commit-hooks
+ rev: "v4.5.0"
+ hooks:
+ - id: check-added-large-files
+ - id: check-case-conflict
+ - id: check-merge-conflict
+ - id: check-symlinks
+ - id: check-yaml
+ - id: debug-statements
+ - id: end-of-file-fixer
+ - id: mixed-line-ending
+ - id: name-tests-test
+ args: ["--pytest-test-first"]
+ - id: requirements-txt-fixer
+ - id: trailing-whitespace
+
+ - repo: https://github.com/pre-commit/pygrep-hooks
+ rev: "v1.10.0"
+ hooks:
+ - id: rst-backticks
+ - id: rst-directive-colons
+ - id: rst-inline-touching-normal
+
+ - repo: https://github.com/pre-commit/mirrors-prettier
+ rev: "v4.0.0-alpha.8"
+ hooks:
+ - id: prettier
+ types_or: [yaml, markdown, html, css, scss, javascript, json]
+ args: [--prose-wrap=always]
+
+ - repo: https://github.com/astral-sh/ruff-pre-commit
+ rev: "v0.3.2"
+ hooks:
+ - id: ruff
+ args: ["--fix", "--show-fixes"]
+
+ - repo: https://github.com/pre-commit/mirrors-mypy
+ rev: "v1.9.0"
+ hooks:
+ - id: mypy
+ files: src
+ args: ["--ignore-missing-imports"]
+ additional_dependencies:
+ - pytest
+
+ - repo: https://github.com/codespell-project/codespell
+ rev: "v2.2.6"
+ hooks:
+ - id: codespell
+ args: ["--write-changes", "--ignore-words", ".codespell-whitelist"]
+
+ - repo: https://github.com/kynan/nbstripout
+ rev: 0.7.1
+ hooks:
+ - id: nbstripout
+ args: [--extra-keys=metadata.kernelspec metadata.language_info.version]
+
+ - repo: local
+ hooks:
+ - id: disallow-caps
+ name: Disallow improper capitalization
+ language: pygrep
+ entry: PyBind|Numpy|Cmake|CCache|Github|PyTest
+ exclude: .pre-commit-config.yaml
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..06eee31
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,53 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "vectordb2"
+dynamic = ["version"]
+description = "A lightweight Python package for storing and retrieving text using chunking, embedding, and vector search"
+readme = "README.md"
+license = ""
+authors = [
+ { name = "Vladimir Prelovac", email = "vlad@kagi.com" },
+]
+keywords = [
+ "chunking",
+ "embedding",
+ "search",
+ "text",
+ "vector",
+]
+classifiers = [
+ "Development Status :: 3 - Alpha",
+ "Intended Audience :: Developers",
+ "License :: OSI Approved :: MIT License",
+ "Programming Language :: Python :: 3",
+ "Programming Language :: Python :: 3.6",
+ "Programming Language :: Python :: 3.7",
+ "Programming Language :: Python :: 3.8",
+ "Programming Language :: Python :: 3.9",
+ "Programming Language :: Python :: 3.10",
+ "Programming Language :: Python :: 3.11",
+]
+dependencies = [
+ "faiss-cpu",
+ "numpy>=1.21.0",
+ "scikit-learn>=0.24.0",
+ "scipy>=1.7.0",
+ "sentence_transformers",
+ "tensorflow_text",
+ "torch>=1.9.0",
+ "transformers>=4.10.0",
+]
+
+[project.urls]
+Homepage = "https://github.com/kagisearch/vectordb"
+
+[tool.hatch.version]
+path = "vectordb/__init__.py"
+
+[tool.hatch.build.targets.sdist]
+include = [
+ "/vectordb",
+]
From 455e04448f8112a47ade1ed7ebdfc3f0deb60dac Mon Sep 17 00:00:00 2001
From: Cordero Core <127983572+uwcdc@users.noreply.github.com>
Date: Thu, 28 Mar 2024 03:10:34 +0000
Subject: [PATCH 02/39] refactor: Update with pre-commit
---
.github/dependabot.yml | 2 +-
.github/matchers/pylint.json | 62 +++++++--------
.github/workflows/pylint.yml | 24 +++---
.gitignore | 2 +-
.pre-commit-config.yaml | 2 +-
README.md | 147 ++++++++++++++++++++---------------
images/.init | 1 -
setup.py | 4 +-
vectordb/__init__.py | 1 -
vectordb/memory.py | 19 ++++-
vectordb/vector_search.py | 4 +-
11 files changed, 151 insertions(+), 117 deletions(-)
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index 3459d67..500dfaa 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -7,4 +7,4 @@ updates:
interval: "weekly"
# Raise pull requests for version updates
# to pip against the `main` branch
- target-branch: "main"
\ No newline at end of file
+ target-branch: "main"
diff --git a/.github/matchers/pylint.json b/.github/matchers/pylint.json
index ee5a60b..e3a6bd1 100644
--- a/.github/matchers/pylint.json
+++ b/.github/matchers/pylint.json
@@ -1,32 +1,32 @@
{
- "problemMatcher": [
- {
- "severity": "warning",
- "pattern": [
- {
- "regexp": "^([^:]+):(\\d+):(\\d+): ([A-DF-Z]\\d+): \\033\\[[\\d;]+m([^\\033]+).*$",
- "file": 1,
- "line": 2,
- "column": 3,
- "code": 4,
- "message": 5
- }
- ],
- "owner": "pylint-warning"
- },
- {
- "severity": "error",
- "pattern": [
- {
- "regexp": "^([^:]+):(\\d+):(\\d+): (E\\d+): \\033\\[[\\d;]+m([^\\033]+).*$",
- "file": 1,
- "line": 2,
- "column": 3,
- "code": 4,
- "message": 5
- }
- ],
- "owner": "pylint-error"
- }
- ]
- }
\ No newline at end of file
+ "problemMatcher": [
+ {
+ "severity": "warning",
+ "pattern": [
+ {
+ "regexp": "^([^:]+):(\\d+):(\\d+): ([A-DF-Z]\\d+): \\033\\[[\\d;]+m([^\\033]+).*$",
+ "file": 1,
+ "line": 2,
+ "column": 3,
+ "code": 4,
+ "message": 5
+ }
+ ],
+ "owner": "pylint-warning"
+ },
+ {
+ "severity": "error",
+ "pattern": [
+ {
+ "regexp": "^([^:]+):(\\d+):(\\d+): (E\\d+): \\033\\[[\\d;]+m([^\\033]+).*$",
+ "file": 1,
+ "line": 2,
+ "column": 3,
+ "code": 4,
+ "message": 5
+ }
+ ],
+ "owner": "pylint-error"
+ }
+ ]
+}
diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml
index 383e65c..8713965 100644
--- a/.github/workflows/pylint.yml
+++ b/.github/workflows/pylint.yml
@@ -9,15 +9,15 @@ jobs:
matrix:
python-version: ["3.8", "3.9", "3.10"]
steps:
- - uses: actions/checkout@v3
- - name: Set up Python ${{ matrix.python-version }}
- uses: actions/setup-python@v3
- with:
- python-version: ${{ matrix.python-version }}
- - name: Install dependencies
- run: |
- python -m pip install --upgrade pip
- pip install pylint
- - name: Analysing the code with pylint
- run: |
- pylint $(git ls-files '*.py')
+ - uses: actions/checkout@v3
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v3
+ with:
+ python-version: ${{ matrix.python-version }}
+ - name: Install dependencies
+ run: |
+ python -m pip install --upgrade pip
+ pip install pylint
+ - name: Analysing the code with pylint
+ run: |
+ pylint $(git ls-files '*.py')
diff --git a/.gitignore b/.gitignore
index 64fa3de..9bdda1c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -160,4 +160,4 @@ cython_debug/
#.idea/
-*~
\ No newline at end of file
+*~
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index c0dc4cf..9217977 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -82,4 +82,4 @@ repos:
name: Disallow improper capitalization
language: pygrep
entry: PyBind|Numpy|Cmake|CCache|Github|PyTest
- exclude: .pre-commit-config.yaml
\ No newline at end of file
+ exclude: .pre-commit-config.yaml
diff --git a/README.md b/README.md
index 7df1263..16ad2f8 100644
--- a/README.md
+++ b/README.md
@@ -1,13 +1,20 @@
# VectorDB
-[](https://discord.gg/aDNg6E9szy) [](https://twitter.com/KagiHQ) [](https://opensource.org/license/mit/)
+[](https://discord.gg/aDNg6E9szy)
+[](https://twitter.com/KagiHQ)
+[](https://opensource.org/license/mit/)
-VectorDB is a simple, lightweight, fully local, end-to-end solution for using embeddings-based text retrieval.
+VectorDB is a simple, lightweight, fully local, end-to-end solution for using
+embeddings-based text retrieval.
-Thanks to its low latency and small memory footprint, VectorDB is used to power AI features inside [Kagi Search](https://kagi.com).
-
-Check an [example Colab notebook](https://colab.research.google.com/drive/1pecKGCCru_Jvx7v0WRNrW441EBlcS5qS#scrollTo=Eh6o8m7d8eOk) where this is used to filter the content of [Kagi Small Web](https://kagi.com/smallweb) RSS feed based on stated user interests.
+Thanks to its low latency and small memory footprint, VectorDB is used to power
+AI features inside [Kagi Search](https://kagi.com).
+Check an
+[example Colab notebook](https://colab.research.google.com/drive/1pecKGCCru_Jvx7v0WRNrW441EBlcS5qS#scrollTo=Eh6o8m7d8eOk)
+where this is used to filter the content of
+[Kagi Small Web](https://kagi.com/smallweb) RSS feed based on stated user
+interests.
## Installation
@@ -19,7 +26,9 @@ pip install vectordb2
## Usage
-Quick example that loads data into memory, and runs retrieval. All data will be handled locally, including embeddings and vector search, completely trasparent for the user with maximum possible performance.
+Quick example that loads data into memory, and runs retrieval. All data will be
+handled locally, including embeddings and vector search, completely trasparent
+for the user with maximum possible performance.
```python
from vectordb import Memory
@@ -28,24 +37,31 @@ from vectordb import Memory
memory = Memory()
memory.save(
- ["apples are green", "oranges are orange"], # save your text content. for long text we will automatically chunk it
- [{"url": "https://apples.com"}, {"url": "https://oranges.com"}], # associate any kind of metadata with it (optional)
+ [
+ "apples are green",
+ "oranges are orange",
+ ], # save your text content. for long text we will automatically chunk it
+ [
+ {"url": "https://apples.com"},
+ {"url": "https://oranges.com"},
+ ], # associate any kind of metadata with it (optional)
)
# Search for top n relevant results, automatically using embeddings
query = "green"
-results = memory.search(query, top_n = 1)
+results = memory.search(query, top_n=1)
print(results)
```
-This returns the chunks with the added metadata and the vector distance (where 0 is the exact match and higher means further apart)
+This returns the chunks with the added metadata and the vector distance (where 0
+is the exact match and higher means further apart)
```json
[
{
"chunk": "apples are green",
- "metadata": {"url": "https://apples.com"},
+ "metadata": { "url": "https://apples.com" },
"distance": 0.87
}
]
@@ -53,62 +69,72 @@ This returns the chunks with the added metadata and the vector distance (where 0
## Options
-
**Memory(memory_file=None, chunking_strategy={"mode":"sliding_window"},
embeddings="normal")**
+- `memory_file`: _Optional._ Path to the memory file. If provided, memory will
+ persist to disk and loaded/saved to this file.
+- `chunking_strategy`: _Optional._ Dictionary containing the chunking mode.
-- `memory_file`: *Optional.* Path to the memory file. If provided, memory will persist to disk and loaded/saved to this file.
-- `chunking_strategy`: *Optional.* Dictionary containing the chunking mode.
-
- Options:\
- `{'mode':'sliding_window', 'window_size': 240, 'overlap': 8}` (default)\
+ Options:\
+ `{'mode':'sliding_window', 'window_size': 240, 'overlap': 8}` (default)\
`{'mode':'paragraph'}`
-- `embeddings`: *Optional.*
-
- Options:\
+
+- `embeddings`: _Optional._
+
+ Options:\
`fast` - Uses Universal Sentence Encoder 4\
`normal` - Uses "BAAI/bge-small-en-v1.5" (default)\
`best` - Uses "BAAI/bge-base-en-v1.5"\
`multilingual` - Uses Universal Sentence Encoder Multilingual Large 3
-
- You can also specify a custom HuggingFace model by name eg. `TaylorAI/bge-micro-v2`. See also [Pretrained models](https://www.sbert.net/docs/pretrained_models.html) and [MTEB](https://huggingface.co/spaces/mteb/leaderboard).
+ You can also specify a custom HuggingFace model by name eg.
+ `TaylorAI/bge-micro-v2`. See also
+ [Pretrained models](https://www.sbert.net/docs/pretrained_models.html) and
+ [MTEB](https://huggingface.co/spaces/mteb/leaderboard).
**Memory.save(texts, metadata, memory_file=None)**
-Save content to memory. Metadata will be automatically optimized to use less resources.
+Save content to memory. Metadata will be automatically optimized to use less
+resources.
-- `texts`: *Required.* Text or list of texts to be saved.
-- `metdata`: *Optional.* Metadata or list of metadata associated with the texts.
-- `memory_file`: *Optional.* Path to persist the memory file. By default
+- `texts`: _Required._ Text or list of texts to be saved.
+- `metdata`: _Optional._ Metadata or list of metadata associated with the texts.
+- `memory_file`: _Optional._ Path to persist the memory file. By default
**Memory.search(query, top_n=5, unique=False, batch_results="flatten")**
Search inside memory.
-- `query`: *Required.* Query text or list of queries (see `batch_results` option below for handling results for a list).
-- `top_n`: *Optional.* Number of most similar chunks to return (default: 5).
-- `unique`: *Optional.* Return only items chunks from unique original texts (additional chunks coming from the same text will be ignored). Note this may return less chhunks than requested (default: False).
-- `batch_results`: *Optional.* When input is a list of queries, output algorithm can be "flatten" or "diverse". Flatten returns true nearest neighbours across all input queries, meaning all results could come from just one query. "diverse" attempts to spread out the results, so that each query's nearest neighbours are equally added (neareast first across all queries, than 2nd nearest and so on). (default: "flatten")
+- `query`: _Required._ Query text or list of queries (see `batch_results` option
+ below for handling results for a list).
+- `top_n`: _Optional._ Number of most similar chunks to return (default: 5).
+- `unique`: _Optional._ Return only items chunks from unique original texts
+ (additional chunks coming from the same text will be ignored). Note this may
+ return less chhunks than requested (default: False).
+- `batch_results`: _Optional._ When input is a list of queries, output algorithm
+ can be "flatten" or "diverse". Flatten returns true nearest neighbours across
+ all input queries, meaning all results could come from just one query.
+ "diverse" attempts to spread out the results, so that each query's nearest
+ neighbours are equally added (neareast first across all queries, than 2nd
+ nearest and so on). (default: "flatten")
**Memory.clear()**
Clears the memory.
-
**Memory.dump()**
Prints the contents of the memory.
-
## Example
```python
from vectordb import Memory
memory = Memory(
- chunking_strategy={"mode": "sliding_window", "window_size": 128, "overlap": 16}, embeddings='TaylorAI/bge-micro-v2'
+ chunking_strategy={"mode": "sliding_window", "window_size": 128, "overlap": 16},
+ embeddings="TaylorAI/bge-micro-v2",
)
texts = [
@@ -184,6 +210,7 @@ print(results)
```
Output:
+
```json
[
{
@@ -203,48 +230,46 @@ Output:
"distance": 0.83
}
]
-
```
## Embeddings performance analysis
-
-We constantly evaluate embedding models using standardized benchmarks (higher is better). Average latency is measured locally on CPU (lower is better). Benchmark data pulled from [MTEB](https://huggingface.co/spaces/mteb/leaderboard).
-
-
-
-| Model | Latency | Benchmark 1 | Benchmark 2 | Benchmark 3 | Benchmark 4 |
-|-----------------------------------------------|----------|-------------|-------------|-------------|-------------|
-| all-mpnet-base-v2 | 6.12 s | 80.28 | 65.07 | 43.69 | 83.04 |
-| all-MiniLM-L6-v2 | 1.14 s | 78.9 | 63.05 | 42.35 | 82.37 |
-| BAAI/bge-large-en-v1.5 | 20.8 s | 83.11 | 75.97 | 46.08 | 87.12 |
-| BAAI/bge-base-en-v1.5 | 6.48 s | 82.4 | 75.53 | 45.77 | 86.55 |
-| BAAI/bge-small-en-v1.5 | 1.85 s | 81.59 | 74.14 | 43.82 | 84.92 |
-| TaylorAI/bge-micro-v2 | 0.671 s | 78.65 | 68.04 | 39.18 | 82.81 |
-| TaylorAI/gte-tiny | 1.25 s | 80.46 | 70.35 | 42.09 | 82.83 |
-| thenlper/gte-base | 6.28 s | 82.3 | 73.01 | 46.2 | 84.57 |
-| thenlper/gte-small | 2.14 s | 82.07 | 72.31 | 44.89 | 83.54 |
-| universal-sentence-encoder-large/5 | 0.769 s | 74.05 | 67.9 | 37.82 | 79.53 |
-| universal-sentence-encoder-multilingual-large/3| 1.02 s | 75.35 | 65.78 | 35.06 | 79.62 |
-| universal-sentence-encoder-multilingual/3 | 0.162 s | 75.39 | 63.42 | 34.82 | 75.43 |
-| universal-sentence-encoder/4 | 0.019 s | 72.04 | 64.45 | 35.71 | 76.23 |
-
-*Relative embeddings latency on CPU*
+We constantly evaluate embedding models using standardized benchmarks (higher is
+better). Average latency is measured locally on CPU (lower is better). Benchmark
+data pulled from [MTEB](https://huggingface.co/spaces/mteb/leaderboard).
+
+| Model | Latency | Benchmark 1 | Benchmark 2 | Benchmark 3 | Benchmark 4 |
+| ----------------------------------------------- | ------- | ----------- | ----------- | ----------- | ----------- |
+| all-mpnet-base-v2 | 6.12 s | 80.28 | 65.07 | 43.69 | 83.04 |
+| all-MiniLM-L6-v2 | 1.14 s | 78.9 | 63.05 | 42.35 | 82.37 |
+| BAAI/bge-large-en-v1.5 | 20.8 s | 83.11 | 75.97 | 46.08 | 87.12 |
+| BAAI/bge-base-en-v1.5 | 6.48 s | 82.4 | 75.53 | 45.77 | 86.55 |
+| BAAI/bge-small-en-v1.5 | 1.85 s | 81.59 | 74.14 | 43.82 | 84.92 |
+| TaylorAI/bge-micro-v2 | 0.671 s | 78.65 | 68.04 | 39.18 | 82.81 |
+| TaylorAI/gte-tiny | 1.25 s | 80.46 | 70.35 | 42.09 | 82.83 |
+| thenlper/gte-base | 6.28 s | 82.3 | 73.01 | 46.2 | 84.57 |
+| thenlper/gte-small | 2.14 s | 82.07 | 72.31 | 44.89 | 83.54 |
+| universal-sentence-encoder-large/5 | 0.769 s | 74.05 | 67.9 | 37.82 | 79.53 |
+| universal-sentence-encoder-multilingual-large/3 | 1.02 s | 75.35 | 65.78 | 35.06 | 79.62 |
+| universal-sentence-encoder-multilingual/3 | 0.162 s | 75.39 | 63.42 | 34.82 | 75.43 |
+| universal-sentence-encoder/4 | 0.019 s | 72.04 | 64.45 | 35.71 | 76.23 |
+
+_Relative embeddings latency on CPU_

-*Relative embeddings latency on GPU*
+_Relative embeddings latency on GPU_

-


-
-
## Vector search performance analysis
-VectorDB is also optimized for speed of retrieval. We automatically uses [Faiss](https://github.com/facebookresearch/faiss) for low number of chunks (<4000) and [mrpt](https://github.com/vioshyvo/mrpt) for high number of chunks to ensure maximum performance across the spectrum of use cases.
+VectorDB is also optimized for speed of retrieval. We automatically uses
+[Faiss](https://github.com/facebookresearch/faiss) for low number of chunks
+(<4000) and [mrpt](https://github.com/vioshyvo/mrpt) for high number of chunks
+to ensure maximum performance across the spectrum of use cases.

diff --git a/images/.init b/images/.init
index 8b13789..e69de29 100644
--- a/images/.init
+++ b/images/.init
@@ -1 +0,0 @@
-
diff --git a/setup.py b/setup.py
index c09b245..abaa9eb 100644
--- a/setup.py
+++ b/setup.py
@@ -1,4 +1,4 @@
-#pylint: disable = line-too-long, trailing-whitespace, trailing-newlines, line-too-long, missing-module-docstring, import-error, too-few-public-methods, too-many-instance-attributes, too-many-locals
+# pylint: disable = line-too-long, trailing-whitespace, trailing-newlines, line-too-long, missing-module-docstring, import-error, too-few-public-methods, too-many-instance-attributes, too-many-locals
from setuptools import setup, find_packages
@@ -14,7 +14,7 @@
"scipy>=1.7.0",
"sentence_transformers",
"faiss-cpu",
- "tensorflow_text"
+ "tensorflow_text",
],
author="Vladimir Prelovac",
author_email="vlad@kagi.com",
diff --git a/vectordb/__init__.py b/vectordb/__init__.py
index f1db8dc..6a368e6 100644
--- a/vectordb/__init__.py
+++ b/vectordb/__init__.py
@@ -1,3 +1,2 @@
# pylint: disable = line-too-long, trailing-whitespace, trailing-newlines, line-too-long, missing-module-docstring, import-error, too-few-public-methods, too-many-instance-attributes, too-many-locals
-from .memory import Memory
diff --git a/vectordb/memory.py b/vectordb/memory.py
index a5a8d47..345988a 100644
--- a/vectordb/memory.py
+++ b/vectordb/memory.py
@@ -3,6 +3,7 @@
for text and associated metadata, with functionality for saving, searching, and
managing memory entries.
"""
+
# pylint: disable = line-too-long, trailing-whitespace, trailing-newlines, line-too-long, missing-module-docstring, import-error, too-few-public-methods, too-many-instance-attributes, too-many-locals
from typing import List, Dict, Any, Union
@@ -130,10 +131,16 @@ def save(
self.memory.append(entry)
if memory_file is not None:
- Storage(self.memory_file).save_to_disk([{"memory": self.memory, "metadata" :self.metadata_memory}])
+ Storage(self.memory_file).save_to_disk(
+ [{"memory": self.memory, "metadata": self.metadata_memory}]
+ )
def search(
- self, query: str, top_n: int = 5, unique: bool = False, batch_results: str = "flatten"
+ self,
+ query: str,
+ top_n: int = 5,
+ unique: bool = False,
+ batch_results: str = "flatten",
) -> List[Dict[str, Any]]:
"""
Searches for the most similar chunks to the given query in memory.
@@ -154,7 +161,9 @@ def search(
if len(embeddings) == 0:
return []
- indices = self.vector_search.search_vectors(query_embedding, embeddings, top_n, batch_results)
+ indices = self.vector_search.search_vectors(
+ query_embedding, embeddings, top_n, batch_results
+ )
if unique:
unique_indices = []
seen_text_indices = set() # Change the variable name
@@ -192,7 +201,9 @@ def clear(self):
self.text_index_counter = 0
if self.memory_file is not None:
- Storage(self.memory_file).save_to_disk([{"memory": self.memory, "metadata" :self.metadata_memory}])
+ Storage(self.memory_file).save_to_disk(
+ [{"memory": self.memory, "metadata": self.metadata_memory}]
+ )
def dump(self):
"""
diff --git a/vectordb/vector_search.py b/vectordb/vector_search.py
index 04075eb..7ac3763 100644
--- a/vectordb/vector_search.py
+++ b/vectordb/vector_search.py
@@ -51,7 +51,7 @@ def get_unique_k_elements(i, d, k=15, diverse=False):
dd.append(dist)
if len(ii) >= k:
break
-
+
return np.array(ii), np.array(dd)
@staticmethod
@@ -114,7 +114,7 @@ def search_vectors(
:param top_n: the number of most similar vectors to return.
:param batch_results: when input is a list of vectors, output algo can be "flatten" or "diverse"
:return: a list of indices of the top_n most similar vectors in the embeddings.
-
+
"""
if isinstance(query_embedding, list):
query_embedding = np.array(query_embedding).astype(np.float32)
From ddfe3331356cf8595f9e682b0632eace0a06e746 Mon Sep 17 00:00:00 2001
From: Cordero Core <127983572+uwcdc@users.noreply.github.com>
Date: Thu, 28 Mar 2024 03:11:26 +0000
Subject: [PATCH 03/39] refactor: Remove extra whitespace
---
vectordb/__init__.py | 1 -
1 file changed, 1 deletion(-)
diff --git a/vectordb/__init__.py b/vectordb/__init__.py
index 6a368e6..c7b3add 100644
--- a/vectordb/__init__.py
+++ b/vectordb/__init__.py
@@ -1,2 +1 @@
# pylint: disable = line-too-long, trailing-whitespace, trailing-newlines, line-too-long, missing-module-docstring, import-error, too-few-public-methods, too-many-instance-attributes, too-many-locals
-
From 1aa706fccab420e7ef08674428f307daed82eae7 Mon Sep 17 00:00:00 2001
From: Cordero Core <127983572+uwcdc@users.noreply.github.com>
Date: Thu, 28 Mar 2024 03:28:31 +0000
Subject: [PATCH 04/39] feat: Add codespell-whitelist for pre-commit and fix
spellings in README and embedding
---
.codespell-whitelist | 0
.github/workflows/ci.yml | 74 ++++++++++++++++++++++++++++++++++++
.github/workflows/pylint.yml | 23 -----------
README.md | 6 +--
vectordb/embedding.py | 2 +-
5 files changed, 78 insertions(+), 27 deletions(-)
create mode 100644 .codespell-whitelist
create mode 100644 .github/workflows/ci.yml
delete mode 100644 .github/workflows/pylint.yml
diff --git a/.codespell-whitelist b/.codespell-whitelist
new file mode 100644
index 0000000..e69de29
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..c2799e2
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,74 @@
+# This workflow will install Python dependencies, run tests and lint with a single version of Python
+# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
+
+name: CI
+
+on:
+ workflow_dispatch:
+ pull_request:
+ push:
+ branches:
+ - main
+ - dev
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: true
+
+env:
+ FORCE_COLOR: 3
+ PROJECT_NAME: "caustics"
+
+jobs:
+ build:
+ runs-on: ${{matrix.os}}
+ strategy:
+ fail-fast: false
+ matrix:
+ python-version: ["3.9", "3.10", "3.11"]
+ os: [ubuntu-latest, windows-latest, macOS-latest]
+
+ steps:
+ - name: Checkout caustics
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v5
+ with:
+ python-version: ${{ matrix.python-version }}
+ allow-prereleases: true
+
+ - name: Record State
+ run: |
+ pwd
+ echo github.ref is: ${{ github.ref }}
+ echo GITHUB_SHA is: $GITHUB_SHA
+ echo github.event_name is: ${{ github.event_name }}
+ echo github workspace: ${{ github.workspace }}
+ pip --version
+
+ - name: Install dependencies
+ run: |
+ python -m pip install --upgrade pip
+ pip install pytest pytest-cov torch wheel
+
+ # We only want to install this on one run, because otherwise we'll have
+ # duplicate annotations.
+ - name: Install error reporter
+ if: ${{ matrix.python-version == '3.10' }}
+ run: |
+ python -m pip install pytest-github-actions-annotate-failures
+
+ - name: Install Caustics
+ run: |
+ pip install -e ".[dev]"
+ pip show ${{ env.PROJECT_NAME }}
+
+ - name: Test with pytest
+ run: |
+ pytest -vvv --cov=${{ env.PROJECT_NAME }} --cov-report=xml --cov-report=term tests/
+
+ - name: Upload coverage reports to Codecov with GitHub Action
+ uses: codecov/codecov-action@v4
\ No newline at end of file
diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml
deleted file mode 100644
index 8713965..0000000
--- a/.github/workflows/pylint.yml
+++ /dev/null
@@ -1,23 +0,0 @@
-name: Pylint
-
-on: [push]
-
-jobs:
- build:
- runs-on: ubuntu-latest
- strategy:
- matrix:
- python-version: ["3.8", "3.9", "3.10"]
- steps:
- - uses: actions/checkout@v3
- - name: Set up Python ${{ matrix.python-version }}
- uses: actions/setup-python@v3
- with:
- python-version: ${{ matrix.python-version }}
- - name: Install dependencies
- run: |
- python -m pip install --upgrade pip
- pip install pylint
- - name: Analysing the code with pylint
- run: |
- pylint $(git ls-files '*.py')
diff --git a/README.md b/README.md
index 16ad2f8..201604a 100644
--- a/README.md
+++ b/README.md
@@ -20,14 +20,14 @@ interests.
To install VectorDB, use pip:
-```
+```shell
pip install vectordb2
```
## Usage
Quick example that loads data into memory, and runs retrieval. All data will be
-handled locally, including embeddings and vector search, completely trasparent
+handled locally, including embeddings and vector search, completely transparent
for the user with maximum possible performance.
```python
@@ -99,7 +99,7 @@ Save content to memory. Metadata will be automatically optimized to use less
resources.
- `texts`: _Required._ Text or list of texts to be saved.
-- `metdata`: _Optional._ Metadata or list of metadata associated with the texts.
+- `metadata`: _Optional._ Metadata or list of metadata associated with the texts.
- `memory_file`: _Optional._ Path to persist the memory file. By default
**Memory.search(query, top_n=5, unique=False, batch_results="flatten")**
diff --git a/vectordb/embedding.py b/vectordb/embedding.py
index c3809ef..80b150e 100644
--- a/vectordb/embedding.py
+++ b/vectordb/embedding.py
@@ -33,7 +33,7 @@ def __init__(self, model_name: str = "normal"):
for embeddings.
"""
self.sbert = True
- print("Initiliazing embeddings: ", model_name)
+ print("Initializing embeddings: ", model_name)
if model_name == "fast":
self.model = hub.load(
"https://tfhub.dev/google/universal-sentence-encoder/4"
From 2f6fd08c258db721452ab180741d93171e27475f Mon Sep 17 00:00:00 2001
From: Cordero Core <127983572+uwcdc@users.noreply.github.com>
Date: Thu, 28 Mar 2024 03:31:45 +0000
Subject: [PATCH 05/39] feat: Add requirements file
---
requirements.txt | 8 ++++++++
1 file changed, 8 insertions(+)
create mode 100644 requirements.txt
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..d5aeebf
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,8 @@
+torch>=1.9.0,
+transformers>=4.10.0,
+numpy>=1.21.0,
+scikit-learn>=0.24.0,
+scipy>=1.7.0,
+sentence_transformers,
+faiss-cpu,
+tensorflow_text
\ No newline at end of file
From 6968b2d6ef69ff8a63652b105a594131b1002f2f Mon Sep 17 00:00:00 2001
From: Cordero Core <127983572+uwcdc@users.noreply.github.com>
Date: Thu, 28 Mar 2024 03:34:30 +0000
Subject: [PATCH 06/39] feat: Add CI for main and dev branches
---
.github/workflows/ci.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index c2799e2..87d1654 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -17,7 +17,7 @@ concurrency:
env:
FORCE_COLOR: 3
- PROJECT_NAME: "caustics"
+ PROJECT_NAME: "vectordb2"
jobs:
build:
From 6584cbca28966c994ff0e26cdf8f17113217bb70 Mon Sep 17 00:00:00 2001
From: Cordero Core <127983572+uwcdc@users.noreply.github.com>
Date: Thu, 28 Mar 2024 03:36:01 +0000
Subject: [PATCH 07/39] refactor: Fix PEP8 issues with README, requirements,
and ci files
---
.github/workflows/ci.yml | 2 +-
README.md | 3 ++-
requirements.txt | 8 ++++----
3 files changed, 7 insertions(+), 6 deletions(-)
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 87d1654..6c417cb 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -71,4 +71,4 @@ jobs:
pytest -vvv --cov=${{ env.PROJECT_NAME }} --cov-report=xml --cov-report=term tests/
- name: Upload coverage reports to Codecov with GitHub Action
- uses: codecov/codecov-action@v4
\ No newline at end of file
+ uses: codecov/codecov-action@v4
diff --git a/README.md b/README.md
index 201604a..a9e899d 100644
--- a/README.md
+++ b/README.md
@@ -99,7 +99,8 @@ Save content to memory. Metadata will be automatically optimized to use less
resources.
- `texts`: _Required._ Text or list of texts to be saved.
-- `metadata`: _Optional._ Metadata or list of metadata associated with the texts.
+- `metadata`: _Optional._ Metadata or list of metadata associated with the
+ texts.
- `memory_file`: _Optional._ Path to persist the memory file. By default
**Memory.search(query, top_n=5, unique=False, batch_results="flatten")**
diff --git a/requirements.txt b/requirements.txt
index d5aeebf..6ff77e6 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,8 +1,8 @@
-torch>=1.9.0,
-transformers>=4.10.0,
+faiss-cpu,
numpy>=1.21.0,
scikit-learn>=0.24.0,
scipy>=1.7.0,
sentence_transformers,
-faiss-cpu,
-tensorflow_text
\ No newline at end of file
+tensorflow_text
+torch>=1.9.0,
+transformers>=4.10.0,
From 4e22f4a3eae3e5687db54dfd4c65cfbeb9a15914 Mon Sep 17 00:00:00 2001
From: Cordero Core <127983572+uwcdc@users.noreply.github.com>
Date: Thu, 28 Mar 2024 04:07:12 +0000
Subject: [PATCH 08/39] refactor: Move vectordb to src and update __init__
---
.codespell-whitelist | 1 +
pyproject.toml | 6 +++---
{vectordb => src/vectordb}/__init__.py | 4 ++++
{vectordb => src/vectordb}/chunking.py | 0
{vectordb => src/vectordb}/embedding.py | 0
{vectordb => src/vectordb}/memory.py | 0
{vectordb => src/vectordb}/storage.py | 0
{vectordb => src/vectordb}/vector_search.py | 0
8 files changed, 8 insertions(+), 3 deletions(-)
rename {vectordb => src/vectordb}/__init__.py (67%)
rename {vectordb => src/vectordb}/chunking.py (100%)
rename {vectordb => src/vectordb}/embedding.py (100%)
rename {vectordb => src/vectordb}/memory.py (100%)
rename {vectordb => src/vectordb}/storage.py (100%)
rename {vectordb => src/vectordb}/vector_search.py (100%)
diff --git a/.codespell-whitelist b/.codespell-whitelist
index e69de29..db71550 100644
--- a/.codespell-whitelist
+++ b/.codespell-whitelist
@@ -0,0 +1 @@
+kagisearch
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index 06eee31..67d12ec 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,7 +7,7 @@ name = "vectordb2"
dynamic = ["version"]
description = "A lightweight Python package for storing and retrieving text using chunking, embedding, and vector search"
readme = "README.md"
-license = ""
+license = "LICENSE"
authors = [
{ name = "Vladimir Prelovac", email = "vlad@kagi.com" },
]
@@ -45,9 +45,9 @@ dependencies = [
Homepage = "https://github.com/kagisearch/vectordb"
[tool.hatch.version]
-path = "vectordb/__init__.py"
+path = "/src/vectordb/__init__.py"
[tool.hatch.build.targets.sdist]
include = [
- "/vectordb",
+ "/src/vectordb",
]
diff --git a/vectordb/__init__.py b/src/vectordb/__init__.py
similarity index 67%
rename from vectordb/__init__.py
rename to src/vectordb/__init__.py
index c7b3add..6b2dc16 100644
--- a/vectordb/__init__.py
+++ b/src/vectordb/__init__.py
@@ -1 +1,5 @@
# pylint: disable = line-too-long, trailing-whitespace, trailing-newlines, line-too-long, missing-module-docstring, import-error, too-few-public-methods, too-many-instance-attributes, too-many-locals
+from ._version import version as VERSION # noqa
+
+__version__ = VERSION
+__author__ = "kagisearch"
\ No newline at end of file
diff --git a/vectordb/chunking.py b/src/vectordb/chunking.py
similarity index 100%
rename from vectordb/chunking.py
rename to src/vectordb/chunking.py
diff --git a/vectordb/embedding.py b/src/vectordb/embedding.py
similarity index 100%
rename from vectordb/embedding.py
rename to src/vectordb/embedding.py
diff --git a/vectordb/memory.py b/src/vectordb/memory.py
similarity index 100%
rename from vectordb/memory.py
rename to src/vectordb/memory.py
diff --git a/vectordb/storage.py b/src/vectordb/storage.py
similarity index 100%
rename from vectordb/storage.py
rename to src/vectordb/storage.py
diff --git a/vectordb/vector_search.py b/src/vectordb/vector_search.py
similarity index 100%
rename from vectordb/vector_search.py
rename to src/vectordb/vector_search.py
From 70b053a1fafd133cb15037dc898f433bfd7d4294 Mon Sep 17 00:00:00 2001
From: Cordero Core <127983572+uwcdc@users.noreply.github.com>
Date: Thu, 28 Mar 2024 06:09:46 +0000
Subject: [PATCH 09/39] refactor: Update the pyproject.toml to dynamically
handle versions
---
pyproject.toml | 50 +++++++++++++++++++++++++++-------------
requirements.txt | 14 +++++------
src/vectordb/_version.py | 16 +++++++++++++
tests/__init__.py | 0
4 files changed, 57 insertions(+), 23 deletions(-)
create mode 100644 src/vectordb/_version.py
create mode 100644 tests/__init__.py
diff --git a/pyproject.toml b/pyproject.toml
index 67d12ec..76dca15 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,13 +1,17 @@
[build-system]
-requires = ["hatchling"]
+requires = ["hatchling", "hatch-requirements-txt", "hatch-vcs"]
build-backend = "hatchling.build"
[project]
name = "vectordb2"
-dynamic = ["version"]
+dynamic = [
+ "dependencies",
+ "version"
+]
description = "A lightweight Python package for storing and retrieving text using chunking, embedding, and vector search"
readme = "README.md"
-license = "LICENSE"
+requires-python = ">=3.8"
+license = {file = "LICENSE"}
authors = [
{ name = "Vladimir Prelovac", email = "vlad@kagi.com" },
]
@@ -30,24 +34,38 @@ classifiers = [
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
]
-dependencies = [
- "faiss-cpu",
- "numpy>=1.21.0",
- "scikit-learn>=0.24.0",
- "scipy>=1.7.0",
- "sentence_transformers",
- "tensorflow_text",
- "torch>=1.9.0",
- "transformers>=4.10.0",
-]
[project.urls]
Homepage = "https://github.com/kagisearch/vectordb"
-[tool.hatch.version]
-path = "/src/vectordb/__init__.py"
+[project.optional-dependencies]
+dev = [
+ "pytest>=8.0,<9",
+ "pytest-cov>=4.1,<5",
+ "pytest-mock>=3.12,<4",
+ "pre-commit>=3.6,<4"
+]
+
+[tool.hatch.metadata]
+allow-direct-references = true
[tool.hatch.build.targets.sdist]
include = [
- "/src/vectordb",
+ "src/vectordb",
]
+
+[tool.hatch.metadata.hooks.requirements_txt]
+files = ["requirements.txt"]
+
+[tool.hatch.version]
+source = "vcs"
+
+[tool.hatch.build.hooks.vcs]
+version-file = "src/vectordb/_version.py"
+
+[tool.hatch.version.raw-options]
+local_scheme = "no-local-version"
+
+[tool.ruff]
+# Same as Black.
+line-length = 100
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 6ff77e6..1417962 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,8 +1,8 @@
-faiss-cpu,
-numpy>=1.21.0,
-scikit-learn>=0.24.0,
-scipy>=1.7.0,
-sentence_transformers,
+faiss-cpu
+numpy>=1.21.0
+scikit-learn>=0.24.0
+scipy>=1.7.0
+sentence_transformers
tensorflow_text
-torch>=1.9.0,
-transformers>=4.10.0,
+torch>=1.9.0
+transformers>=4.10.0
diff --git a/src/vectordb/_version.py b/src/vectordb/_version.py
new file mode 100644
index 0000000..656908c
--- /dev/null
+++ b/src/vectordb/_version.py
@@ -0,0 +1,16 @@
+# file generated by setuptools_scm
+# don't change, don't track in version control
+TYPE_CHECKING = False
+if TYPE_CHECKING:
+ from typing import Tuple, Union
+ VERSION_TUPLE = Tuple[Union[int, str], ...]
+else:
+ VERSION_TUPLE = object
+
+version: str
+__version__: str
+__version_tuple__: VERSION_TUPLE
+version_tuple: VERSION_TUPLE
+
+__version__ = version = '0.1.dev104'
+__version_tuple__ = version_tuple = (0, 1, 'dev104')
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
From eaefab85d0c8ab0e518d28e1d2725884533aebaa Mon Sep 17 00:00:00 2001
From: Cordero Core <127983572+uwcdc@users.noreply.github.com>
Date: Thu, 28 Mar 2024 06:18:06 +0000
Subject: [PATCH 10/39] docs: Add Makefile and JupyterBook requirements
---
CODE_OF_CONDUCT.md | 119 ++++++++++++++++++++++++++++++++++++++++++
docs/Makefile | 19 +++++++
docs/requirements.txt | 6 +++
3 files changed, 144 insertions(+)
create mode 100644 CODE_OF_CONDUCT.md
create mode 100644 docs/Makefile
create mode 100644 docs/requirements.txt
diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
new file mode 100644
index 0000000..ef04fe7
--- /dev/null
+++ b/CODE_OF_CONDUCT.md
@@ -0,0 +1,119 @@
+# Contributor Covenant Code of Conduct
+
+## Our Pledge
+
+We as members, contributors, and leaders pledge to make participation in our
+community a harassment-free experience for everyone, regardless of age, body
+size, visible or invisible disability, ethnicity, sex characteristics, gender
+identity and expression, level of experience, education, socio-economic status,
+nationality, personal appearance, race, caste, color, religion, or sexual
+identity and orientation.
+
+We pledge to act and interact in ways that contribute to an open, welcoming,
+diverse, inclusive, and healthy community.
+
+## Our Standards
+
+Examples of behavior that contributes to a positive environment for our
+community include:
+
+- Demonstrating empathy and kindness toward other people
+- Being respectful of differing opinions, viewpoints, and experiences
+- Giving and gracefully accepting constructive feedback
+- Accepting responsibility and apologizing to those affected by our mistakes,
+ and learning from the experience
+- Focusing on what is best not just for us as individuals, but for the overall
+ community
+
+Examples of unacceptable behavior include:
+
+- The use of sexualized language or imagery, and sexual attention or advances of
+ any kind
+- Trolling, insulting or derogatory comments, and personal or political attacks
+- Public or private harassment
+- Publishing others' private information, such as a physical or email address,
+ without their explicit permission
+- Other conduct which could reasonably be considered inappropriate in a
+ professional setting
+
+## Enforcement Responsibilities
+
+Community leaders are responsible for clarifying and enforcing our standards of
+acceptable behavior and will take appropriate and fair corrective action in
+response to any behavior that they deem inappropriate, threatening, offensive,
+or harmful.
+
+Community leaders have the right and responsibility to remove, edit, or reject
+comments, commits, code, wiki edits, issues, and other contributions that are
+not aligned to this Code of Conduct, and will communicate reasons for moderation
+decisions when appropriate.
+
+## Scope
+
+This Code of Conduct applies within all community spaces, and also applies when
+an individual is officially representing the community in public spaces.
+Examples of representing our community include using an official email address,
+posting via an official social media account, or acting as an appointed
+representative at an online or offline event.
+
+## Enforcement
+
+Instances of abusive, harassing, or otherwise unacceptable behavior may be
+reported to the community leaders responsible for enforcement at
+[connor.stone@mila.quebec](mailto:connor.stone@mila.quebec). All complaints will
+be reviewed and investigated promptly and fairly.
+
+All community leaders are obligated to respect the privacy and security of the
+reporter of any incident.
+
+## Enforcement Guidelines
+
+Community leaders will follow these Community Impact Guidelines in determining
+the consequences for any action they deem in violation of this Code of Conduct:
+
+### 1. Correction
+
+**Community Impact**: Use of inappropriate language or other behavior deemed
+unprofessional or unwelcome in the community.
+
+**Consequence**: A private, written warning from community leaders, providing
+clarity around the nature of the violation and an explanation of why the
+behavior was inappropriate. A public apology may be requested.
+
+### 2. Warning
+
+**Community Impact**: A violation through a single incident or series of
+actions.
+
+**Consequence**: A warning with consequences for continued behavior. No
+interaction with the people involved, including unsolicited interaction with
+those enforcing the Code of Conduct, for a specified period of time. This
+includes avoiding interactions in community spaces as well as external channels
+like social media. Violating these terms may lead to a temporary or permanent
+ban.
+
+### 3. Temporary Ban
+
+**Community Impact**: A serious violation of community standards, including
+sustained inappropriate behavior.
+
+**Consequence**: A temporary ban from any sort of interaction or public
+communication with the community for a specified period of time. No public or
+private interaction with the people involved, including unsolicited interaction
+with those enforcing the Code of Conduct, is allowed during this period.
+Violating these terms may lead to a permanent ban.
+
+### 4. Permanent Ban
+
+**Community Impact**: Demonstrating a pattern of violation of community
+standards, including sustained inappropriate behavior, harassment of an
+individual, or aggression toward or disparagement of classes of individuals.
+
+**Consequence**: A permanent ban from any sort of public interaction within the
+community.
+
+## Attribution
+
+This Code of Conduct is adapted from the
+[Contributor Covenant](https://www.contributor-covenant.org/version/2/1/code_of_conduct.html),
+version 2.1.
\ No newline at end of file
diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 0000000..298ea9e
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,19 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line.
+SPHINXOPTS =
+SPHINXBUILD = sphinx-build
+SOURCEDIR = .
+BUILDDIR = _build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+ @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+ @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
\ No newline at end of file
diff --git a/docs/requirements.txt b/docs/requirements.txt
new file mode 100644
index 0000000..c9cb7bf
--- /dev/null
+++ b/docs/requirements.txt
@@ -0,0 +1,6 @@
+ipywidgets
+jupyter-book
+matplotlib
+pyro-ppl
+sphinx
+sphinx_rtd_theme
\ No newline at end of file
From 3eca2439d119e809a7359cfaaadd89e1900d0ee6 Mon Sep 17 00:00:00 2001
From: Cordero Core <127983572+uwcdc@users.noreply.github.com>
Date: Thu, 28 Mar 2024 07:00:57 +0000
Subject: [PATCH 11/39] docs: Add community health files
---
.codespell-whitelist | 2 +-
.devcontainer/devcontainer.json | 0
.devcontainer/environment.yml | 0
.devcontainer/postBuild.sh | 0
.pre-commit-config.yaml | 16 ++--
CODE_OF_CONDUCT.md | 6 +-
CONTRIBUTING.md | 11 +++
docs/Makefile | 2 +-
docs/requirements.txt | 2 +-
docs/source/_toc.yml | 17 +++++
docs/source/contributing.rst | 130 ++++++++++++++++++++++++++++++++
docs/source/install.rst | 30 ++++++++
docs/source/license.rst | 24 ++++++
noxfile.py | 53 +++++++++++++
pyproject.toml | 2 +-
src/vectordb/__init__.py | 2 +-
src/vectordb/_version.py | 5 +-
17 files changed, 284 insertions(+), 18 deletions(-)
create mode 100644 .devcontainer/devcontainer.json
create mode 100644 .devcontainer/environment.yml
create mode 100644 .devcontainer/postBuild.sh
create mode 100644 CONTRIBUTING.md
create mode 100644 docs/source/_toc.yml
create mode 100644 docs/source/contributing.rst
create mode 100644 docs/source/install.rst
create mode 100644 docs/source/license.rst
create mode 100644 noxfile.py
diff --git a/.codespell-whitelist b/.codespell-whitelist
index db71550..dd29978 100644
--- a/.codespell-whitelist
+++ b/.codespell-whitelist
@@ -1 +1 @@
-kagisearch
\ No newline at end of file
+kagisearch
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
new file mode 100644
index 0000000..e69de29
diff --git a/.devcontainer/environment.yml b/.devcontainer/environment.yml
new file mode 100644
index 0000000..e69de29
diff --git a/.devcontainer/postBuild.sh b/.devcontainer/postBuild.sh
new file mode 100644
index 0000000..e69de29
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 9217977..9e5bf1e 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -55,14 +55,14 @@ repos:
- id: ruff
args: ["--fix", "--show-fixes"]
- - repo: https://github.com/pre-commit/mirrors-mypy
- rev: "v1.9.0"
- hooks:
- - id: mypy
- files: src
- args: ["--ignore-missing-imports"]
- additional_dependencies:
- - pytest
+ # - repo: https://github.com/pre-commit/mirrors-mypy
+ # rev: "v1.9.0"
+ # hooks:
+ # - id: mypy
+ # files: src
+ # args: ["--ignore-missing-imports"]
+ # additional_dependencies:
+ # - pytest
- repo: https://github.com/codespell-project/codespell
rev: "v2.2.6"
diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
index ef04fe7..45f341c 100644
--- a/CODE_OF_CONDUCT.md
+++ b/CODE_OF_CONDUCT.md
@@ -60,8 +60,8 @@ representative at an online or offline event.
Instances of abusive, harassing, or otherwise unacceptable behavior may be
reported to the community leaders responsible for enforcement at
-[connor.stone@mila.quebec](mailto:connor.stone@mila.quebec). All complaints will
-be reviewed and investigated promptly and fairly.
+[connor.stone@mila.quebec](mailto:vlad@kagi.com). All complaints will be
+reviewed and investigated promptly and fairly.
All community leaders are obligated to respect the privacy and security of the
reporter of any incident.
@@ -116,4 +116,4 @@ community.
This Code of Conduct is adapted from the
[Contributor Covenant](https://www.contributor-covenant.org/version/2/1/code_of_conduct.html),
-version 2.1.
\ No newline at end of file
+version 2.1.
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000..72f51e5
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,11 @@
+## Contributing to Vectordb
+
+Thank you for your interest in contributing to vectordb! We welcome
+contributions from the community to help improve our project.
+
+To get started, please refer to our
+[online documentation](https://vectordb.readthedocs.io/en/latest/contributing.html)
+for detailed guidelines on how to contribute to vectordb.
+
+We appreciate your contributions and look forward to your involvement in making
+vectordb even better!
\ No newline at end of file
diff --git a/docs/Makefile b/docs/Makefile
index 298ea9e..5128596 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -16,4 +16,4 @@ help:
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
- @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
\ No newline at end of file
+ @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/docs/requirements.txt b/docs/requirements.txt
index c9cb7bf..69c8e43 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -3,4 +3,4 @@ jupyter-book
matplotlib
pyro-ppl
sphinx
-sphinx_rtd_theme
\ No newline at end of file
+sphinx_rtd_theme
diff --git a/docs/source/_toc.yml b/docs/source/_toc.yml
new file mode 100644
index 0000000..b9773cd
--- /dev/null
+++ b/docs/source/_toc.yml
@@ -0,0 +1,17 @@
+# Table of contents
+# Learn more at https://jupyterbook.org/customize/toc.html
+
+format: jb-book
+root: intro
+chapters:
+ - file: getting_started
+ - file: install
+ - file: examples/index
+ sections:
+ - file: examples/Example_ImageFit_LM
+ - file: contributing
+ - file: citation
+ - file: license
+ - file: modules
+ - file: glossary
+ - file: genindex
\ No newline at end of file
diff --git a/docs/source/contributing.rst b/docs/source/contributing.rst
new file mode 100644
index 0000000..b2eeaed
--- /dev/null
+++ b/docs/source/contributing.rst
@@ -0,0 +1,130 @@
+See the `Scientific Python Developer Guide `_ for a detailed
+description of best practices for developing scientific packages.
+
+Quick development
+-----------------
+
+The fastest way to start with development is to use ``nox``. This will set up a
+virtual environment for you to run all the checks and tests. There are 2 ways to
+install ``nox``:
+
+Codespaces
+~~~~~~~~~~
+
+Nox is pre-installed in the Codespaces environment. So, after activating a
+Codespace, you can just open the terminal and run ``nox`` to run all the checks
+and tests.
+
+Local
+~~~~~
+
+If you don't have nox, you can do the following to install ``nox``:
+
+.. code-block:: bash
+
+ pip install nox
+
+If you use macOS, then ``nox`` is in brew:
+
+.. code-block:: bash
+
+ brew install nox
+
+Nox basics
+~~~~~~~~~~
+
+What is it?
+^^^^^^^^^^^
+
+``nox`` is a command-line tool that automates testing in multiple Python
+environments, similar to tox. Unlike tox, Nox uses a standard Python file for
+configuration, you can find this configuration in ``noxfile.py``.
+
+How do I use it?
+^^^^^^^^^^^^^^^^
+
+To use, run ``nox``. This will lint and test using every installed version of
+Python on your system, skipping ones that are not installed. You can also run
+specific jobs:
+
+.. code-block:: bash
+
+ nox -s lint # Lint only
+ nox -s tests # Python tests
+ nox -s build # Make an SDist and wheel
+
+Nox handles everything for you, including setting up a temporary virtual
+environment for each run.
+
+Setting up a development environment manually
+---------------------------------------------
+
+You can set up a development environment by running:
+
+.. code-block:: bash
+
+ python3 -m venv .venv
+ source ./.venv/bin/activate
+ pip install -v -e .[dev]
+
+If you have the
+`Python Launcher for Unix `_, you
+can instead do:
+
+.. code-block:: bash
+
+ py -m venv .venv
+ py -m install -v -e .[dev]
+
+Post setup
+----------
+
+You should prepare pre-commit, which will help you by checking that commits pass
+required checks:
+
+.. code-block:: bash
+
+ pip install pre-commit # or brew install pre-commit on macOS
+ pre-commit install # Will install a pre-commit hook into the git repo
+
+You can also/alternatively run ``pre-commit run`` (changes only) or
+``pre-commit run --all-files`` to check even without installing the hook.
+
+Testing
+-------
+
+Use pytest to run the unit checks:
+
+.. code-block:: bash
+
+ pytest
+
+Coverage
+--------
+
+Use pytest-cov to generate coverage reports:
+
+.. code-block:: bash
+
+ pytest --cov=vectordb2
+
+Pre-commit
+----------
+
+This project uses pre-commit for all style checking. While you can run it with
+nox, this is such an important tool that it deserves to be installed on its own.
+Install pre-commit and run:
+
+.. code-block:: bash
+
+ pre-commit run -a
+
+to check all files.
+
+Code of Conduct
+---------------
+
+By contributing to this project, you agree to abide by the `Code of Conduct
+`_.
+Please make sure to read and understand the guidelines outlined in the Code
+of Conduct before making any contributions.
\ No newline at end of file
diff --git a/docs/source/install.rst b/docs/source/install.rst
new file mode 100644
index 0000000..96bd85c
--- /dev/null
+++ b/docs/source/install.rst
@@ -0,0 +1,30 @@
+
+Installation
+============
+
+Regular Install
+---------------
+
+The easiest way to install is to make a new virtual environment then run::
+
+ pip install vectordb2
+
+this will install all the required libraries and then install vectordb and you are ready to go! You can check out the tutorials afterwards to see some of vectordb's capabilities.
+
+
+Developer Install
+-----------------
+
+First clone the repo with::
+
+ git clone git@github.com:Ciela-Institute/vectordb.git
+
+this will create a directory ``vectordb`` wherever you ran the command. Next go into the directory and install in developer mode::
+
+ pip install -e ".[dev]"
+
+this will install all relevant libraries and then install vectordb in an editable format so any changes you make to the code will be included next time you import the package. To start making changes you should immediately create a new branch::
+
+ git checkout -b
+
+you can edit this branch however you like. If you are happy with the results and want to share with the rest of the community, then follow the contributors guide to create a pull request!
\ No newline at end of file
diff --git a/docs/source/license.rst b/docs/source/license.rst
new file mode 100644
index 0000000..48bbc9f
--- /dev/null
+++ b/docs/source/license.rst
@@ -0,0 +1,24 @@
+License
+=======
+
+MIT License
+
+Copyright (c) [2023] [vectordb authors]
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
\ No newline at end of file
diff --git a/noxfile.py b/noxfile.py
new file mode 100644
index 0000000..95165cf
--- /dev/null
+++ b/noxfile.py
@@ -0,0 +1,53 @@
+from __future__ import annotations
+
+import shutil
+from pathlib import Path
+
+import nox
+
+DIR = Path(__file__).parent.resolve()
+
+nox.options.sessions = ["lint", "pylint", "tests", "build"]
+
+
+@nox.session
+def lint(session: nox.Session) -> None:
+ """
+ Run the linter.
+ """
+ session.install("pre-commit")
+ session.run("pre-commit", "run", "--all-files", *session.posargs)
+
+
+@nox.session
+def pylint(session: nox.Session) -> None:
+ """
+ Run PyLint.
+ """
+ # This needs to be installed into the package environment, and is slower
+ # than a pre-commit check
+ session.install(".", "pylint")
+ session.run("pylint", "src", *session.posargs)
+
+
+@nox.session
+def tests(session: nox.Session) -> None:
+ """
+ Run the unit and regular tests. Use --cov to activate coverage.
+ """
+ session.install(".[dev]")
+ session.run("pytest", *session.posargs)
+
+
+@nox.session
+def build(session: nox.Session) -> None:
+ """
+ Build an SDist and wheel.
+ """
+
+ build_p = DIR.joinpath("build")
+ if build_p.exists():
+ shutil.rmtree(build_p)
+
+ session.install("build")
+ session.run("python", "-m", "build")
diff --git a/pyproject.toml b/pyproject.toml
index 76dca15..c33ce68 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -68,4 +68,4 @@ local_scheme = "no-local-version"
[tool.ruff]
# Same as Black.
-line-length = 100
\ No newline at end of file
+line-length = 100
diff --git a/src/vectordb/__init__.py b/src/vectordb/__init__.py
index 6b2dc16..00985c8 100644
--- a/src/vectordb/__init__.py
+++ b/src/vectordb/__init__.py
@@ -2,4 +2,4 @@
from ._version import version as VERSION # noqa
__version__ = VERSION
-__author__ = "kagisearch"
\ No newline at end of file
+__author__ = "kagisearch"
diff --git a/src/vectordb/_version.py b/src/vectordb/_version.py
index 656908c..1e4bd65 100644
--- a/src/vectordb/_version.py
+++ b/src/vectordb/_version.py
@@ -3,6 +3,7 @@
TYPE_CHECKING = False
if TYPE_CHECKING:
from typing import Tuple, Union
+
VERSION_TUPLE = Tuple[Union[int, str], ...]
else:
VERSION_TUPLE = object
@@ -12,5 +13,5 @@
__version_tuple__: VERSION_TUPLE
version_tuple: VERSION_TUPLE
-__version__ = version = '0.1.dev104'
-__version_tuple__ = version_tuple = (0, 1, 'dev104')
+__version__ = version = "0.1.dev104"
+__version_tuple__ = version_tuple = (0, 1, "dev104")
From 1f89aa90338947fb5104c836eb60826dcd683d83 Mon Sep 17 00:00:00 2001
From: Cordero Core <127983572+uwcdc@users.noreply.github.com>
Date: Thu, 28 Mar 2024 07:30:03 +0000
Subject: [PATCH 12/39] docs: Add getting started/intro documentation
---
.devcontainer/environment.yml | 17 +++++++++++
.devcontainer/postBuild.sh | 4 +++
docs/source/_toc.yml | 5 +--
docs/source/examples/ai_rss_reader.ipynb | 39 ++++++++++++++++++++++++
docs/source/getting_started.rst | 14 +++++++++
5 files changed, 75 insertions(+), 4 deletions(-)
create mode 100644 docs/source/examples/ai_rss_reader.ipynb
create mode 100644 docs/source/getting_started.rst
diff --git a/.devcontainer/environment.yml b/.devcontainer/environment.yml
index e69de29..b4a2ea2 100644
--- a/.devcontainer/environment.yml
+++ b/.devcontainer/environment.yml
@@ -0,0 +1,17 @@
+channels:
+ - conda-forge
+dependencies:
+ - python=3.8
+ - jupyterlab
+ - jupyterlab-git
+ - faiss-cpu
+ - numpy>=1.21.0
+ - scikit-learn>=0.24.0
+ - scipy>=1.7.0
+ - sentence_transformers
+ - tensorflow_text
+ - torch>=1.9.0
+ - transformers>=4.10.0
+ - pre-commit
+ - nox
+ - pip
\ No newline at end of file
diff --git a/.devcontainer/postBuild.sh b/.devcontainer/postBuild.sh
index e69de29..d08d23b 100644
--- a/.devcontainer/postBuild.sh
+++ b/.devcontainer/postBuild.sh
@@ -0,0 +1,4 @@
+# These commands will be run after the devcontainer is built.
+
+# Install vectordb locally for development
+python3 -m pip install -e .
\ No newline at end of file
diff --git a/docs/source/_toc.yml b/docs/source/_toc.yml
index b9773cd..7a6610e 100644
--- a/docs/source/_toc.yml
+++ b/docs/source/_toc.yml
@@ -8,10 +8,7 @@ chapters:
- file: install
- file: examples/index
sections:
- - file: examples/Example_ImageFit_LM
+ - file: examples/ai_rss_reader
- file: contributing
- - file: citation
- file: license
- - file: modules
- - file: glossary
- file: genindex
\ No newline at end of file
diff --git a/docs/source/examples/ai_rss_reader.ipynb b/docs/source/examples/ai_rss_reader.ipynb
new file mode 100644
index 0000000..b1ad7e5
--- /dev/null
+++ b/docs/source/examples/ai_rss_reader.ipynb
@@ -0,0 +1,39 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!pip install feedparser vectordb2"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.13"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/docs/source/getting_started.rst b/docs/source/getting_started.rst
new file mode 100644
index 0000000..ca35bc4
--- /dev/null
+++ b/docs/source/getting_started.rst
@@ -0,0 +1,14 @@
+
+Getting Started
+===============
+
+Install
+-------
+
+Please follow the instructions on the :doc:`install` page. For most users, the basic pip install is all that's needed.
+
+
+Read The Docs
+-------------
+
+Docs for all the main functions in vectordb are available at :doc:`vectordb` at varying degrees of completeness. Further development of the docs is always ongoing.
\ No newline at end of file
From 239a83ceb604d986e4d5cf791110b7d709c1af04 Mon Sep 17 00:00:00 2001
From: Cordero Core <127983572+uwcdc@users.noreply.github.com>
Date: Thu, 28 Mar 2024 07:34:26 +0000
Subject: [PATCH 13/39] feat: Add Codespaces functionality
---
.devcontainer/devcontainer.json | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index e69de29..a34204c 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -0,0 +1,14 @@
+// For format details, see https://aka.ms/devcontainer.json. For config options, see the
+{
+ "image":"quay.io/pangeo/base-image:latest",
+
+ "customizations": {
+ "vscode": {
+ "extensions": [
+ "ms-toolsai.jupyter",
+ "ms-python.python"
+ ]
+ }
+ },
+ "postCreateCommand": "sh .devcontainer/postBuild.sh"
+}
\ No newline at end of file
From dc2d639604139f4b7abe34027ab48dcdb10c80f6 Mon Sep 17 00:00:00 2001
From: Cordero Core <127983572+uwcdc@users.noreply.github.com>
Date: Thu, 28 Mar 2024 07:43:10 +0000
Subject: [PATCH 14/39] feat: Add apt file to install important pkgs to
Codespace
---
.devcontainer/apt.txt | 4 ++++
1 file changed, 4 insertions(+)
create mode 100644 .devcontainer/apt.txt
diff --git a/.devcontainer/apt.txt b/.devcontainer/apt.txt
new file mode 100644
index 0000000..ba414aa
--- /dev/null
+++ b/.devcontainer/apt.txt
@@ -0,0 +1,4 @@
+git
+ncdu
+wget
+curl
\ No newline at end of file
From 9c63a1f06a462d19598d1b7c7734ab42956f7a93 Mon Sep 17 00:00:00 2001
From: Cordero Core <127983572+uwcdc@users.noreply.github.com>
Date: Thu, 28 Mar 2024 00:47:46 -0700
Subject: [PATCH 15/39] fix: Add shell library installs to postBuild
---
.devcontainer/postBuild.sh | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/.devcontainer/postBuild.sh b/.devcontainer/postBuild.sh
index d08d23b..fb97747 100644
--- a/.devcontainer/postBuild.sh
+++ b/.devcontainer/postBuild.sh
@@ -1,4 +1,5 @@
# These commands will be run after the devcontainer is built.
# Install vectordb locally for development
-python3 -m pip install -e .
\ No newline at end of file
+python3 -m pip install -e .
+apt-get install git ncdu wget curl
From 7195b56d8096e64fd879b944c4e5f9740ac8da15 Mon Sep 17 00:00:00 2001
From: Cordero Core <127983572+uwcdc@users.noreply.github.com>
Date: Thu, 28 Mar 2024 00:56:40 -0700
Subject: [PATCH 16/39] fix: Add shell package install to devcontainer
---
.devcontainer/devcontainer.json | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index a34204c..f760360 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -10,5 +10,13 @@
]
}
},
- "postCreateCommand": "sh .devcontainer/postBuild.sh"
-}
\ No newline at end of file
+ "postCreateCommand": "sh .devcontainer/postBuild.sh",
+ "features": {
+ "ghcr.io/devcontainers-contrib/features/black:2": {},
+ "ghcr.io/devcontainers-contrib/features/pylint:2": {},
+ "ghcr.io/devcontainers/features/git:1": {},
+ "ghcr.io/devcontainers-contrib/features/curl-apt-get:1": {},
+ "ghcr.io/devcontainers-contrib/features/ncdu:1": {},
+ "ghcr.io/devcontainers-contrib/features/wget-apt-get:1": {}
+ }
+}
From d883e9dafb761407f89b5ad30577e3ea1a2307b8 Mon Sep 17 00:00:00 2001
From: Cordero Core <127983572+uwcdc@users.noreply.github.com>
Date: Thu, 28 Mar 2024 00:58:13 -0700
Subject: [PATCH 17/39] refactor: Delete .devcontainer/apt.txt
---
.devcontainer/apt.txt | 4 ----
1 file changed, 4 deletions(-)
delete mode 100644 .devcontainer/apt.txt
diff --git a/.devcontainer/apt.txt b/.devcontainer/apt.txt
deleted file mode 100644
index ba414aa..0000000
--- a/.devcontainer/apt.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-git
-ncdu
-wget
-curl
\ No newline at end of file
From 69666bf0b788f1209445a1665dab8e96092624a6 Mon Sep 17 00:00:00 2001
From: Cordero Core <127983572+uwcdc@users.noreply.github.com>
Date: Thu, 28 Mar 2024 01:03:28 -0700
Subject: [PATCH 18/39] refactor: Install env pkgs in postBuild
---
.devcontainer/postBuild.sh | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/.devcontainer/postBuild.sh b/.devcontainer/postBuild.sh
index fb97747..4f40f74 100644
--- a/.devcontainer/postBuild.sh
+++ b/.devcontainer/postBuild.sh
@@ -1,5 +1,5 @@
# These commands will be run after the devcontainer is built.
# Install vectordb locally for development
-python3 -m pip install -e .
-apt-get install git ncdu wget curl
+python3 -m pip install --user -r requirements.txt # Install required packages
+python3 -m pip install -e . # Install vectordb locally
From a6f31c90aa6e4cf6b25560a445a667b441101dd6 Mon Sep 17 00:00:00 2001
From: Cordero Core <127983572+uwcdc@users.noreply.github.com>
Date: Thu, 28 Mar 2024 01:04:25 -0700
Subject: [PATCH 19/39] refactor: Delete .devcontainer/environment.yml
---
.devcontainer/environment.yml | 17 -----------------
1 file changed, 17 deletions(-)
delete mode 100644 .devcontainer/environment.yml
diff --git a/.devcontainer/environment.yml b/.devcontainer/environment.yml
deleted file mode 100644
index b4a2ea2..0000000
--- a/.devcontainer/environment.yml
+++ /dev/null
@@ -1,17 +0,0 @@
-channels:
- - conda-forge
-dependencies:
- - python=3.8
- - jupyterlab
- - jupyterlab-git
- - faiss-cpu
- - numpy>=1.21.0
- - scikit-learn>=0.24.0
- - scipy>=1.7.0
- - sentence_transformers
- - tensorflow_text
- - torch>=1.9.0
- - transformers>=4.10.0
- - pre-commit
- - nox
- - pip
\ No newline at end of file
From 96f6694c23df797d19961bab60ea155d0c453183 Mon Sep 17 00:00:00 2001
From: Cordero Core <127983572+uwcdc@users.noreply.github.com>
Date: Thu, 28 Mar 2024 01:05:48 -0700
Subject: [PATCH 20/39] refactor: Install dev tools in postBuild
---
.devcontainer/postBuild.sh | 1 +
1 file changed, 1 insertion(+)
diff --git a/.devcontainer/postBuild.sh b/.devcontainer/postBuild.sh
index 4f40f74..7dea045 100644
--- a/.devcontainer/postBuild.sh
+++ b/.devcontainer/postBuild.sh
@@ -2,4 +2,5 @@
# Install vectordb locally for development
python3 -m pip install --user -r requirements.txt # Install required packages
+python3 -m pip install pre-commit nox # Install development tools
python3 -m pip install -e . # Install vectordb locally
From ce51bcabf21a9150a2f695da7ab094011b3a96d5 Mon Sep 17 00:00:00 2001
From: Cordero Core <127983572+uwcdc@users.noreply.github.com>
Date: Thu, 28 Mar 2024 08:10:32 +0000
Subject: [PATCH 21/39] chore: Pre-commit fixes
---
.devcontainer/devcontainer.json | 31 +++++++++++-------------
CONTRIBUTING.md | 2 +-
docs/source/_toc.yml | 2 +-
docs/source/contributing.rst | 2 +-
docs/source/examples/ai_rss_reader.ipynb | 8 +-----
docs/source/getting_started.rst | 2 +-
docs/source/install.rst | 2 +-
docs/source/license.rst | 2 +-
src/vectordb/_version.py | 4 +--
9 files changed, 23 insertions(+), 32 deletions(-)
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index f760360..dabf871 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -1,22 +1,19 @@
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
{
- "image":"quay.io/pangeo/base-image:latest",
+ "image": "quay.io/pangeo/base-image:latest",
- "customizations": {
- "vscode": {
- "extensions": [
- "ms-toolsai.jupyter",
- "ms-python.python"
- ]
- }
- },
- "postCreateCommand": "sh .devcontainer/postBuild.sh",
- "features": {
- "ghcr.io/devcontainers-contrib/features/black:2": {},
- "ghcr.io/devcontainers-contrib/features/pylint:2": {},
- "ghcr.io/devcontainers/features/git:1": {},
- "ghcr.io/devcontainers-contrib/features/curl-apt-get:1": {},
- "ghcr.io/devcontainers-contrib/features/ncdu:1": {},
- "ghcr.io/devcontainers-contrib/features/wget-apt-get:1": {}
+ "customizations": {
+ "vscode": {
+ "extensions": ["ms-toolsai.jupyter", "ms-python.python"]
}
+ },
+ "postCreateCommand": "sh .devcontainer/postBuild.sh",
+ "features": {
+ "ghcr.io/devcontainers-contrib/features/black:2": {},
+ "ghcr.io/devcontainers-contrib/features/pylint:2": {},
+ "ghcr.io/devcontainers/features/git:1": {},
+ "ghcr.io/devcontainers-contrib/features/curl-apt-get:1": {},
+ "ghcr.io/devcontainers-contrib/features/ncdu:1": {},
+ "ghcr.io/devcontainers-contrib/features/wget-apt-get:1": {}
+ }
}
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 72f51e5..6c24175 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -8,4 +8,4 @@ To get started, please refer to our
for detailed guidelines on how to contribute to vectordb.
We appreciate your contributions and look forward to your involvement in making
-vectordb even better!
\ No newline at end of file
+vectordb even better!
diff --git a/docs/source/_toc.yml b/docs/source/_toc.yml
index 7a6610e..7e968c4 100644
--- a/docs/source/_toc.yml
+++ b/docs/source/_toc.yml
@@ -11,4 +11,4 @@ chapters:
- file: examples/ai_rss_reader
- file: contributing
- file: license
- - file: genindex
\ No newline at end of file
+ - file: genindex
diff --git a/docs/source/contributing.rst b/docs/source/contributing.rst
index b2eeaed..1c40dda 100644
--- a/docs/source/contributing.rst
+++ b/docs/source/contributing.rst
@@ -127,4 +127,4 @@ Code of Conduct
By contributing to this project, you agree to abide by the `Code of Conduct
`_.
Please make sure to read and understand the guidelines outlined in the Code
-of Conduct before making any contributions.
\ No newline at end of file
+of Conduct before making any contributions.
diff --git a/docs/source/examples/ai_rss_reader.ipynb b/docs/source/examples/ai_rss_reader.ipynb
index b1ad7e5..ca235d9 100644
--- a/docs/source/examples/ai_rss_reader.ipynb
+++ b/docs/source/examples/ai_rss_reader.ipynb
@@ -16,11 +16,6 @@
}
],
"metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
"language_info": {
"codemirror_mode": {
"name": "ipython",
@@ -30,8 +25,7 @@
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.10.13"
+ "pygments_lexer": "ipython3"
}
},
"nbformat": 4,
diff --git a/docs/source/getting_started.rst b/docs/source/getting_started.rst
index ca35bc4..3a83d17 100644
--- a/docs/source/getting_started.rst
+++ b/docs/source/getting_started.rst
@@ -11,4 +11,4 @@ Please follow the instructions on the :doc:`install` page. For most users, the b
Read The Docs
-------------
-Docs for all the main functions in vectordb are available at :doc:`vectordb` at varying degrees of completeness. Further development of the docs is always ongoing.
\ No newline at end of file
+Docs for all the main functions in vectordb are available at :doc:`vectordb` at varying degrees of completeness. Further development of the docs is always ongoing.
diff --git a/docs/source/install.rst b/docs/source/install.rst
index 96bd85c..a644b95 100644
--- a/docs/source/install.rst
+++ b/docs/source/install.rst
@@ -27,4 +27,4 @@ this will install all relevant libraries and then install vectordb in an editabl
git checkout -b
-you can edit this branch however you like. If you are happy with the results and want to share with the rest of the community, then follow the contributors guide to create a pull request!
\ No newline at end of file
+you can edit this branch however you like. If you are happy with the results and want to share with the rest of the community, then follow the contributors guide to create a pull request!
diff --git a/docs/source/license.rst b/docs/source/license.rst
index 48bbc9f..2a5b425 100644
--- a/docs/source/license.rst
+++ b/docs/source/license.rst
@@ -21,4 +21,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
\ No newline at end of file
+SOFTWARE.
diff --git a/src/vectordb/_version.py b/src/vectordb/_version.py
index 1e4bd65..d3eeb2b 100644
--- a/src/vectordb/_version.py
+++ b/src/vectordb/_version.py
@@ -13,5 +13,5 @@
__version_tuple__: VERSION_TUPLE
version_tuple: VERSION_TUPLE
-__version__ = version = "0.1.dev104"
-__version_tuple__ = version_tuple = (0, 1, "dev104")
+__version__ = version = "0.1.dev116"
+__version_tuple__ = version_tuple = (0, 1, "dev116")
From 58ec24c33bb895c209f7f1a40f5ebb3e8f64bbf8 Mon Sep 17 00:00:00 2001
From: Cordero Core <127983572+uwcdc@users.noreply.github.com>
Date: Thu, 28 Mar 2024 08:25:18 +0000
Subject: [PATCH 22/39] fix: Add modules to __init__
---
src/vectordb/__init__.py | 15 +++++++++++++++
src/vectordb/_version.py | 5 ++---
2 files changed, 17 insertions(+), 3 deletions(-)
diff --git a/src/vectordb/__init__.py b/src/vectordb/__init__.py
index 00985c8..ac9fd0d 100644
--- a/src/vectordb/__init__.py
+++ b/src/vectordb/__init__.py
@@ -1,5 +1,20 @@
# pylint: disable = line-too-long, trailing-whitespace, trailing-newlines, line-too-long, missing-module-docstring, import-error, too-few-public-methods, too-many-instance-attributes, too-many-locals
from ._version import version as VERSION # noqa
+from .chunking import Chunker
+from .embedding import BaseEmbedder, Embedder
+from .memory import Memory
+from .storage import Storage
+from .vector_search import VectorSearch
+
__version__ = VERSION
__author__ = "kagisearch"
+
+__all__ = [
+ "Chunker",
+ "BaseEmbedder",
+ "Embedder",
+ "Memory",
+ "Storage",
+ "VectorStorage",
+]
\ No newline at end of file
diff --git a/src/vectordb/_version.py b/src/vectordb/_version.py
index d3eeb2b..761eb15 100644
--- a/src/vectordb/_version.py
+++ b/src/vectordb/_version.py
@@ -3,7 +3,6 @@
TYPE_CHECKING = False
if TYPE_CHECKING:
from typing import Tuple, Union
-
VERSION_TUPLE = Tuple[Union[int, str], ...]
else:
VERSION_TUPLE = object
@@ -13,5 +12,5 @@
__version_tuple__: VERSION_TUPLE
version_tuple: VERSION_TUPLE
-__version__ = version = "0.1.dev116"
-__version_tuple__ = version_tuple = (0, 1, "dev116")
+__version__ = version = '0.1.dev117'
+__version_tuple__ = version_tuple = (0, 1, 'dev117')
From 211c4338c8bf6b5e1607ee07e90ab76d3fe18488 Mon Sep 17 00:00:00 2001
From: Cordero Core <127983572+uwcdc@users.noreply.github.com>
Date: Thu, 28 Mar 2024 08:33:09 +0000
Subject: [PATCH 23/39] fix: Set Python version to 3.8
---
.devcontainer/devcontainer.json | 19 +++++++++++++++----
src/vectordb/_version.py | 4 ++--
2 files changed, 17 insertions(+), 6 deletions(-)
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index dabf871..d281fc9 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -9,11 +9,22 @@
},
"postCreateCommand": "sh .devcontainer/postBuild.sh",
"features": {
- "ghcr.io/devcontainers-contrib/features/black:2": {},
- "ghcr.io/devcontainers-contrib/features/pylint:2": {},
- "ghcr.io/devcontainers/features/git:1": {},
+ "ghcr.io/devcontainers-contrib/features/black:2": {
+ "version": "latest"
+ },
+ "ghcr.io/devcontainers-contrib/features/pylint:2": {
+ "version": "latest"
+ },
+ "ghcr.io/devcontainers/features/git:1": {
+ "ppa": true,
+ "version": "latest"
+ },
"ghcr.io/devcontainers-contrib/features/curl-apt-get:1": {},
"ghcr.io/devcontainers-contrib/features/ncdu:1": {},
- "ghcr.io/devcontainers-contrib/features/wget-apt-get:1": {}
+ "ghcr.io/devcontainers-contrib/features/wget-apt-get:1": {},
+ "ghcr.io/devcontainers/features/python:1": {
+ "installTools": true,
+ "version": "3.8"
+ }
}
}
diff --git a/src/vectordb/_version.py b/src/vectordb/_version.py
index 761eb15..459fefe 100644
--- a/src/vectordb/_version.py
+++ b/src/vectordb/_version.py
@@ -12,5 +12,5 @@
__version_tuple__: VERSION_TUPLE
version_tuple: VERSION_TUPLE
-__version__ = version = '0.1.dev117'
-__version_tuple__ = version_tuple = (0, 1, 'dev117')
+__version__ = version = '0.1.dev118'
+__version_tuple__ = version_tuple = (0, 1, 'dev118')
From 77318d382dabeee97c4375969083bc6be0191cf4 Mon Sep 17 00:00:00 2001
From: Cordero Core <127983572+uwcdc@users.noreply.github.com>
Date: Thu, 28 Mar 2024 15:18:45 +0000
Subject: [PATCH 24/39] refactor: Add conda support
---
.devcontainer/devcontainer.json | 6 +++++-
requirements.txt | 4 ++--
src/vectordb/_version.py | 4 ++--
3 files changed, 9 insertions(+), 5 deletions(-)
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index d281fc9..eb61009 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -24,7 +24,11 @@
"ghcr.io/devcontainers-contrib/features/wget-apt-get:1": {},
"ghcr.io/devcontainers/features/python:1": {
"installTools": true,
- "version": "3.8"
+ "version": "latest"
+ },
+ "ghcr.io/devcontainers/features/conda:1": {
+ "addCondaForge": true,
+ "version": "latest"
}
}
}
diff --git a/requirements.txt b/requirements.txt
index 1417962..0923a54 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,8 +1,8 @@
-faiss-cpu
+faiss-cpu>=1.8.0
numpy>=1.21.0
scikit-learn>=0.24.0
scipy>=1.7.0
-sentence_transformers
+sentence_transformers>=2.6.1
tensorflow_text
torch>=1.9.0
transformers>=4.10.0
diff --git a/src/vectordb/_version.py b/src/vectordb/_version.py
index 459fefe..380ff21 100644
--- a/src/vectordb/_version.py
+++ b/src/vectordb/_version.py
@@ -12,5 +12,5 @@
__version_tuple__: VERSION_TUPLE
version_tuple: VERSION_TUPLE
-__version__ = version = '0.1.dev118'
-__version_tuple__ = version_tuple = (0, 1, 'dev118')
+__version__ = version = '0.1.dev119'
+__version_tuple__ = version_tuple = (0, 1, 'dev119')
From cafe7a6f624ca7bfb9249c5b4892fcf6230a0103 Mon Sep 17 00:00:00 2001
From: Cordero Core <127983572+uwcdc@users.noreply.github.com>
Date: Thu, 28 Mar 2024 20:46:19 +0000
Subject: [PATCH 25/39] refactor: Add Dockerfile and update devcontainer.json
---
.devcontainer/Dockerfile | 11 +++++++++++
.devcontainer/devcontainer.json | 28 +++++++---------------------
.devcontainer/environment.yml | 16 ++++++++++++++++
.devcontainer/postBuild.sh | 6 ++++++
src/vectordb/_version.py | 4 ++--
5 files changed, 42 insertions(+), 23 deletions(-)
create mode 100644 .devcontainer/Dockerfile
create mode 100644 .devcontainer/environment.yml
diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
new file mode 100644
index 0000000..2719590
--- /dev/null
+++ b/.devcontainer/Dockerfile
@@ -0,0 +1,11 @@
+# Use the Pangeo base image
+ARG PANGEO_BASE_IMAGE_TAG=master
+FROM pangeo/base-image:${PANGEO_BASE_IMAGE_TAG}
+
+# Copy the environment.yaml file into the Docker image
+COPY environment.yaml /tmp/environment.yaml
+
+# Use the conda command to create a new environment from the environment.yaml file
+RUN conda env create -f ~/.devcontainer/environment.yaml
+RUN conda init bash
+RUN echo "conda activate ssec-scipy2024" >> ~/.bashrc
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index eb61009..be9025d 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -1,34 +1,20 @@
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
{
- "image": "quay.io/pangeo/base-image:latest",
+ "build": { "dockerfile": "Dockerfile" },
"customizations": {
"vscode": {
- "extensions": ["ms-toolsai.jupyter", "ms-python.python"]
+ "extensions": [
+ "ms-toolsai.jupyter",
+ "ms-python.python"
+ ]
}
},
"postCreateCommand": "sh .devcontainer/postBuild.sh",
"features": {
- "ghcr.io/devcontainers-contrib/features/black:2": {
- "version": "latest"
- },
- "ghcr.io/devcontainers-contrib/features/pylint:2": {
- "version": "latest"
- },
- "ghcr.io/devcontainers/features/git:1": {
- "ppa": true,
- "version": "latest"
- },
+ "ghcr.io/devcontainers/features/git:1": {},
"ghcr.io/devcontainers-contrib/features/curl-apt-get:1": {},
"ghcr.io/devcontainers-contrib/features/ncdu:1": {},
- "ghcr.io/devcontainers-contrib/features/wget-apt-get:1": {},
- "ghcr.io/devcontainers/features/python:1": {
- "installTools": true,
- "version": "latest"
- },
- "ghcr.io/devcontainers/features/conda:1": {
- "addCondaForge": true,
- "version": "latest"
+ "ghcr.io/devcontainers-contrib/features/wget-apt-get:1": {}
}
- }
}
diff --git a/.devcontainer/environment.yml b/.devcontainer/environment.yml
new file mode 100644
index 0000000..726b626
--- /dev/null
+++ b/.devcontainer/environment.yml
@@ -0,0 +1,16 @@
+name: vectordb-dev
+channel:
+ - conda-forge
+dependencies:
+ - python==3.8
+ - faiss-cpu>=1.8.0
+ - numpy>=1.21.0
+ - scikit-learn>=0.24.0
+ - scipy>=1.7.0
+ - sentence_transformers>=2.6.1
+ - tensorflow_text
+ - torch>=1.9.0
+ - transformers>=4.10.0
+ - pre-commit
+ - nox
+ - pip
diff --git a/.devcontainer/postBuild.sh b/.devcontainer/postBuild.sh
index 7dea045..ca573f5 100644
--- a/.devcontainer/postBuild.sh
+++ b/.devcontainer/postBuild.sh
@@ -1,5 +1,11 @@
# These commands will be run after the devcontainer is built.
+# Setup Conda environment
+conda env create -f ~/.devcontainer/SciPy2024/environment.yml # Create environment from environment.yml
+conda init bash # Initialize conda for bash
+source ~/.bashrc # Reload bash
+conda activate scipy2024 # Activate the environment
+
# Install vectordb locally for development
python3 -m pip install --user -r requirements.txt # Install required packages
python3 -m pip install pre-commit nox # Install development tools
diff --git a/src/vectordb/_version.py b/src/vectordb/_version.py
index 380ff21..7e2fe9a 100644
--- a/src/vectordb/_version.py
+++ b/src/vectordb/_version.py
@@ -12,5 +12,5 @@
__version_tuple__: VERSION_TUPLE
version_tuple: VERSION_TUPLE
-__version__ = version = '0.1.dev119'
-__version_tuple__ = version_tuple = (0, 1, 'dev119')
+__version__ = version = '0.1.dev120'
+__version_tuple__ = version_tuple = (0, 1, 'dev120')
From 8b657e5f29fb510b8c91da2301bedc5fe15327f0 Mon Sep 17 00:00:00 2001
From: Cordero Core <127983572+uwcdc@users.noreply.github.com>
Date: Thu, 28 Mar 2024 20:47:31 +0000
Subject: [PATCH 26/39] refactor: Remove pre-commit and nox from postBuild
---
.devcontainer/postBuild.sh | 9 ---------
1 file changed, 9 deletions(-)
diff --git a/.devcontainer/postBuild.sh b/.devcontainer/postBuild.sh
index ca573f5..6236346 100644
--- a/.devcontainer/postBuild.sh
+++ b/.devcontainer/postBuild.sh
@@ -1,12 +1,3 @@
# These commands will be run after the devcontainer is built.
-# Setup Conda environment
-conda env create -f ~/.devcontainer/SciPy2024/environment.yml # Create environment from environment.yml
-conda init bash # Initialize conda for bash
-source ~/.bashrc # Reload bash
-conda activate scipy2024 # Activate the environment
-
-# Install vectordb locally for development
-python3 -m pip install --user -r requirements.txt # Install required packages
-python3 -m pip install pre-commit nox # Install development tools
python3 -m pip install -e . # Install vectordb locally
From 385cc242a67535407d7fc81a2fcdfb9605e66552 Mon Sep 17 00:00:00 2001
From: Cordero Core <127983572+uwcdc@users.noreply.github.com>
Date: Sun, 7 Apr 2024 23:38:59 -0700
Subject: [PATCH 27/39] fix: Update Dockerfile to build Conda env
---
.devcontainer/Dockerfile | 28 +++++++++++++++++++---------
1 file changed, 19 insertions(+), 9 deletions(-)
diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
index 2719590..86aaa63 100644
--- a/.devcontainer/Dockerfile
+++ b/.devcontainer/Dockerfile
@@ -1,11 +1,21 @@
-# Use the Pangeo base image
-ARG PANGEO_BASE_IMAGE_TAG=master
-FROM pangeo/base-image:${PANGEO_BASE_IMAGE_TAG}
+# Use the latest version of the pangeo/base-notebook (includes CUDA support)
+FROM pangeo/base-notebook:latest
-# Copy the environment.yaml file into the Docker image
-COPY environment.yaml /tmp/environment.yaml
+# Set the user to root
+USER root
-# Use the conda command to create a new environment from the environment.yaml file
-RUN conda env create -f ~/.devcontainer/environment.yaml
-RUN conda init bash
-RUN echo "conda activate ssec-scipy2024" >> ~/.bashrc
+# Copy the environment and requirements files into the Docker image
+COPY environment.yml /tmp/environment.yml
+COPY requirements.txt /tmp/requirements.txt
+
+# Create a new Conda environment from the environment.yml file
+RUN conda env create -f /tmp/environment.yml
+
+# Install wget, git, ncdu, and curl
+RUN apt-get update && apt-get install -y \
+ wget \
+ git \
+ ncdu \
+ curl \
+ && apt-get clean \
+ && rm -rf /var/lib/apt/lists/*
From 20e9d1ad7898cde27ff82ee660a5912f82252e54 Mon Sep 17 00:00:00 2001
From: Cordero Core <127983572+uwcdc@users.noreply.github.com>
Date: Sun, 7 Apr 2024 23:49:08 -0700
Subject: [PATCH 28/39] chore: Update ci.yml
---
.github/workflows/ci.yml | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 6c417cb..fff6303 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -29,7 +29,7 @@ jobs:
os: [ubuntu-latest, windows-latest, macOS-latest]
steps:
- - name: Checkout caustics
+ - name: Checkout vectordb
uses: actions/checkout@v4
with:
fetch-depth: 0
@@ -61,7 +61,7 @@ jobs:
run: |
python -m pip install pytest-github-actions-annotate-failures
- - name: Install Caustics
+ - name: Install vectordb
run: |
pip install -e ".[dev]"
pip show ${{ env.PROJECT_NAME }}
From 508e75329ec9d555502d0e7ab6fbb78175437b52 Mon Sep 17 00:00:00 2001
From: Cordero Core <127983572+uwcdc@users.noreply.github.com>
Date: Fri, 19 Apr 2024 18:51:43 +0000
Subject: [PATCH 29/39] refactor: Update Dockerfile to use pangeo/base-notebook
and install apt deps
---
.devcontainer/Dockerfile | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
index 86aaa63..10f47da 100644
--- a/.devcontainer/Dockerfile
+++ b/.devcontainer/Dockerfile
@@ -4,9 +4,8 @@ FROM pangeo/base-notebook:latest
# Set the user to root
USER root
-# Copy the environment and requirements files into the Docker image
+# Copy the environment file into the Docker image
COPY environment.yml /tmp/environment.yml
-COPY requirements.txt /tmp/requirements.txt
# Create a new Conda environment from the environment.yml file
RUN conda env create -f /tmp/environment.yml
@@ -18,4 +17,4 @@ RUN apt-get update && apt-get install -y \
ncdu \
curl \
&& apt-get clean \
- && rm -rf /var/lib/apt/lists/*
+ && rm -rf /var/lib/apt/lists/*
\ No newline at end of file
From 275857f45182cc7608def1443a8552baec8278b4 Mon Sep 17 00:00:00 2001
From: Cordero Core <127983572+uwcdc@users.noreply.github.com>
Date: Fri, 19 Apr 2024 21:01:48 +0000
Subject: [PATCH 30/39] refactor: Update requirement specs
---
.devcontainer/environment.yml | 17 +++++++++--------
CONTRIBUTING.md | 2 +-
requirements.txt | 14 +++++++-------
src/vectordb/__init__.py | 2 +-
src/vectordb/_version.py | 4 ++--
5 files changed, 20 insertions(+), 19 deletions(-)
diff --git a/.devcontainer/environment.yml b/.devcontainer/environment.yml
index 726b626..57c1c4e 100644
--- a/.devcontainer/environment.yml
+++ b/.devcontainer/environment.yml
@@ -1,16 +1,17 @@
name: vectordb-dev
channel:
- conda-forge
+ - defaults
dependencies:
- - python==3.8
- - faiss-cpu>=1.8.0
- - numpy>=1.21.0
- - scikit-learn>=0.24.0
- - scipy>=1.7.0
- - sentence_transformers>=2.6.1
+ - python>=3.8,<3.12
+ - faiss-cpu
+ - numpy
+ - scikit-learn
+ - scipy
+ - sentence_transformers
- tensorflow_text
- - torch>=1.9.0
- - transformers>=4.10.0
+ - torch
+ - transformers
- pre-commit
- nox
- pip
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 6c24175..effb9b2 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,4 +1,4 @@
-## Contributing to Vectordb
+# Contributing to Vectordb
Thank you for your interest in contributing to vectordb! We welcome
contributions from the community to help improve our project.
diff --git a/requirements.txt b/requirements.txt
index 0923a54..5ace5dc 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,8 +1,8 @@
-faiss-cpu>=1.8.0
-numpy>=1.21.0
-scikit-learn>=0.24.0
-scipy>=1.7.0
-sentence_transformers>=2.6.1
+faiss-cpu
+numpy
+scikit-learn
+scipy
+sentence_transformers
tensorflow_text
-torch>=1.9.0
-transformers>=4.10.0
+torch
+transformers
diff --git a/src/vectordb/__init__.py b/src/vectordb/__init__.py
index ac9fd0d..1902912 100644
--- a/src/vectordb/__init__.py
+++ b/src/vectordb/__init__.py
@@ -16,5 +16,5 @@
"Embedder",
"Memory",
"Storage",
- "VectorStorage",
+ "VectorSearch",
]
\ No newline at end of file
diff --git a/src/vectordb/_version.py b/src/vectordb/_version.py
index 7e2fe9a..007a556 100644
--- a/src/vectordb/_version.py
+++ b/src/vectordb/_version.py
@@ -12,5 +12,5 @@
__version_tuple__: VERSION_TUPLE
version_tuple: VERSION_TUPLE
-__version__ = version = '0.1.dev120'
-__version_tuple__ = version_tuple = (0, 1, 'dev120')
+__version__ = version = '0.1.dev125'
+__version_tuple__ = version_tuple = (0, 1, 'dev125')
From c93a9df7a9540dfb14de88bc2f33967e7664dafb Mon Sep 17 00:00:00 2001
From: Cordero Core <127983572+uwcdc@users.noreply.github.com>
Date: Fri, 19 Apr 2024 21:18:20 +0000
Subject: [PATCH 31/39] feat: Create auto-build docker image
---
.github/workflows/build.yml | 89 +++++++++++++++++++++++
{.devcontainer => docker}/Dockerfile | 0
{.devcontainer => docker}/environment.yml | 0
requirements.txt | 2 +
4 files changed, 91 insertions(+)
create mode 100644 .github/workflows/build.yml
rename {.devcontainer => docker}/Dockerfile (100%)
rename {.devcontainer => docker}/environment.yml (100%)
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
new file mode 100644
index 0000000..359b224
--- /dev/null
+++ b/.github/workflows/build.yml
@@ -0,0 +1,89 @@
+# Any commit to master branch re-builds images, re-runs tests, and pushes SHA tags to DockerHub
+name: Build
+on:
+ push:
+ branches:
+ - main
+ - pre-commit
+ paths-ignore:
+ - 'LICENSE'
+ - 'README.md'
+ workflow_dispatch:
+
+env:
+ DOCKER_REGISTRY: ghcr.io
+ DOCKER_ORG: ${{ github.repository_owner }}
+ GITHUB_SHA: ${{ github.sha }}
+ GITHUB_REF: ${{ github.ref }}
+
+jobs:
+ build-images:
+ runs-on: ubuntu-latest
+ permissions:
+ contents: read
+ packages: write
+ strategy:
+ fail-fast: false
+ matrix:
+ IMAGE: [docker]
+ steps:
+ - name: Checkout Repository
+ uses: actions/checkout@v3
+
+ - name: Get date tag
+ id: get_date
+ run: |
+ DATE_TAG="$( date -u '+%Y.%m.%d' )"
+ echo "date_tag=$DATE_TAG" >> $GITHUB_OUTPUT
+
+ - name: Get registry and org
+ id: registry_org
+ run: |
+ ORG=$(echo "${{ env.DOCKER_ORG }}" | tr '[:upper:]' '[:lower:]')
+ echo "image_base=${{ env.DOCKER_REGISTRY }}/${ORG}" >> $GITHUB_OUTPUT
+
+ # https://github.com/actions/runner-images/issues/2840#issuecomment-790492173
+ - name: Free up disk space
+ run: |
+ df -h
+ docker image ls
+ sudo apt clean
+ sudo rm -rf /usr/local/lib/android /usr/share/dotnet /opt/ghc
+ df -h
+
+ - name: Extract metadata (tags, labels) for Docker
+ id: meta
+ uses: docker/metadata-action@v4
+ with:
+ images: ${{ steps.registry_org.outputs.image_base }}/${{ matrix.IMAGE }}
+ tags: |
+ # set latest tag for default branch
+ type=raw,value=latest
+ type=raw,value=${{ steps.get_date.outputs.date_tag }}
+
+ - name: Log in to registry
+ uses: docker/login-action@v2
+ with:
+ registry: ${{ env.DOCKER_REGISTRY }}
+ username: ${{ github.actor }}
+ password: ${{ secrets.GITHUB_TOKEN }}
+
+ - name: Set up Docker Buildx
+ uses: docker/setup-buildx-action@v2
+
+ - name: Build and push Docker image
+ uses: docker/build-push-action@v4
+ with:
+ context: ${{ matrix.IMAGE }}
+ tags: ${{ steps.meta.outputs.tags }}
+ labels: ${{ steps.meta.outputs.labels }}
+ push: true
+
+ - name: Inspect Image
+ run: |
+ docker run ${{ steps.registry_org.outputs.image_base }}/${{ matrix.IMAGE }}:latest conda list
+ docker images ls
+
+ - name: Test Image
+ run: |
+ docker run -u 1000 -w /srv/test -v $PWD:/srv/test ${{ steps.registry_org.outputs.image_base }}/${{ matrix.IMAGE }}:latest
\ No newline at end of file
diff --git a/.devcontainer/Dockerfile b/docker/Dockerfile
similarity index 100%
rename from .devcontainer/Dockerfile
rename to docker/Dockerfile
diff --git a/.devcontainer/environment.yml b/docker/environment.yml
similarity index 100%
rename from .devcontainer/environment.yml
rename to docker/environment.yml
diff --git a/requirements.txt b/requirements.txt
index 5ace5dc..f4a74ed 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,7 @@
faiss-cpu
+nox
numpy
+pre-commit
scikit-learn
scipy
sentence_transformers
From 1fd079cba2fb3ab62a736192c59ff6135f8c22e7 Mon Sep 17 00:00:00 2001
From: Cordero Core <127983572+uwcdc@users.noreply.github.com>
Date: Fri, 19 Apr 2024 21:38:45 +0000
Subject: [PATCH 32/39] fix: Check pkgs available on conda-forge
---
.github/workflows/build.yml | 1 +
docker/environment.yml | 8 +++++---
requirements.txt | 3 ++-
3 files changed, 8 insertions(+), 4 deletions(-)
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 359b224..fcd3662 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -4,6 +4,7 @@ on:
push:
branches:
- main
+ - dev
- pre-commit
paths-ignore:
- 'LICENSE'
diff --git a/docker/environment.yml b/docker/environment.yml
index 57c1c4e..34723bc 100644
--- a/docker/environment.yml
+++ b/docker/environment.yml
@@ -8,10 +8,12 @@ dependencies:
- numpy
- scikit-learn
- scipy
- - sentence_transformers
- - tensorflow_text
- - torch
+ - sentence-transformers
+ - tensorflow-hub
+ - pytorch
- transformers
- pre-commit
- nox
- pip
+ - pip:
+ - tensorflow_text
diff --git a/requirements.txt b/requirements.txt
index f4a74ed..a85c1d7 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,7 +4,8 @@ numpy
pre-commit
scikit-learn
scipy
-sentence_transformers
+sentence-transformers
+tensorflow-hub
tensorflow_text
torch
transformers
From a1f7e9257cdced6cd1e7d2d574e2e15ee24dbea9 Mon Sep 17 00:00:00 2001
From: Cordero Core <127983572+uwcdc@users.noreply.github.com>
Date: Fri, 19 Apr 2024 21:53:26 +0000
Subject: [PATCH 33/39] refactor: Use docker image instead of Dockerfile in
devcontainer.json
---
.devcontainer/devcontainer.json | 10 ++--------
1 file changed, 2 insertions(+), 8 deletions(-)
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index be9025d..bec02b3 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -1,6 +1,6 @@
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
{
- "build": { "dockerfile": "Dockerfile" },
+ "image": "ghcr.io/swarm-io-internal/docker:latest",
"customizations": {
"vscode": {
@@ -10,11 +10,5 @@
]
}
},
- "postCreateCommand": "sh .devcontainer/postBuild.sh",
- "features": {
- "ghcr.io/devcontainers/features/git:1": {},
- "ghcr.io/devcontainers-contrib/features/curl-apt-get:1": {},
- "ghcr.io/devcontainers-contrib/features/ncdu:1": {},
- "ghcr.io/devcontainers-contrib/features/wget-apt-get:1": {}
- }
+ "postCreateCommand": "sh .devcontainer/postBuild.sh"
}
From f45431213a0cfaa04901025f4757678ac1bcadc0 Mon Sep 17 00:00:00 2001
From: Cordero Core <127983572+uwcdc@users.noreply.github.com>
Date: Fri, 19 Apr 2024 22:43:27 +0000
Subject: [PATCH 34/39] feat: Install support for GitHub repo install
---
docker/environment.yml | 1 +
pyproject.toml | 2 +-
src/vectordb/_version.py | 4 ++--
3 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/docker/environment.yml b/docker/environment.yml
index 34723bc..214b731 100644
--- a/docker/environment.yml
+++ b/docker/environment.yml
@@ -17,3 +17,4 @@ dependencies:
- pip
- pip:
- tensorflow_text
+ - git+https://github.com/vioshyvo/mrpt/
diff --git a/pyproject.toml b/pyproject.toml
index c33ce68..c26bac1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -3,7 +3,7 @@ requires = ["hatchling", "hatch-requirements-txt", "hatch-vcs"]
build-backend = "hatchling.build"
[project]
-name = "vectordb2"
+name = "vectordb"
dynamic = [
"dependencies",
"version"
diff --git a/src/vectordb/_version.py b/src/vectordb/_version.py
index 007a556..9838fb0 100644
--- a/src/vectordb/_version.py
+++ b/src/vectordb/_version.py
@@ -12,5 +12,5 @@
__version_tuple__: VERSION_TUPLE
version_tuple: VERSION_TUPLE
-__version__ = version = '0.1.dev125'
-__version_tuple__ = version_tuple = (0, 1, 'dev125')
+__version__ = version = '0.1.dev129'
+__version_tuple__ = version_tuple = (0, 1, 'dev129')
From 4255800e2c4998d5e6b41617ff449a8742420726 Mon Sep 17 00:00:00 2001
From: Cordero Core <127983572+uwcdc@users.noreply.github.com>
Date: Fri, 19 Apr 2024 23:04:16 +0000
Subject: [PATCH 35/39] fix: Remove git installation
---
docker/environment.yml | 1 -
1 file changed, 1 deletion(-)
diff --git a/docker/environment.yml b/docker/environment.yml
index 214b731..34723bc 100644
--- a/docker/environment.yml
+++ b/docker/environment.yml
@@ -17,4 +17,3 @@ dependencies:
- pip
- pip:
- tensorflow_text
- - git+https://github.com/vioshyvo/mrpt/
From 84ecb1cbf54115c1e6f5110d06b4ff0e34534559 Mon Sep 17 00:00:00 2001
From: Cordero Core <127983572+uwcdc@users.noreply.github.com>
Date: Sat, 20 Apr 2024 05:37:34 +0000
Subject: [PATCH 36/39] fix: Update project name in ci.yml and add
build-essentials to Dockerfile
---
.github/workflows/ci.yml | 4 ++--
docker/Dockerfile | 1 +
2 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index fff6303..cb91d62 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -17,7 +17,7 @@ concurrency:
env:
FORCE_COLOR: 3
- PROJECT_NAME: "vectordb2"
+ PROJECT_NAME: "vectordb"
jobs:
build:
@@ -25,7 +25,7 @@ jobs:
strategy:
fail-fast: false
matrix:
- python-version: ["3.9", "3.10", "3.11"]
+ python-version: ["3.8","3.9", "3.10", "3.11"]
os: [ubuntu-latest, windows-latest, macOS-latest]
steps:
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 10f47da..2234d5b 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -16,5 +16,6 @@ RUN apt-get update && apt-get install -y \
git \
ncdu \
curl \
+ build-essential \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
\ No newline at end of file
From e4c32a1fc4c4f6f6de1fd32d9b11f6bec7ae48f3 Mon Sep 17 00:00:00 2001
From: Cordero Core <127983572+uwcdc@users.noreply.github.com>
Date: Sat, 20 Apr 2024 06:28:11 +0000
Subject: [PATCH 37/39] docs: Update docs to include codespaces, docker, and
conda instructions
---
.devcontainer/postBuild.sh | 1 +
README.md | 82 ++++++++++++++++++++++++++++++++++++++
src/vectordb/_version.py | 4 +-
3 files changed, 85 insertions(+), 2 deletions(-)
diff --git a/.devcontainer/postBuild.sh b/.devcontainer/postBuild.sh
index 6236346..3e63d21 100644
--- a/.devcontainer/postBuild.sh
+++ b/.devcontainer/postBuild.sh
@@ -1,3 +1,4 @@
# These commands will be run after the devcontainer is built.
+python3 -m pip install git+https://github.com/vioshyvo/mrpt/
python3 -m pip install -e . # Install vectordb locally
diff --git a/README.md b/README.md
index a9e899d..64af681 100644
--- a/README.md
+++ b/README.md
@@ -277,3 +277,85 @@ to ensure maximum performance across the spectrum of use cases.
## License
MIT License.
+
+## Contributing
+
+We welcome contributions to VectorDB! Here are the instructions to set up your development environment.
+
+### Activating a Codespace
+
+1. On the main page of the repository, click the `Code` button.
+2. Click on `Open with Codespaces`.
+3. Click on `New Codespace`.
+
+### Using a Docker Container
+
+1. Ensure [Docker](https://www.docker.com/products/docker-desktop/) is installed on your machine.
+2. Pull the Docker image using the following command:
+
+```bash
+docker pull ghcr.io/swarm-io-internal/docker:latest --platform linux/x86_64
+```
+
+> Note: The "no matching manifest for linux/arm64/v8 in the manifest list entries" error occurs if you do not include the platform flag for Apple Silicon systems
+
+3. Run the Docker container:
+
+```bash
+docker run -p 8888:8888 --platform linux/x86_64 -it ghcr.io/swarm-io-internal/docker:latest bash
+```
+
+4. Clone the repository:
+
+```bash
+git clone https://github.com/kagisearch/vectordb.git
+```
+
+5. Navigate to the cloned repository:
+
+```
+cd vectordb
+```
+
+### Using a Conda Environment
+
+1. Ensure [Conda](https://conda.io/projects/conda/en/latest/user-guide/install/index.html) is installed on your machine.
+2. Clone the repository:
+
+```bash
+git clone https://github.com/kagisearch/vectordb.git
+```
+
+3. Navigate to the cloned repository:
+
+```bash
+cd vectordb
+```
+
+4. Create the Conda environment:
+
+```bash
+conda env create -f ./docker/environment.yml
+```
+
+5. Initialize Conda:
+
+```bash
+conda init
+```
+
+5. Source the bashrc file:
+
+```bash
+. ~/.bashrc
+```
+
+6. Activate the Conda environment:
+
+```bash
+conda activate vectordb-dev
+```
+
+Please make sure to update tests as appropriate when making changes. Also, update the documentation reflecting the changes you made.
+
+Happy coding!
\ No newline at end of file
diff --git a/src/vectordb/_version.py b/src/vectordb/_version.py
index 9838fb0..c6c1682 100644
--- a/src/vectordb/_version.py
+++ b/src/vectordb/_version.py
@@ -12,5 +12,5 @@
__version_tuple__: VERSION_TUPLE
version_tuple: VERSION_TUPLE
-__version__ = version = '0.1.dev129'
-__version_tuple__ = version_tuple = (0, 1, 'dev129')
+__version__ = version = '0.1.dev132'
+__version_tuple__ = version_tuple = (0, 1, 'dev132')
From d46357afe6ecc568719ab3f723823255890a8c2d Mon Sep 17 00:00:00 2001
From: Cordero Core <127983572+uwcdc@users.noreply.github.com>
Date: Tue, 14 May 2024 14:56:45 -0700
Subject: [PATCH 38/39] fix: Update install.rst organization
---
docs/source/install.rst | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/docs/source/install.rst b/docs/source/install.rst
index a644b95..a94b6a6 100644
--- a/docs/source/install.rst
+++ b/docs/source/install.rst
@@ -1,4 +1,3 @@
-
Installation
============
@@ -17,7 +16,7 @@ Developer Install
First clone the repo with::
- git clone git@github.com:Ciela-Institute/vectordb.git
+ git clone git@github.com:kagisearch/vectordb2.git
this will create a directory ``vectordb`` wherever you ran the command. Next go into the directory and install in developer mode::
From b5b65462d1d5b4a03a4d6ea131460f23224b5963 Mon Sep 17 00:00:00 2001
From: Cordero Core <127983572+uwcdc@users.noreply.github.com>
Date: Fri, 17 May 2024 15:13:05 -0700
Subject: [PATCH 39/39] feat: Update docker image in devcontainer.json
---
.devcontainer/devcontainer.json | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index bec02b3..84fe1fb 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -1,6 +1,6 @@
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
{
- "image": "ghcr.io/swarm-io-internal/docker:latest",
+ "image": "ghcr.io/swarm-io-internal/data-science-lite:latest",
"customizations": {
"vscode": {