-
Notifications
You must be signed in to change notification settings - Fork 93
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
RocksDB Python API #991
RocksDB Python API #991
Changes from all commits
a7298d5
f94c2ff
8f1a894
2b657ba
00d2cf3
aaafea3
4d3082c
c8e6655
59376e3
bcb21e3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -20,6 +20,8 @@ | |||||
#include <rocksdb/options.h> | ||||||
#include <rocksdb/utilities/options_util.h> | ||||||
#include <rocksdb/slice.h> | ||||||
#include <rocksdb/filter_policy.h> | ||||||
#include <rocksdb/table.h> | ||||||
|
||||||
namespace arcticdb::storage::rocksdb { | ||||||
|
||||||
|
@@ -68,6 +70,13 @@ RocksDBStorage::RocksDBStorage(const LibraryPath &library_path, OpenMode mode, c | |||||
fs::create_directories(lib_dir); | ||||||
db_options.create_if_missing = true; | ||||||
db_options.create_missing_column_families = true; | ||||||
db_options.IncreaseParallelism(); // TODO: add a method to task_scheduler.hpp to return the nubmer of IOThreads configured there, and use that rather than the default 16. | ||||||
for (auto& desc: column_families) { | ||||||
desc.options.OptimizeLevelStyleCompaction(); | ||||||
::rocksdb::BlockBasedTableOptions table_options; | ||||||
table_options.filter_policy.reset(::rocksdb::NewBloomFilterPolicy(10)); | ||||||
desc.options.table_factory.reset(::rocksdb::NewBlockBasedTableFactory(table_options)); | ||||||
} | ||||||
} else { | ||||||
util::raise_rte(DEFAULT_ROCKSDB_NOT_OK_ERROR + s.ToString()); | ||||||
} | ||||||
|
@@ -85,14 +94,18 @@ RocksDBStorage::RocksDBStorage(const LibraryPath &library_path, OpenMode mode, c | |||||
} | ||||||
|
||||||
RocksDBStorage::~RocksDBStorage() { | ||||||
for (const auto& [key_type_name, handle]: handles_by_key_type_) { | ||||||
auto s = db_->DestroyColumnFamilyHandle(handle); | ||||||
if (db_ == nullptr) { | ||||||
util::check(handles_by_key_type_.empty(), "Handles not empty but db_ is nullptr"); | ||||||
} else { | ||||||
for (const auto &[key_type_name, handle]: handles_by_key_type_) { | ||||||
auto s = db_->DestroyColumnFamilyHandle(handle); | ||||||
util::check(s.ok(), DEFAULT_ROCKSDB_NOT_OK_ERROR + s.ToString()); | ||||||
} | ||||||
handles_by_key_type_.clear(); | ||||||
auto s = db_->Close(); | ||||||
util::check(s.ok(), DEFAULT_ROCKSDB_NOT_OK_ERROR + s.ToString()); | ||||||
delete db_; | ||||||
} | ||||||
handles_by_key_type_.clear(); | ||||||
auto s = db_->Close(); | ||||||
util::check(s.ok(), DEFAULT_ROCKSDB_NOT_OK_ERROR + s.ToString()); | ||||||
delete db_; | ||||||
} | ||||||
|
||||||
void RocksDBStorage::do_write(Composite<KeySegmentPair>&& kvs) { | ||||||
|
@@ -118,17 +131,26 @@ void RocksDBStorage::do_read(Composite<VariantKey>&& ks, const ReadVisitor& visi | |||||
ARCTICDB_SAMPLE(RocksDBStorageRead, 0) | ||||||
auto grouper = [](auto &&k) { return variant_key_type(k); }; | ||||||
|
||||||
std::vector<VariantKey> failed_reads; | ||||||
(fg::from(ks.as_range()) | fg::move | fg::groupBy(grouper)).foreach([&](auto &&group) { | ||||||
auto key_type_name = fmt::format("{}", group.key()); | ||||||
auto handle = handles_by_key_type_.at(key_type_name); | ||||||
for (const auto &k : group.values()) { | ||||||
std::string k_str = to_serialized_key(k); | ||||||
std::string value; | ||||||
// TODO: Once PR: 975 has been merged we can use ::rocksdb::PinnableSlice to avoid the copy in | ||||||
// the consturction of the segment | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
auto s = db_->Get(::rocksdb::ReadOptions(), handle, ::rocksdb::Slice(k_str), &value); | ||||||
util::check(s.ok(), DEFAULT_ROCKSDB_NOT_OK_ERROR + s.ToString()); | ||||||
visitor(k, Segment::from_bytes(reinterpret_cast<uint8_t*>(value.data()), value.size())); | ||||||
if (s.IsNotFound()) { | ||||||
failed_reads.push_back(k); | ||||||
} else { | ||||||
util::check(s.ok(), DEFAULT_ROCKSDB_NOT_OK_ERROR + s.ToString()); | ||||||
visitor(k, Segment::from_bytes(reinterpret_cast<uint8_t*>(value.data()), value.size(), true)); | ||||||
} | ||||||
} | ||||||
}); | ||||||
if(!failed_reads.empty()) | ||||||
throw KeyNotFoundException(Composite<VariantKey>(std::move(failed_reads))); | ||||||
} | ||||||
|
||||||
bool RocksDBStorage::do_key_exists(const VariantKey& key) { | ||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,9 @@ | |
#include <arcticdb/storage/storage_factory.hpp> | ||
#include <arcticdb/storage/lmdb/lmdb_storage.hpp> | ||
#include <arcticdb/storage/memory/memory_storage.hpp> | ||
#ifdef ARCTICDB_INCLUDE_ROCKSDB | ||
#include <arcticdb/storage/rocksdb/rocksdb_storage.hpp> | ||
#endif | ||
#include <arcticdb/storage/mongo/mongo_storage.hpp> | ||
#include <arcticdb/storage/azure/azure_storage.hpp> | ||
#include <arcticdb/storage/s3/s3_storage.hpp> | ||
|
@@ -27,39 +30,35 @@ std::unique_ptr<Storage> create_storage( | |
if (type_name == s3::S3Storage::Config::descriptor()->full_name()) { | ||
s3::S3Storage::Config s3_config; | ||
storage_descriptor.config().UnpackTo(&s3_config); | ||
storage = std::make_unique<s3::S3Storage>( | ||
s3::S3Storage(library_path, mode, s3_config) | ||
); | ||
storage = std::make_unique<s3::S3Storage>(library_path, mode, s3_config); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah wow, nice cleanup, much better. |
||
} else if (type_name == lmdb::LmdbStorage::Config::descriptor()->full_name()) { | ||
lmdb::LmdbStorage::Config lmbd_config; | ||
storage_descriptor.config().UnpackTo(&lmbd_config); | ||
storage = std::make_unique<lmdb::LmdbStorage>( | ||
lmdb::LmdbStorage(library_path, mode, lmbd_config) | ||
); | ||
storage = std::make_unique<lmdb::LmdbStorage>(library_path, mode, lmbd_config); | ||
} else if (type_name == mongo::MongoStorage::Config::descriptor()->full_name()) { | ||
mongo::MongoStorage::Config mongo_config; | ||
storage_descriptor.config().UnpackTo(&mongo_config); | ||
storage = std::make_unique<mongo::MongoStorage>( | ||
mongo::MongoStorage(library_path, mode, mongo_config) | ||
); | ||
storage = std::make_unique<mongo::MongoStorage>(library_path, mode, mongo_config); | ||
} else if (type_name == memory::MemoryStorage::Config::descriptor()->full_name()) { | ||
memory::MemoryStorage::Config memory_config; | ||
storage_descriptor.config().UnpackTo(&memory_config); | ||
storage = std::make_unique<memory::MemoryStorage>( | ||
memory::MemoryStorage(library_path, mode, memory_config) | ||
); | ||
} else if (type_name == nfs_backed::NfsBackedStorage::Config::descriptor()->full_name()) { | ||
storage = std::make_unique<memory::MemoryStorage>(library_path, mode, memory_config); | ||
} | ||
#ifdef ARCTICDB_INCLUDE_ROCKSDB | ||
else if (type_name == rocksdb::RocksDBStorage::Config::descriptor()->full_name()) { | ||
rocksdb::RocksDBStorage::Config rocksdb_config; | ||
storage_descriptor.config().UnpackTo(&rocksdb_config); | ||
storage = std::make_unique<rocksdb::RocksDBStorage>(library_path, mode, rocksdb_config); | ||
} | ||
#endif | ||
else if (type_name == nfs_backed::NfsBackedStorage::Config::descriptor()->full_name()) { | ||
nfs_backed::NfsBackedStorage::Config nfs_backed_config; | ||
storage_descriptor.config().UnpackTo(&nfs_backed_config); | ||
storage = std::make_unique<nfs_backed::NfsBackedStorage>( | ||
nfs_backed::NfsBackedStorage(library_path, mode, nfs_backed_config) | ||
); | ||
storage = std::make_unique<nfs_backed::NfsBackedStorage>(library_path, mode, nfs_backed_config); | ||
} else if (type_name == azure::AzureStorage::Config::descriptor()->full_name()) { | ||
azure::AzureStorage::Config azure_config; | ||
storage_descriptor.config().UnpackTo(&azure_config); | ||
storage = std::make_unique<azure::AzureStorage >( | ||
azure::AzureStorage(library_path, mode, azure_config) | ||
); | ||
storage = std::make_unique<azure::AzureStorage>(library_path, mode, azure_config); | ||
} else | ||
throw std::runtime_error(fmt::format("Unknown config type {}", type_name)); | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
""" | ||
Copyright 2023 Man Group Operations Limited | ||
|
||
Use of this software is governed by the Business Source License 1.1 included in the file licenses/BSL.txt. | ||
|
||
As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0. | ||
""" | ||
import re | ||
import os | ||
|
||
# from dataclasses import dataclass | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. rm |
||
|
||
|
||
from arcticdb.options import LibraryOptions | ||
from arcticc.pb2.storage_pb2 import EnvironmentConfigsMap, LibraryConfig | ||
from arcticdb.version_store.helper import add_rocksdb_library_to_env | ||
from arcticdb.config import _DEFAULT_ENV | ||
from arcticdb.version_store._store import NativeVersionStore | ||
from arcticdb.adapters.arctic_library_adapter import ArcticLibraryAdapter, set_library_options | ||
from arcticdb.encoding_version import EncodingVersion | ||
from arcticdb_ext.storage import CONFIG_LIBRARY_NAME | ||
|
||
|
||
class RocksDBLibraryAdapter(ArcticLibraryAdapter): | ||
""" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you document in here that this is a work in progress, beta API and should not be relied upon? |
||
Connect to a RocksDB backend. | ||
|
||
Only supports the URI ``"rocksdb://"``. TODO: Complete this | ||
""" | ||
|
||
REGEX = r"rocksdb://(?P<path>[^?]*)$" | ||
|
||
@staticmethod | ||
def supports_uri(uri: str) -> bool: | ||
return uri.startswith("rocksdb://") | ||
|
||
def __init__(self, uri: str, encoding_version: EncodingVersion, *args, **kwargs): | ||
match = re.match(self.REGEX, uri) | ||
match_groups = match.groupdict() | ||
|
||
self._path = os.path.abspath(match_groups["path"]) | ||
self._encoding_version = encoding_version | ||
|
||
os.makedirs(self._path, exist_ok=True) | ||
|
||
super().__init__(uri, self._encoding_version) | ||
|
||
def __repr__(self): | ||
return "ROCKSDB()" | ||
|
||
@property | ||
def config_library(self): | ||
env_cfg = EnvironmentConfigsMap() | ||
|
||
add_rocksdb_library_to_env(env_cfg, lib_name=CONFIG_LIBRARY_NAME, env_name=_DEFAULT_ENV, db_dir=self._path) | ||
|
||
lib = NativeVersionStore.create_store_from_config( | ||
env_cfg, _DEFAULT_ENV, CONFIG_LIBRARY_NAME, encoding_version=self._encoding_version | ||
) | ||
|
||
return lib._library | ||
|
||
def get_library_config(self, name, library_options: LibraryOptions): | ||
env_cfg = EnvironmentConfigsMap() | ||
|
||
add_rocksdb_library_to_env(env_cfg, lib_name=name, env_name=_DEFAULT_ENV, db_dir=self._path) | ||
|
||
library_options.encoding_version = ( | ||
library_options.encoding_version if library_options.encoding_version is not None else self._encoding_version | ||
) | ||
set_library_options(env_cfg.env_by_id[_DEFAULT_ENV].lib_by_path[name], library_options) | ||
|
||
return NativeVersionStore.create_library_config( | ||
env_cfg, _DEFAULT_ENV, name, encoding_version=library_options.encoding_version | ||
) | ||
|
||
# TODO: def cleanup_library should do something similar to LMDB | ||
# See PR: 918 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We can just hang thing off the module like here
ArcticDB/cpp/arcticdb/storage/python_bindings.cpp
Line 38 in 263e843
library_manager
?