@@ -624,10 +624,11 @@ static bool SaveError(char** errptr, const Status& s) {
624
624
return true ;
625
625
}
626
626
627
- // Copies str to a new malloc()-ed buffer. The buffer is not NUL terminated.
628
- static char * CopyString (const std::string& str) {
629
- char * result = reinterpret_cast <char *>(malloc (sizeof (char ) * str.size ()));
630
- memcpy (result, str.data (), sizeof (char ) * str.size ());
627
+ // Helper function to copy string data to a malloc'd buffer
628
+ // Works with std::string, Slice, and PinnableSlice through implicit conversion
629
+ static inline char * CopyString (const Slice& slice) {
630
+ char * result = reinterpret_cast <char *>(malloc (slice.size ()));
631
+ memcpy (result, slice.data (), slice.size ());
631
632
return result;
632
633
}
633
634
@@ -1393,11 +1394,14 @@ char* rocksdb_get(rocksdb_t* db, const rocksdb_readoptions_t* options,
1393
1394
const char * key, size_t keylen, size_t * vallen,
1394
1395
char ** errptr) {
1395
1396
char * result = nullptr ;
1396
- std::string tmp;
1397
- Status s = db->rep ->Get (options->rep , Slice (key, keylen), &tmp);
1397
+ // Use PinnableSlice to avoid unnecessary copy
1398
+ PinnableSlice pinnable_val;
1399
+ Status s = db->rep ->Get (options->rep , db->rep ->DefaultColumnFamily (),
1400
+ Slice (key, keylen), &pinnable_val);
1398
1401
if (s.ok ()) {
1399
- *vallen = tmp.size ();
1400
- result = CopyString (tmp);
1402
+ *vallen = pinnable_val.size ();
1403
+ // Only one copy: from PinnableSlice to malloc'd buffer
1404
+ result = CopyString (pinnable_val);
1401
1405
} else {
1402
1406
*vallen = 0 ;
1403
1407
if (!s.IsNotFound ()) {
@@ -1412,12 +1416,14 @@ char* rocksdb_get_cf(rocksdb_t* db, const rocksdb_readoptions_t* options,
1412
1416
const char * key, size_t keylen, size_t * vallen,
1413
1417
char ** errptr) {
1414
1418
char * result = nullptr ;
1415
- std::string tmp;
1416
- Status s =
1417
- db->rep ->Get (options->rep , column_family->rep , Slice (key, keylen), &tmp);
1419
+ // Use PinnableSlice to avoid unnecessary copy
1420
+ PinnableSlice pinnable_val;
1421
+ Status s = db->rep ->Get (options->rep , column_family->rep , Slice (key, keylen),
1422
+ &pinnable_val);
1418
1423
if (s.ok ()) {
1419
- *vallen = tmp.size ();
1420
- result = CopyString (tmp);
1424
+ *vallen = pinnable_val.size ();
1425
+ // Only one copy: from PinnableSlice to malloc'd buffer
1426
+ result = CopyString (pinnable_val);
1421
1427
} else {
1422
1428
*vallen = 0 ;
1423
1429
if (!s.IsNotFound ()) {
@@ -1498,8 +1504,8 @@ void rocksdb_multi_get(rocksdb_t* db, const rocksdb_readoptions_t* options,
1498
1504
std::vector<Status> statuses = db->rep ->MultiGet (options->rep , keys, &values);
1499
1505
for (size_t i = 0 ; i < num_keys; i++) {
1500
1506
if (statuses[i].ok ()) {
1501
- values_list[i] = CopyString (values[i]);
1502
1507
values_list_sizes[i] = values[i].size ();
1508
+ values_list[i] = CopyString (values[i]);
1503
1509
errs[i] = nullptr ;
1504
1510
} else {
1505
1511
values_list[i] = nullptr ;
@@ -1530,10 +1536,10 @@ void rocksdb_multi_get_with_ts(rocksdb_t* db,
1530
1536
db->rep ->MultiGet (options->rep , keys, &values, ×tamps);
1531
1537
for (size_t i = 0 ; i < num_keys; i++) {
1532
1538
if (statuses[i].ok ()) {
1533
- values_list[i] = CopyString (values[i]);
1534
1539
values_list_sizes[i] = values[i].size ();
1535
- timestamp_list [i] = CopyString (timestamps [i]);
1540
+ values_list [i] = CopyString (values [i]);
1536
1541
timestamp_list_sizes[i] = timestamps[i].size ();
1542
+ timestamp_list[i] = CopyString (timestamps[i]);
1537
1543
errs[i] = nullptr ;
1538
1544
} else {
1539
1545
values_list[i] = nullptr ;
@@ -1566,8 +1572,8 @@ void rocksdb_multi_get_cf(
1566
1572
db->rep ->MultiGet (options->rep , cfs, keys, &values);
1567
1573
for (size_t i = 0 ; i < num_keys; i++) {
1568
1574
if (statuses[i].ok ()) {
1569
- values_list[i] = CopyString (values[i]);
1570
1575
values_list_sizes[i] = values[i].size ();
1576
+ values_list[i] = CopyString (values[i]);
1571
1577
errs[i] = nullptr ;
1572
1578
} else {
1573
1579
values_list[i] = nullptr ;
@@ -1600,10 +1606,10 @@ void rocksdb_multi_get_cf_with_ts(
1600
1606
db->rep ->MultiGet (options->rep , cfs, keys, &values, ×tamps);
1601
1607
for (size_t i = 0 ; i < num_keys; i++) {
1602
1608
if (statuses[i].ok ()) {
1603
- values_list[i] = CopyString (values[i]);
1604
1609
values_list_sizes[i] = values[i].size ();
1605
- timestamps_list [i] = CopyString (timestamps [i]);
1610
+ values_list [i] = CopyString (values [i]);
1606
1611
timestamps_list_sizes[i] = timestamps[i].size ();
1612
+ timestamps_list[i] = CopyString (timestamps[i]);
1607
1613
errs[i] = nullptr ;
1608
1614
} else {
1609
1615
values_list[i] = nullptr ;
@@ -6888,8 +6894,8 @@ void rocksdb_transaction_multi_get(rocksdb_transaction_t* txn,
6888
6894
txn->rep ->MultiGet (options->rep , keys, &values);
6889
6895
for (size_t i = 0 ; i < num_keys; i++) {
6890
6896
if (statuses[i].ok ()) {
6891
- values_list[i] = CopyString (values[i]);
6892
6897
values_list_sizes[i] = values[i].size ();
6898
+ values_list[i] = CopyString (values[i]);
6893
6899
errs[i] = nullptr ;
6894
6900
} else {
6895
6901
values_list[i] = nullptr ;
@@ -6917,8 +6923,8 @@ void rocksdb_transaction_multi_get_for_update(
6917
6923
txn->rep ->MultiGetForUpdate (options->rep , keys, &values);
6918
6924
for (size_t i = 0 ; i < num_keys; i++) {
6919
6925
if (statuses[i].ok ()) {
6920
- values_list[i] = CopyString (values[i]);
6921
6926
values_list_sizes[i] = values[i].size ();
6927
+ values_list[i] = CopyString (values[i]);
6922
6928
errs[i] = nullptr ;
6923
6929
} else {
6924
6930
values_list[i] = nullptr ;
@@ -6949,8 +6955,8 @@ void rocksdb_transaction_multi_get_cf(
6949
6955
txn->rep ->MultiGet (options->rep , cfs, keys, &values);
6950
6956
for (size_t i = 0 ; i < num_keys; i++) {
6951
6957
if (statuses[i].ok ()) {
6952
- values_list[i] = CopyString (values[i]);
6953
6958
values_list_sizes[i] = values[i].size ();
6959
+ values_list[i] = CopyString (values[i]);
6954
6960
errs[i] = nullptr ;
6955
6961
} else {
6956
6962
values_list[i] = nullptr ;
@@ -6981,8 +6987,8 @@ void rocksdb_transaction_multi_get_for_update_cf(
6981
6987
txn->rep ->MultiGetForUpdate (options->rep , cfs, keys, &values);
6982
6988
for (size_t i = 0 ; i < num_keys; i++) {
6983
6989
if (statuses[i].ok ()) {
6984
- values_list[i] = CopyString (values[i]);
6985
6990
values_list_sizes[i] = values[i].size ();
6991
+ values_list[i] = CopyString (values[i]);
6986
6992
errs[i] = nullptr ;
6987
6993
} else {
6988
6994
values_list[i] = nullptr ;
@@ -7085,8 +7091,8 @@ void rocksdb_transactiondb_multi_get(rocksdb_transactiondb_t* txn_db,
7085
7091
txn_db->rep ->MultiGet (options->rep , keys, &values);
7086
7092
for (size_t i = 0 ; i < num_keys; i++) {
7087
7093
if (statuses[i].ok ()) {
7088
- values_list[i] = CopyString (values[i]);
7089
7094
values_list_sizes[i] = values[i].size ();
7095
+ values_list[i] = CopyString (values[i]);
7090
7096
errs[i] = nullptr ;
7091
7097
} else {
7092
7098
values_list[i] = nullptr ;
@@ -7117,8 +7123,8 @@ void rocksdb_transactiondb_multi_get_cf(
7117
7123
txn_db->rep ->MultiGet (options->rep , cfs, keys, &values);
7118
7124
for (size_t i = 0 ; i < num_keys; i++) {
7119
7125
if (statuses[i].ok ()) {
7120
- values_list[i] = CopyString (values[i]);
7121
7126
values_list_sizes[i] = values[i].size ();
7127
+ values_list[i] = CopyString (values[i]);
7122
7128
errs[i] = nullptr ;
7123
7129
} else {
7124
7130
values_list[i] = nullptr ;
@@ -7707,4 +7713,110 @@ uint64_t rocksdb_wait_for_compact_options_get_timeout(
7707
7713
return opt->rep .timeout .count ();
7708
7714
}
7709
7715
7716
+ /* High-performance zero-copy Get implementations */
7717
+
7718
+ struct rocksdb_pinnable_handle_t {
7719
+ PinnableSlice rep;
7720
+ };
7721
+
7722
+ rocksdb_pinnable_handle_t * rocksdb_get_pinned_v2 (
7723
+ rocksdb_t * db, const rocksdb_readoptions_t * options, const char * key,
7724
+ size_t keylen, char ** errptr) {
7725
+ rocksdb_pinnable_handle_t * handle = new rocksdb_pinnable_handle_t ;
7726
+ Status s = db->rep ->Get (options->rep , db->rep ->DefaultColumnFamily (),
7727
+ Slice (key, keylen), &handle->rep );
7728
+ if (!s.ok ()) {
7729
+ delete handle;
7730
+ if (!s.IsNotFound ()) {
7731
+ SaveError (errptr, s);
7732
+ }
7733
+ return nullptr ;
7734
+ }
7735
+ return handle;
7736
+ }
7737
+
7738
+ rocksdb_pinnable_handle_t * rocksdb_get_pinned_cf_v2 (
7739
+ rocksdb_t * db, const rocksdb_readoptions_t * options,
7740
+ rocksdb_column_family_handle_t * column_family, const char * key,
7741
+ size_t keylen, char ** errptr) {
7742
+ rocksdb_pinnable_handle_t * handle = new rocksdb_pinnable_handle_t ;
7743
+ Status s = db->rep ->Get (options->rep , column_family->rep , Slice (key, keylen),
7744
+ &handle->rep );
7745
+ if (!s.ok ()) {
7746
+ delete handle;
7747
+ if (!s.IsNotFound ()) {
7748
+ SaveError (errptr, s);
7749
+ }
7750
+ return nullptr ;
7751
+ }
7752
+ return handle;
7753
+ }
7754
+
7755
+ const char * rocksdb_pinnable_handle_get_value (
7756
+ const rocksdb_pinnable_handle_t * handle, size_t * vallen) {
7757
+ if (!handle) {
7758
+ *vallen = 0 ;
7759
+ return nullptr ;
7760
+ }
7761
+ *vallen = handle->rep .size ();
7762
+ return handle->rep .data ();
7763
+ }
7764
+
7765
+ void rocksdb_pinnable_handle_destroy (rocksdb_pinnable_handle_t * handle) {
7766
+ delete handle;
7767
+ }
7768
+
7769
+ unsigned char rocksdb_get_into_buffer (rocksdb_t * db,
7770
+ const rocksdb_readoptions_t * options,
7771
+ const char * key, size_t keylen,
7772
+ char * buffer, size_t buffer_size,
7773
+ size_t * vallen, unsigned char * found,
7774
+ char ** errptr) {
7775
+ PinnableSlice pinnable_val;
7776
+ Status s = db->rep ->Get (options->rep , db->rep ->DefaultColumnFamily (),
7777
+ Slice (key, keylen), &pinnable_val);
7778
+ if (s.ok ()) {
7779
+ *found = 1 ;
7780
+ *vallen = pinnable_val.size ();
7781
+ if (buffer_size >= pinnable_val.size ()) {
7782
+ memcpy (buffer, pinnable_val.data (), pinnable_val.size ());
7783
+ return 1 ; // Success - data copied
7784
+ }
7785
+ return 0 ; // Buffer too small
7786
+ } else {
7787
+ *found = 0 ;
7788
+ *vallen = 0 ;
7789
+ if (!s.IsNotFound ()) {
7790
+ SaveError (errptr, s);
7791
+ }
7792
+ return 0 ;
7793
+ }
7794
+ }
7795
+
7796
+ unsigned char rocksdb_get_into_buffer_cf (
7797
+ rocksdb_t * db, const rocksdb_readoptions_t * options,
7798
+ rocksdb_column_family_handle_t * column_family, const char * key,
7799
+ size_t keylen, char * buffer, size_t buffer_size, size_t * vallen,
7800
+ unsigned char * found, char ** errptr) {
7801
+ PinnableSlice pinnable_val;
7802
+ Status s = db->rep ->Get (options->rep , column_family->rep , Slice (key, keylen),
7803
+ &pinnable_val);
7804
+ if (s.ok ()) {
7805
+ *found = 1 ;
7806
+ *vallen = pinnable_val.size ();
7807
+ if (buffer_size >= pinnable_val.size ()) {
7808
+ memcpy (buffer, pinnable_val.data (), pinnable_val.size ());
7809
+ return 1 ; // Success - data copied
7810
+ }
7811
+ return 0 ; // Buffer too small
7812
+ } else {
7813
+ *found = 0 ;
7814
+ *vallen = 0 ;
7815
+ if (!s.IsNotFound ()) {
7816
+ SaveError (errptr, s);
7817
+ }
7818
+ return 0 ;
7819
+ }
7820
+ }
7821
+
7710
7822
} // end extern "C"
0 commit comments