Skip to content

Commit d763ffe

Browse files
committed
Refactor
1 parent 0227abd commit d763ffe

File tree

4 files changed

+43
-209
lines changed

4 files changed

+43
-209
lines changed

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,9 +126,9 @@ set(SPARROW_IPC_HEADERS
126126
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/deserialize_array_impl.hpp
127127
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/deserialize_fixedsizebinary_array.hpp
128128
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/deserialize_interval_array.hpp
129+
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/deserialize_null_array.hpp
129130
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/deserialize_primitive_array.hpp
130131
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/deserialize_time_related_arrays.hpp
131-
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/deserialize_null_array.hpp
132132
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/deserialize_utils.hpp
133133
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/deserialize_variable_size_binary_array.hpp
134134
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/deserialize.hpp

include/sparrow_ipc/deserialize_array_impl.hpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
#pragma once
22

33
#include <optional>
4+
#include <span>
5+
#include <string>
6+
#include <string_view>
47
#include <unordered_set>
58
#include <vector>
69

@@ -38,12 +41,13 @@ namespace sparrow_ipc::detail
3841
std::string_view name,
3942
const std::optional<std::vector<sparrow::metadata_pair>>& metadata,
4043
bool nullable,
41-
size_t& buffer_index
44+
size_t& buffer_index,
45+
std::optional<std::string> format_override = std::nullopt
4246
)
4347
{
44-
const std::string_view format = data_type_to_format(
45-
sparrow::detail::get_data_type_from_array<ArrayType<T>>::get()
46-
);
48+
const std::string_view format = format_override.has_value()
49+
? *format_override
50+
: data_type_to_format(sparrow::detail::get_data_type_from_array<ArrayType<T>>::get());
4751

4852
// Set up flags based on nullable
4953
std::optional<std::unordered_set<sparrow::ArrowFlag>> flags;
Lines changed: 32 additions & 204 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
#pragma once
22

33
#include <optional>
4-
#include <unordered_set>
4+
#include <span>
5+
#include <string>
6+
#include <string_view>
57
#include <vector>
68

79
#include <sparrow/arrow_interface/arrow_array_schema_proxy.hpp>
@@ -11,9 +13,7 @@
1113
#include <sparrow/timestamp_without_timezone_array.hpp>
1214

1315
#include "Message_generated.h"
14-
#include "sparrow_ipc/arrow_interface/arrow_array.hpp"
15-
#include "sparrow_ipc/arrow_interface/arrow_schema.hpp"
16-
#include "sparrow_ipc/deserialize_utils.hpp"
16+
#include "sparrow_ipc/deserialize_array_impl.hpp"
1717

1818
namespace sparrow_ipc
1919
{
@@ -27,59 +27,15 @@ namespace sparrow_ipc
2727
size_t& buffer_index
2828
)
2929
{
30-
const std::string_view format = data_type_to_format(
31-
sparrow::detail::get_data_type_from_array<sparrow::date_array<T>>::get()
32-
);
33-
34-
// Set up flags based on nullable
35-
std::optional<std::unordered_set<sparrow::ArrowFlag>> flags;
36-
if (nullable)
37-
{
38-
flags = std::unordered_set<sparrow::ArrowFlag>{sparrow::ArrowFlag::NULLABLE};
39-
}
40-
41-
ArrowSchema schema = make_non_owning_arrow_schema(
42-
format,
43-
name.data(),
30+
return detail::deserialize_non_owning_simple_array<sparrow::date_array, T>(
31+
record_batch,
32+
body,
33+
name,
4434
metadata,
45-
flags,
46-
0,
47-
nullptr,
48-
nullptr
49-
);
50-
51-
const auto compression = record_batch.compression();
52-
std::vector<arrow_array_private_data::optionally_owned_buffer> buffers;
53-
54-
auto validity_buffer_span = utils::get_buffer(record_batch, body, buffer_index);
55-
auto data_buffer_span = utils::get_buffer(record_batch, body, buffer_index);
56-
57-
if (compression)
58-
{
59-
buffers.push_back(utils::get_decompressed_buffer(validity_buffer_span, compression));
60-
buffers.push_back(utils::get_decompressed_buffer(data_buffer_span, compression));
61-
}
62-
else
63-
{
64-
buffers.push_back(validity_buffer_span);
65-
buffers.push_back(data_buffer_span);
66-
}
67-
68-
// TODO bitmap_ptr is not used anymore... Leave it for now, and remove later if no need confirmed
69-
const auto [bitmap_ptr, null_count] = utils::get_bitmap_pointer_and_null_count(validity_buffer_span, record_batch.length());
70-
71-
ArrowArray array = make_arrow_array<arrow_array_private_data>(
72-
record_batch.length(),
73-
null_count,
74-
0,
75-
0,
76-
nullptr,
77-
nullptr,
78-
std::move(buffers)
35+
nullable,
36+
buffer_index,
37+
std::nullopt
7938
);
80-
81-
sparrow::arrow_proxy ap{std::move(array), std::move(schema)};
82-
return sparrow::date_array<T>{std::move(ap)};
8339
}
8440

8541
template <typename T>
@@ -97,55 +53,15 @@ namespace sparrow_ipc
9753
sparrow::detail::get_data_type_from_array<sparrow::timestamp_array<T>>::get()
9854
)) + timezone;
9955

100-
// Set up flags based on nullable
101-
std::optional<std::unordered_set<sparrow::ArrowFlag>> flags;
102-
if (nullable)
103-
{
104-
flags = std::unordered_set<sparrow::ArrowFlag>{sparrow::ArrowFlag::NULLABLE};
105-
}
106-
107-
ArrowSchema schema = make_non_owning_arrow_schema(
108-
format.c_str(),
109-
name.data(),
56+
return detail::deserialize_non_owning_simple_array<sparrow::timestamp_array, T>(
57+
record_batch,
58+
body,
59+
name,
11060
metadata,
111-
flags,
112-
0,
113-
nullptr,
114-
nullptr
61+
nullable,
62+
buffer_index,
63+
std::move(format)
11564
);
116-
117-
const auto compression = record_batch.compression();
118-
std::vector<arrow_array_private_data::optionally_owned_buffer> buffers;
119-
120-
auto validity_buffer_span = utils::get_buffer(record_batch, body, buffer_index);
121-
auto data_buffer_span = utils::get_buffer(record_batch, body, buffer_index);
122-
123-
if (compression)
124-
{
125-
buffers.push_back(utils::get_decompressed_buffer(validity_buffer_span, compression));
126-
buffers.push_back(utils::get_decompressed_buffer(data_buffer_span, compression));
127-
}
128-
else
129-
{
130-
buffers.push_back(validity_buffer_span);
131-
buffers.push_back(data_buffer_span);
132-
}
133-
134-
// TODO bitmap_ptr is not used anymore... Leave it for now, and remove later if no need confirmed
135-
const auto [bitmap_ptr, null_count] = utils::get_bitmap_pointer_and_null_count(validity_buffer_span, record_batch.length());
136-
137-
ArrowArray array = make_arrow_array<arrow_array_private_data>(
138-
record_batch.length(),
139-
null_count,
140-
0,
141-
0,
142-
nullptr,
143-
nullptr,
144-
std::move(buffers)
145-
);
146-
147-
sparrow::arrow_proxy ap{std::move(array), std::move(schema)};
148-
return sparrow::timestamp_array<T>{std::move(ap)};
14965
}
15066

15167
template <typename T>
@@ -158,59 +74,15 @@ namespace sparrow_ipc
15874
size_t& buffer_index
15975
)
16076
{
161-
const std::string_view format = data_type_to_format(
162-
sparrow::detail::get_data_type_from_array<sparrow::timestamp_without_timezone_array<T>>::get()
163-
);
164-
165-
// Set up flags based on nullable
166-
std::optional<std::unordered_set<sparrow::ArrowFlag>> flags;
167-
if (nullable)
168-
{
169-
flags = std::unordered_set<sparrow::ArrowFlag>{sparrow::ArrowFlag::NULLABLE};
170-
}
171-
172-
ArrowSchema schema = make_non_owning_arrow_schema(
173-
format,
174-
name.data(),
77+
return detail::deserialize_non_owning_simple_array<sparrow::timestamp_without_timezone_array, T>(
78+
record_batch,
79+
body,
80+
name,
17581
metadata,
176-
flags,
177-
0,
178-
nullptr,
179-
nullptr
180-
);
181-
182-
const auto compression = record_batch.compression();
183-
std::vector<arrow_array_private_data::optionally_owned_buffer> buffers;
184-
185-
auto validity_buffer_span = utils::get_buffer(record_batch, body, buffer_index);
186-
auto data_buffer_span = utils::get_buffer(record_batch, body, buffer_index);
187-
188-
if (compression)
189-
{
190-
buffers.push_back(utils::get_decompressed_buffer(validity_buffer_span, compression));
191-
buffers.push_back(utils::get_decompressed_buffer(data_buffer_span, compression));
192-
}
193-
else
194-
{
195-
buffers.push_back(validity_buffer_span);
196-
buffers.push_back(data_buffer_span);
197-
}
198-
199-
// TODO bitmap_ptr is not used anymore... Leave it for now, and remove later if no need confirmed
200-
const auto [bitmap_ptr, null_count] = utils::get_bitmap_pointer_and_null_count(validity_buffer_span, record_batch.length());
201-
202-
ArrowArray array = make_arrow_array<arrow_array_private_data>(
203-
record_batch.length(),
204-
null_count,
205-
0,
206-
0,
207-
nullptr,
208-
nullptr,
209-
std::move(buffers)
82+
nullable,
83+
buffer_index,
84+
std::nullopt
21085
);
211-
212-
sparrow::arrow_proxy ap{std::move(array), std::move(schema)};
213-
return sparrow::timestamp_without_timezone_array<T>{std::move(ap)};
21486
}
21587

21688
template <typename T>
@@ -223,58 +95,14 @@ namespace sparrow_ipc
22395
size_t& buffer_index
22496
)
22597
{
226-
const std::string_view format = data_type_to_format(
227-
sparrow::detail::get_data_type_from_array<sparrow::time_array<T>>::get()
228-
);
229-
230-
// Set up flags based on nullable
231-
std::optional<std::unordered_set<sparrow::ArrowFlag>> flags;
232-
if (nullable)
233-
{
234-
flags = std::unordered_set<sparrow::ArrowFlag>{sparrow::ArrowFlag::NULLABLE};
235-
}
236-
237-
ArrowSchema schema = make_non_owning_arrow_schema(
238-
format,
239-
name.data(),
98+
return detail::deserialize_non_owning_simple_array<sparrow::time_array, T>(
99+
record_batch,
100+
body,
101+
name,
240102
metadata,
241-
flags,
242-
0,
243-
nullptr,
244-
nullptr
103+
nullable,
104+
buffer_index,
105+
std::nullopt
245106
);
246-
247-
const auto compression = record_batch.compression();
248-
std::vector<arrow_array_private_data::optionally_owned_buffer> buffers;
249-
250-
auto validity_buffer_span = utils::get_buffer(record_batch, body, buffer_index);
251-
auto data_buffer_span = utils::get_buffer(record_batch, body, buffer_index);
252-
253-
if (compression)
254-
{
255-
buffers.push_back(utils::get_decompressed_buffer(validity_buffer_span, compression));
256-
buffers.push_back(utils::get_decompressed_buffer(data_buffer_span, compression));
257-
}
258-
else
259-
{
260-
buffers.push_back(validity_buffer_span);
261-
buffers.push_back(data_buffer_span);
262-
}
263-
264-
// TODO bitmap_ptr is not used anymore... Leave it for now, and remove later if no need confirmed
265-
const auto [bitmap_ptr, null_count] = utils::get_bitmap_pointer_and_null_count(validity_buffer_span, record_batch.length());
266-
267-
ArrowArray array = make_arrow_array<arrow_array_private_data>(
268-
record_batch.length(),
269-
null_count,
270-
0,
271-
0,
272-
nullptr,
273-
nullptr,
274-
std::move(buffers)
275-
);
276-
277-
sparrow::arrow_proxy ap{std::move(array), std::move(schema)};
278-
return sparrow::time_array<T>{std::move(ap)};
279107
}
280108
}

src/deserialize.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -402,6 +402,8 @@ namespace sparrow_ipc
402402
arrays.emplace_back(deserialize_non_owning_time_array_lambda.template operator()<sparrow::chrono::time_nanoseconds>());
403403
break;
404404
}
405+
break;
406+
}
405407
case org::apache::arrow::flatbuf::Type::Null:
406408
arrays.emplace_back(deserialize_non_owning_null(
407409
record_batch,

0 commit comments

Comments
 (0)