Skip to content
Merged
Show file tree
Hide file tree
Changes from 58 commits
Commits
Show all changes
60 commits
Select commit Hold shift + click to select a range
3175516
feat: Add LOAD PARQUET clause
as51340 Oct 1, 2025
cd9946c
testing: Fix clang-tidy, add unit tests and fix previously failing tests
as51340 Oct 1, 2025
90be6ef
feat: Compile with arrow, needs glueing with operator
as51340 Oct 1, 2025
cfc728d
feat: Use ReadTable and TableBatchReader
as51340 Oct 1, 2025
9d4ea5e
feat: Use RecordBatchReader
as51340 Oct 2, 2025
3ef9445
feat: Optimize memory allocation patterns
as51340 Oct 2, 2025
80335d0
feat: Operate on batches instead of rows
as51340 Oct 3, 2025
725e1af
feat: Use arrow compute cast when converting to string
as51340 Oct 3, 2025
6ffb18d
feat: Move arrow under query. Optimize int, double and string allocat…
as51340 Oct 3, 2025
c44e54a
feat: Queue-based batching
as51340 Oct 3, 2025
4751352
feat: Optimize indices usage, use local data queue and use swap seman…
as51340 Oct 6, 2025
255289a
feat: Delete 10m parquet
as51340 Oct 6, 2025
d0b3530
feat: Delete parquet files
as51340 Oct 6, 2025
ac09535
feat: Support other types in the parquet file
as51340 Oct 7, 2025
d441070
feat: Optimize WalFile::FinalizeWal
as51340 Oct 7, 2025
9317367
testing: Add e2e tests for LOAD PARQUET clause
as51340 Oct 7, 2025
bd47d5b
testing: Add unit tests
as51340 Oct 7, 2025
67d5d92
refactor: Remove some unnecessary includes
as51340 Oct 8, 2025
e41ffc1
feat: Add arrow as Conan dependency
as51340 Oct 8, 2025
7c0a110
docs: Explain arrow
as51340 Oct 8, 2025
d943ac2
refactor: Fix clang-tidy, refactor Parquet reader
as51340 Oct 9, 2025
3b60776
feat: Add support for numeric types
as51340 Oct 9, 2025
9e7925b
feat: Add direct support for Date32 Parquet type
as51340 Oct 9, 2025
6cdadfc
feat: Support Date32, Date64 and fix half_float Parquet type
as51340 Oct 9, 2025
af557f2
feat: Add support for Parquet type Time32
as51340 Oct 9, 2025
7ade8c7
feat: Add support for Parquet type TIME64
as51340 Oct 9, 2025
4170b98
feat: Add support for Parquet type TIME64ns
as51340 Oct 9, 2025
6dad28f
feat: Add support for Parquet type TIMESTAMP
as51340 Oct 9, 2025
49e81c3
refactor: Parquet temporal types
as51340 Oct 9, 2025
bb860cb
feat: Add support for Parquet type DURATION
as51340 Oct 9, 2025
1448ba8
feat: Add support for Parquet binary types
as51340 Oct 9, 2025
f7471d2
feat: Add support for Parquet STRING_VIEW and LARGE_STRING types
as51340 Oct 9, 2025
fd56ec4
feat: Add support for Parquet DECIMAL types
as51340 Oct 10, 2025
816fa1a
feat: Use converters and support recursive type instantiationm for li…
as51340 Oct 10, 2025
61ac554
docs: Remove TODOs
as51340 Oct 10, 2025
04f988d
feat: Integrate thread safe allocator with Parquet
as51340 Oct 10, 2025
9f9f85b
feat: Optimize usage of thread safe allocator
as51340 Oct 20, 2025
f986e76
fix: Avoid TypedValue copies
as51340 Oct 22, 2025
c15938c
fix: Use FrameWriter::Modify
as51340 Oct 22, 2025
52772fe
fix: Restore WAL handling, use chrono for tag dispatch
as51340 Oct 22, 2025
c6bb114
fix: Clang-tidy and build of unit tests
as51340 Oct 22, 2025
790f5e8
fix: Pretty printer
as51340 Oct 22, 2025
833eee1
fix: clang-tidy and tests/unit/typed_value
as51340 Oct 23, 2025
cec2597
testing: Add mgbench code
as51340 Oct 23, 2025
d17511d
fix: Remove logging code
as51340 Oct 23, 2025
38db7a5
fix: Includes in reader.cpp
as51340 Oct 23, 2025
99efdfd
feat: Restore to cpp20
as51340 Oct 23, 2025
b60ba20
feat: Restore to cpp20 in CMakeLists.txt
as51340 Oct 23, 2025
7ae57a5
feat: Restore to cpp23 in CMakeLists.txt
as51340 Oct 23, 2025
0d630ef
feat: Restore to cpp23 in memgraph_template_profile
as51340 Oct 23, 2025
30ce29a
feat: Build directly arrow
as51340 Oct 23, 2025
48cbb16
fix: Single CMake arrow target
as51340 Oct 23, 2025
4fe0037
fix: Use CMAKE_INSTALL_LIBDIR for libraries
as51340 Oct 23, 2025
0dd5566
fix: Force usage of lib instead of lib64
as51340 Oct 23, 2025
3f7de83
Handle arrow debug suffix (#3374)
mattkjames7 Oct 24, 2025
0d525f8
feat: Use cv::wait, add comments, optimize binary usage
as51340 Oct 25, 2025
730a55f
refactor: Remove nodes_binary.parquet file
as51340 Oct 25, 2025
31ec177
fix: Mutable mutex and LARGE_BINAY handling
as51340 Oct 26, 2025
7fcc99c
feat: Use PrimitiveLiteralExpressionEvaluator
as51340 Oct 27, 2025
91a5ffb
refactor: Remove unused variables
as51340 Oct 27, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions libs/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -394,3 +394,41 @@ set_property(TARGET usearch PROPERTY INTERFACE_INCLUDE_DIRECTORIES
${CMAKE_CURRENT_SOURCE_DIR}/usearch/include
${CMAKE_CURRENT_SOURCE_DIR}/usearch/fp16/include
${CMAKE_CURRENT_SOURCE_DIR}/usearch/stringzilla/include)

macro(build_arrow)
set(ARROW_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/arrow)
set(ARROW_LIBDIR "${ARROW_ROOT}/lib")

add_external_project(arrow
SOURCE_DIR ${ARROW_ROOT}/cpp
CMAKE_ARGS
-DCMAKE_INSTALL_PREFIX=${ARROW_ROOT}
-DCMAKE_INSTALL_LIBDIR=lib
-DARROW_PARQUET=ON
-DARROW_WITH_SNAPPY=ON
-DARROW_FILESYSTEM=ON
-DARROW_BUILD_STATIC=ON
-DARROW_BUILD_SHARED=OFF
-DARROW_MIMALLOC=OFF
-DARROW_WITH_RE2=OFF
-DARROW_WITH_UTF8PROC=OFF
-DARROW_DEPENDENCY_SOURCE=BUNDLED
BUILD_BYPRODUCTS
"${ARROW_LIBDIR}/libarrow.a" "${ARROW_LIBDIR}/libarrowd.a"
"${ARROW_LIBDIR}/libparquet.a" "${ARROW_LIBDIR}/libparquetd.a"
"${ARROW_LIBDIR}/libarrow_bundled_dependencies.a"
)

# handle debug build with `d` suffix
set(_d "$<$<CONFIG:Debug>:d>")
set(_arrow_libs "${ARROW_LIBDIR}/libparquet${_d}.a;${ARROW_LIBDIR}/libarrow${_d}.a;${ARROW_LIBDIR}/libarrow_bundled_dependencies.a")

add_library(arrow INTERFACE IMPORTED GLOBAL)
set_target_properties(arrow PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES "${ARROW_ROOT}/include"
INTERFACE_LINK_LIBRARIES "${_arrow_libs}"
)
add_dependencies(arrow arrow-proj)
file(MAKE_DIRECTORY ${ARROW_ROOT}/include)
endmacro()
build_arrow()
6 changes: 6 additions & 0 deletions libs/setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ declare -A primary_urls=(
["nuraft"]="http://$local_cache_host/git/NuRaft.git"
["mgcxx"]="http://$local_cache_host/git/mgcxx.git"
["usearch"]="http://$local_cache_host/git/usearch.git"
["arrow"]="http://$local_cache_host/git/arrow.git"
)

# The goal of secondary urls is to have links to the "source of truth" of
Expand All @@ -175,6 +176,7 @@ declare -A secondary_urls=(
["nuraft"]="https://github.com/eBay/NuRaft.git"
["mgcxx"]="https://github.com/memgraph/mgcxx.git"
["usearch"]="https://github.com/unum-cloud/usearch.git"
["arrow"]="https://github.com/apache/arrow"
)

# Skip download if we are under the latest toolchains (>= 6).
Expand Down Expand Up @@ -268,3 +270,7 @@ repo_clone_try_double "${primary_urls[usearch]}" "${secondary_urls[usearch]}" "u
pushd usearch
git submodule update --init --recursive
popd

# arrow (load parquet)
arrow_ref="apache-arrow-21.0.0"
repo_clone_try_double "${primary_urls[arrow]}" "${secondary_urls[arrow]}" "arrow" "$arrow_ref" true
2 changes: 1 addition & 1 deletion src/audit/log.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ inline nlohmann::json BoltValueToJson(const communication::bolt::Value &value) {
}
case Date: {
std::stringstream ss;
ss << utils::Date(value.ValueDate().MicrosecondsSinceEpoch());
ss << utils::Date(std::chrono::microseconds{value.ValueDate().MicrosecondsSinceEpoch()});
ret = ss.str();
break;
}
Expand Down
2 changes: 1 addition & 1 deletion src/glue/communication.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,7 @@ Value ToBoltValue(const storage::PropertyValue &value, const storage::Storage &s
const auto &type = value.ValueTemporalData();
switch (type.type) {
case storage::TemporalType::Date:
return Value(utils::Date(type.microseconds));
return {utils::Date{std::chrono::microseconds{type.microseconds}}};
case storage::TemporalType::LocalTime:
return Value(utils::LocalTime(type.microseconds));
case storage::TemporalType::LocalDateTime:
Expand Down
4 changes: 3 additions & 1 deletion src/query/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ add_library(mg-query STATIC)
target_sources(mg-query
PRIVATE
auth_query_handler.cpp
arrow_parquet/reader.cpp
common.cpp
context.cpp
cypher_query_interpreter.cpp
Expand Down Expand Up @@ -86,6 +87,7 @@ target_sources(mg-query
PUBLIC
FILE_SET HEADERS
FILES
arrow_parquet/reader.hpp
dependant_symbol_visitor.hpp
edge_accessor.hpp
frontend/ast/ast.hpp
Expand All @@ -111,13 +113,13 @@ target_sources(mg-query
vertex_accessor.hpp
)


target_link_libraries(mg-query
PUBLIC
dl
cppitertools::cppitertools
range-v3::range-v3
Python3::Python
arrow
mg-integrations-pulsar
mg-integrations-kafka
mg::storage
Expand Down
Loading
Loading