Skip to content

Commit e79d60d

Browse files
authored
GH-45486: [GLib] Add GArrowArrayStatistics (#45490)
### Rationale for this change GLib should be able to use `arrow::ArrayStatistics`. ### What changes are included in this PR? Add `GArrowArrayStatistics` with minimal features. ### Are these changes tested? Yes. ### Are there any user-facing changes? Yes. * GitHub Issue: #45486 Authored-by: Sutou Kouhei <[email protected]> Signed-off-by: Sutou Kouhei <[email protected]>
1 parent e9ec383 commit e79d60d

File tree

4 files changed

+205
-0
lines changed

4 files changed

+205
-0
lines changed

Diff for: c_glib/arrow-glib/basic-array.cpp

+130
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ G_BEGIN_DECLS
3737
* @title: Basic array classes
3838
* @include: arrow-glib/arrow-glib.h
3939
*
40+
* #GArrowArrayStatistics is a class for statistics of an array.
41+
*
4042
* #GArrowArray is a base class for all array classes such as
4143
* #GArrowBooleanArray.
4244
*
@@ -364,6 +366,106 @@ garrow_equal_options_is_approx(GArrowEqualOptions *options)
364366
return priv->approx;
365367
}
366368

369+
struct GArrowArrayStatisticsPrivate
370+
{
371+
arrow::ArrayStatistics statistics;
372+
};
373+
374+
enum {
375+
PROP_STATISTICS = 1,
376+
};
377+
378+
G_DEFINE_TYPE_WITH_PRIVATE(GArrowArrayStatistics, garrow_array_statistics, G_TYPE_OBJECT)
379+
380+
#define GARROW_ARRAY_STATISTICS_GET_PRIVATE(object) \
381+
static_cast<GArrowArrayStatisticsPrivate *>( \
382+
garrow_array_statistics_get_instance_private(GARROW_ARRAY_STATISTICS(object)))
383+
384+
static void
385+
garrow_array_statistics_finalize(GObject *object)
386+
{
387+
auto priv = GARROW_ARRAY_STATISTICS_GET_PRIVATE(object);
388+
priv->statistics.~ArrayStatistics();
389+
G_OBJECT_CLASS(garrow_array_statistics_parent_class)->finalize(object);
390+
}
391+
392+
static void
393+
garrow_array_statistics_set_property(GObject *object,
394+
guint prop_id,
395+
const GValue *value,
396+
GParamSpec *pspec)
397+
{
398+
auto priv = GARROW_ARRAY_STATISTICS_GET_PRIVATE(object);
399+
400+
switch (prop_id) {
401+
case PROP_STATISTICS:
402+
priv->statistics = *static_cast<arrow::ArrayStatistics *>(g_value_get_pointer(value));
403+
break;
404+
default:
405+
G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
406+
break;
407+
}
408+
}
409+
410+
static void
411+
garrow_array_statistics_init(GArrowArrayStatistics *object)
412+
{
413+
auto priv = GARROW_ARRAY_STATISTICS_GET_PRIVATE(object);
414+
new (&priv->statistics) arrow::ArrayStatistics;
415+
}
416+
417+
static void
418+
garrow_array_statistics_class_init(GArrowArrayStatisticsClass *klass)
419+
{
420+
auto gobject_class = G_OBJECT_CLASS(klass);
421+
gobject_class->finalize = garrow_array_statistics_finalize;
422+
gobject_class->set_property = garrow_array_statistics_set_property;
423+
424+
auto spec = g_param_spec_pointer(
425+
"statistics",
426+
"Statistics",
427+
"The raw arrow::ArrayStatistics *",
428+
static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
429+
g_object_class_install_property(gobject_class, PROP_STATISTICS, spec);
430+
}
431+
432+
/**
433+
* garrow_array_statistics_has_null_count:
434+
* @statistics: A #GArrowArrayStatistics.
435+
*
436+
* Returns: %TRUE if @statistics has a valid null count value,
437+
* %FALSE otherwise.
438+
*
439+
* Since: 20.0.0
440+
*/
441+
gboolean
442+
garrow_array_statistics_has_null_count(GArrowArrayStatistics *statistics)
443+
{
444+
auto priv = GARROW_ARRAY_STATISTICS_GET_PRIVATE(statistics);
445+
return priv->statistics.null_count.has_value();
446+
}
447+
448+
/**
449+
* garrow_array_statistics_get_null_count:
450+
* @statistics: A #GArrowArrayStatistics.
451+
*
452+
* Returns: 0 or larger value if @statistics has a valid null count value,
453+
* -1 otherwise.
454+
*
455+
* Since: 20.0.0
456+
*/
457+
gint64
458+
garrow_array_statistics_get_null_count(GArrowArrayStatistics *statistics)
459+
{
460+
auto priv = GARROW_ARRAY_STATISTICS_GET_PRIVATE(statistics);
461+
const auto &null_count = priv->statistics.null_count;
462+
if (null_count) {
463+
return null_count.value();
464+
} else {
465+
return -1;
466+
}
467+
}
468+
367469
typedef struct GArrowArrayPrivate_
368470
{
369471
std::shared_ptr<arrow::Array> array;
@@ -1049,6 +1151,27 @@ garrow_array_validate_full(GArrowArray *array, GError **error)
10491151
return garrow::check(error, arrow_array->ValidateFull(), "[array][validate-full]");
10501152
}
10511153

1154+
/**
1155+
* garrow_array_get_statistics:
1156+
* @array: A #GArrowArray.
1157+
*
1158+
* Returns: (transfer full): The associated #GArrowArrayStatistics of @array,
1159+
* %NULL if @array doesn't have any associated statistics.
1160+
*
1161+
* Since: 20.0.0
1162+
*/
1163+
GArrowArrayStatistics *
1164+
garrow_array_get_statistics(GArrowArray *array)
1165+
{
1166+
const auto arrow_array = garrow_array_get_raw(array);
1167+
const auto &statistics = arrow_array->statistics();
1168+
if (statistics) {
1169+
return garrow_array_statistics_new_raw(statistics.get());
1170+
} else {
1171+
return nullptr;
1172+
}
1173+
}
1174+
10521175
G_DEFINE_TYPE(GArrowNullArray, garrow_null_array, GARROW_TYPE_ARRAY)
10531176

10541177
static void
@@ -3468,6 +3591,13 @@ garrow_equal_options_get_raw(GArrowEqualOptions *equal_options)
34683591
return &(priv->options);
34693592
}
34703593

3594+
GArrowArrayStatistics *
3595+
garrow_array_statistics_new_raw(arrow::ArrayStatistics *arrow_statistics)
3596+
{
3597+
return GARROW_ARRAY_STATISTICS(
3598+
g_object_new(GARROW_TYPE_ARRAY_STATISTICS, "statistics", arrow_statistics, nullptr));
3599+
}
3600+
34713601
GArrowArray *
34723602
garrow_array_new_raw(std::shared_ptr<arrow::Array> *arrow_array)
34733603
{

Diff for: c_glib/arrow-glib/basic-array.h

+20
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,22 @@ GARROW_AVAILABLE_IN_5_0
4242
gboolean
4343
garrow_equal_options_is_approx(GArrowEqualOptions *options);
4444

45+
#define GARROW_TYPE_ARRAY_STATISTICS (garrow_array_statistics_get_type())
46+
GARROW_AVAILABLE_IN_20_0
47+
G_DECLARE_DERIVABLE_TYPE(
48+
GArrowArrayStatistics, garrow_array_statistics, GARROW, ARRAY_STATISTICS, GObject)
49+
struct _GArrowArrayStatisticsClass
50+
{
51+
GObjectClass parent_class;
52+
};
53+
54+
GARROW_AVAILABLE_IN_20_0
55+
gboolean
56+
garrow_array_statistics_has_null_count(GArrowArrayStatistics *statistics);
57+
GARROW_AVAILABLE_IN_20_0
58+
gint64
59+
garrow_array_statistics_get_null_count(GArrowArrayStatistics *statistics);
60+
4561
GARROW_AVAILABLE_IN_6_0
4662
GArrowArray *
4763
garrow_array_import(gpointer c_abi_array, GArrowDataType *data_type, GError **error);
@@ -134,6 +150,10 @@ GARROW_AVAILABLE_IN_20_0
134150
gboolean
135151
garrow_array_validate_full(GArrowArray *array, GError **error);
136152

153+
GARROW_AVAILABLE_IN_20_0
154+
GArrowArrayStatistics *
155+
garrow_array_get_statistics(GArrowArray *array);
156+
137157
#define GARROW_TYPE_NULL_ARRAY (garrow_null_array_get_type())
138158
GARROW_AVAILABLE_IN_ALL
139159
G_DECLARE_DERIVABLE_TYPE(

Diff for: c_glib/arrow-glib/basic-array.hpp

+4
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@ GARROW_EXTERN
2727
arrow::EqualOptions *
2828
garrow_equal_options_get_raw(GArrowEqualOptions *equal_options);
2929

30+
GARROW_EXTERN
31+
GArrowArrayStatistics *
32+
garrow_array_statistics_new_raw(arrow::ArrayStatistics *arrow_statistics);
33+
3034
GARROW_EXTERN
3135
GArrowArray *
3236
garrow_array_new_raw(std::shared_ptr<arrow::Array> *arrow_array);

Diff for: c_glib/test/test-array-statistics.rb

+51
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
class TestArrayStatistics < Test::Unit::TestCase
19+
include Helper::Buildable
20+
21+
def setup
22+
omit("Parquet is required") unless defined?(::Parquet)
23+
24+
Tempfile.create(["data", ".parquet"]) do |file|
25+
@file = file
26+
array = build_int64_array([nil, -(2 ** 32), 2 ** 32])
27+
@table = build_table("int64" => array)
28+
writer = Parquet::ArrowFileWriter.new(@table.schema, @file.path)
29+
chunk_size = 1024
30+
writer.write_table(@table, chunk_size)
31+
writer.close
32+
reader = Parquet::ArrowFileReader.new(@file.path)
33+
begin
34+
@statistics = reader.read_table.get_column_data(0).get_chunk(0).statistics
35+
yield
36+
ensure
37+
reader.unref
38+
end
39+
end
40+
end
41+
42+
test("#has_null_count?") do
43+
assert do
44+
@statistics.has_null_count?
45+
end
46+
end
47+
48+
test("#null_count") do
49+
assert_equal(1, @statistics.null_count)
50+
end
51+
end

0 commit comments

Comments
 (0)