Skip to content

Commit

Permalink
Use arrow1 datatype in reflection (#8596)
Browse files Browse the repository at this point in the history
* Part of #3741
  • Loading branch information
emilk authored Jan 7, 2025
1 parent cef0ffe commit 287b8d5
Show file tree
Hide file tree
Showing 8 changed files with 142 additions and 126 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ fn generate_component_reflection(
ComponentReflection {
docstring_md: #docstring_md,
custom_placeholder: #custom_placeholder,
datatype: #type_name::arrow2_datatype(),
datatype: #type_name::arrow_datatype(),
}
};
quoted_pairs.push(quote! { (#quoted_name, #quoted_reflection) });
Expand Down
4 changes: 2 additions & 2 deletions crates/store/re_chunk_store/src/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -723,7 +723,7 @@ impl ChunkStore {
.filter_map(|(entity_path, component_descr)| {
let metadata =
self.lookup_column_metadata(entity_path, &component_descr.component_name)?;
let datatype = self.lookup_datatype(&component_descr.component_name)?;
let datatype = self.lookup_datatype_arrow2(&component_descr.component_name)?;

Some(((entity_path, component_descr), (metadata, datatype)))
})
Expand Down Expand Up @@ -833,7 +833,7 @@ impl ChunkStore {
});

let datatype = self
.lookup_datatype(&component_name)
.lookup_datatype_arrow2(&component_name)
.cloned()
.unwrap_or(Arrow2Datatype::Null);

Expand Down
14 changes: 13 additions & 1 deletion crates/store/re_chunk_store/src/store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use std::collections::{BTreeMap, BTreeSet};
use std::sync::atomic::AtomicU64;
use std::sync::Arc;

use arrow::datatypes::DataType as ArrowDataType;
use arrow2::datatypes::DataType as Arrow2DataType;
use nohash_hasher::IntMap;

Expand Down Expand Up @@ -632,9 +633,20 @@ impl ChunkStore {
self.chunks_per_chunk_id.len()
}

/// Lookup the _latest_ arrow [`ArrowDataType`] used by a specific [`re_types_core::Component`].
#[inline]
pub fn lookup_datatype(&self, component_name: &ComponentName) -> Option<ArrowDataType> {
self.type_registry
.get(component_name)
.map(|dt| dt.clone().into())
}

/// Lookup the _latest_ arrow [`Arrow2DataType`] used by a specific [`re_types_core::Component`].
#[inline]
pub fn lookup_datatype(&self, component_name: &ComponentName) -> Option<&Arrow2DataType> {
pub fn lookup_datatype_arrow2(
&self,
component_name: &ComponentName,
) -> Option<&Arrow2DataType> {
self.type_registry.get(component_name)
}

Expand Down
218 changes: 109 additions & 109 deletions crates/store/re_types/src/reflection/mod.rs

Large diffs are not rendered by default.

18 changes: 12 additions & 6 deletions crates/store/re_types_core/src/reflection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,12 @@ impl Reflection {
///
/// See also [`ComponentReflection::custom_placeholder`].
pub fn generic_placeholder_for_datatype(
datatype: &arrow::datatypes::DataType,
) -> arrow::array::ArrayRef {
generic_placeholder_for_datatype_arrow2(&datatype.clone().into()).into()
}

fn generic_placeholder_for_datatype_arrow2(
datatype: &arrow2::datatypes::DataType,
) -> Box<dyn arrow2::array::Array> {
use arrow2::{
Expand Down Expand Up @@ -121,7 +127,7 @@ pub fn generic_placeholder_for_datatype(
DataType::Utf8 => Box::new(array::Utf8Array::<i32>::from_slice([""])),
DataType::LargeUtf8 => Box::new(array::Utf8Array::<i64>::from_slice([""])),
DataType::List(field) => {
let inner = generic_placeholder_for_datatype(field.data_type());
let inner = generic_placeholder_for_datatype_arrow2(field.data_type());
let offsets = arrow2::offset::Offsets::try_from_lengths(std::iter::once(inner.len()))
.expect("failed to create offsets buffer");
Box::new(array::ListArray::<i32>::new(
Expand All @@ -140,7 +146,7 @@ pub fn generic_placeholder_for_datatype(
}

DataType::LargeList(field) => {
let inner = generic_placeholder_for_datatype(field.data_type());
let inner = generic_placeholder_for_datatype_arrow2(field.data_type());
let offsets = arrow2::offset::Offsets::try_from_lengths(std::iter::once(inner.len()))
.expect("failed to create offsets buffer");
Box::new(array::ListArray::<i64>::new(
Expand All @@ -153,7 +159,7 @@ pub fn generic_placeholder_for_datatype(
DataType::Struct(fields) => {
let inners = fields
.iter()
.map(|field| generic_placeholder_for_datatype(field.data_type()));
.map(|field| generic_placeholder_for_datatype_arrow2(field.data_type()));
Box::new(array::StructArray::new(
datatype.clone(),
inners.collect(),
Expand All @@ -162,7 +168,7 @@ pub fn generic_placeholder_for_datatype(
}
DataType::Union(fields, _types, _union_mode) => {
if let Some(first_field) = fields.first() {
let first_field = generic_placeholder_for_datatype(first_field.data_type());
let first_field = generic_placeholder_for_datatype_arrow2(first_field.data_type());
let first_field_len = first_field.len(); // Should be 1, but let's play this safe!
let other_fields = fields
.iter()
Expand Down Expand Up @@ -199,7 +205,7 @@ pub fn generic_placeholder_for_datatype(
)]))
}

DataType::Extension(_, datatype, _) => generic_placeholder_for_datatype(datatype),
DataType::Extension(_, datatype, _) => generic_placeholder_for_datatype_arrow2(datatype),
}
}

Expand All @@ -223,7 +229,7 @@ pub struct ComponentReflection {
pub custom_placeholder: Option<ArrayRef>,

/// Datatype of the component.
pub datatype: arrow2::datatypes::DataType,
pub datatype: arrow::datatypes::DataType,
}

/// Runtime reflection about archetypes.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,5 @@ fn placeholder_for_component(
None
};

datatype
.map(|datatype| re_types::reflection::generic_placeholder_for_datatype(&datatype).into())
datatype.map(|datatype| re_types::reflection::generic_placeholder_for_datatype(&datatype))
}
2 changes: 1 addition & 1 deletion crates/viewer/re_viewer/src/blueprint/validation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use re_types_core::Component;
pub(crate) fn validate_component<C: Component>(blueprint: &EntityDb) -> bool {
let engine = blueprint.storage_engine();
if let Some(data_type) = engine.store().lookup_datatype(&C::name()) {
if data_type != &C::arrow2_datatype() {
if data_type != C::arrow_datatype() {
// If the schemas don't match, we definitely have a problem
re_log::debug!(
"Unexpected datatype for component {:?}.\nFound: {:#?}\nExpected: {:#?}",
Expand Down
7 changes: 3 additions & 4 deletions crates/viewer/re_viewer_context/src/viewer_context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -239,17 +239,16 @@ impl ViewerContext<'_> {
self.recording_engine()
.store()
.lookup_datatype(&component)
.cloned()
.or_else(|| self.blueprint_engine().store().lookup_datatype(&component).cloned())
.or_else(|| self.blueprint_engine().store().lookup_datatype(&component))
.unwrap_or_else(|| {
re_log::error_once!("Could not find datatype for component {component}. Using null array as placeholder.");
re_chunk::external::arrow2::datatypes::DataType::Null
arrow::datatypes::DataType::Null
})
};

// TODO(andreas): Is this operation common enough to cache the result? If so, here or in the reflection data?
// The nice thing about this would be that we could always give out references (but updating said cache wouldn't be easy in that case).
re_types::reflection::generic_placeholder_for_datatype(&datatype).into()
re_types::reflection::generic_placeholder_for_datatype(&datatype)
}
}

Expand Down

0 comments on commit 287b8d5

Please sign in to comment.