Skip to content

Commit

Permalink
Add function to test Cells to identify groups of adjacent equal keys (#…
Browse files Browse the repository at this point in the history
…171)

* Config common option methods use reference

* Add Cells::identify_groups
  • Loading branch information
rroelke authored Oct 1, 2024
1 parent 89dee14 commit c395070
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 4 deletions.
4 changes: 2 additions & 2 deletions tiledb/api/src/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1507,7 +1507,7 @@ pub mod tests {
let uri = {
let context = {
let mut config = Config::new()?;
config.set_common_option(key_config.clone())?;
config.set_common_option(&key_config)?;

Context::from_config(&config)
}?;
Expand Down Expand Up @@ -1541,7 +1541,7 @@ pub mod tests {
{
let context = Context::new()?;
let array_config =
Config::new()?.with_common_option(key_config.clone())?;
Config::new()?.with_common_option(&key_config)?;

let _ = ArrayOpener::new(&context, &uri, Mode::Read)?
.config(&array_config)?
Expand Down
7 changes: 5 additions & 2 deletions tiledb/api/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,11 +132,14 @@ impl Config {
}
}

pub fn set_common_option(&mut self, opt: CommonOption) -> TileDBResult<()> {
pub fn set_common_option(
&mut self,
opt: &CommonOption,
) -> TileDBResult<()> {
opt.apply(self)
}

pub fn with_common_option(self, opt: CommonOption) -> TileDBResult<Self> {
pub fn with_common_option(self, opt: &CommonOption) -> TileDBResult<Self> {
let mut s = self;
s.set_common_option(opt)?;
Ok(s)
Expand Down
76 changes: 76 additions & 0 deletions tiledb/api/src/query/strategy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -972,6 +972,35 @@ impl Cells {
sorted
}

/// Returns the list of offsets beginning each group, i.e. run of contiguous values on `keys`.
///
/// This is best used with sorted cells, but that is not required.
/// For each pair of offsets in the output, all cells in that index range are equal;
/// and the adjacent cells outside of the range are not equal.
pub fn identify_groups(&self, keys: &[String]) -> Option<Vec<usize>> {
if self.is_empty() {
return None;
}
let mut groups = vec![0];
let mut icmp = 0;
for i in 1..self.len() {
let distinct = keys.iter().any(|k| {
let v = self.fields().get(k).unwrap();
typed_field_data_go!(
v,
ref cells,
cells[i].bits_ne(&cells[icmp])
)
});
if distinct {
groups.push(i);
icmp = i;
}
}
groups.push(self.len());
Some(groups)
}

/// Returns the number of distinct values grouped on `keys`
pub fn count_distinct(&self, keys: &[String]) -> usize {
if self.len() <= 1 {
Expand Down Expand Up @@ -1889,6 +1918,43 @@ mod tests {
}
}

/// Assert that the output of [Cells::identify_groups] produces
/// correct output for the given `keys`.
fn do_cells_identify_groups(cells: Cells, keys: &[String]) {
let Some(actual) = cells.identify_groups(keys) else {
assert!(cells.is_empty());
return;
};

for w in actual.windows(2) {
let (start, end) = (w[0], w[1]);
assert!(start < end);
}

for w in actual.windows(2) {
let (start, end) = (w[0], w[1]);
for k in keys.iter() {
let f = cells.fields().get(k).unwrap();
typed_field_data_go!(f, ref field_cells, {
for i in start..end {
assert!(field_cells[start].bits_eq(&field_cells[i]));
}
})
}
if end < cells.len() {
let some_ne = keys.iter().any(|k| {
let f = cells.fields().get(k).unwrap();
typed_field_data_go!(f, ref field_cells, {
field_cells[start].bits_ne(&field_cells[end])
})
});
assert!(some_ne);
}
}

assert_eq!(Some(cells.len()), actual.last().copied());
}

fn do_cells_count_distinct_1d(cells: Cells) {
for (key, field_cells) in cells.fields().iter() {
let expect_count =
Expand Down Expand Up @@ -2107,6 +2173,16 @@ mod tests {
do_cells_slice_3d(cells, d1, d2, d3, s1, s2, s3)
}

#[test]
fn cells_identify_groups((cells, keys) in any::<Cells>().prop_flat_map(|c| {
let keys = c.fields().keys().cloned().collect::<Vec<String>>();
let nkeys = keys.len();
(Just(c), proptest::sample::subsequence(keys, 0..=nkeys))
}))
{
do_cells_identify_groups(cells, &keys)
}

#[test]
fn cells_count_distinct_1d(cells in any::<Cells>()) {
do_cells_count_distinct_1d(cells)
Expand Down

0 comments on commit c395070

Please sign in to comment.