Skip to content

Commit

Permalink
splitting ArrowEnumerator into sub-classes
Browse files Browse the repository at this point in the history
  • Loading branch information
asolimando committed Feb 25, 2024
1 parent 5bb4d6c commit c50e955
Show file tree
Hide file tree
Showing 6 changed files with 266 additions and 191 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to you under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.calcite.adapter.arrow;

import org.apache.calcite.linq4j.Enumerator;
import org.apache.calcite.util.ImmutableIntList;
import org.apache.calcite.util.Util;

import org.apache.arrow.vector.ValueVector;
import org.apache.arrow.vector.VectorSchemaRoot;
import org.apache.arrow.vector.VectorUnloader;
import org.apache.arrow.vector.ipc.ArrowFileReader;
import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

/**
* Enumerator that reads from a collection of Arrow value-vectors.
*/
abstract class AbstractArrowEnumerator implements Enumerator<Object> {
protected final ArrowFileReader arrowFileReader;
protected int currRowIndex = -1;
protected int rowCount;
protected final List<ValueVector> valueVectors;
protected final List<Integer> fields;

AbstractArrowEnumerator(ArrowFileReader arrowFileReader, ImmutableIntList fields) {
this.arrowFileReader = arrowFileReader;
this.fields = fields;
this.valueVectors = new ArrayList<>(fields.size());
}

abstract void evaluateOperator(ArrowRecordBatch arrowRecordBatch);

protected void loadNextArrowBatch() {
try {
final VectorSchemaRoot vsr = arrowFileReader.getVectorSchemaRoot();
for (int i : fields) {
this.valueVectors.add(vsr.getVector(i));
}
this.rowCount = vsr.getRowCount();
VectorUnloader vectorUnloader = new VectorUnloader(vsr);
ArrowRecordBatch arrowRecordBatch = vectorUnloader.getRecordBatch();
evaluateOperator(arrowRecordBatch);
} catch (IOException e) {
throw Util.toUnchecked(e);
}
}

@Override public Object current() {
if (fields.size() == 1) {
return this.valueVectors.get(0).getObject(currRowIndex);
}
Object[] current = new Object[valueVectors.size()];
for (int i = 0; i < valueVectors.size(); i++) {
ValueVector vector = this.valueVectors.get(i);
current[i] = vector.getObject(currRowIndex);
}
return current;
}

@Override public void reset() {
throw new UnsupportedOperationException();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,12 @@
*/
class ArrowEnumerable extends AbstractEnumerable<Object> {
private final ArrowFileReader arrowFileReader;
private final ImmutableIntList fields;
private final @Nullable Projector projector;
private final @Nullable Filter filter;
private final ImmutableIntList fields;

ArrowEnumerable(ArrowFileReader arrowFileReader,
@Nullable Projector projector, @Nullable Filter filter,
ImmutableIntList fields) {
ArrowEnumerable(ArrowFileReader arrowFileReader, ImmutableIntList fields,
@Nullable Projector projector, @Nullable Filter filter) {
this.arrowFileReader = arrowFileReader;
this.projector = projector;
this.filter = filter;
Expand All @@ -47,7 +46,13 @@ class ArrowEnumerable extends AbstractEnumerable<Object> {

@Override public Enumerator<Object> enumerator() {
try {
return new ArrowEnumerator(projector, filter, fields, arrowFileReader);
if (projector != null) {
return new ArrowProjectEnumerator(arrowFileReader, fields, projector);
} else if (filter != null) {
return new ArrowFilterEnumerator(arrowFileReader, fields, filter);
}
throw new IllegalArgumentException(
"The arrow enumerator must have either a filter or a projection");
} catch (Exception e) {
throw Util.toUnchecked(e);
}
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to you under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.calcite.adapter.arrow;

import org.apache.arrow.gandiva.evaluator.Filter;
import org.apache.arrow.gandiva.evaluator.SelectionVector;
import org.apache.arrow.gandiva.evaluator.SelectionVectorInt16;
import org.apache.arrow.gandiva.exceptions.GandivaException;
import org.apache.arrow.memory.ArrowBuf;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.ipc.ArrowFileReader;
import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;

import org.apache.calcite.util.ImmutableIntList;
import org.apache.calcite.util.Util;

import java.io.IOException;

/**
* Enumerator that reads from a filtered collection of Arrow value-vectors.
*/
class ArrowFilterEnumerator extends AbstractArrowEnumerator {
private final BufferAllocator allocator;
private final Filter filter;
private ArrowBuf buf;
private SelectionVector selectionVector;
private int selectionVectorIndex;

ArrowFilterEnumerator(ArrowFileReader arrowFileReader, ImmutableIntList fields, Filter filter) {
super(arrowFileReader, fields);
this.allocator = new RootAllocator(Long.MAX_VALUE);
this.filter = filter;
}

void evaluateOperator(ArrowRecordBatch arrowRecordBatch) {
try {
this.buf = this.allocator.buffer((long) rowCount * 2);
this.selectionVector = new SelectionVectorInt16(buf);
filter.evaluate(arrowRecordBatch, selectionVector);
} catch (GandivaException e) {
throw Util.toUnchecked(e);
}
}

@Override public boolean moveNext() {
if (selectionVector == null
|| selectionVectorIndex >= selectionVector.getRecordCount()) {
boolean hasNextBatch;
while (true) {
try {
hasNextBatch = arrowFileReader.loadNextBatch();
} catch (IOException e) {
throw Util.toUnchecked(e);
}
if (hasNextBatch) {
selectionVectorIndex = 0;
this.valueVectors.clear();
loadNextArrowBatch();
assert selectionVector != null;
if (selectionVectorIndex >= selectionVector.getRecordCount()) {
// the "filtered" batch is empty, but there may be more batches to fetch
continue;
}
currRowIndex = selectionVector.getIndex(selectionVectorIndex++);
}
return hasNextBatch;
}
} else {
currRowIndex = selectionVector.getIndex(selectionVectorIndex++);
return true;
}
}

@Override public void close() {
try {
buf.close();
filter.close();
} catch (GandivaException e) {
throw Util.toUnchecked(e);
}
}
}
Loading

0 comments on commit c50e955

Please sign in to comment.