Skip to content

Commit

Permalink
[CALCITE-2040] Create adapter for Apache Arrow
Browse files Browse the repository at this point in the history
Co-authored-by: Jonathan Swenson <[email protected]>
Co-authored-by: Julian Hyde <[email protected]>
Co-authored-by: Karshit Shah <[email protected]>
Co-authored-by: Michael Mior <[email protected]>
  • Loading branch information
5 people committed Feb 5, 2024
1 parent d5fa3eb commit 5bb4d6c
Show file tree
Hide file tree
Showing 39 changed files with 2,534 additions and 46 deletions.
7 changes: 5 additions & 2 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ jobs:
with:
job-id: jdk${{ matrix.jdk }}
remote-build-cache-proxy-enabled: false
arguments: --scan --no-parallel --no-daemon build javadoc
arguments: --scan --no-parallel --no-daemon build javadoc --exclude-task :arrow:build
- name: 'sqlline and sqllsh'
shell: cmd
run: |
Expand Down Expand Up @@ -103,7 +103,7 @@ jobs:
with:
job-id: jdk${{ matrix.jdk }}
remote-build-cache-proxy-enabled: false
arguments: --scan --no-parallel --no-daemon build
arguments: --scan --no-parallel --no-daemon build --exclude-task :arrow:build
- name: 'sqlline and sqllsh'
shell: cmd
run: |
Expand Down Expand Up @@ -215,6 +215,7 @@ jobs:
S3_BUILD_CACHE_ACCESS_KEY_ID: ${{ secrets.S3_BUILD_CACHE_ACCESS_KEY_ID }}
S3_BUILD_CACHE_SECRET_KEY: ${{ secrets.S3_BUILD_CACHE_SECRET_KEY }}
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
_JAVA_OPTIONS: ${{ env._JAVA_OPTIONS }} --add-opens=java.base/java.nio=ALL-UNNAMED
with:
job-id: jdk${{ matrix.jdk }}
remote-build-cache-proxy-enabled: false
Expand All @@ -241,6 +242,7 @@ jobs:
S3_BUILD_CACHE_ACCESS_KEY_ID: ${{ secrets.S3_BUILD_CACHE_ACCESS_KEY_ID }}
S3_BUILD_CACHE_SECRET_KEY: ${{ secrets.S3_BUILD_CACHE_SECRET_KEY }}
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
_JAVA_OPTIONS: ${{ env._JAVA_OPTIONS }} --add-opens=java.base/java.nio=ALL-UNNAMED
with:
job-id: jdk${{ matrix.jdk }}
remote-build-cache-proxy-enabled: false
Expand Down Expand Up @@ -310,6 +312,7 @@ jobs:
S3_BUILD_CACHE_ACCESS_KEY_ID: ${{ secrets.S3_BUILD_CACHE_ACCESS_KEY_ID }}
S3_BUILD_CACHE_SECRET_KEY: ${{ secrets.S3_BUILD_CACHE_SECRET_KEY }}
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
_JAVA_OPTIONS: ${{ env._JAVA_OPTIONS }} --add-opens=java.base/java.nio=ALL-UNNAMED
with:
job-id: jdk19
remote-build-cache-proxy-enabled: false
Expand Down
2 changes: 1 addition & 1 deletion Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ node('ubuntu') {
}
stage('Code Quality') {
timeout(time: 1, unit: 'HOURS') {
withEnv(["Path+JDK=$JAVA_JDK_17/bin","JAVA_HOME=$JAVA_JDK_17"]) {
withEnv(["Path+JDK=$JAVA_JDK_17/bin","JAVA_HOME=$JAVA_JDK_17","_JAVA_OPTIONS=--add-opens=java.base/java.nio=ALL-UNNAMED"]) {
withCredentials([string(credentialsId: 'SONARCLOUD_TOKEN', variable: 'SONAR_TOKEN')]) {
if ( env.BRANCH_NAME.startsWith("PR-") ) {
sh './gradlew --no-parallel --no-daemon jacocoAggregateTestReport sonar -PenableJacoco -Dsonar.pullrequest.branch=${CHANGE_BRANCH} -Dsonar.pullrequest.base=${CHANGE_TARGET} -Dsonar.pullrequest.key=${CHANGE_ID} -Dsonar.login=${SONAR_TOKEN}'
Expand Down
32 changes: 32 additions & 0 deletions arrow/build.gradle.kts
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to you under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
dependencies {
api(project(":core"))

implementation("com.google.guava:guava")
implementation("org.apache.arrow:arrow-memory-netty")
implementation("org.apache.arrow:arrow-vector")
implementation("org.apache.arrow.gandiva:arrow-gandiva")
annotationProcessor("org.immutables:value")
compileOnly("org.immutables:value-annotations")

testImplementation("org.apache.arrow:arrow-jdbc")
testImplementation("net.hydromatic:scott-data-hsqldb")
testImplementation("org.apache.commons:commons-lang3")
testImplementation(project(":core"))
testImplementation(project(":testkit"))
}
18 changes: 18 additions & 0 deletions arrow/gradle.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to you under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
description=Arrow adapter for Calcite
artifact.name=Calcite Arrow
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to you under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.calcite.adapter.arrow;

import org.apache.calcite.linq4j.AbstractEnumerable;
import org.apache.calcite.linq4j.Enumerator;
import org.apache.calcite.util.ImmutableIntList;
import org.apache.calcite.util.Util;

import org.apache.arrow.gandiva.evaluator.Filter;
import org.apache.arrow.gandiva.evaluator.Projector;
import org.apache.arrow.vector.ipc.ArrowFileReader;

import org.checkerframework.checker.nullness.qual.Nullable;

/**
* Enumerable that reads from Arrow value-vectors.
*/
class ArrowEnumerable extends AbstractEnumerable<Object> {
private final ArrowFileReader arrowFileReader;
private final @Nullable Projector projector;
private final @Nullable Filter filter;
private final ImmutableIntList fields;

ArrowEnumerable(ArrowFileReader arrowFileReader,
@Nullable Projector projector, @Nullable Filter filter,
ImmutableIntList fields) {
this.arrowFileReader = arrowFileReader;
this.projector = projector;
this.filter = filter;
this.fields = fields;
}

@Override public Enumerator<Object> enumerator() {
try {
return new ArrowEnumerator(projector, filter, fields, arrowFileReader);
} catch (Exception e) {
throw Util.toUnchecked(e);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to you under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.calcite.adapter.arrow;

import org.apache.calcite.linq4j.Enumerator;
import org.apache.calcite.util.ImmutableIntList;
import org.apache.calcite.util.Util;

import org.apache.arrow.gandiva.evaluator.Filter;
import org.apache.arrow.gandiva.evaluator.Projector;
import org.apache.arrow.gandiva.evaluator.SelectionVector;
import org.apache.arrow.gandiva.evaluator.SelectionVectorInt16;
import org.apache.arrow.gandiva.exceptions.GandivaException;
import org.apache.arrow.memory.ArrowBuf;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.ValueVector;
import org.apache.arrow.vector.VectorSchemaRoot;
import org.apache.arrow.vector.VectorUnloader;
import org.apache.arrow.vector.ipc.ArrowFileReader;
import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;

import org.checkerframework.checker.nullness.qual.Nullable;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

/**
* Enumerator that reads from a collection of Arrow value-vectors.
*/
class ArrowEnumerator implements Enumerator<Object> {
private final BufferAllocator allocator;
private final ArrowFileReader arrowFileReader;
private final @Nullable Projector projector;
private final @Nullable Filter filter;
private int rowIndex = -1;
private final List<ValueVector> valueVectors;
private final List<Integer> fields;
private int rowSize;
private @Nullable ArrowBuf buf;
private @Nullable SelectionVector selectionVector;
private int selectionVectorIndex = 0;

ArrowEnumerator(@Nullable Projector projector, @Nullable Filter filter,
ImmutableIntList fields, ArrowFileReader arrowFileReader) {
this.allocator = new RootAllocator(Long.MAX_VALUE);
this.projector = projector;
this.filter = filter;
this.arrowFileReader = arrowFileReader;
this.fields = fields;
this.valueVectors = new ArrayList<>(fields.size());

// Set up fields so that first call to moveNext() will trigger a call to
// loadNextBatch().
if (projector != null) {
rowIndex = rowSize = 0;
} else {
selectionVector = null;
selectionVectorIndex = 0;
}
}

@Override public Object current() {
if (fields.size() == 1) {
return this.valueVectors.get(0).getObject(rowIndex);
}
Object[] current = new Object[valueVectors.size()];
for (int i = 0; i < valueVectors.size(); i++) {
ValueVector vector = this.valueVectors.get(i);
current[i] = vector.getObject(rowIndex);
}
return current;
}

@Override public boolean moveNext() {
if (projector != null) {
if (rowIndex >= rowSize - 1) {
final boolean hasNextBatch;
try {
hasNextBatch = arrowFileReader.loadNextBatch();
} catch (IOException e) {
throw Util.toUnchecked(e);
}
if (hasNextBatch) {
rowIndex = 0;
this.valueVectors.clear();
loadNextArrowBatch();
return true;
} else {
return false;
}
} else {
rowIndex++;
return true;
}
} else {
if (selectionVector == null
|| selectionVectorIndex >= selectionVector.getRecordCount()) {
boolean hasNextBatch;
while (true) {
try {
hasNextBatch = arrowFileReader.loadNextBatch();
} catch (IOException e) {
throw Util.toUnchecked(e);
}
if (hasNextBatch) {
selectionVectorIndex = 0;
this.valueVectors.clear();
loadNextArrowBatch();
assert selectionVector != null;
if (selectionVectorIndex >= selectionVector.getRecordCount()) {
// the "filtered" batch is empty, but there may be more batches to fetch
continue;
}
rowIndex = selectionVector.getIndex(selectionVectorIndex++);
return true;
} else {
return false;
}
}
} else {
rowIndex = selectionVector.getIndex(selectionVectorIndex++);
return true;
}
}
}

private void loadNextArrowBatch() {
try {
final VectorSchemaRoot vsr = arrowFileReader.getVectorSchemaRoot();
for (int i : fields) {
this.valueVectors.add(vsr.getVector(i));
}
this.rowSize = vsr.getRowCount();
VectorUnloader vectorUnloader = new VectorUnloader(vsr);
ArrowRecordBatch arrowRecordBatch = vectorUnloader.getRecordBatch();
if (projector != null) {
projector.evaluate(arrowRecordBatch, valueVectors);
}
if (filter != null) {
this.buf = this.allocator.buffer(rowSize * 2);
this.selectionVector = new SelectionVectorInt16(buf);
filter.evaluate(arrowRecordBatch, selectionVector);
}
} catch (IOException | GandivaException e) {
throw Util.toUnchecked(e);
}
}

@Override public void reset() {
throw new UnsupportedOperationException();
}

@Override public void close() {
try {
if (projector != null) {
projector.close();
}
if (filter != null) {
if (buf != null) {
buf.close();
}
filter.close();
}
} catch (GandivaException e) {
throw Util.toUnchecked(e);
}
}
}
Loading

0 comments on commit 5bb4d6c

Please sign in to comment.