From 8ec6e61c1c04b6f2f96bf2623126ad3d3ed7e178 Mon Sep 17 00:00:00 2001 From: david Date: Sun, 22 May 2016 14:41:09 -0700 Subject: [PATCH 01/10] Test For multiple Join lmd --- ...CHCustomerOptimizedHybridHashJoinTest.java | 413 +++++++++++++----- 1 file changed, 300 insertions(+), 113 deletions(-) diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/TPCHCustomerOptimizedHybridHashJoinTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/TPCHCustomerOptimizedHybridHashJoinTest.java index a10513aa1..d3d993728 100644 --- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/TPCHCustomerOptimizedHybridHashJoinTest.java +++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/TPCHCustomerOptimizedHybridHashJoinTest.java @@ -19,7 +19,8 @@ package org.apache.hyracks.tests.integration; import java.io.File; - +import java.util.Arrays; +import java.util.Date; import org.junit.Test; import org.apache.hyracks.api.constraints.PartitionConstraintHelper; @@ -29,77 +30,315 @@ import org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFamily; import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer; import org.apache.hyracks.api.dataflow.value.RecordDescriptor; +import org.apache.hyracks.api.dataset.ResultSetId; import org.apache.hyracks.api.io.FileReference; import org.apache.hyracks.api.job.JobSpecification; import org.apache.hyracks.data.std.accessors.PointableBinaryComparatorFactory; import org.apache.hyracks.data.std.accessors.UTF8StringBinaryHashFunctionFamily; import org.apache.hyracks.data.std.primitive.UTF8StringPointable; +import org.apache.hyracks.dataflow.common.data.marshalling.FloatSerializerDeserializer; +import org.apache.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer; import org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer; +import org.apache.hyracks.dataflow.common.data.normalizers.UTF8StringNormalizedKeyComputerFactory; +import org.apache.hyracks.dataflow.common.data.parsers.FloatParserFactory; import org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory; +import org.apache.hyracks.dataflow.common.data.parsers.IntegerParserFactory; import org.apache.hyracks.dataflow.common.data.parsers.UTF8StringParserFactory; +import org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory; +import org.apache.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor; import org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor; import org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider; import org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory; import org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor; import org.apache.hyracks.dataflow.std.file.FileSplit; import org.apache.hyracks.dataflow.std.file.IFileSplitProvider; +import org.apache.hyracks.dataflow.std.file.PlainFileWriterOperatorDescriptor; import org.apache.hyracks.dataflow.std.join.JoinComparatorFactory; import org.apache.hyracks.dataflow.std.join.OptimizedHybridHashJoinOperatorDescriptor; +import org.apache.hyracks.tests.util.ResultSerializerFactoryProvider; + import org.apache.hyracks.dataflow.std.misc.NullSinkOperatorDescriptor; -import org.apache.hyracks.dataflow.std.misc.PrinterOperatorDescriptor; public class TPCHCustomerOptimizedHybridHashJoinTest extends AbstractIntegrationTest { - private static final boolean DEBUG = false; + + private static boolean DEBUG = false; + + static RecordDescriptor custDesc = new RecordDescriptor(new ISerializerDeserializer[] { + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() }); + + static RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] { + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer() }); + static RecordDescriptor lineItemDesc = new RecordDescriptor(new ISerializerDeserializer[] { + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer()}); + + + static RecordDescriptor custOrderJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] { + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer() }); + static RecordDescriptor lineOrderJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] { + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer() }); + static RecordDescriptor custorderLineItemJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] { + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() + }); + + static IValueParserFactory[] custValueParserFactories = new IValueParserFactory[custDesc.getFieldCount()]; + static IValueParserFactory[] orderValueParserFactories = new IValueParserFactory[ordersDesc.getFieldCount()]; + static IValueParserFactory[] lineItemValueParserFactories = new IValueParserFactory[lineItemDesc.getFieldCount()]; + + + static { + Arrays.fill(custValueParserFactories, UTF8StringParserFactory.INSTANCE); + Arrays.fill(orderValueParserFactories, UTF8StringParserFactory.INSTANCE); + Arrays.fill(lineItemValueParserFactories, UTF8StringParserFactory.INSTANCE); + } + + private IOperatorDescriptor getPrinter(JobSpecification spec, File file) { + IFileSplitProvider outputSplitProvider = new ConstantFileSplitProvider( + new FileSplit[] { + new FileSplit(NC1_ID, file.getAbsolutePath()) }); + + return DEBUG ? new PlainFileWriterOperatorDescriptor(spec, outputSplitProvider, "|") + : new NullSinkOperatorDescriptor(spec); + } @Test public void customerOrderCIDHybridHashJoin_Case1() throws Exception { JobSpecification spec = new JobSpecification(); + long startTime = new Date().getTime(); + FileSplit[] custSplits = new FileSplit[] { new FileSplit(NC1_ID, new FileReference(new File( + "data/tpch0.001/customerlmdd.tbl"))) }; + IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits); + + FileSplit[] lineSplits = new FileSplit[] { new FileSplit(NC1_ID, new FileReference(new File( + "data/tpch0.001/lineitem.tbl"))) }; + IFileSplitProvider lineSplitsProvider = new ConstantFileSplitProvider(lineSplits); + + FileSplit[] ordersSplits = new FileSplit[] { new FileSplit(NC2_ID, new FileReference(new File( + "data/tpch0.001/orderslmdd.tbl"))) }; + ordersSplits[0].getPartition(); + IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits); + + + FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider, + new DelimitedDataTupleParserFactory(orderValueParserFactories, '|'), ordersDesc); + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, ordScanner, NC1_ID); + + FileScanOperatorDescriptor custScanner = new FileScanOperatorDescriptor(spec, custSplitsProvider, + new DelimitedDataTupleParserFactory(custValueParserFactories, '|'), custDesc); + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, custScanner, NC1_ID); + + + + OptimizedHybridHashJoinOperatorDescriptor join = new OptimizedHybridHashJoinOperatorDescriptor(spec, 15, 243, + 1.2, new int[] { 0 }, new int[] { 1 }, + new IBinaryHashFunctionFamily[] { UTF8StringBinaryHashFunctionFamily.INSTANCE }, + new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, + custOrderJoinDesc, new JoinComparatorFactory( + PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 0, 1), + new JoinComparatorFactory(PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 1, 0), + null); + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, join, NC1_ID); + + FileScanOperatorDescriptor lineItemScanner = new FileScanOperatorDescriptor(spec, lineSplitsProvider, + new DelimitedDataTupleParserFactory(lineItemValueParserFactories, '|'), lineItemDesc); + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, lineItemScanner, NC1_ID); + + OptimizedHybridHashJoinOperatorDescriptor join2 = new OptimizedHybridHashJoinOperatorDescriptor(spec, 15, 243, + 1.2, new int[] { 0 }, new int[] { 0 }, + new IBinaryHashFunctionFamily[] { UTF8StringBinaryHashFunctionFamily.INSTANCE }, + new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, + custorderLineItemJoinDesc, new JoinComparatorFactory( + PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 0, 1), + new JoinComparatorFactory(PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 1, 0), + null); + + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, join2, NC1_ID); + + + ResultSetId rsId = new ResultSetId(1); + spec.addResultSetId(rsId); + IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, false, false, + ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider()); + + /* File file = File.createTempFile(getClass().getName(), "case1"); + IOperatorDescriptor printer = getPrinter(spec, file);*/ + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID); + + IConnectorDescriptor custJoinConn = new OneToOneConnectorDescriptor(spec); + spec.connect(custJoinConn, custScanner, 0, join, 0); + + IConnectorDescriptor ordJoinConn = new OneToOneConnectorDescriptor(spec); + spec.connect(ordJoinConn, ordScanner, 0, join, 1); + + IConnectorDescriptor ordCustJoinResultConn = new OneToOneConnectorDescriptor(spec); + spec.connect(ordCustJoinResultConn, join, 0, join2, 1); + IConnectorDescriptor thirdJoinConn = new OneToOneConnectorDescriptor(spec); + spec.connect(thirdJoinConn, lineItemScanner, 0, join2, 0); + + IConnectorDescriptor joinPrinterConn = new OneToOneConnectorDescriptor(spec); + spec.connect(joinPrinterConn, join2, 0, printer, 0); + + spec.addRoot(printer); + runTest(spec); + //System.out.println("output to " + file.getAbsolutePath()); + long endTime = new Date().getTime(); + System.out.println("it run " + (endTime - startTime) + + " 。" ); + File temp=new File("result2join"); + runTestAndStoreResult(spec, temp); + } + @Test + public void customerOrderCIDHybridHashJoin_Case12() throws Exception { + JobSpecification spec = new JobSpecification(); + long startTime = new Date().getTime(); FileSplit[] custSplits = new FileSplit[] { new FileSplit(NC1_ID, new FileReference(new File( - "data/tpch0.001/customer4.tbl"))) }; + "data/tpch0.001/customerlmdd.tbl"))) }; IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits); - RecordDescriptor custDesc = new RecordDescriptor(new ISerializerDeserializer[] { - new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), - new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), - new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), - new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() }); + + FileSplit[] lineSplits = new FileSplit[] { new FileSplit(NC1_ID, new FileReference(new File( + "data/tpch0.001/lineitem.tbl"))) }; + IFileSplitProvider lineSplitsProvider = new ConstantFileSplitProvider(lineSplits); FileSplit[] ordersSplits = new FileSplit[] { new FileSplit(NC2_ID, new FileReference(new File( - "data/tpch0.001/orders4.tbl"))) }; + "data/tpch0.001/orderslmdd.tbl"))) }; + ordersSplits[0].getPartition(); + IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits); + + + FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider, + new DelimitedDataTupleParserFactory(orderValueParserFactories, '|'), ordersDesc); + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, ordScanner, NC1_ID); + FileScanOperatorDescriptor lineItemScanner = new FileScanOperatorDescriptor(spec, lineSplitsProvider, + new DelimitedDataTupleParserFactory(lineItemValueParserFactories, '|'), lineItemDesc); + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, lineItemScanner, NC1_ID); + + + + + OptimizedHybridHashJoinOperatorDescriptor join = new OptimizedHybridHashJoinOperatorDescriptor(spec, 15, 243, + 1.2, new int[] { 0 }, new int[] { 0 }, + new IBinaryHashFunctionFamily[] { UTF8StringBinaryHashFunctionFamily.INSTANCE }, + new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, + lineOrderJoinDesc, new JoinComparatorFactory( + PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 0, 1), + new JoinComparatorFactory(PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 1, 0), + null); + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, join, NC1_ID); + + FileScanOperatorDescriptor custScanner = new FileScanOperatorDescriptor(spec, custSplitsProvider, + new DelimitedDataTupleParserFactory(custValueParserFactories, '|'), custDesc); + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, custScanner, NC1_ID); + + + OptimizedHybridHashJoinOperatorDescriptor join2 = new OptimizedHybridHashJoinOperatorDescriptor(spec, 15, 243, + 1.2, new int[] { 0 }, new int[] { 1 }, + new IBinaryHashFunctionFamily[] { UTF8StringBinaryHashFunctionFamily.INSTANCE }, + new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, + custorderLineItemJoinDesc, new JoinComparatorFactory( + PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 0, 1), + new JoinComparatorFactory(PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 1, 0), + null); + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, join2, NC1_ID); + + File file = File.createTempFile(getClass().getName(), "case1"); + IOperatorDescriptor printer = getPrinter(spec, file); + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID); + + IConnectorDescriptor lineJoinConn = new OneToOneConnectorDescriptor(spec); + spec.connect(lineJoinConn, lineItemScanner, 0, join, 0); + + IConnectorDescriptor ordJoinConn = new OneToOneConnectorDescriptor(spec); + spec.connect(ordJoinConn, ordScanner, 0, join, 1); + + IConnectorDescriptor ordCustJoinResultConn = new OneToOneConnectorDescriptor(spec); + spec.connect(ordCustJoinResultConn, join, 0, join2, 1); + IConnectorDescriptor thirdJoinConn = new OneToOneConnectorDescriptor(spec); + spec.connect(thirdJoinConn, custScanner, 0, join2, 0); + + IConnectorDescriptor joinPrinterConn = new OneToOneConnectorDescriptor(spec); + spec.connect(joinPrinterConn, join2, 0, printer, 0); + + spec.addRoot(printer); + runTest(spec); + System.out.println("output to " + file.getAbsolutePath()); + long endTime = new Date().getTime(); + System.out.println("it run " + (endTime - startTime) + + " 。" ); + File temp=new File("result2join"); + runTestAndStoreResult(spec, temp); + } + + @Test + public void customerOrderCIDHybridHashJoin_Case1_StatsFirst() throws Exception { + JobSpecification spec = new JobSpecification(); + + FileSplit[] custSplits = new FileSplit[] { new FileSplit(NC1_ID, new FileReference(new File( + "data/tpch0.001/customerlmdd.tbl"))) }; + IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits); + + FileSplit[] ordersSplits = new FileSplit[] { new FileSplit(NC2_ID, new FileReference(new File( + "data/tpch0.001/orderslmdd.tbl"))) }; + ordersSplits[0].getPartition(); IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits); - RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] { - new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), - new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), - new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), - new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), - new UTF8StringSerializerDeserializer() }); - - RecordDescriptor custOrderJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] { - new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), - new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), - new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), - new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), - new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), - new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), - new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), - new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), - new UTF8StringSerializerDeserializer() }); + FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider, - new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE, - UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, - UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, - UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, - UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE }, '|'), ordersDesc); + new DelimitedDataTupleParserFactory(orderValueParserFactories, '|'), ordersDesc); PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, ordScanner, NC1_ID); FileScanOperatorDescriptor custScanner = new FileScanOperatorDescriptor(spec, custSplitsProvider, - new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE, - UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, - UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, - UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, - UTF8StringParserFactory.INSTANCE }, '|'), custDesc); + new DelimitedDataTupleParserFactory(custValueParserFactories, '|'), custDesc); PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, custScanner, NC1_ID); OptimizedHybridHashJoinOperatorDescriptor join = new OptimizedHybridHashJoinOperatorDescriptor(spec, 15, 243, @@ -107,13 +346,14 @@ public void customerOrderCIDHybridHashJoin_Case1() throws Exception { new IBinaryHashFunctionFamily[] { UTF8StringBinaryHashFunctionFamily.INSTANCE }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, custOrderJoinDesc, new JoinComparatorFactory( - PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 0, 1), - new JoinComparatorFactory(PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 1, 0), null); + PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 0, 1), + new JoinComparatorFactory(PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 1, 0), + null); PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, join, NC1_ID); - IOperatorDescriptor printer = DEBUG ? new PrinterOperatorDescriptor(spec) - : new NullSinkOperatorDescriptor(spec); + File file = File.createTempFile(getClass().getName(), "case1"); + IOperatorDescriptor printer = getPrinter(spec, file); PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID); IConnectorDescriptor custJoinConn = new OneToOneConnectorDescriptor(spec); @@ -127,6 +367,7 @@ custOrderJoinDesc, new JoinComparatorFactory( spec.addRoot(printer); runTest(spec); + System.out.println("output to " + file.getAbsolutePath()); } @Test @@ -136,48 +377,18 @@ public void customerOrderCIDHybridHashJoin_Case2() throws Exception { FileSplit[] custSplits = new FileSplit[] { new FileSplit(NC1_ID, new FileReference(new File( "data/tpch0.001/customer3.tbl"))) }; IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits); - RecordDescriptor custDesc = new RecordDescriptor(new ISerializerDeserializer[] { - new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), - new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), - new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), - new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() }); FileSplit[] ordersSplits = new FileSplit[] { new FileSplit(NC2_ID, new FileReference(new File( "data/tpch0.001/orders4.tbl"))) }; IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits); - RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] { - new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), - new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), - new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), - new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), - new UTF8StringSerializerDeserializer() }); - - RecordDescriptor custOrderJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] { - new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), - new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), - new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), - new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), - new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), - new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), - new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), - new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), - new UTF8StringSerializerDeserializer() }); FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider, - new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE, - UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, - UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, - UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, - UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE }, '|'), ordersDesc); + new DelimitedDataTupleParserFactory(orderValueParserFactories, '|'), ordersDesc); PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, ordScanner, NC1_ID); FileScanOperatorDescriptor custScanner = new FileScanOperatorDescriptor(spec, custSplitsProvider, - new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE, - UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, - UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, - UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, - UTF8StringParserFactory.INSTANCE }, '|'), custDesc); + new DelimitedDataTupleParserFactory(custValueParserFactories, '|'), custDesc); PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, custScanner, NC1_ID); OptimizedHybridHashJoinOperatorDescriptor join = new OptimizedHybridHashJoinOperatorDescriptor(spec, 15, 122, @@ -185,13 +396,14 @@ public void customerOrderCIDHybridHashJoin_Case2() throws Exception { new IBinaryHashFunctionFamily[] { UTF8StringBinaryHashFunctionFamily.INSTANCE }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, custOrderJoinDesc, new JoinComparatorFactory( - PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 0, 1), - new JoinComparatorFactory(PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 1, 0), null); + PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 0, 1), + new JoinComparatorFactory(PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 1, 0), + null); PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, join, NC1_ID); - IOperatorDescriptor printer = DEBUG ? new PrinterOperatorDescriptor(spec) - : new NullSinkOperatorDescriptor(spec); + File file = File.createTempFile(getClass().getName(), "case2"); + IOperatorDescriptor printer = getPrinter(spec, file); PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID); IConnectorDescriptor custJoinConn = new OneToOneConnectorDescriptor(spec); @@ -205,6 +417,9 @@ custOrderJoinDesc, new JoinComparatorFactory( spec.addRoot(printer); runTest(spec); + System.out.println("output to " + file.getAbsolutePath()); + File temp=new File("tpchcustomerJoinResult"); + runTestAndStoreResult(spec, temp); } @Test @@ -215,48 +430,18 @@ public void customerOrderCIDHybridHashJoin_Case3() throws Exception { FileSplit[] custSplits = new FileSplit[] { new FileSplit(NC1_ID, new FileReference(new File( "data/tpch0.001/customer3.tbl"))) }; IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits); - RecordDescriptor custDesc = new RecordDescriptor(new ISerializerDeserializer[] { - new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), - new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), - new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), - new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() }); FileSplit[] ordersSplits = new FileSplit[] { new FileSplit(NC2_ID, new FileReference(new File( "data/tpch0.001/orders1.tbl"))) }; IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits); - RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] { - new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), - new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), - new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), - new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), - new UTF8StringSerializerDeserializer() }); - - RecordDescriptor custOrderJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] { - new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), - new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), - new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), - new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), - new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), - new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), - new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), - new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), - new UTF8StringSerializerDeserializer() }); FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider, - new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE, - UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, - UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, - UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, - UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE }, '|'), ordersDesc); + new DelimitedDataTupleParserFactory(orderValueParserFactories, '|'), ordersDesc); PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, ordScanner, NC1_ID); FileScanOperatorDescriptor custScanner = new FileScanOperatorDescriptor(spec, custSplitsProvider, - new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE, - UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, - UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, - UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, - UTF8StringParserFactory.INSTANCE }, '|'), custDesc); + new DelimitedDataTupleParserFactory(custValueParserFactories, '|'), custDesc); PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, custScanner, NC1_ID); OptimizedHybridHashJoinOperatorDescriptor join = new OptimizedHybridHashJoinOperatorDescriptor(spec, 6, 122, @@ -264,13 +449,14 @@ public void customerOrderCIDHybridHashJoin_Case3() throws Exception { new IBinaryHashFunctionFamily[] { UTF8StringBinaryHashFunctionFamily.INSTANCE }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, custOrderJoinDesc, new JoinComparatorFactory( - PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 0, 1), - new JoinComparatorFactory(PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 1, 0), null); + PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 0, 1), + new JoinComparatorFactory(PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 1, 0), + null); PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, join, NC1_ID); - IOperatorDescriptor printer = DEBUG ? new PrinterOperatorDescriptor(spec) - : new NullSinkOperatorDescriptor(spec); + File file = File.createTempFile(getClass().getName(), "case3"); + IOperatorDescriptor printer = getPrinter(spec, file); PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID); IConnectorDescriptor custJoinConn = new OneToOneConnectorDescriptor(spec); @@ -284,6 +470,7 @@ custOrderJoinDesc, new JoinComparatorFactory( spec.addRoot(printer); runTest(spec); + System.out.println("output to " + file.getAbsolutePath()); } } From 7d2068bf7dc04777b644bbfa9f0b589775501b11 Mon Sep 17 00:00:00 2001 From: david Date: Sun, 22 May 2016 15:39:52 -0700 Subject: [PATCH 02/10] Multiple Join: Reason for this --- README.md | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 000000000..57e2c955b --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +# incubator-asterixdb-hyracks +Mirror of Apache AsterixDB Hyracks (Incubating) +This is for the test of multiple Join's optimization From 18fbf8dfad78aa7e1788181bdf54e9594724e1b5 Mon Sep 17 00:00:00 2001 From: david Date: Sun, 22 May 2016 15:41:33 -0700 Subject: [PATCH 03/10] For shuffle process for multiple join Shuffle Process test for multiple join --- .../tests/integration/AggregationTest.java | 555 +++++++----------- 1 file changed, 211 insertions(+), 344 deletions(-) diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/AggregationTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/AggregationTest.java index c330f8eea..e7d8d7eb5 100644 --- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/AggregationTest.java +++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/AggregationTest.java @@ -20,13 +20,13 @@ import java.io.File; import java.io.IOException; - -import org.junit.Test; +import java.util.Date; import org.apache.hyracks.api.constraints.PartitionConstraintHelper; import org.apache.hyracks.api.dataflow.IConnectorDescriptor; import org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory; import org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory; +import org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFamily; import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer; import org.apache.hyracks.api.dataflow.value.RecordDescriptor; import org.apache.hyracks.api.dataset.ResultSetId; @@ -34,6 +34,7 @@ import org.apache.hyracks.api.job.JobSpecification; import org.apache.hyracks.data.std.accessors.PointableBinaryComparatorFactory; import org.apache.hyracks.data.std.accessors.PointableBinaryHashFunctionFactory; +import org.apache.hyracks.data.std.accessors.UTF8StringBinaryHashFunctionFamily; import org.apache.hyracks.data.std.primitive.UTF8StringPointable; import org.apache.hyracks.dataflow.common.data.marshalling.FloatSerializerDeserializer; import org.apache.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer; @@ -63,18 +64,18 @@ import org.apache.hyracks.dataflow.std.group.aggregators.MinMaxStringFieldAggregatorFactory; import org.apache.hyracks.dataflow.std.group.aggregators.MultiFieldsAggregatorFactory; import org.apache.hyracks.dataflow.std.group.external.ExternalGroupOperatorDescriptor; -import org.apache.hyracks.dataflow.std.group.hash.HashGroupOperatorDescriptor; import org.apache.hyracks.dataflow.std.group.preclustered.PreclusteredGroupOperatorDescriptor; import org.apache.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor; import org.apache.hyracks.tests.util.ResultSerializerFactoryProvider; +import org.junit.Test; /** * */ public class AggregationTest extends AbstractIntegrationTest { - final IFileSplitProvider splitProvider = new ConstantFileSplitProvider(new FileSplit[] { new FileSplit(NC2_ID, - new FileReference(new File("data/tpch0.001/lineitem.tbl"))) }); + final IFileSplitProvider splitProvider = new ConstantFileSplitProvider( + new FileSplit[] { new FileSplit(NC2_ID, new FileReference(new File("data/tpch0.001/lineitem.tbl"))) }); final RecordDescriptor desc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE, @@ -94,6 +95,23 @@ public class AggregationTest extends AbstractIntegrationTest { UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, }, '|'); + final IFileSplitProvider splitProvider2 = new ConstantFileSplitProvider( + new FileSplit[] { new FileSplit(NC2_ID, new FileReference(new File("data/tpch0.001/orderslmdd.tbl"))) }); + + final RecordDescriptor desc2 = new RecordDescriptor(new ISerializerDeserializer[] { + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer()}); + + final ITupleParserFactory tupleParserFactory2 = new DelimitedDataTupleParserFactory(new IValueParserFactory[] { + UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, + UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, + UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, + UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, + UTF8StringParserFactory.INSTANCE, }, '|'); + private AbstractSingleActivityOperatorDescriptor getPrinter(JobSpecification spec, String prefix) throws IOException { @@ -106,7 +124,7 @@ private AbstractSingleActivityOperatorDescriptor getPrinter(JobSpecification spe } @Test - public void singleKeySumInmemGroupTest() throws Exception { + public void singleKeySumPreClusterGroupTest() throws Exception { JobSpecification spec = new JobSpecification(); FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(spec, splitProvider, tupleParserFactory, @@ -119,23 +137,19 @@ public void singleKeySumInmemGroupTest() throws Exception { IntegerSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE }); int[] keyFields = new int[] { 0 }; - int tableSize = 8; - HashGroupOperatorDescriptor grouper = new HashGroupOperatorDescriptor(spec, keyFields, - new FieldHashPartitionComputerFactory(keyFields, - new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory - .of(UTF8StringPointable.FACTORY) }), + PreclusteredGroupOperatorDescriptor grouper = new PreclusteredGroupOperatorDescriptor(spec, keyFields, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(1, true), new IntSumFieldAggregatorFactory(3, true), - new FloatSumFieldAggregatorFactory(5, true) }), outputRec, tableSize); + new FloatSumFieldAggregatorFactory(5, true) }), + outputRec); PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper, NC2_ID, NC1_ID); IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec, - new FieldHashPartitionComputerFactory(keyFields, - new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory - .of(UTF8StringPointable.FACTORY) })); + new FieldHashPartitionComputerFactory(keyFields, new IBinaryHashFunctionFactory[] { + PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) })); spec.connect(conn1, csvScanner, 0, grouper, 0); AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec, "singleKeySumInmemGroupTest"); @@ -147,35 +161,35 @@ public void singleKeySumInmemGroupTest() throws Exception { spec.addRoot(printer); runTest(spec); + File temp=new File("aggregateMultiple"); + runTestAndStoreResult(spec, temp); } - @Test - public void singleKeySumPreClusterGroupTest() throws Exception { + public void singleKeySum2PreClusterGroupTest() throws Exception { JobSpecification spec = new JobSpecification(); - FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(spec, splitProvider, tupleParserFactory, - desc); + FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(spec, splitProvider2, tupleParserFactory2, + desc2); PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID); RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] { - new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE, - IntegerSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE }); + IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE, + }); int[] keyFields = new int[] { 0 }; PreclusteredGroupOperatorDescriptor grouper = new PreclusteredGroupOperatorDescriptor(spec, keyFields, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { - new IntSumFieldAggregatorFactory(1, true), new IntSumFieldAggregatorFactory(3, true), - new FloatSumFieldAggregatorFactory(5, true) }), outputRec); + new IntSumFieldAggregatorFactory(1, true) }), + outputRec); PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper, NC2_ID, NC1_ID); IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec, - new FieldHashPartitionComputerFactory(keyFields, - new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory - .of(UTF8StringPointable.FACTORY) })); + new FieldHashPartitionComputerFactory(keyFields, new IBinaryHashFunctionFactory[] { + PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) })); spec.connect(conn1, csvScanner, 0, grouper, 0); AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec, "singleKeySumInmemGroupTest"); @@ -187,6 +201,8 @@ public void singleKeySumPreClusterGroupTest() throws Exception { spec.addRoot(printer); runTest(spec); + File temp=new File("aggregateMultiple2"); + runTestAndStoreResult(spec, temp); } @Test @@ -205,26 +221,26 @@ public void singleKeySumExtGroupTest() throws Exception { int[] keyFields = new int[] { 0 }; int frameLimits = 4; int tableSize = 8; + long fileSize = frameLimits * spec.getFrameSize(); - ExternalGroupOperatorDescriptor grouper = new ExternalGroupOperatorDescriptor(spec, keyFields, frameLimits, + ExternalGroupOperatorDescriptor grouper = new ExternalGroupOperatorDescriptor(spec, tableSize, fileSize, + keyFields, frameLimits, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, - new UTF8StringNormalizedKeyComputerFactory(), new MultiFieldsAggregatorFactory( - new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(1, false), - new IntSumFieldAggregatorFactory(3, false), - new FloatSumFieldAggregatorFactory(5, false) }), new MultiFieldsAggregatorFactory( - new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(1, false), - new IntSumFieldAggregatorFactory(2, false), - new FloatSumFieldAggregatorFactory(3, false) }), outputRec, - new HashSpillableTableFactory(new FieldHashPartitionComputerFactory(keyFields, - new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory - .of(UTF8StringPointable.FACTORY) }), tableSize), true); + new UTF8StringNormalizedKeyComputerFactory(), + new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { + new IntSumFieldAggregatorFactory(1, false), new IntSumFieldAggregatorFactory(3, false), + new FloatSumFieldAggregatorFactory(5, false) }), + new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { + new IntSumFieldAggregatorFactory(1, false), new IntSumFieldAggregatorFactory(2, false), + new FloatSumFieldAggregatorFactory(3, false) }), + outputRec, outputRec, new HashSpillableTableFactory( + new IBinaryHashFunctionFamily[] { UTF8StringBinaryHashFunctionFamily.INSTANCE })); PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper, NC2_ID, NC1_ID); IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec, - new FieldHashPartitionComputerFactory(keyFields, - new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory - .of(UTF8StringPointable.FACTORY) })); + new FieldHashPartitionComputerFactory(keyFields, new IBinaryHashFunctionFactory[] { + PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) })); spec.connect(conn1, csvScanner, 0, grouper, 0); AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec, "singleKeySumExtGroupTest"); @@ -239,7 +255,7 @@ public void singleKeySumExtGroupTest() throws Exception { } @Test - public void singleKeyAvgInmemGroupTest() throws Exception { + public void singleKeyAvgPreClusterGroupTest() throws Exception { JobSpecification spec = new JobSpecification(); FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(spec, splitProvider, tupleParserFactory, @@ -248,27 +264,22 @@ public void singleKeyAvgInmemGroupTest() throws Exception { PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID); RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] { - new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE, - IntegerSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE }); + new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE}); int[] keyFields = new int[] { 0 }; - int tableSize = 8; - HashGroupOperatorDescriptor grouper = new HashGroupOperatorDescriptor(spec, keyFields, - new FieldHashPartitionComputerFactory(keyFields, - new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory - .of(UTF8StringPointable.FACTORY) }), + PreclusteredGroupOperatorDescriptor grouper = new PreclusteredGroupOperatorDescriptor(spec, keyFields, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, - new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { - new IntSumFieldAggregatorFactory(1, true), new CountFieldAggregatorFactory(true), - new AvgFieldGroupAggregatorFactory(1, true) }), outputRec, tableSize); + new MultiFieldsAggregatorFactory( + new IFieldAggregateDescriptorFactory[] { + new CountFieldAggregatorFactory(true) }), + outputRec); PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper, NC2_ID, NC1_ID); IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec, - new FieldHashPartitionComputerFactory(keyFields, - new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory - .of(UTF8StringPointable.FACTORY) })); + new FieldHashPartitionComputerFactory(keyFields, new IBinaryHashFunctionFactory[] { + PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) })); spec.connect(conn1, csvScanner, 0, grouper, 0); AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec, "singleKeyAvgInmemGroupTest"); @@ -280,35 +291,56 @@ public void singleKeyAvgInmemGroupTest() throws Exception { spec.addRoot(printer); runTest(spec); + File temp=new File("singleKeyPreCluster"); + runTestAndStoreResult(spec, temp); } - @Test - public void singleKeyAvgPreClusterGroupTest() throws Exception { - JobSpecification spec = new JobSpecification(); + public void singleKeyAv2g2PreClusterGroupTest() throws Exception { + /////This is Mingda Li's code here + /////This is Mingda Li's code here + /////This is Mingda Li's code here + /////This is Mingda Li's code here + /////This is Mingda Li's code here + /////This is Mingda Li's code here + /////This is Mingda Li's code here + /////This is Mingda Li's code here + /////This is Mingda Li's code here + /////This is Mingda Li's code here + /////This is Mingda Li's code here + /////This is Mingda Li's code here + /////This is Mingda Li's code here + /////This is Mingda Li's code here + /////This is Mingda Li's code here + - FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(spec, splitProvider, tupleParserFactory, - desc); + + + + + JobSpecification spec = new JobSpecification(); + long startTime = new Date().getTime(); + FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(spec, splitProvider2, tupleParserFactory2, + desc2); PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID); RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] { - new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE, - IntegerSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE }); + new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE}); - int[] keyFields = new int[] { 0 }; + int[] keyFields = new int[] { 1 }; PreclusteredGroupOperatorDescriptor grouper = new PreclusteredGroupOperatorDescriptor(spec, keyFields, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, - new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { - new IntSumFieldAggregatorFactory(1, true), new CountFieldAggregatorFactory(true), - new AvgFieldGroupAggregatorFactory(1, true) }), outputRec); + new MultiFieldsAggregatorFactory( + new IFieldAggregateDescriptorFactory[] { + new CountFieldAggregatorFactory(true) }), + outputRec); PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper, NC2_ID, NC1_ID); IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec, - new FieldHashPartitionComputerFactory(keyFields, - new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory - .of(UTF8StringPointable.FACTORY) })); + new FieldHashPartitionComputerFactory(keyFields, new IBinaryHashFunctionFactory[] { + PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) })); spec.connect(conn1, csvScanner, 0, grouper, 0); AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec, "singleKeyAvgInmemGroupTest"); @@ -320,8 +352,13 @@ public void singleKeyAvgPreClusterGroupTest() throws Exception { spec.addRoot(printer); runTest(spec); - } + long endTime = new Date().getTime(); + System.out.println("it run " + (endTime - startTime) + + " 。"); + File temp=new File("singleKeyPr2eCluster"); + runTestAndStoreResult(spec, temp); + } @Test public void singleKeyAvgExtGroupTest() throws Exception { JobSpecification spec = new JobSpecification(); @@ -338,73 +375,29 @@ public void singleKeyAvgExtGroupTest() throws Exception { int[] keyFields = new int[] { 0 }; int frameLimits = 4; int tableSize = 8; + long fileSize = frameLimits * spec.getFrameSize(); - ExternalGroupOperatorDescriptor grouper = new ExternalGroupOperatorDescriptor(spec, keyFields, frameLimits, + ExternalGroupOperatorDescriptor grouper = new ExternalGroupOperatorDescriptor(spec, tableSize, fileSize, + keyFields, frameLimits, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, new UTF8StringNormalizedKeyComputerFactory(), - new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { - new IntSumFieldAggregatorFactory(1, false), new CountFieldAggregatorFactory(false), - new AvgFieldGroupAggregatorFactory(1, false) }), new MultiFieldsAggregatorFactory( + new MultiFieldsAggregatorFactory( new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(1, false), - new IntSumFieldAggregatorFactory(2, false), - new AvgFieldMergeAggregatorFactory(3, false) }), outputRec, - new HashSpillableTableFactory(new FieldHashPartitionComputerFactory(keyFields, - new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory - .of(UTF8StringPointable.FACTORY) }), tableSize), true); - - PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper, NC2_ID, NC1_ID); - - IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec, - new FieldHashPartitionComputerFactory(keyFields, - new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory - .of(UTF8StringPointable.FACTORY) })); - spec.connect(conn1, csvScanner, 0, grouper, 0); - - AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec, "singleKeyAvgExtGroupTest"); - - PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC2_ID, NC1_ID); - - IConnectorDescriptor conn2 = new OneToOneConnectorDescriptor(spec); - spec.connect(conn2, grouper, 0, printer, 0); - - spec.addRoot(printer); - runTest(spec); - } - - @Test - public void singleKeyMinMaxStringInmemGroupTest() throws Exception { - JobSpecification spec = new JobSpecification(); - - FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(spec, splitProvider, tupleParserFactory, - desc); - - PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID); - - RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] { - new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE, - new UTF8StringSerializerDeserializer() }); - - int[] keyFields = new int[] { 0 }; - int tableSize = 8; - - HashGroupOperatorDescriptor grouper = new HashGroupOperatorDescriptor(spec, keyFields, - new FieldHashPartitionComputerFactory(keyFields, - new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory - .of(UTF8StringPointable.FACTORY) }), - new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, + new CountFieldAggregatorFactory(false), new AvgFieldGroupAggregatorFactory(1, false) }), new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { - new IntSumFieldAggregatorFactory(1, true), - new MinMaxStringFieldAggregatorFactory(15, true, false) }), outputRec, tableSize); + new IntSumFieldAggregatorFactory(1, false), new IntSumFieldAggregatorFactory(2, false), + new AvgFieldMergeAggregatorFactory(3, false) }), + outputRec, outputRec, new HashSpillableTableFactory( + new IBinaryHashFunctionFamily[] { UTF8StringBinaryHashFunctionFamily.INSTANCE })); PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper, NC2_ID, NC1_ID); IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec, - new FieldHashPartitionComputerFactory(keyFields, - new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory - .of(UTF8StringPointable.FACTORY) })); + new FieldHashPartitionComputerFactory(keyFields, new IBinaryHashFunctionFactory[] { + PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) })); spec.connect(conn1, csvScanner, 0, grouper, 0); - AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec, "singleKeyAvgInmemGroupTest"); + AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec, "singleKeyAvgExtGroupTest"); PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC2_ID, NC1_ID); @@ -424,24 +417,24 @@ public void singleKeyMinMaxStringPreClusterGroupTest() throws Exception { PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID); - RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] { - new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE, - new UTF8StringSerializerDeserializer() }); + RecordDescriptor outputRec = new RecordDescriptor( + new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), + IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer() }); int[] keyFields = new int[] { 0 }; PreclusteredGroupOperatorDescriptor grouper = new PreclusteredGroupOperatorDescriptor(spec, keyFields, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, - new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { - new IntSumFieldAggregatorFactory(1, true), - new MinMaxStringFieldAggregatorFactory(15, true, false) }), outputRec); + new MultiFieldsAggregatorFactory( + new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(1, true), + new MinMaxStringFieldAggregatorFactory(15, true, false) }), + outputRec); PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper, NC2_ID, NC1_ID); IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec, - new FieldHashPartitionComputerFactory(keyFields, - new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory - .of(UTF8StringPointable.FACTORY) })); + new FieldHashPartitionComputerFactory(keyFields, new IBinaryHashFunctionFactory[] { + PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) })); spec.connect(conn1, csvScanner, 0, grouper, 0); AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec, "singleKeyAvgInmemGroupTest"); @@ -464,80 +457,36 @@ public void singleKeyMinMaxStringExtGroupTest() throws Exception { PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID); - RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] { - new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE, - new UTF8StringSerializerDeserializer() }); + RecordDescriptor outputRec = new RecordDescriptor( + new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), + IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer() }); int[] keyFields = new int[] { 0 }; int frameLimits = 4; int tableSize = 8; + long fileSize = frameLimits * spec.getFrameSize(); - ExternalGroupOperatorDescriptor grouper = new ExternalGroupOperatorDescriptor(spec, keyFields, frameLimits, + ExternalGroupOperatorDescriptor grouper = new ExternalGroupOperatorDescriptor(spec, tableSize, fileSize, + keyFields, frameLimits, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, - new UTF8StringNormalizedKeyComputerFactory(), new MultiFieldsAggregatorFactory( + new UTF8StringNormalizedKeyComputerFactory(), + new MultiFieldsAggregatorFactory( new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(1, false), new MinMaxStringFieldAggregatorFactory(15, true, true) }), - new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { - new IntSumFieldAggregatorFactory(1, false), - new MinMaxStringFieldAggregatorFactory(2, true, true) }), outputRec, - new HashSpillableTableFactory(new FieldHashPartitionComputerFactory(keyFields, - new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory - .of(UTF8StringPointable.FACTORY) }), tableSize), true); - - PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper, NC2_ID, NC1_ID); - - IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec, - new FieldHashPartitionComputerFactory(keyFields, - new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory - .of(UTF8StringPointable.FACTORY) })); - spec.connect(conn1, csvScanner, 0, grouper, 0); - - AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec, "singleKeyAvgExtGroupTest"); - - PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC2_ID, NC1_ID); - - IConnectorDescriptor conn2 = new OneToOneConnectorDescriptor(spec); - spec.connect(conn2, grouper, 0, printer, 0); - - spec.addRoot(printer); - runTest(spec); - } - - @Test - public void multiKeySumInmemGroupTest() throws Exception { - JobSpecification spec = new JobSpecification(); - - FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(spec, splitProvider, tupleParserFactory, - desc); - - PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID); - - RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] { - new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), - IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE }); - - int[] keyFields = new int[] { 8, 0 }; - int tableSize = 8; - - HashGroupOperatorDescriptor grouper = new HashGroupOperatorDescriptor(spec, keyFields, - new FieldHashPartitionComputerFactory(keyFields, new IBinaryHashFunctionFactory[] { - PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY), - PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }), - new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), - PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, - new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { - new IntSumFieldAggregatorFactory(1, true), new IntSumFieldAggregatorFactory(3, true) }), - outputRec, tableSize); + new MultiFieldsAggregatorFactory( + new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(1, false), + new MinMaxStringFieldAggregatorFactory(2, true, true) }), + outputRec, outputRec, new HashSpillableTableFactory( + new IBinaryHashFunctionFamily[] { UTF8StringBinaryHashFunctionFamily.INSTANCE })); PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper, NC2_ID, NC1_ID); IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(keyFields, new IBinaryHashFunctionFactory[] { - PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY), PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) })); spec.connect(conn1, csvScanner, 0, grouper, 0); - AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec, "multiKeySumInmemGroupTest"); + AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec, "singleKeyAvgExtGroupTest"); PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC2_ID, NC1_ID); @@ -573,9 +522,10 @@ public void multiKeySumPreClusterGroupTest() throws Exception { PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper, NC2_ID, NC1_ID); IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec, - new FieldHashPartitionComputerFactory(keyFields, new IBinaryHashFunctionFactory[] { - PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY), - PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) })); + new FieldHashPartitionComputerFactory(keyFields, + new IBinaryHashFunctionFactory[] { + PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY), + PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) })); spec.connect(conn1, csvScanner, 0, grouper, 0); AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec, "multiKeySumInmemGroupTest"); @@ -605,75 +555,30 @@ public void multiKeySumExtGroupTest() throws Exception { int[] keyFields = new int[] { 8, 0 }; int frameLimits = 4; int tableSize = 8; + long fileSize = frameLimits * spec.getFrameSize(); - ExternalGroupOperatorDescriptor grouper = new ExternalGroupOperatorDescriptor(spec, keyFields, frameLimits, - new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), - PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, - new UTF8StringNormalizedKeyComputerFactory(), new MultiFieldsAggregatorFactory( - new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(1, false), - new IntSumFieldAggregatorFactory(3, false) }), new MultiFieldsAggregatorFactory( - new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(2, false), - new IntSumFieldAggregatorFactory(3, false) }), outputRec, - new HashSpillableTableFactory(new FieldHashPartitionComputerFactory(keyFields, - new IBinaryHashFunctionFactory[] { - PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY), - PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }), tableSize), true); - - PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper, NC2_ID, NC1_ID); - - IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec, - new FieldHashPartitionComputerFactory(keyFields, new IBinaryHashFunctionFactory[] { - PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY), - PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) })); - spec.connect(conn1, csvScanner, 0, grouper, 0); - - AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec, "multiKeySumExtGroupTest"); - - PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC2_ID, NC1_ID); - - IConnectorDescriptor conn2 = new OneToOneConnectorDescriptor(spec); - spec.connect(conn2, grouper, 0, printer, 0); - - spec.addRoot(printer); - runTest(spec); - } - - @Test - public void multiKeyAvgInmemGroupTest() throws Exception { - JobSpecification spec = new JobSpecification(); - - FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(spec, splitProvider, tupleParserFactory, - desc); - - PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID); - - RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] { - new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), - IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE, - FloatSerializerDeserializer.INSTANCE }); - - int[] keyFields = new int[] { 8, 0 }; - int tableSize = 8; - - HashGroupOperatorDescriptor grouper = new HashGroupOperatorDescriptor(spec, keyFields, - new FieldHashPartitionComputerFactory(keyFields, new IBinaryHashFunctionFactory[] { - PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY), - PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }), + ExternalGroupOperatorDescriptor grouper = new ExternalGroupOperatorDescriptor(spec, tableSize, fileSize, + keyFields, frameLimits, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, + new UTF8StringNormalizedKeyComputerFactory(), new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { - new IntSumFieldAggregatorFactory(1, true), new CountFieldAggregatorFactory(true), - new AvgFieldGroupAggregatorFactory(1, true) }), outputRec, tableSize); + new IntSumFieldAggregatorFactory(1, false), new IntSumFieldAggregatorFactory(3, false) }), + new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { + new IntSumFieldAggregatorFactory(2, false), new IntSumFieldAggregatorFactory(3, false) }), + outputRec, outputRec, new HashSpillableTableFactory(new IBinaryHashFunctionFamily[] { + UTF8StringBinaryHashFunctionFamily.INSTANCE, UTF8StringBinaryHashFunctionFamily.INSTANCE })); PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper, NC2_ID, NC1_ID); IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec, - new FieldHashPartitionComputerFactory(keyFields, new IBinaryHashFunctionFactory[] { - PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY), - PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) })); + new FieldHashPartitionComputerFactory(keyFields, + new IBinaryHashFunctionFactory[] { + PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY), + PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) })); spec.connect(conn1, csvScanner, 0, grouper, 0); - AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec, "multiKeyAvgInmemGroupTest"); + AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec, "multiKeySumExtGroupTest"); PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC2_ID, NC1_ID); @@ -693,26 +598,28 @@ public void multiKeyAvgPreClusterGroupTest() throws Exception { PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID); - RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] { - new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), - IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE, - FloatSerializerDeserializer.INSTANCE }); + RecordDescriptor outputRec = new RecordDescriptor( + new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE, + IntegerSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE }); int[] keyFields = new int[] { 8, 0 }; PreclusteredGroupOperatorDescriptor grouper = new PreclusteredGroupOperatorDescriptor(spec, keyFields, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, - new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { - new IntSumFieldAggregatorFactory(1, true), new CountFieldAggregatorFactory(true), - new AvgFieldGroupAggregatorFactory(1, true) }), outputRec); + new MultiFieldsAggregatorFactory( + new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(1, true), + new CountFieldAggregatorFactory(true), new AvgFieldGroupAggregatorFactory(1, true) }), + outputRec); PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper, NC2_ID, NC1_ID); IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec, - new FieldHashPartitionComputerFactory(keyFields, new IBinaryHashFunctionFactory[] { - PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY), - PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) })); + new FieldHashPartitionComputerFactory(keyFields, + new IBinaryHashFunctionFactory[] { + PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY), + PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) })); spec.connect(conn1, csvScanner, 0, grouper, 0); AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec, "multiKeyAvgInmemGroupTest"); @@ -735,36 +642,37 @@ public void multiKeyAvgExtGroupTest() throws Exception { PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID); - RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] { - new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), - IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE, - FloatSerializerDeserializer.INSTANCE }); + RecordDescriptor outputRec = new RecordDescriptor( + new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE, + IntegerSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE }); int[] keyFields = new int[] { 8, 0 }; int frameLimits = 4; int tableSize = 8; + long fileSize = frameLimits * spec.getFrameSize(); - ExternalGroupOperatorDescriptor grouper = new ExternalGroupOperatorDescriptor(spec, keyFields, frameLimits, + ExternalGroupOperatorDescriptor grouper = new ExternalGroupOperatorDescriptor(spec, tableSize, fileSize, + keyFields, frameLimits, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, new UTF8StringNormalizedKeyComputerFactory(), + new MultiFieldsAggregatorFactory( + new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(1, false), + new CountFieldAggregatorFactory(false), new AvgFieldGroupAggregatorFactory(1, false) }), new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { - new IntSumFieldAggregatorFactory(1, false), new CountFieldAggregatorFactory(false), - new AvgFieldGroupAggregatorFactory(1, false) }), new MultiFieldsAggregatorFactory( - new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(2, false), - new IntSumFieldAggregatorFactory(3, false), - new AvgFieldMergeAggregatorFactory(4, false) }), outputRec, - new HashSpillableTableFactory(new FieldHashPartitionComputerFactory(keyFields, - new IBinaryHashFunctionFactory[] { - PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY), - PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }), tableSize), true); + new IntSumFieldAggregatorFactory(2, false), new IntSumFieldAggregatorFactory(3, false), + new AvgFieldMergeAggregatorFactory(4, false) }), + outputRec, outputRec, new HashSpillableTableFactory(new IBinaryHashFunctionFamily[] { + UTF8StringBinaryHashFunctionFamily.INSTANCE, UTF8StringBinaryHashFunctionFamily.INSTANCE })); PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper, NC2_ID, NC1_ID); IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec, - new FieldHashPartitionComputerFactory(keyFields, new IBinaryHashFunctionFactory[] { - PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY), - PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) })); + new FieldHashPartitionComputerFactory(keyFields, + new IBinaryHashFunctionFactory[] { + PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY), + PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) })); spec.connect(conn1, csvScanner, 0, grouper, 0); AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec, "multiKeyAvgExtGroupTest"); @@ -778,51 +686,6 @@ public void multiKeyAvgExtGroupTest() throws Exception { runTest(spec); } - @Test - public void multiKeyMinMaxStringInmemGroupTest() throws Exception { - JobSpecification spec = new JobSpecification(); - - FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(spec, splitProvider, tupleParserFactory, - desc); - - PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID); - - RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] { - new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), - IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer() }); - - int[] keyFields = new int[] { 8, 0 }; - int tableSize = 8; - - HashGroupOperatorDescriptor grouper = new HashGroupOperatorDescriptor(spec, keyFields, - new FieldHashPartitionComputerFactory(keyFields, new IBinaryHashFunctionFactory[] { - PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY), - PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }), - new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), - PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, - new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { - new IntSumFieldAggregatorFactory(1, true), - new MinMaxStringFieldAggregatorFactory(15, true, false) }), outputRec, tableSize); - - PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper, NC2_ID, NC1_ID); - - IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec, - new FieldHashPartitionComputerFactory(keyFields, new IBinaryHashFunctionFactory[] { - PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY), - PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) })); - spec.connect(conn1, csvScanner, 0, grouper, 0); - - AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec, "multiKeyMinMaxStringInmemGroupTest"); - - PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC2_ID, NC1_ID); - - IConnectorDescriptor conn2 = new OneToOneConnectorDescriptor(spec); - spec.connect(conn2, grouper, 0, printer, 0); - - spec.addRoot(printer); - runTest(spec); - } - @Test public void multiKeyMinMaxStringPreClusterGroupTest() throws Exception { JobSpecification spec = new JobSpecification(); @@ -841,16 +704,18 @@ public void multiKeyMinMaxStringPreClusterGroupTest() throws Exception { PreclusteredGroupOperatorDescriptor grouper = new PreclusteredGroupOperatorDescriptor(spec, keyFields, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, - new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { - new IntSumFieldAggregatorFactory(1, true), - new MinMaxStringFieldAggregatorFactory(15, true, false) }), outputRec); + new MultiFieldsAggregatorFactory( + new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(1, true), + new MinMaxStringFieldAggregatorFactory(15, true, false) }), + outputRec); PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper, NC2_ID, NC1_ID); IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec, - new FieldHashPartitionComputerFactory(keyFields, new IBinaryHashFunctionFactory[] { - PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY), - PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) })); + new FieldHashPartitionComputerFactory(keyFields, + new IBinaryHashFunctionFactory[] { + PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY), + PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) })); spec.connect(conn1, csvScanner, 0, grouper, 0); AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec, "multiKeyMinMaxStringPreClusterGroupTest"); @@ -880,27 +745,29 @@ public void multiKeyMinMaxStringExtGroupTest() throws Exception { int[] keyFields = new int[] { 8, 0 }; int frameLimits = 4; int tableSize = 8; + long fileSize = frameLimits * spec.getFrameSize(); - ExternalGroupOperatorDescriptor grouper = new ExternalGroupOperatorDescriptor(spec, keyFields, frameLimits, + ExternalGroupOperatorDescriptor grouper = new ExternalGroupOperatorDescriptor(spec, tableSize, fileSize, + keyFields, frameLimits, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, - new UTF8StringNormalizedKeyComputerFactory(), new MultiFieldsAggregatorFactory( + new UTF8StringNormalizedKeyComputerFactory(), + new MultiFieldsAggregatorFactory( new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(1, false), new MinMaxStringFieldAggregatorFactory(15, true, true) }), - new MultiFieldsAggregatorFactory(new int[] { 0, 1 }, new IFieldAggregateDescriptorFactory[] { - new IntSumFieldAggregatorFactory(2, false), - new MinMaxStringFieldAggregatorFactory(3, true, true) }), outputRec, - new HashSpillableTableFactory(new FieldHashPartitionComputerFactory(keyFields, - new IBinaryHashFunctionFactory[] { - PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY), - PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }), tableSize), true); + new MultiFieldsAggregatorFactory(new int[] { 0, 1 }, + new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(2, false), + new MinMaxStringFieldAggregatorFactory(3, true, true) }), + outputRec, outputRec, new HashSpillableTableFactory(new IBinaryHashFunctionFamily[] { + UTF8StringBinaryHashFunctionFamily.INSTANCE, UTF8StringBinaryHashFunctionFamily.INSTANCE })); PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper, NC2_ID, NC1_ID); IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec, - new FieldHashPartitionComputerFactory(keyFields, new IBinaryHashFunctionFactory[] { - PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY), - PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) })); + new FieldHashPartitionComputerFactory(keyFields, + new IBinaryHashFunctionFactory[] { + PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY), + PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) })); spec.connect(conn1, csvScanner, 0, grouper, 0); AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec, "multiKeyMinMaxStringExtGroupTest"); From 3534e99d2b2ca2b752ee278608632bc7eb769142 Mon Sep 17 00:00:00 2001 From: david Date: Sun, 22 May 2016 15:43:29 -0700 Subject: [PATCH 04/10] Shuffle for multiple join shuffle --- .../org/apache/hyracks/tests/integration/AggregationTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/AggregationTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/AggregationTest.java index e7d8d7eb5..48eed9354 100644 --- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/AggregationTest.java +++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/AggregationTest.java @@ -312,7 +312,7 @@ public void singleKeyAv2g2PreClusterGroupTest() throws Exception { /////This is Mingda Li's code here /////This is Mingda Li's code here - + ///this is for mingda's shuffle From 907d051c238eda02d447a098d847ad5a6479cd8a Mon Sep 17 00:00:00 2001 From: david Date: Sat, 18 Jun 2016 20:47:06 -0700 Subject: [PATCH 05/10] SampleForMultipleJoinOperatorDescriptor SampleForMultipleJoinOperatorDescriptor --- ...pleForMultipleJoinOperatorDescriptor.class | 171 ++++++++++++++++++ 1 file changed, 171 insertions(+) create mode 100644 hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/join/SampleForMultipleJoinOperatorDescriptor.class diff --git a/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/join/SampleForMultipleJoinOperatorDescriptor.class b/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/join/SampleForMultipleJoinOperatorDescriptor.class new file mode 100644 index 000000000..5859043ee --- /dev/null +++ b/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/join/SampleForMultipleJoinOperatorDescriptor.class @@ -0,0 +1,171 @@ +package org.apache.hyracks.dataflow.std.join; + +import org.apache.hyracks.api.comm.IFrame; +import org.apache.hyracks.api.comm.VSizeFrame; +import org.apache.hyracks.api.context.IHyracksTaskContext; +import org.apache.hyracks.api.dataflow.ActivityId; +import org.apache.hyracks.api.dataflow.IActivityGraphBuilder; +import org.apache.hyracks.api.dataflow.IOperatorNodePushable; +import org.apache.hyracks.api.dataflow.TaskId; +import org.apache.hyracks.api.dataflow.value.*; +import java.util.Random; +import org.apache.hyracks.api.exceptions.HyracksDataException; +import org.apache.hyracks.api.io.FileReference; +import org.apache.hyracks.api.job.IOperatorDescriptorRegistry; +import org.apache.hyracks.api.job.JobId; +import org.apache.hyracks.dataflow.common.comm.io.FrameTupleAccessor; +import org.apache.hyracks.dataflow.common.comm.io.FrameTupleAppender; +import org.apache.hyracks.dataflow.common.comm.util.FrameUtils; +import org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory; +import org.apache.hyracks.dataflow.common.data.partition.RepartitionComputerFactory; +import org.apache.hyracks.dataflow.common.io.RunFileReader; +import org.apache.hyracks.dataflow.common.io.RunFileWriter; +import org.apache.hyracks.dataflow.std.base.*; +import org.apache.hyracks.dataflow.std.structures.ISerializableTable; +import org.apache.hyracks.dataflow.std.structures.SerializableHashTable; +import org.apache.hyracks.dataflow.std.util.FrameTuplePairComparator; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.nio.ByteBuffer; + +/** + * Created by MingdaLi on 6/16/16. + */ + + + + +public class SampleForMultipleJoinOperatorDescriptor extends AbstractSingleActivityOperatorDescriptor { + private static final long serialVersionUID = 1L; + private final int outputLimit;//reservior size + private final int[] keys0; //keys to sample + private final IBinaryHashFunctionFactory[] hashFunctionFactories; // The hash function for binary of the keys to sample + private final int statePartitions;//hashfunction will use this to hash + + public SampleForMultipleJoinOperatorDescriptor(IOperatorDescriptorRegistry spec, RecordDescriptor rDesc, int outputLimit, int[] keys0, IBinaryHashFunctionFactory[] hashFunctionFactories, int statePartitions) { + super(spec, 1, 1); + recordDescriptors[0] = rDesc; + this.outputLimit = outputLimit; + this.keys0=keys0; + this.hashFunctionFactories = hashFunctionFactories; + this.statePartitions=statePartitions; + } + + + @Override + public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx, + final IRecordDescriptorProvider recordDescProvider, int partition, int nPartitions) + throws HyracksDataException { + //final RecordDescriptor rd1 = recordDescProvider.getInputRecordDescriptor(getActivityId(), 0); + + return new AbstractUnaryInputUnaryOutputOperatorNodePushable() { + private FrameTupleAccessor fta; + private int currentSize; + private boolean finished; + private final ITuplePartitionComputer hpcBuild = new FieldHashPartitionComputerFactory(keys0, + hashFunctionFactories).createPartitioner(); + //private final FrameTupleAccessor accessorBuild = new FrameTupleAccessor(rd1); + + @Override + public void open() throws HyracksDataException { + fta = new FrameTupleAccessor(recordDescriptors[0]); + currentSize = 0; + finished = false; + writer.open(); + } + + @Override + public void nextFrame(ByteBuffer buffer) throws HyracksDataException { + if (!finished) { + fta.reset(buffer); + int count = fta.getTupleCount(); + if ((currentSize + count) > outputLimit) { + FrameTupleAppender partialAppender = new FrameTupleAppender(new VSizeFrame(ctx)); + int[][] reservior=new int[statePartitions][outputLimit];//store indexes for sample + int[] countForEachPartition= new int [statePartitions]; + for (int i = 0; i < count; ++i) { + int entry; + // entry = hpcBuild.partition(fta, i, statePartitions); + + + //get hash value + IBinaryHashFunction[] hashFunctions = new IBinaryHashFunction[hashFunctionFactories.length]; + for (int j = 0; j < hashFunctionFactories.length; ++j) { + hashFunctions[j] = hashFunctionFactories[j].createBinaryHashFunction(); + } + int h = 0; + int startOffset = fta.getTupleStartOffset(i); + int slotLength = fta.getFieldSlotsLength(); + for (int j = 0; j < keys0.length; ++j) { + int fIdx = keys0[j]; + IBinaryHashFunction hashFn = hashFunctions[j]; + int fStart = fta.getFieldStartOffset(i, fIdx); + int fEnd = fta.getFieldEndOffset(i, fIdx); + int length= fEnd-fStart-1;//The length of int + int startOfInt=startOffset + slotLength + fStart+1;//The start position of int's first binary + int fh=0; + for(int g=0;g Date: Sat, 18 Jun 2016 20:47:51 -0700 Subject: [PATCH 06/10] Rename SampleForMultipleJoinOperatorDescriptor.class to SampleForMultipleJoinOperatorDescriptor.java --- ...criptor.class => SampleForMultipleJoinOperatorDescriptor.java} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/join/{SampleForMultipleJoinOperatorDescriptor.class => SampleForMultipleJoinOperatorDescriptor.java} (100%) diff --git a/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/join/SampleForMultipleJoinOperatorDescriptor.class b/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/join/SampleForMultipleJoinOperatorDescriptor.java similarity index 100% rename from hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/join/SampleForMultipleJoinOperatorDescriptor.class rename to hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/join/SampleForMultipleJoinOperatorDescriptor.java From 2e3089ab772685928c2fa348fc31964c505d9bef Mon Sep 17 00:00:00 2001 From: david Date: Sun, 19 Jun 2016 18:17:21 -0700 Subject: [PATCH 07/10] SampleForMultipleJoinOperatorDescriptor_Version0_1.java Use while but not for (i outputLimit) { + FrameTupleAppender partialAppender = new FrameTupleAppender(new VSizeFrame(ctx)); + int[][] reservior=new int[statePartitions][outputLimit];//store indexes for sample + int[] countForEachPartition= new int [statePartitions]; + //for (int i = 0; i < count; ++i) { + int i=0; + int startOffset=1; + while ( ((startOffset=fta.getTupleStartOffset(i))!=0 )||(i==0)){ + int entry; + // entry = hpcBuild.partition(fta, i, statePartitions); + + + //get hash value + IBinaryHashFunction[] hashFunctions = new IBinaryHashFunction[hashFunctionFactories.length]; + for (int j = 0; j < hashFunctionFactories.length; ++j) { + hashFunctions[j] = hashFunctionFactories[j].createBinaryHashFunction(); + } + int h = 0; + int slotLength = fta.getFieldSlotsLength(); + for (int j = 0; j < keys0.length; ++j) { + int fIdx = keys0[j]; + IBinaryHashFunction hashFn = hashFunctions[j]; + int fStart = fta.getFieldStartOffset(i, fIdx); + int fEnd = fta.getFieldEndOffset(i, fIdx); + int length= fEnd-fStart-1;//The length of int + int startOfInt=startOffset + slotLength + fStart+1;//The start position of int's first binary + int fh=0; + for(int g=0;g Date: Tue, 21 Jun 2016 18:32:29 -0700 Subject: [PATCH 08/10] Test for TPC-DS data test for TPC-DS data --- .../hyracks/dataflow/std/join/TPCDStest.java | 939 ++++++++++++++++++ 1 file changed, 939 insertions(+) create mode 100644 hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/join/TPCDStest.java diff --git a/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/join/TPCDStest.java b/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/join/TPCDStest.java new file mode 100644 index 000000000..b5b86223f --- /dev/null +++ b/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/join/TPCDStest.java @@ -0,0 +1,939 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hyracks.tests.integration; + +import java.io.File; +import java.util.Arrays; +import java.util.Date; +import org.junit.Test; + +import org.apache.hyracks.api.constraints.PartitionConstraintHelper; +import org.apache.hyracks.api.dataflow.IConnectorDescriptor; +import org.apache.hyracks.api.dataflow.IOperatorDescriptor; +import org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory; +import org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFamily; +import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer; +import org.apache.hyracks.api.dataflow.value.RecordDescriptor; +import org.apache.hyracks.api.dataset.ResultSetId; +import org.apache.hyracks.api.io.FileReference; +import org.apache.hyracks.api.job.JobSpecification; +import org.apache.hyracks.data.std.accessors.PointableBinaryComparatorFactory; +import org.apache.hyracks.data.std.accessors.UTF8StringBinaryHashFunctionFamily; +import org.apache.hyracks.data.std.primitive.UTF8StringPointable; +import org.apache.hyracks.dataflow.common.data.marshalling.FloatSerializerDeserializer; +import org.apache.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer; +import org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer; +import org.apache.hyracks.dataflow.common.data.normalizers.UTF8StringNormalizedKeyComputerFactory; +import org.apache.hyracks.dataflow.common.data.parsers.FloatParserFactory; +import org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory; +import org.apache.hyracks.dataflow.common.data.parsers.IntegerParserFactory; +import org.apache.hyracks.dataflow.common.data.parsers.UTF8StringParserFactory; +import org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory; +import org.apache.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor; +import org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor; +import org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider; +import org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory; +import org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor; +import org.apache.hyracks.dataflow.std.file.FileSplit; +import org.apache.hyracks.dataflow.std.file.IFileSplitProvider; +import org.apache.hyracks.dataflow.std.file.PlainFileWriterOperatorDescriptor; +import org.apache.hyracks.dataflow.std.join.JoinComparatorFactory; +import org.apache.hyracks.dataflow.std.join.OptimizedHybridHashJoinOperatorDescriptor; +import org.apache.hyracks.tests.util.ResultSerializerFactoryProvider; + +import org.apache.hyracks.dataflow.std.misc.NullSinkOperatorDescriptor; + +public class TPCDStest extends AbstractIntegrationTest { + + private static boolean DEBUG = false; + + static RecordDescriptor custDesc = new RecordDescriptor(new ISerializerDeserializer[] { + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() }); + + static RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] { + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer() }); + static RecordDescriptor lineItemDesc = new RecordDescriptor(new ISerializerDeserializer[] { + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer()}); + + + static RecordDescriptor custOrderJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] { + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer() }); + static RecordDescriptor lineOrderJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] { + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer() }); + static RecordDescriptor custorderLineItemJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] { + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() + }); + + static RecordDescriptor storeSaleDesc=new RecordDescriptor(new ISerializerDeserializer[] { + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer() }); + static RecordDescriptor webSaleDesc=new RecordDescriptor(new ISerializerDeserializer[] { + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() }); + static RecordDescriptor catalogSaleDesc=new RecordDescriptor(new ISerializerDeserializer[] { + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() }); + static RecordDescriptor webStoreJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] { + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer() + + }); + static RecordDescriptor storeCatalogJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] { + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() + + }); + + static RecordDescriptor webStoreCatalogJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] { + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(),new UTF8StringSerializerDeserializer(), + new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() + + }); + + static IValueParserFactory[] custValueParserFactories = new IValueParserFactory[custDesc.getFieldCount()]; + static IValueParserFactory[] orderValueParserFactories = new IValueParserFactory[ordersDesc.getFieldCount()]; + static IValueParserFactory[] lineItemValueParserFactories = new IValueParserFactory[lineItemDesc.getFieldCount()]; + static IValueParserFactory[] storeSaleValueParserFactories = new IValueParserFactory[storeSaleDesc.getFieldCount()]; + static IValueParserFactory[] webSaleValueParserFactories = new IValueParserFactory[webSaleDesc.getFieldCount()]; + static IValueParserFactory[] catalogSaleDescFactories = new IValueParserFactory[catalogSaleDesc.getFieldCount()]; + + + + static { + Arrays.fill(custValueParserFactories, UTF8StringParserFactory.INSTANCE); + Arrays.fill(orderValueParserFactories, UTF8StringParserFactory.INSTANCE); + Arrays.fill(lineItemValueParserFactories, UTF8StringParserFactory.INSTANCE); + Arrays.fill(storeSaleValueParserFactories, UTF8StringParserFactory.INSTANCE); + Arrays.fill(webSaleValueParserFactories, UTF8StringParserFactory.INSTANCE); + Arrays.fill(catalogSaleDescFactories, UTF8StringParserFactory.INSTANCE); + } + + private IOperatorDescriptor getPrinter(JobSpecification spec, File file) { + IFileSplitProvider outputSplitProvider = new ConstantFileSplitProvider( + new FileSplit[] { + new FileSplit(NC1_ID, file.getAbsolutePath()) }); + + return DEBUG ? new PlainFileWriterOperatorDescriptor(spec, outputSplitProvider, "|") + : new NullSinkOperatorDescriptor(spec); + } + @Test + public void customerOrderCIDHybridHashJoin_CaseTPCDs() throws Exception { + //This is the good order TPC-DS Join by Mingda Li + //webSales Join storeSales Join catalogSales + + + JobSpecification spec = new JobSpecification(); + long startTime = new Date().getTime(); + FileSplit[] catalogSplits = new FileSplit[] { new FileSplit(NC1_ID, new FileReference(new File( + "data/tpch0.001/catalog_sales4g.tbl"))) }; + IFileSplitProvider catalogSplitsProvider = new ConstantFileSplitProvider(catalogSplits); + + FileSplit[] webSalesSplits = new FileSplit[] { new FileSplit(NC1_ID, new FileReference(new File( + "data/tpch0.001/web_sales4g.tbl"))) }; + + IFileSplitProvider webSalesSplitsProvider = new ConstantFileSplitProvider(webSalesSplits); + FileSplit[] storeSalesSplits = new FileSplit[] { new FileSplit(NC2_ID, new FileReference(new File( + "data/tpch0.001/store_sales4g.tbl"))) }; + storeSalesSplits[0].getPartition(); + IFileSplitProvider storeSalesSplitsProvider = new ConstantFileSplitProvider(storeSalesSplits); + + + + FileScanOperatorDescriptor storeSaleScanner = new FileScanOperatorDescriptor(spec, storeSalesSplitsProvider, + new DelimitedDataTupleParserFactory(storeSaleValueParserFactories, '|'), storeSaleDesc); + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, storeSaleScanner, NC1_ID); + + FileScanOperatorDescriptor webSalescanner = new FileScanOperatorDescriptor(spec, webSalesSplitsProvider, + new DelimitedDataTupleParserFactory(webSaleValueParserFactories, '|'), webSaleDesc); + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, webSalescanner, NC1_ID); + + FileScanOperatorDescriptor catalogSalesScanner = new FileScanOperatorDescriptor(spec, catalogSplitsProvider, + new DelimitedDataTupleParserFactory(catalogSaleDescFactories, '|'), catalogSaleDesc); + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, catalogSalesScanner, NC1_ID); + + + OptimizedHybridHashJoinOperatorDescriptor join = new OptimizedHybridHashJoinOperatorDescriptor(spec, 15, 243, + 1.2, new int[] { 3 , 4 }, new int[] { 2 , 3 }, + new IBinaryHashFunctionFamily[] { UTF8StringBinaryHashFunctionFamily.INSTANCE, UTF8StringBinaryHashFunctionFamily.INSTANCE }, + new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY),PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY)}, + webStoreJoinDesc, new JoinComparatorFactory( + PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 0, 1), + new JoinComparatorFactory(PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 1, 0), + null); + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, join, NC1_ID); + + + + OptimizedHybridHashJoinOperatorDescriptor join2 = new OptimizedHybridHashJoinOperatorDescriptor(spec, 15, 243, + 1.2, new int[] { 3 }, new int[] { 15}, + new IBinaryHashFunctionFamily[] { UTF8StringBinaryHashFunctionFamily.INSTANCE }, + new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, + webStoreCatalogJoinDesc, new JoinComparatorFactory( + PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 0, 1), + new JoinComparatorFactory(PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 1, 0), + null); + + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, join2, NC1_ID); + + + ResultSetId rsId = new ResultSetId(1); + spec.addResultSetId(rsId); + IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, false, false, + ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider()); + + + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID); + + IConnectorDescriptor webJoinConn = new OneToOneConnectorDescriptor(spec); + spec.connect(webJoinConn, webSalescanner, 0, join, 0); + + IConnectorDescriptor storeJoinConn = new OneToOneConnectorDescriptor(spec); + spec.connect(storeJoinConn, storeSaleScanner, 0, join, 1); + + IConnectorDescriptor webStoreJoinResultConn = new OneToOneConnectorDescriptor(spec); + spec.connect(webStoreJoinResultConn, join, 0, join2, 0); + IConnectorDescriptor thirdJoinConn = new OneToOneConnectorDescriptor(spec); + spec.connect(thirdJoinConn, catalogSalesScanner, 0, join2, 1); + + IConnectorDescriptor joinPrinterConn = new OneToOneConnectorDescriptor(spec); + spec.connect(joinPrinterConn, join2, 0, printer, 0); + + spec.addRoot(printer); + runTest(spec); + //System.out.println("output to " + file.getAbsolutePath()); + long endTime = new Date().getTime(); + System.out.println("it run for good" + (endTime - startTime) + + " 。" ); + File temp=new File("tpcDS2G"); + runTestAndStoreResult(spec, temp); + } + @Test + public void customerOrderCIDHybridHashJoin_CaseTPCDsBad() throws Exception { + //This is the bad order TPC-DS Join by Mingda Li + //webSales Join storeSales Join catalogSales + + + JobSpecification spec = new JobSpecification(); + long startTime = new Date().getTime(); + FileSplit[] catalogSplits = new FileSplit[] { new FileSplit(NC1_ID, new FileReference(new File( + "data/tpch0.001/catalog_sales4g.tbl"))) }; + IFileSplitProvider catalogSplitsProvider = new ConstantFileSplitProvider(catalogSplits); + + FileSplit[] webSalesSplits = new FileSplit[] { new FileSplit(NC1_ID, new FileReference(new File( + "data/tpch0.001/web_sales4g.tbl"))) }; + + IFileSplitProvider webSalesSplitsProvider = new ConstantFileSplitProvider(webSalesSplits); + FileSplit[] storeSalesSplits = new FileSplit[] { new FileSplit(NC2_ID, new FileReference(new File( + "data/tpch0.001/store_sales4g.tbl"))) }; + storeSalesSplits[0].getPartition(); + IFileSplitProvider storeSalesSplitsProvider = new ConstantFileSplitProvider(storeSalesSplits); + + + + FileScanOperatorDescriptor storeSaleScanner = new FileScanOperatorDescriptor(spec, storeSalesSplitsProvider, + new DelimitedDataTupleParserFactory(storeSaleValueParserFactories, '|'), storeSaleDesc); + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, storeSaleScanner, NC1_ID); + + FileScanOperatorDescriptor webSalescanner = new FileScanOperatorDescriptor(spec, webSalesSplitsProvider, + new DelimitedDataTupleParserFactory(webSaleValueParserFactories, '|'), webSaleDesc); + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, webSalescanner, NC1_ID); + + FileScanOperatorDescriptor catalogSalesScanner = new FileScanOperatorDescriptor(spec, catalogSplitsProvider, + new DelimitedDataTupleParserFactory(catalogSaleDescFactories, '|'), catalogSaleDesc); + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, catalogSalesScanner, NC1_ID); + + + OptimizedHybridHashJoinOperatorDescriptor join = new OptimizedHybridHashJoinOperatorDescriptor(spec, 200, 243, + 1.2, new int[] { 2 }, new int[] { 15 }, + new IBinaryHashFunctionFamily[] { UTF8StringBinaryHashFunctionFamily.INSTANCE }, + new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY)}, + storeCatalogJoinDesc, new JoinComparatorFactory( + PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 0, 1), + new JoinComparatorFactory(PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 1, 0), + null); + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, join, NC1_ID); + + + + OptimizedHybridHashJoinOperatorDescriptor join2 = new OptimizedHybridHashJoinOperatorDescriptor(spec, 15, 243, + 1.2, new int[] { 2, 3 }, new int[] { 3, 4}, + new IBinaryHashFunctionFamily[] { UTF8StringBinaryHashFunctionFamily.INSTANCE ,UTF8StringBinaryHashFunctionFamily.INSTANCE}, + new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY),PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, + webStoreCatalogJoinDesc, new JoinComparatorFactory( + PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 0, 1), + new JoinComparatorFactory(PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 1, 0), + null); + + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, join2, NC1_ID); + + + ResultSetId rsId = new ResultSetId(1); + spec.addResultSetId(rsId); + IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, false, false, + ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider()); + + + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID); + + IConnectorDescriptor catalogJoinConn = new OneToOneConnectorDescriptor(spec); + spec.connect(catalogJoinConn, catalogSalesScanner, 0, join, 1); + + IConnectorDescriptor storeJoinConn = new OneToOneConnectorDescriptor(spec); + spec.connect(storeJoinConn, storeSaleScanner, 0, join, 0); + + IConnectorDescriptor webStoreJoinResultConn = new OneToOneConnectorDescriptor(spec); + spec.connect(webStoreJoinResultConn, join, 0, join2, 0); + IConnectorDescriptor thirdJoinConn = new OneToOneConnectorDescriptor(spec); + spec.connect(thirdJoinConn, webSalescanner, 0, join2, 1); + + IConnectorDescriptor joinPrinterConn = new OneToOneConnectorDescriptor(spec); + spec.connect(joinPrinterConn, join2, 0, printer, 0); + + spec.addRoot(printer); + runTest(spec); + //System.out.println("output to " + file.getAbsolutePath()); + long endTime = new Date().getTime(); + System.out.println("it run for bad" + (endTime - startTime) + + " 。" ); + File temp=new File("tpcDS8G"); + runTestAndStoreResult(spec, temp); + } + + @Test + public void customerOrderCIDHybridHashJoin_CaseTPCDs1() throws Exception { + //This is the bad order TPC-DS Join by Mingda Li + //webSales Join storeSales Join catalogSales + + + JobSpecification spec = new JobSpecification(); + long startTime = new Date().getTime(); + FileSplit[] catalogSplits = new FileSplit[] { new FileSplit(NC1_ID, new FileReference(new File( + "data/tpch0.001/catalog_sales.tbl"))) }; + IFileSplitProvider catalogSplitsProvider = new ConstantFileSplitProvider(catalogSplits); + + FileSplit[] storeSalesSplits = new FileSplit[] { new FileSplit(NC2_ID, new FileReference(new File( + "data/tpch0.001/store_sales.tbl"))) }; + storeSalesSplits[0].getPartition(); + IFileSplitProvider storeSalesSplitsProvider = new ConstantFileSplitProvider(storeSalesSplits); + + + + FileScanOperatorDescriptor storeSaleScanner = new FileScanOperatorDescriptor(spec, storeSalesSplitsProvider, + new DelimitedDataTupleParserFactory(storeSaleValueParserFactories, '|'), storeSaleDesc); + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, storeSaleScanner, NC1_ID); + + + FileScanOperatorDescriptor catalogSalesScanner = new FileScanOperatorDescriptor(spec, catalogSplitsProvider, + new DelimitedDataTupleParserFactory(catalogSaleDescFactories, '|'), catalogSaleDesc); + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, catalogSalesScanner, NC1_ID); + + + + + + + OptimizedHybridHashJoinOperatorDescriptor join2 = new OptimizedHybridHashJoinOperatorDescriptor(spec, 63, 243, + 1.2, new int[] { 2 }, new int[] { 15 }, + new IBinaryHashFunctionFamily[] { UTF8StringBinaryHashFunctionFamily.INSTANCE }, + new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, + storeCatalogJoinDesc, new JoinComparatorFactory( + PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 0, 1), + new JoinComparatorFactory(PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 1, 0), + null); + + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, join2, NC1_ID); + + + ResultSetId rsId = new ResultSetId(1); + spec.addResultSetId(rsId); + IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, false, false, + ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider()); + + + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID); + + + IConnectorDescriptor storeJoinConn = new OneToOneConnectorDescriptor(spec); + spec.connect(storeJoinConn, storeSaleScanner, 0, join2, 0); + + + IConnectorDescriptor thirdJoinConn = new OneToOneConnectorDescriptor(spec); + spec.connect(thirdJoinConn, catalogSalesScanner, 0, join2, 1); + + IConnectorDescriptor joinPrinterConn = new OneToOneConnectorDescriptor(spec); + spec.connect(joinPrinterConn, join2, 1, printer, 0); + + spec.addRoot(printer); + runTest(spec); + //System.out.println("output to " + file.getAbsolutePath()); + long endTime = new Date().getTime(); + System.out.println("it run for good" + (endTime - startTime) + + " 。" ); + File temp=new File("tpcDS8G"); + runTestAndStoreResult(spec, temp); + } + @Test + public void customerOrderCIDHybridHashJoin_Case1() throws Exception { + //This is the good order Join by Mingda Li + //This is the bad order Join by Mingda Li + //This is the bad order Join by Mingda Li + //This is the bad order Join by Mingda Li + //This is the bad order Join by Mingda Li + + JobSpecification spec = new JobSpecification(); + long startTime = new Date().getTime(); + FileSplit[] custSplits = new FileSplit[] { new FileSplit(NC1_ID, new FileReference(new File( + "data/tpch0.001/customerlmdd.tbl"))) }; + IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits); + + FileSplit[] lineSplits = new FileSplit[] { new FileSplit(NC1_ID, new FileReference(new File( + "data/tpch0.001/lineitem1g.tbl"))) }; + IFileSplitProvider lineSplitsProvider = new ConstantFileSplitProvider(lineSplits); + + FileSplit[] ordersSplits = new FileSplit[] { new FileSplit(NC2_ID, new FileReference(new File( + "data/tpch0.001/orderslmdd.tbl"))) }; + ordersSplits[0].getPartition(); + IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits); + + + FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider, + new DelimitedDataTupleParserFactory(orderValueParserFactories, '|'), ordersDesc); + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, ordScanner, NC1_ID); + + FileScanOperatorDescriptor custScanner = new FileScanOperatorDescriptor(spec, custSplitsProvider, + new DelimitedDataTupleParserFactory(custValueParserFactories, '|'), custDesc); + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, custScanner, NC1_ID); + + + + OptimizedHybridHashJoinOperatorDescriptor join = new OptimizedHybridHashJoinOperatorDescriptor(spec, 15, 243, + 1.2, new int[] { 0 }, new int[] { 1 }, + new IBinaryHashFunctionFamily[] { UTF8StringBinaryHashFunctionFamily.INSTANCE }, + new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, + custOrderJoinDesc, new JoinComparatorFactory( + PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 0, 1), + new JoinComparatorFactory(PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 1, 0), + null); + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, join, NC1_ID); + + FileScanOperatorDescriptor lineItemScanner = new FileScanOperatorDescriptor(spec, lineSplitsProvider, + new DelimitedDataTupleParserFactory(lineItemValueParserFactories, '|'), lineItemDesc); + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, lineItemScanner, NC1_ID); + + OptimizedHybridHashJoinOperatorDescriptor join2 = new OptimizedHybridHashJoinOperatorDescriptor(spec, 15, 243, + 1.2, new int[] { 0 }, new int[] { 0 }, + new IBinaryHashFunctionFamily[] { UTF8StringBinaryHashFunctionFamily.INSTANCE }, + new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, + custorderLineItemJoinDesc, new JoinComparatorFactory( + PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 0, 1), + new JoinComparatorFactory(PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 1, 0), + null); + + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, join2, NC1_ID); + + + ResultSetId rsId = new ResultSetId(1); + spec.addResultSetId(rsId); + IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, false, false, + ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider()); + + /* File file = File.createTempFile(getClass().getName(), "case1"); + IOperatorDescriptor printer = getPrinter(spec, file);*/ + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID); + + IConnectorDescriptor custJoinConn = new OneToOneConnectorDescriptor(spec); + spec.connect(custJoinConn, custScanner, 0, join, 0); + + IConnectorDescriptor ordJoinConn = new OneToOneConnectorDescriptor(spec); + spec.connect(ordJoinConn, ordScanner, 0, join, 1); + + IConnectorDescriptor ordCustJoinResultConn = new OneToOneConnectorDescriptor(spec); + spec.connect(ordCustJoinResultConn, join, 0, join2, 1); + IConnectorDescriptor thirdJoinConn = new OneToOneConnectorDescriptor(spec); + spec.connect(thirdJoinConn, lineItemScanner, 0, join2, 0); + + IConnectorDescriptor joinPrinterConn = new OneToOneConnectorDescriptor(spec); + spec.connect(joinPrinterConn, join2, 0, printer, 0); + + spec.addRoot(printer); + runTest(spec); + //System.out.println("output to " + file.getAbsolutePath()); + long endTime = new Date().getTime(); + System.out.println("it run for bad" + (endTime - startTime) + + " 。" ); + File temp=new File("result2join"); + runTestAndStoreResult(spec, temp); + } + + @Test + public void customerOrderCIDHybridHashJoin_Case12() throws Exception { + //This is the good order Join by Mingda Li + //This is the good order Join by Mingda Li + //This is the good order Join by Mingda Li + //This is the good order Join by Mingda Li + //This is the good order Join by Mingda Li + + + JobSpecification spec = new JobSpecification(); + long startTime = new Date().getTime(); + FileSplit[] custSplits = new FileSplit[] { new FileSplit(NC1_ID, new FileReference(new File( + "data/tpch0.001/customerlmdd.tbl"))) }; + IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits); + + FileSplit[] lineSplits = new FileSplit[] { new FileSplit(NC1_ID, new FileReference(new File( + "data/tpch0.001/lineitem1g.tbl"))) }; + IFileSplitProvider lineSplitsProvider = new ConstantFileSplitProvider(lineSplits); + + FileSplit[] ordersSplits = new FileSplit[] { new FileSplit(NC2_ID, new FileReference(new File( + "data/tpch0.001/orderslmdd.tbl"))) }; + ordersSplits[0].getPartition(); + IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits); + + + FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider, + new DelimitedDataTupleParserFactory(orderValueParserFactories, '|'), ordersDesc); + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, ordScanner, NC1_ID); + FileScanOperatorDescriptor lineItemScanner = new FileScanOperatorDescriptor(spec, lineSplitsProvider, + new DelimitedDataTupleParserFactory(lineItemValueParserFactories, '|'), lineItemDesc); + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, lineItemScanner, NC1_ID); + + + + + OptimizedHybridHashJoinOperatorDescriptor join = new OptimizedHybridHashJoinOperatorDescriptor(spec, 15, 243, + 1.2, new int[] { 0 }, new int[] { 0 }, + new IBinaryHashFunctionFamily[] { UTF8StringBinaryHashFunctionFamily.INSTANCE }, + new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, + lineOrderJoinDesc, new JoinComparatorFactory( + PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 0, 1), + new JoinComparatorFactory(PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 1, 0), + null); + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, join, NC1_ID); + + FileScanOperatorDescriptor custScanner = new FileScanOperatorDescriptor(spec, custSplitsProvider, + new DelimitedDataTupleParserFactory(custValueParserFactories, '|'), custDesc); + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, custScanner, NC1_ID); + + + OptimizedHybridHashJoinOperatorDescriptor join2 = new OptimizedHybridHashJoinOperatorDescriptor(spec, 15, 243, + 1.2, new int[] { 0 }, new int[] { 1 }, + new IBinaryHashFunctionFamily[] { UTF8StringBinaryHashFunctionFamily.INSTANCE }, + new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, + custorderLineItemJoinDesc, new JoinComparatorFactory( + PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 0, 1), + new JoinComparatorFactory(PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 1, 0), + null); + + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, join2, NC1_ID); + + File file = File.createTempFile(getClass().getName(), "case1"); + IOperatorDescriptor printer = getPrinter(spec, file); + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID); + + IConnectorDescriptor lineJoinConn = new OneToOneConnectorDescriptor(spec); + spec.connect(lineJoinConn, lineItemScanner, 0, join, 0); + + IConnectorDescriptor ordJoinConn = new OneToOneConnectorDescriptor(spec); + spec.connect(ordJoinConn, ordScanner, 0, join, 1); + + IConnectorDescriptor ordCustJoinResultConn = new OneToOneConnectorDescriptor(spec); + spec.connect(ordCustJoinResultConn, join, 0, join2, 1); + IConnectorDescriptor thirdJoinConn = new OneToOneConnectorDescriptor(spec); + spec.connect(thirdJoinConn, custScanner, 0, join2, 0); + + IConnectorDescriptor joinPrinterConn = new OneToOneConnectorDescriptor(spec); + spec.connect(joinPrinterConn, join2, 0, printer, 0); + + spec.addRoot(printer); + runTest(spec); + System.out.println("output to " + file.getAbsolutePath()); + long endTime = new Date().getTime(); + System.out.println("it run for good " + (endTime - startTime) + + " 。" ); + File temp=new File("result2join"); + runTestAndStoreResult(spec, temp); + } + /* @Test + public void customerOrderCIDHybridHashJoin_CasetestDS() throws Exception { + JobSpecification spec = new JobSpecification(); + + FileSplit[] storeSplit = new FileSplit[] { new FileSplit(NC1_ID, new FileReference(new File( + "data/tpch0.001/store_sales.tbl"))) }; + IFileSplitProvider storeSplitsProvider = new ConstantFileSplitProvider(storeSplit); + + FileSplit[] cataSplit = new FileSplit[] { new FileSplit(NC2_ID, new FileReference(new File( + "data/tpch0.001/catalog_sales.tbl"))) }; + + IFileSplitProvider cataSplitsProvider = new ConstantFileSplitProvider(cataSplit); + + FileScanOperatorDescriptor storeScanner = new FileScanOperatorDescriptor(spec, storeSplitsProvider, + new DelimitedDataTupleParserFactory(storeSaleValueParserFactories, '|'), storeSaleDesc); + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, storeScanner, NC1_ID); + + FileScanOperatorDescriptor cataScanner = new FileScanOperatorDescriptor(spec, cataSplitsProvider, + new DelimitedDataTupleParserFactory(catalogSaleDescFactories, '|'), catalogSaleDesc); + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, cataScanner, NC1_ID); + + OptimizedHybridHashJoinOperatorDescriptor join = new OptimizedHybridHashJoinOperatorDescriptor(spec, 15, 122, + 1.2, new int[] { 2, 10 }, new int[] { 15, 18 }, + new IBinaryHashFunctionFamily[] { UTF8StringBinaryHashFunctionFamily.INSTANCE }, + new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, + storeCatalogJoinDesc, new JoinComparatorFactory( + PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 0, 1), + new JoinComparatorFactory(PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 1, 0), + null); + + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, join, NC1_ID); + + File file = File.createTempFile(getClass().getName(), "case2"); + IOperatorDescriptor printer = getPrinter(spec, file); + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID); + + IConnectorDescriptor custJoinConn = new OneToOneConnectorDescriptor(spec); + spec.connect(custJoinConn, storeScanner, 0, join, 0); + + IConnectorDescriptor ordJoinConn = new OneToOneConnectorDescriptor(spec); + spec.connect(ordJoinConn, cataScanner, 0, join, 1); + + IConnectorDescriptor joinPrinterConn = new OneToOneConnectorDescriptor(spec); + spec.connect(joinPrinterConn, join, 0, printer, 0); + + spec.addRoot(printer); + runTest(spec); + System.out.println("output to " + file.getAbsolutePath()); + } + @Test + public void customerOrderCIDHybridHashJoin_CasetestDSStoreWeb() throws Exception { + JobSpecification spec = new JobSpecification(); + + FileSplit[] webSplit = new FileSplit[] { new FileSplit(NC1_ID, new FileReference(new File( + "data/tpch0.001/web_sales.tbl"))) }; + IFileSplitProvider webSplitsProvider = new ConstantFileSplitProvider(webSplit); + + FileSplit[] storeSplit = new FileSplit[] { new FileSplit(NC2_ID, new FileReference(new File( + "data/tpch0.001/store_sales.tbl"))) }; + + IFileSplitProvider storeSplitsProvider = new ConstantFileSplitProvider(storeSplit); + + FileScanOperatorDescriptor webScanner = new FileScanOperatorDescriptor(spec, webSplitsProvider, + new DelimitedDataTupleParserFactory(webSaleValueParserFactories, '|'), webSaleDesc); + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, webScanner, NC1_ID); + + FileScanOperatorDescriptor storeScanner = new FileScanOperatorDescriptor(spec, storeSplitsProvider, + new DelimitedDataTupleParserFactory(storeSaleValueParserFactories, '|'), storeSaleDesc); + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, storeScanner, NC1_ID); + + OptimizedHybridHashJoinOperatorDescriptor join = new OptimizedHybridHashJoinOperatorDescriptor(spec, 15, 122, + 1.2, new int[] { 3, 4 }, new int[] { 2 ,3 }, + new IBinaryHashFunctionFamily[] { UTF8StringBinaryHashFunctionFamily.INSTANCE }, + new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, + webStoreJoinDesc, new JoinComparatorFactory( + PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 0, 1), + new JoinComparatorFactory(PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 1, 0), + null); + + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, join, NC1_ID); + + File file = File.createTempFile(getClass().getName(), "case2"); + IOperatorDescriptor printer = getPrinter(spec, file); + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID); + + IConnectorDescriptor custJoinConn = new OneToOneConnectorDescriptor(spec); + spec.connect(custJoinConn, webScanner, 0, join, 0); + + IConnectorDescriptor ordJoinConn = new OneToOneConnectorDescriptor(spec); + spec.connect(ordJoinConn, storeScanner, 0, join, 1); + + IConnectorDescriptor joinPrinterConn = new OneToOneConnectorDescriptor(spec); + spec.connect(joinPrinterConn, join, 0, printer, 0); + + spec.addRoot(printer); + runTest(spec); + System.out.println("output to " + file.getAbsolutePath()); + }*/ + + @Test + public void customerOrderCIDHybridHashJoin_Case1_StatsFirst() throws Exception { + JobSpecification spec = new JobSpecification(); + + FileSplit[] custSplits = new FileSplit[] { new FileSplit(NC1_ID, new FileReference(new File( + "data/tpch0.001/customerlmdd.tbl"))) }; + IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits); + + FileSplit[] ordersSplits = new FileSplit[] { new FileSplit(NC2_ID, new FileReference(new File( + "data/tpch0.001/orderslmdd.tbl"))) }; + ordersSplits[0].getPartition(); + IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits); + + + FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider, + new DelimitedDataTupleParserFactory(orderValueParserFactories, '|'), ordersDesc); + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, ordScanner, NC1_ID); + + FileScanOperatorDescriptor custScanner = new FileScanOperatorDescriptor(spec, custSplitsProvider, + new DelimitedDataTupleParserFactory(custValueParserFactories, '|'), custDesc); + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, custScanner, NC1_ID); + + OptimizedHybridHashJoinOperatorDescriptor join = new OptimizedHybridHashJoinOperatorDescriptor(spec, 15, 243, + 1.2, new int[] { 0 }, new int[] { 1 }, + new IBinaryHashFunctionFamily[] { UTF8StringBinaryHashFunctionFamily.INSTANCE }, + new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, + custOrderJoinDesc, new JoinComparatorFactory( + PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 0, 1), + new JoinComparatorFactory(PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 1, 0), + null); + + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, join, NC1_ID); + + File file = File.createTempFile(getClass().getName(), "case1"); + IOperatorDescriptor printer = getPrinter(spec, file); + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID); + + IConnectorDescriptor custJoinConn = new OneToOneConnectorDescriptor(spec); + spec.connect(custJoinConn, custScanner, 0, join, 0); + + IConnectorDescriptor ordJoinConn = new OneToOneConnectorDescriptor(spec); + spec.connect(ordJoinConn, ordScanner, 0, join, 1); + + IConnectorDescriptor joinPrinterConn = new OneToOneConnectorDescriptor(spec); + spec.connect(joinPrinterConn, join, 0, printer, 0); + + spec.addRoot(printer); + runTest(spec); + System.out.println("output to " + file.getAbsolutePath()); + } + + @Test + public void customerOrderCIDHybridHashJoin_Case2() throws Exception { + JobSpecification spec = new JobSpecification(); + + FileSplit[] custSplits = new FileSplit[] { new FileSplit(NC1_ID, new FileReference(new File( + "data/tpch0.001/customer3.tbl"))) }; + IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits); + + FileSplit[] ordersSplits = new FileSplit[] { new FileSplit(NC2_ID, new FileReference(new File( + "data/tpch0.001/orders4.tbl"))) }; + + IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits); + + FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider, + new DelimitedDataTupleParserFactory(orderValueParserFactories, '|'), ordersDesc); + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, ordScanner, NC1_ID); + + FileScanOperatorDescriptor custScanner = new FileScanOperatorDescriptor(spec, custSplitsProvider, + new DelimitedDataTupleParserFactory(custValueParserFactories, '|'), custDesc); + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, custScanner, NC1_ID); + + OptimizedHybridHashJoinOperatorDescriptor join = new OptimizedHybridHashJoinOperatorDescriptor(spec, 15, 122, + 1.2, new int[] { 0, 3 }, new int[] { 1, 0 }, + new IBinaryHashFunctionFamily[] { UTF8StringBinaryHashFunctionFamily.INSTANCE }, + new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, + custOrderJoinDesc, new JoinComparatorFactory( + PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 0, 1), + new JoinComparatorFactory(PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 1, 0), + null); + + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, join, NC1_ID); + + File file = File.createTempFile(getClass().getName(), "case2"); + IOperatorDescriptor printer = getPrinter(spec, file); + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID); + + IConnectorDescriptor custJoinConn = new OneToOneConnectorDescriptor(spec); + spec.connect(custJoinConn, custScanner, 0, join, 0); + + IConnectorDescriptor ordJoinConn = new OneToOneConnectorDescriptor(spec); + spec.connect(ordJoinConn, ordScanner, 0, join, 1); + + IConnectorDescriptor joinPrinterConn = new OneToOneConnectorDescriptor(spec); + spec.connect(joinPrinterConn, join, 0, printer, 0); + + spec.addRoot(printer); + runTest(spec); + System.out.println("output to " + file.getAbsolutePath()); + } + + @Test + public void customerOrderCIDHybridHashJoin_Case3() throws Exception { + + JobSpecification spec = new JobSpecification(); + + FileSplit[] custSplits = new FileSplit[] { new FileSplit(NC1_ID, new FileReference(new File( + "data/tpch0.001/customer3.tbl"))) }; + IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits); + + FileSplit[] ordersSplits = new FileSplit[] { new FileSplit(NC2_ID, new FileReference(new File( + "data/tpch0.001/orders1.tbl"))) }; + + IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits); + + FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider, + new DelimitedDataTupleParserFactory(orderValueParserFactories, '|'), ordersDesc); + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, ordScanner, NC1_ID); + + FileScanOperatorDescriptor custScanner = new FileScanOperatorDescriptor(spec, custSplitsProvider, + new DelimitedDataTupleParserFactory(custValueParserFactories, '|'), custDesc); + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, custScanner, NC1_ID); + + OptimizedHybridHashJoinOperatorDescriptor join = new OptimizedHybridHashJoinOperatorDescriptor(spec, 6, 122, + 1.2, new int[] { 0 }, new int[] { 1 }, + new IBinaryHashFunctionFamily[] { UTF8StringBinaryHashFunctionFamily.INSTANCE }, + new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, + custOrderJoinDesc, new JoinComparatorFactory( + PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 0, 1), + new JoinComparatorFactory(PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 1, 0), + null); + + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, join, NC1_ID); + + File file = File.createTempFile(getClass().getName(), "case3"); + IOperatorDescriptor printer = getPrinter(spec, file); + PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID); + + IConnectorDescriptor custJoinConn = new OneToOneConnectorDescriptor(spec); + spec.connect(custJoinConn, custScanner, 0, join, 0); + + IConnectorDescriptor ordJoinConn = new OneToOneConnectorDescriptor(spec); + spec.connect(ordJoinConn, ordScanner, 0, join, 1); + + IConnectorDescriptor joinPrinterConn = new OneToOneConnectorDescriptor(spec); + spec.connect(joinPrinterConn, join, 0, printer, 0); + + spec.addRoot(printer); + runTest(spec); + System.out.println("output to " + file.getAbsolutePath()); + } + +} From 08fca79260a50798b224b759dfc055ceab7735ce Mon Sep 17 00:00:00 2001 From: david Date: Fri, 8 Jul 2016 11:15:22 -0700 Subject: [PATCH 09/10] version --- ...ipleJoinOperatorDescriptor_Version0_2.java | 187 ++++++++++++++++++ 1 file changed, 187 insertions(+) create mode 100644 hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/joinSampleForMultipleJoinOperatorDescriptor_Version0_2.java diff --git a/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/joinSampleForMultipleJoinOperatorDescriptor_Version0_2.java b/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/joinSampleForMultipleJoinOperatorDescriptor_Version0_2.java new file mode 100644 index 000000000..af1970de0 --- /dev/null +++ b/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/joinSampleForMultipleJoinOperatorDescriptor_Version0_2.java @@ -0,0 +1,187 @@ +package org.apache.hyracks.dataflow.std.join; + +import org.apache.hyracks.api.comm.IFrame; +import org.apache.hyracks.api.comm.VSizeFrame; +import org.apache.hyracks.api.context.IHyracksTaskContext; +import org.apache.hyracks.api.dataflow.ActivityId; +import org.apache.hyracks.api.dataflow.IActivityGraphBuilder; +import org.apache.hyracks.api.dataflow.IOperatorNodePushable; +import org.apache.hyracks.api.dataflow.TaskId; +import org.apache.hyracks.api.dataflow.value.*; + +import java.io.*; +import java.util.Random; +import org.apache.hyracks.api.exceptions.HyracksDataException; +import org.apache.hyracks.api.io.FileReference; +import org.apache.hyracks.api.job.IOperatorDescriptorRegistry; +import org.apache.hyracks.api.job.JobId; +import org.apache.hyracks.dataflow.common.comm.io.FrameTupleAccessor; +import org.apache.hyracks.dataflow.common.comm.io.FrameTupleAppender; +import org.apache.hyracks.dataflow.common.comm.util.FrameUtils; +import org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory; +import org.apache.hyracks.dataflow.common.data.partition.RepartitionComputerFactory; +import org.apache.hyracks.dataflow.common.io.RunFileReader; +import org.apache.hyracks.dataflow.common.io.RunFileWriter; +import org.apache.hyracks.dataflow.std.base.*; +import org.apache.hyracks.dataflow.std.structures.ISerializableTable; +import org.apache.hyracks.dataflow.std.structures.SerializableHashTable; +import org.apache.hyracks.dataflow.std.util.FrameTuplePairComparator; + +import java.nio.ByteBuffer; + +/** + * Created by MingdaLi on 6/16/16. + */ + + + + +public class SampleForMultipleJoinOperatorDescriptor extends AbstractSingleActivityOperatorDescriptor { + private static final long serialVersionUID = 1L; + private final int outputLimit;//reservior size + private final int[] keys0; //keys to sample + private final IBinaryHashFunctionFactory[] hashFunctionFactories; // The hash function for binary of the keys to sample + private final int statePartitions;//hashfunction will use this to hash + + public SampleForMultipleJoinOperatorDescriptor(IOperatorDescriptorRegistry spec, RecordDescriptor rDesc, int outputLimit, int[] keys0, IBinaryHashFunctionFactory[] hashFunctionFactories, int statePartitions) { + super(spec, 1, 1); + recordDescriptors[0] = rDesc; + this.outputLimit = outputLimit; + this.keys0=keys0; + this.hashFunctionFactories = hashFunctionFactories; + this.statePartitions=statePartitions; + } + + + @Override + public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx, + final IRecordDescriptorProvider recordDescProvider, int partition, int nPartitions) + throws HyracksDataException { + //final RecordDescriptor rd1 = recordDescProvider.getInputRecordDescriptor(getActivityId(), 0); + + return new AbstractUnaryInputUnaryOutputOperatorNodePushable() { + private FrameTupleAccessor fta; + private int currentSize; + private boolean finished; + private final ITuplePartitionComputer hpcBuild = new FieldHashPartitionComputerFactory(keys0, + hashFunctionFactories).createPartitioner(); + //private final FrameTupleAccessor accessorBuild = new FrameTupleAccessor(rd1); + + @Override + public void open() throws HyracksDataException { + fta = new FrameTupleAccessor(recordDescriptors[0]); + currentSize = 0; + finished = false; + writer.open(); + } + + @Override + public void nextFrame(ByteBuffer buffer) throws HyracksDataException { + if (!finished) { + fta.reset(buffer); + int count = fta.getTupleCount(); + FrameTupleAppender partialAppender = new FrameTupleAppender(new VSizeFrame(ctx)); + int[][] reservior=new int[statePartitions][outputLimit+1];//store indexes for sample + if(ctx.getSharedObject()!=null){ + Object reserviorObject=ctx.getSharedObject(); + reservior= (int[][]) reserviorObject;//store indexes for sample + } + + + int[] countForEachPartition=new int[statePartitions]; + // System.out.println("number in each pool"); + for (int i=0;i Date: Fri, 8 Jul 2016 11:16:38 -0700 Subject: [PATCH 10/10] version solve the storing of result between different frames --- ...ipleJoinOperatorDescriptor_Version0_2.java | 187 ++++++++++++++++++ 1 file changed, 187 insertions(+) create mode 100644 hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/join/SampleForMultipleJoinOperatorDescriptor_Version0_2.java diff --git a/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/join/SampleForMultipleJoinOperatorDescriptor_Version0_2.java b/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/join/SampleForMultipleJoinOperatorDescriptor_Version0_2.java new file mode 100644 index 000000000..af1970de0 --- /dev/null +++ b/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/join/SampleForMultipleJoinOperatorDescriptor_Version0_2.java @@ -0,0 +1,187 @@ +package org.apache.hyracks.dataflow.std.join; + +import org.apache.hyracks.api.comm.IFrame; +import org.apache.hyracks.api.comm.VSizeFrame; +import org.apache.hyracks.api.context.IHyracksTaskContext; +import org.apache.hyracks.api.dataflow.ActivityId; +import org.apache.hyracks.api.dataflow.IActivityGraphBuilder; +import org.apache.hyracks.api.dataflow.IOperatorNodePushable; +import org.apache.hyracks.api.dataflow.TaskId; +import org.apache.hyracks.api.dataflow.value.*; + +import java.io.*; +import java.util.Random; +import org.apache.hyracks.api.exceptions.HyracksDataException; +import org.apache.hyracks.api.io.FileReference; +import org.apache.hyracks.api.job.IOperatorDescriptorRegistry; +import org.apache.hyracks.api.job.JobId; +import org.apache.hyracks.dataflow.common.comm.io.FrameTupleAccessor; +import org.apache.hyracks.dataflow.common.comm.io.FrameTupleAppender; +import org.apache.hyracks.dataflow.common.comm.util.FrameUtils; +import org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory; +import org.apache.hyracks.dataflow.common.data.partition.RepartitionComputerFactory; +import org.apache.hyracks.dataflow.common.io.RunFileReader; +import org.apache.hyracks.dataflow.common.io.RunFileWriter; +import org.apache.hyracks.dataflow.std.base.*; +import org.apache.hyracks.dataflow.std.structures.ISerializableTable; +import org.apache.hyracks.dataflow.std.structures.SerializableHashTable; +import org.apache.hyracks.dataflow.std.util.FrameTuplePairComparator; + +import java.nio.ByteBuffer; + +/** + * Created by MingdaLi on 6/16/16. + */ + + + + +public class SampleForMultipleJoinOperatorDescriptor extends AbstractSingleActivityOperatorDescriptor { + private static final long serialVersionUID = 1L; + private final int outputLimit;//reservior size + private final int[] keys0; //keys to sample + private final IBinaryHashFunctionFactory[] hashFunctionFactories; // The hash function for binary of the keys to sample + private final int statePartitions;//hashfunction will use this to hash + + public SampleForMultipleJoinOperatorDescriptor(IOperatorDescriptorRegistry spec, RecordDescriptor rDesc, int outputLimit, int[] keys0, IBinaryHashFunctionFactory[] hashFunctionFactories, int statePartitions) { + super(spec, 1, 1); + recordDescriptors[0] = rDesc; + this.outputLimit = outputLimit; + this.keys0=keys0; + this.hashFunctionFactories = hashFunctionFactories; + this.statePartitions=statePartitions; + } + + + @Override + public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx, + final IRecordDescriptorProvider recordDescProvider, int partition, int nPartitions) + throws HyracksDataException { + //final RecordDescriptor rd1 = recordDescProvider.getInputRecordDescriptor(getActivityId(), 0); + + return new AbstractUnaryInputUnaryOutputOperatorNodePushable() { + private FrameTupleAccessor fta; + private int currentSize; + private boolean finished; + private final ITuplePartitionComputer hpcBuild = new FieldHashPartitionComputerFactory(keys0, + hashFunctionFactories).createPartitioner(); + //private final FrameTupleAccessor accessorBuild = new FrameTupleAccessor(rd1); + + @Override + public void open() throws HyracksDataException { + fta = new FrameTupleAccessor(recordDescriptors[0]); + currentSize = 0; + finished = false; + writer.open(); + } + + @Override + public void nextFrame(ByteBuffer buffer) throws HyracksDataException { + if (!finished) { + fta.reset(buffer); + int count = fta.getTupleCount(); + FrameTupleAppender partialAppender = new FrameTupleAppender(new VSizeFrame(ctx)); + int[][] reservior=new int[statePartitions][outputLimit+1];//store indexes for sample + if(ctx.getSharedObject()!=null){ + Object reserviorObject=ctx.getSharedObject(); + reservior= (int[][]) reserviorObject;//store indexes for sample + } + + + int[] countForEachPartition=new int[statePartitions]; + // System.out.println("number in each pool"); + for (int i=0;i