diff --git a/dbcon/execplan/calpontselectexecutionplan.cpp b/dbcon/execplan/calpontselectexecutionplan.cpp index 62e6cfcd5..7e2415a0e 100644 --- a/dbcon/execplan/calpontselectexecutionplan.cpp +++ b/dbcon/execplan/calpontselectexecutionplan.cpp @@ -628,6 +628,11 @@ void CalpontSelectExecutionPlan::serialize(messageqcpp::ByteStream& b) const b << timeZone; b << fPron; b << (uint8_t)fWithRollup; + b << (uint8_t)fIsRecursiveWithTable; + b << (uint8_t)fIsRecursiveQuery; + b << (uint8_t)fContainsRecursiveQuery; + + b << fMaxRecursiveDepth; } void CalpontSelectExecutionPlan::unserialize(messageqcpp::ByteStream& b) @@ -832,6 +837,13 @@ void CalpontSelectExecutionPlan::unserialize(messageqcpp::ByteStream& b) utils::Pron::instance().pron(fPron); b >> tmp8; fWithRollup = tmp8; + b >> tmp8; + fIsRecursiveWithTable = tmp8; + b >> tmp8; + fIsRecursiveQuery = tmp8; + b >> tmp8; + fContainsRecursiveQuery = tmp8; + b >> fMaxRecursiveDepth; } bool CalpontSelectExecutionPlan::operator==(const CalpontSelectExecutionPlan& t) const diff --git a/dbcon/execplan/calpontselectexecutionplan.h b/dbcon/execplan/calpontselectexecutionplan.h index d39b16693..f06ceee89 100644 --- a/dbcon/execplan/calpontselectexecutionplan.h +++ b/dbcon/execplan/calpontselectexecutionplan.h @@ -23,6 +23,7 @@ /** @file */ #pragma once +#include #include #include #include @@ -496,7 +497,12 @@ class CalpontSelectExecutionPlan : public CalpontExecutionPlan { return fDerivedTableList; } - void derivedTableList(const SelectList& derivedTableList) + + SelectList& derivedTableList() + { + return fDerivedTableList; + } + void derivedTableList(SelectList& derivedTableList) { fDerivedTableList = derivedTableList; } @@ -523,10 +529,12 @@ class CalpontSelectExecutionPlan : public CalpontExecutionPlan { fUnionVec = unionVec; } + const SelectList& unionVec() const { return fUnionVec; } + SelectList& unionVec() { return fUnionVec; @@ -765,6 +773,46 @@ class CalpontSelectExecutionPlan : public CalpontExecutionPlan return fTimeZone; } + void isRecursiveWithTable(bool b) + { + fIsRecursiveWithTable = b; + } + + bool isRecursiveWithTable() + { + return fIsRecursiveWithTable; + } + + void isRecursiveQuery(bool b) + { + fIsRecursiveQuery = b; + } + + bool isRecursiveQuery() + { + return fIsRecursiveQuery; + } + + void containsRecursiveQuery(bool b) + { + fContainsRecursiveQuery = b; + } + + bool containsRecursiveQuery() + { + return fContainsRecursiveQuery; + } + + void maxRecursiveDepth(uint32_t i) + { + fMaxRecursiveDepth = i; + } + + int maxRecursiveDepth() + { + return fMaxRecursiveDepth; + } + /** * The serialization interface */ @@ -985,6 +1033,11 @@ class CalpontSelectExecutionPlan : public CalpontExecutionPlan * A flag to compute subtotals, related to GROUP BY operation. */ bool fWithRollup; + bool fIsRecursiveWithTable = false; + bool fIsRecursiveQuery = false; + bool fContainsRecursiveQuery = false; + + uint32_t fMaxRecursiveDepth; }; /** diff --git a/dbcon/joblist/fifo.h b/dbcon/joblist/fifo.h index b88bbac75..096f6e8e3 100644 --- a/dbcon/joblist/fifo.h +++ b/dbcon/joblist/fifo.h @@ -83,7 +83,7 @@ class FIFO : public DataListImpl, element_t> } inline void dropToken() {}; - inline void dropToken(uint32_t){}; + inline void dropToken(uint32_t) {}; // Counters that reflect how many many times this FIFO blocked on reads/writes uint64_t blockedWriteCount() const; diff --git a/dbcon/joblist/jlf_subquery.cpp b/dbcon/joblist/jlf_subquery.cpp index b94381756..5e662a7eb 100644 --- a/dbcon/joblist/jlf_subquery.cpp +++ b/dbcon/joblist/jlf_subquery.cpp @@ -21,7 +21,7 @@ #include #include #include -//#define NDEBUG +// #define NDEBUG #include #include using namespace std; @@ -748,6 +748,10 @@ int doFromSubquery(CalpontExecutionPlan* ep, const string& alias, const string& SJSTEP subQueryStep = transformer.makeSubQueryStep(csep, true); subQueryStep->view(view); SJSTEP subAd(new SubAdapterStep(subQueryStep, jobInfo)); + if (csep->isRecursiveQuery()) + { + dynamic_cast(subAd.get())->isRecursiveStep(true); + } jobInfo.selectAndFromSubs.push_back(subAd); return CNX_VTABLE_ID; @@ -870,6 +874,10 @@ SJSTEP doUnionSub(CalpontExecutionPlan* ep, JobInfo& jobInfo) transformer.setVarbinaryOK(); SJSTEP subQueryStep = transformer.makeSubQueryStep(csep, false); SJSTEP subAd(new SubAdapterStep(subQueryStep, jobInfo)); + if (csep->isRecursiveQuery()) + { + dynamic_cast(subAd.get())->isRecursiveStep(true); + } return subAd; } diff --git a/dbcon/joblist/jlf_tuplejoblist.cpp b/dbcon/joblist/jlf_tuplejoblist.cpp index 7ec8f064f..54eeb21c5 100644 --- a/dbcon/joblist/jlf_tuplejoblist.cpp +++ b/dbcon/joblist/jlf_tuplejoblist.cpp @@ -5262,7 +5262,203 @@ SJSTEP unionQueries(JobStepVector& queries, uint64_t distinctUnionNum, JobInfo& return SJSTEP(unionStep); } +SJSTEP recursiveUnionQueries(JobStepVector& queries, uint64_t distinctUnionNum, JobInfo& jobInfo, + JobStepVector& recurQueries, uint32_t keyCount) +{ + vector inputRGs; + vector distinct; + uint64_t colCount = jobInfo.deliveredCols.size(); + + vector oids; + vector keys; + vector scale; + vector precision; + vector width; + vector types; + vector csNums; + JobStepAssociation jsaToUnion; + + // bug4388, share code with connector for column type coversion + vector> queryColTypes; + + for (uint64_t j = 0; j < colCount; ++j) + queryColTypes.push_back(vector(queries.size() + recurQueries.size())); + + for (uint64_t i = 0; i < queries.size(); i++) + { + SJSTEP& spjs = queries[i]; + TupleDeliveryStep* tds = dynamic_cast(spjs.get()); + + if (tds == NULL) + { + throw runtime_error("Not a deliverable step."); + } + + const RowGroup& rg = tds->getDeliveredRowGroup(); + inputRGs.push_back(rg); + + const vector& scaleIn = rg.getScale(); + const vector& precisionIn = rg.getPrecision(); + const vector& typesIn = rg.getColTypes(); + const vector& csNumsIn = rg.getCharsetNumbers(); + + for (uint64_t j = 0; j < colCount; ++j) + { + queryColTypes[j][i].colDataType = typesIn[j]; + queryColTypes[j][i].charsetNumber = csNumsIn[j]; + queryColTypes[j][i].scale = scaleIn[j]; + queryColTypes[j][i].precision = precisionIn[j]; + queryColTypes[j][i].colWidth = rg.getColumnWidth(j); + } + + if (i == 0) + { + const vector& oidsIn = rg.getOIDs(); + const vector& keysIn = rg.getKeys(); + oids.insert(oids.end(), oidsIn.begin(), oidsIn.begin() + colCount); + keys.insert(keys.end(), keysIn.begin(), keysIn.begin() + colCount); + } + + // if all union types are UNION_ALL, distinctUnionNum is 0. + distinct.push_back(distinctUnionNum > i); + + AnyDataListSPtr spdl(new AnyDataList()); + RowGroupDL* dl = new RowGroupDL(1, jobInfo.fifoSize); + spdl->rowGroupDL(dl); + dl->OID(CNX_VTABLE_ID); + JobStepAssociation jsa; + jsa.outAdd(spdl); + spjs->outputAssociation(jsa); + jsaToUnion.outAdd(spdl); + } + for (uint64_t i = 0; i < recurQueries.size(); i++) + { + SJSTEP spjs = recurQueries[i]; + TupleDeliveryStep* tds = dynamic_cast(spjs.get()); + + if (tds == NULL) + { + throw runtime_error("Not a deliverable step."); + } + + const RowGroup& rg = tds->getDeliveredRowGroup(); + inputRGs.push_back(rg); + + const vector& scaleIn = rg.getScale(); + const vector& precisionIn = rg.getPrecision(); + const vector& typesIn = rg.getColTypes(); + const vector& csNumsIn = rg.getCharsetNumbers(); + + for (uint64_t j = 0; j < colCount; ++j) + { + queryColTypes[j][i + queries.size()].colDataType = typesIn[j]; + queryColTypes[j][i + queries.size()].charsetNumber = csNumsIn[j]; + queryColTypes[j][i + queries.size()].scale = scaleIn[j]; + queryColTypes[j][i + queries.size()].precision = precisionIn[j]; + queryColTypes[j][i + queries.size()].colWidth = rg.getColumnWidth(j); + } + + // if all union types are UNION_ALL, distinctUnionNum is 0. + distinct.push_back(distinctUnionNum > i); + + // mostly should have initialised DLs hence the change + if (i < recurQueries.size() - 1) + { + AnyDataListSPtr spdl = spjs->outputAssociation().outAt(0); + spdl->rowGroupDL()->setNumConsumers(2); + jsaToUnion.outAdd(spdl); + } + else + { + AnyDataListSPtr spdl(new AnyDataList()); + RowGroupDL* dl = new RowGroupDL(1, jobInfo.fifoSize); + spdl->rowGroupDL(dl); + dl->OID(CNX_VTABLE_ID); + JobStepAssociation jsa; + jsa.outAdd(spdl); + spjs->outputAssociation(jsa); + jsaToUnion.outAdd(spdl); + } + } + + AnyDataListSPtr spdl(new AnyDataList()); + RowGroupDL* dl = new RowGroupDL(1, jobInfo.fifoSize); + spdl->rowGroupDL(dl); + dl->OID(CNX_VTABLE_ID); + JobStepAssociation jsa; + jsa.outAdd(spdl); + TupleRecursiveUnion* unionStep = new TupleRecursiveUnion(CNX_VTABLE_ID, jobInfo, keyCount); + unionStep->inputAssociation(jsaToUnion); + unionStep->outputAssociation(jsa); + + // This return code in the call to convertUnionColType() below would + // always be 0. This is because convertUnionColType() is also called + // in the connector code in getSelectPlan() which handle + // the non-zero return code scenarios from this function call and error + // out, in which case, the execution does not even get to ExeMgr. + unsigned int dummyUnionedTypeRc = 0; + + // get unioned column types + for (uint64_t j = 0; j < colCount; ++j) + { + CalpontSystemCatalog::ColType colType = + CalpontSystemCatalog::ColType::convertUnionColType(queryColTypes[j], dummyUnionedTypeRc); + types.push_back(colType.colDataType); + csNums.push_back(colType.charsetNumber); + scale.push_back(colType.scale); + precision.push_back(colType.precision); + width.push_back(colType.colWidth); + } + + vector pos; + pos.push_back(2); + + for (uint64_t i = 0; i < oids.size(); ++i) + pos.push_back(pos[i] + width[i]); + + unionStep->setInputRowGroups(inputRGs); + unionStep->setDistinctFlags(distinct); + unionStep->setOutputRowGroup( + RowGroup(oids.size(), pos, oids, keys, types, csNums, scale, precision, jobInfo.stringTableThreshold)); + + unionStep->recursiveSteps(recurQueries); + // Fix for bug 4388 adjusts the result type at connector side, this workaround is obsolete. + // bug 3067, update the returned column types. + // This is a workaround as the connector always uses the first query' returned columns. + // ct.colDataType = types[i]; + // ct.scale = scale[i]; + // ct.colWidth = width[i]; + + for (size_t i = 0; i < jobInfo.deliveredCols.size(); i++) + { + CalpontSystemCatalog::ColType ct = jobInfo.deliveredCols[i]->resultType(); + // XXX remove after connector change + ct.colDataType = types[i]; + ct.scale = scale[i]; + ct.colWidth = width[i]; + + // varchar/varbinary column width has been fudged, see fudgeWidth in jlf_common.cpp. + if (ct.colDataType == CalpontSystemCatalog::VARCHAR) + ct.colWidth--; + else if (ct.colDataType == CalpontSystemCatalog::VARBINARY) + ct.colWidth -= 2; + + jobInfo.deliveredCols[i]->resultType(ct); + } + + if (jobInfo.trace) + { + cout << boldStart << "\ninput RGs: (distinct=" << distinctUnionNum << ")\n" << boldStop; + + for (vector::iterator i = inputRGs.begin(); i != inputRGs.end(); i++) + cout << i->toString() << endl << endl; + + cout << boldStart << "output RG:\n" << boldStop << unionStep->getDeliveredRowGroup().toString() << endl; + } + + return SJSTEP(unionStep); +} } // namespace joblist #ifdef __clang__ diff --git a/dbcon/joblist/jlf_tuplejoblist.h b/dbcon/joblist/jlf_tuplejoblist.h index 423c9cf1c..0b29ae68c 100644 --- a/dbcon/joblist/jlf_tuplejoblist.h +++ b/dbcon/joblist/jlf_tuplejoblist.h @@ -130,7 +130,8 @@ void orExpresssion(const execplan::Operator* op, JobInfo& jobInfo); // union the queries and return the tuple union step SJSTEP unionQueries(JobStepVector& queries, uint64_t distinctUnionNum, JobInfo& jobInfo, uint32_t keyCount); - +SJSTEP recursiveUnionQueries(JobStepVector& queries, uint64_t distinctUnionNum, JobInfo& jobInfo, + JobStepVector& recurQueries, uint32_t keyCount); void addAnnexStep(JobStepVector& querySteps, DeliveredTableMap& deliverySteps, JobInfo& jobInfo, IDBQueryType queryType = execplan::IDBQueryType::SELECT); diff --git a/dbcon/joblist/joblistfactory.cpp b/dbcon/joblist/joblistfactory.cpp index 37bc73fce..7da63dbf0 100644 --- a/dbcon/joblist/joblistfactory.cpp +++ b/dbcon/joblist/joblistfactory.cpp @@ -18,6 +18,7 @@ // $Id: joblistfactory.cpp 9632 2013-06-18 22:18:20Z xlou $ +#include #include #include #include @@ -28,6 +29,7 @@ #include #include #include +#include "subquerystep.h" using namespace std; #include @@ -1332,7 +1334,7 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo } // for dictionary columns not count only, replace the token oid with string oid - for (vector >::iterator it = jobInfo.returnedColVec.begin(); + for (vector>::iterator it = jobInfo.returnedColVec.begin(); it != jobInfo.returnedColVec.end(); it++) { // if the column is a dictionary column and not count only @@ -1578,7 +1580,7 @@ void parseExecutionPlan(CalpontSelectExecutionPlan* csep, JobInfo& jobInfo, JobS set seenTableIds; // Stack of seenTables to make sure the left-hand side and right-hand have the same content - stack > seenTableStack; + stack> seenTableStack; if (!querySteps.empty()) { @@ -2015,126 +2017,385 @@ void makeJobSteps(CalpontSelectExecutionPlan* csep, JobInfo& jobInfo, JobStepVec parseExecutionPlan(csep, jobInfo, querySteps, projectSteps, deliverySteps); makeVtableModeSteps(csep, jobInfo, querySteps, projectSteps, deliverySteps); } - -void makeUnionJobSteps(CalpontSelectExecutionPlan* csep, JobInfo& jobInfo, JobStepVector& querySteps, - JobStepVector& /*projectSteps*/, DeliveredTableMap& deliverySteps) +void findRecursiveSubSteps(const SJSTEP& root, JobStepVector& result) { - CalpontSelectExecutionPlan::SelectList& unionVec = csep->unionVec(); - uint8_t distinctUnionNum = csep->distinctUnionNum(); - uint32_t unionRetColsCount = csep->returnedCols().size(); - JobStepVector unionFeeders; + if (!root) + return; - std::remove_cv_torderByCols())>> expOrderByCols; - for (auto& obc : csep->orderByCols()) + std::stack work; + work.push(root); + + while (!work.empty()) { - if (obc->orderPos() != -1ull) + SJSTEP step = work.top(); + work.pop(); + + if (!step) + continue; + + // Case 1: SubAdapterStep + if (auto* adapter = dynamic_cast(step.get())) { + if (adapter->isRecursiveStep()) + { + result.push_back(step); + } + + // push its substep + work.push(adapter->subStep()); + } + // Case 2: SubQueryStep + else if (auto* subq = dynamic_cast(step.get())) + { + const STJLP& subJoblist = subq->subJoblist(); + if (subJoblist) + { + const auto& qsteps = subJoblist->querySteps(); + for (const auto& qstep : qsteps) + { + work.push(qstep); + } + } + } + } + std::reverse(result.begin(), result.end()); +} + +void replaceDerivedTableList(CalpontSelectExecutionPlan::SelectList& list, const SCSEP& replacementScep) +{ + for (auto& scep : list) + { + auto plan = dynamic_cast(scep.get()); + if (!plan) continue; + + if (plan->isRecursiveWithTable()) + { + scep = replacementScep; } - if (dynamic_cast(obc.get()) == nullptr && - dynamic_cast(obc.get()) == nullptr) + else if (plan->containsRecursiveQuery()) { - // Arithmetic & function columns need special processing - expOrderByCols.push_back(obc); + replaceDerivedTableList(plan->derivedTableList(), replacementScep); } } +} - for (auto& unionSub : unionVec) +void makeUnionJobSteps(CalpontSelectExecutionPlan* csep, JobInfo& jobInfo, JobStepVector& querySteps, + JobStepVector& /*projectSteps*/, DeliveredTableMap& deliverySteps) +{ + if (csep->isRecursiveWithTable()) { - auto* unionCSEP = dynamic_cast(unionSub.get()); - for (auto& obc : expOrderByCols) + CalpontSelectExecutionPlan::SelectList& unionVec = csep->unionVec(); + uint8_t distinctUnionNum = csep->distinctUnionNum(); + uint32_t unionRetColsCount = csep->returnedCols().size(); + JobStepVector unionFeeders; + + std::remove_cv_torderByCols())>> expOrderByCols; + for (auto& obc : csep->orderByCols()) { - // Replace any leaf of expressions in the ORDER BY list with the corresponding column for each table in - // the UNION, and add the expression to the returned columns. - auto* col = obc->clone(); - auto* ac = dynamic_cast(col); - auto* fc = dynamic_cast(col); - if (ac) + if (obc->orderPos() != -1ull) + { + continue; + } + if (dynamic_cast(obc.get()) == nullptr && + dynamic_cast(obc.get()) == nullptr) { - ac->expression()->walk(fixUnionExpressionCol, unionCSEP); - ac->setSimpleColumnList(); + // Arithmetic & function columns need special processing + expOrderByCols.push_back(obc); } - else if (fc) + } + auto partitionPoint = std::partition(unionVec.begin(), unionVec.end(), + [](SCEP scep) + { + auto plan = dynamic_cast(scep.get()); + if (plan) + { + return !plan->containsRecursiveQuery(); + } + return false; + }); + + CalpontSelectExecutionPlan* baseRecur; + CalpontSelectExecutionPlan* currRecur; + + SJSTEP sub; + + // iterate up to the non recursive queries + for (auto it = unionVec.begin(); it != partitionPoint; ++it) + { + auto& unionSub = *it; + auto* unionCSEP = dynamic_cast(unionSub.get()); + for (auto& obc : expOrderByCols) { - for (auto& parm : fc->functionParms()) + // Replace any leaf of expressions in the ORDER BY list with the corresponding column for each table + // in the UNION, and add the expression to the returned columns. + auto* col = obc->clone(); + auto* ac = dynamic_cast(col); + auto* fc = dynamic_cast(col); + if (ac) { - parm->walk(fixUnionExpressionCol, unionCSEP); + ac->expression()->walk(fixUnionExpressionCol, unionCSEP); + ac->setSimpleColumnList(); } - fc->setSimpleColumnList(); + else if (fc) + { + for (auto& parm : fc->functionParms()) + { + parm->walk(fixUnionExpressionCol, unionCSEP); + } + fc->setSimpleColumnList(); + } + unionCSEP->returnedCols().emplace_back(col); } - unionCSEP->returnedCols().emplace_back(col); + SJSTEP sub = doUnionSub(unionSub.get(), jobInfo); + querySteps.push_back(sub); + unionFeeders.push_back(sub); } - SJSTEP sub = doUnionSub(unionSub.get(), jobInfo); - querySteps.push_back(sub); - unionFeeders.push_back(sub); - } - for (auto& obc : expOrderByCols) - { - // Add a SimpleColumn to the outer query for the every ORDER BY expression - auto* sc = new SimpleColumn(*obc.get()); - csep->returnedCols().emplace_back(sc); - sc->colPosition(csep->returnedCols().size() - 1); - sc->orderPos(csep->returnedCols().size() - 1); - obc->orderPos(csep->returnedCols().size() - 1); - } + for (auto cit = partitionPoint; cit != unionVec.end(); ++cit) + { + currRecur = dynamic_cast(cit->get()); + currRecur->isRecursiveQuery(true); + } + baseRecur = new CalpontSelectExecutionPlan(*currRecur); + uint32_t depth = (currRecur->maxRecursiveDepth() <= 100) ? csep->maxRecursiveDepth() : 100; + // uint32_t depth = 100; + for (uint32 i = 0; i < depth; ++i) + { + CalpontSelectExecutionPlan* workingRecur = new CalpontSelectExecutionPlan(*baseRecur); + CalpontSelectExecutionPlan::SelectList& currDerivedTbList = workingRecur->derivedTableList(); + CalpontSelectExecutionPlan::SelectList& currUnionVec = workingRecur->unionVec(); - jobInfo.deliveredCols = csep->returnedCols(); - SJSTEP unionStep(unionQueries(unionFeeders, distinctUnionNum, jobInfo, unionRetColsCount)); - querySteps.push_back(unionStep); - uint16_t stepNo = jobInfo.subId * 10000; - numberSteps(querySteps, stepNo, jobInfo.traceFlags); - deliverySteps[execplan::CNX_VTABLE_ID] = unionStep; + currRecur->isRecursiveWithTable(true); + workingRecur->isRecursiveQuery(true); + + SCSEP replacement = boost::make_shared(*currRecur); + replaceDerivedTableList(currDerivedTbList, replacement); + replaceDerivedTableList(currUnionVec, replacement); + + for (auto& obc : expOrderByCols) + { + // Replace any leaf of expressions in the ORDER BY list with the corresponding column for each table + // in the UNION, and add the expression to the returned columns. + auto* col = obc->clone(); + auto* ac = dynamic_cast(col); + auto* fc = dynamic_cast(col); + if (ac) + { + ac->expression()->walk(fixUnionExpressionCol, workingRecur); + ac->setSimpleColumnList(); + } + else if (fc) + { + for (auto& parm : fc->functionParms()) + { + parm->walk(fixUnionExpressionCol, workingRecur); + } + fc->setSimpleColumnList(); + } + workingRecur->returnedCols().emplace_back(col); + } + if (i == depth - 1) + { + sub = doUnionSub(workingRecur, jobInfo); + querySteps.push_back(sub); + } + // querySteps.push_back(sub); + // unionFeeders.push_back(sub); + currRecur = new CalpontSelectExecutionPlan(*workingRecur); + } + + JobStepVector recursiveUnionFeeders; + findRecursiveSubSteps(sub, recursiveUnionFeeders); + + for (auto& obc : expOrderByCols) + { + // Add a SimpleColumn to the outer query for the every ORDER BY expression + auto* sc = new SimpleColumn(*obc.get()); + csep->returnedCols().emplace_back(sc); + sc->colPosition(csep->returnedCols().size() - 1); + sc->orderPos(csep->returnedCols().size() - 1); + obc->orderPos(csep->returnedCols().size() - 1); + } + + jobInfo.deliveredCols = csep->returnedCols(); + SJSTEP unionStep(recursiveUnionQueries(unionFeeders, distinctUnionNum, jobInfo, recursiveUnionFeeders, + unionRetColsCount)); + querySteps.push_back(unionStep); + uint16_t stepNo = jobInfo.subId * 10000; + numberSteps(querySteps, stepNo, jobInfo.traceFlags); + deliverySteps[execplan::CNX_VTABLE_ID] = unionStep; - if (!csep->orderByCols().empty() || csep->limitStart() != 0 || csep->limitNum() != -1ull) + if (!csep->orderByCols().empty() || csep->limitStart() != 0 || csep->limitNum() != -1ull) + { + jobInfo.limitStart = csep->limitStart(); + jobInfo.limitCount = csep->limitNum(); + jobInfo.orderByThreads = csep->orderByThreads(); + for (auto& obc : csep->orderByCols()) + { + auto* osc = dynamic_cast(obc.get()); + if (osc) + { + auto* sc = dynamic_cast(jobInfo.deliveredCols[obc->orderPos()].get()); + idbassert(sc); + sc->schemaName(""); + sc->tableAlias(querySteps[0]->alias()); + sc->colPosition(obc->orderPos()); + sc->oid(tableOid(sc, jobInfo.csc) + 1 + obc->orderPos()); + jobInfo.orderByColVec.emplace_back(getTupleKey(jobInfo, sc), obc->asc()); + } + else + { + auto* tus = dynamic_cast(unionStep.get()); + auto& keys = tus->getOutputRowGroup().getKeys(); + idbassert(obc->orderPos() < keys.size()); + jobInfo.orderByColVec.emplace_back(keys[obc->orderPos()], obc->asc()); + } + } + + for (auto& rc : csep->returnedCols()) + { + // Replace ConstantColumns with SimpleColumns and fix OIDs + auto* sc = dynamic_cast(rc.get()); + if (sc) + { + sc->schemaName(""); + sc->tableAlias(querySteps[0]->alias()); + sc->oid(tableOid(sc, jobInfo.csc) + 1 + rc->colPosition()); + } + else + { + sc = new SimpleColumn(*rc.get()); + rc.reset(sc); + sc->schemaName(""); + sc->tableAlias(querySteps[0]->alias()); + sc->oid(tableOid(sc, jobInfo.csc) + 1 + rc->colPosition()); + } + } + doProject(csep->returnedCols(), jobInfo); + checkReturnedColumns(csep, jobInfo); + addAnnexStep(querySteps, deliverySteps, jobInfo, IDBQueryType::UNION); + } + } + else { - jobInfo.limitStart = csep->limitStart(); - jobInfo.limitCount = csep->limitNum(); - jobInfo.orderByThreads = csep->orderByThreads(); + CalpontSelectExecutionPlan::SelectList& unionVec = csep->unionVec(); + uint8_t distinctUnionNum = csep->distinctUnionNum(); + uint32_t unionRetColsCount = csep->returnedCols().size(); + JobStepVector unionFeeders; + + std::remove_cv_torderByCols())>> expOrderByCols; for (auto& obc : csep->orderByCols()) { - auto* osc = dynamic_cast(obc.get()); - if (osc) + if (obc->orderPos() != -1ull) { - auto* sc = dynamic_cast(jobInfo.deliveredCols[obc->orderPos()].get()); - idbassert(sc); - sc->schemaName(""); - sc->tableAlias(querySteps[0]->alias()); - sc->colPosition(obc->orderPos()); - sc->oid(tableOid(sc, jobInfo.csc) + 1 + obc->orderPos()); - jobInfo.orderByColVec.emplace_back(getTupleKey(jobInfo, sc), obc->asc()); + continue; } - else + if (dynamic_cast(obc.get()) == nullptr && + dynamic_cast(obc.get()) == nullptr) { - auto* tus = dynamic_cast(unionStep.get()); - auto& keys = tus->getOutputRowGroup().getKeys(); - idbassert(obc->orderPos() < keys.size()); - jobInfo.orderByColVec.emplace_back(keys[obc->orderPos()], obc->asc()); + // Arithmetic & function columns need special processing + expOrderByCols.push_back(obc); } } - for (auto& rc : csep->returnedCols()) + for (auto& unionSub : unionVec) { - // Replace ConstantColumns with SimpleColumns and fix OIDs - auto* sc = dynamic_cast(rc.get()); - if (sc) + auto* unionCSEP = dynamic_cast(unionSub.get()); + for (auto& obc : expOrderByCols) { - sc->schemaName(""); - sc->tableAlias(querySteps[0]->alias()); - sc->oid(tableOid(sc, jobInfo.csc) + 1 + rc->colPosition()); + // Replace any leaf of expressions in the ORDER BY list with the corresponding column for each table + // in the UNION, and add the expression to the returned columns. + auto* col = obc->clone(); + auto* ac = dynamic_cast(col); + auto* fc = dynamic_cast(col); + if (ac) + { + ac->expression()->walk(fixUnionExpressionCol, unionCSEP); + ac->setSimpleColumnList(); + } + else if (fc) + { + for (auto& parm : fc->functionParms()) + { + parm->walk(fixUnionExpressionCol, unionCSEP); + } + fc->setSimpleColumnList(); + } + unionCSEP->returnedCols().emplace_back(col); } - else + SJSTEP sub = doUnionSub(unionSub.get(), jobInfo); + querySteps.push_back(sub); + unionFeeders.push_back(sub); + } + + for (auto& obc : expOrderByCols) + { + // Add a SimpleColumn to the outer query for the every ORDER BY expression + auto* sc = new SimpleColumn(*obc.get()); + csep->returnedCols().emplace_back(sc); + sc->colPosition(csep->returnedCols().size() - 1); + sc->orderPos(csep->returnedCols().size() - 1); + obc->orderPos(csep->returnedCols().size() - 1); + } + + jobInfo.deliveredCols = csep->returnedCols(); + SJSTEP unionStep(unionQueries(unionFeeders, distinctUnionNum, jobInfo, unionRetColsCount)); + querySteps.push_back(unionStep); + uint16_t stepNo = jobInfo.subId * 10000; + numberSteps(querySteps, stepNo, jobInfo.traceFlags); + deliverySteps[execplan::CNX_VTABLE_ID] = unionStep; + + if (!csep->orderByCols().empty() || csep->limitStart() != 0 || csep->limitNum() != -1ull) + { + jobInfo.limitStart = csep->limitStart(); + jobInfo.limitCount = csep->limitNum(); + jobInfo.orderByThreads = csep->orderByThreads(); + for (auto& obc : csep->orderByCols()) { - sc = new SimpleColumn(*rc.get()); - rc.reset(sc); - sc->schemaName(""); - sc->tableAlias(querySteps[0]->alias()); - sc->oid(tableOid(sc, jobInfo.csc) + 1 + rc->colPosition()); + auto* osc = dynamic_cast(obc.get()); + if (osc) + { + auto* sc = dynamic_cast(jobInfo.deliveredCols[obc->orderPos()].get()); + idbassert(sc); + sc->schemaName(""); + sc->tableAlias(querySteps[0]->alias()); + sc->colPosition(obc->orderPos()); + sc->oid(tableOid(sc, jobInfo.csc) + 1 + obc->orderPos()); + jobInfo.orderByColVec.emplace_back(getTupleKey(jobInfo, sc), obc->asc()); + } + else + { + auto* tus = dynamic_cast(unionStep.get()); + auto& keys = tus->getOutputRowGroup().getKeys(); + idbassert(obc->orderPos() < keys.size()); + jobInfo.orderByColVec.emplace_back(keys[obc->orderPos()], obc->asc()); + } + } + + for (auto& rc : csep->returnedCols()) + { + // Replace ConstantColumns with SimpleColumns and fix OIDs + auto* sc = dynamic_cast(rc.get()); + if (sc) + { + sc->schemaName(""); + sc->tableAlias(querySteps[0]->alias()); + sc->oid(tableOid(sc, jobInfo.csc) + 1 + rc->colPosition()); + } + else + { + sc = new SimpleColumn(*rc.get()); + rc.reset(sc); + sc->schemaName(""); + sc->tableAlias(querySteps[0]->alias()); + sc->oid(tableOid(sc, jobInfo.csc) + 1 + rc->colPosition()); + } } + doProject(csep->returnedCols(), jobInfo); + checkReturnedColumns(csep, jobInfo); + addAnnexStep(querySteps, deliverySteps, jobInfo, IDBQueryType::UNION); } - doProject(csep->returnedCols(), jobInfo); - checkReturnedColumns(csep, jobInfo); - addAnnexStep(querySteps, deliverySteps, jobInfo, IDBQueryType::UNION); } } } // namespace joblist diff --git a/dbcon/joblist/subquerystep.h b/dbcon/joblist/subquerystep.h index ad81ca360..7ce1921c4 100644 --- a/dbcon/joblist/subquerystep.h +++ b/dbcon/joblist/subquerystep.h @@ -215,6 +215,16 @@ class SubAdapterStep : public JobStep, public TupleDeliveryStep return fSubStep; } + void isRecursiveStep(bool b) + { + fIsRecursiveStep = b; + } + + bool isRecursiveStep() + { + return fIsRecursiveStep; + } + /** @brief add filters (expression steps) */ void addExpression(const JobStepVector&, JobInfo&); @@ -252,6 +262,8 @@ class SubAdapterStep : public JobStep, public TupleDeliveryStep uint64_t fInputIterator; uint64_t fOutputIterator; + bool fIsRecursiveStep = false; + class Runner { public: diff --git a/dbcon/joblist/tupleunion.cpp b/dbcon/joblist/tupleunion.cpp index 655454497..a9cd17d0e 100644 --- a/dbcon/joblist/tupleunion.cpp +++ b/dbcon/joblist/tupleunion.cpp @@ -60,1235 +60,1829 @@ inline double exp10(double x) namespace { - // union helper functions. +// union helper functions. - inline uint64_t pickScaleForDouble(Row* out, uint32_t i, double val) - { - /* have to pick a scale to use for the double. using 5... */ - uint32_t scale = 5; - uint64_t ival = (uint64_t)(double)(val * datatypes::scaleDivisor(scale)); - const int diff = out->getScale(i) - scale; - ival = datatypes::applySignedScale(ival, diff); - return ival; - } +inline uint64_t pickScaleForDouble(Row* out, uint32_t i, double val) +{ + /* have to pick a scale to use for the double. using 5... */ + uint32_t scale = 5; + uint64_t ival = (uint64_t)(double)(val * datatypes::scaleDivisor(scale)); + const int diff = out->getScale(i) - scale; + ival = datatypes::applySignedScale(ival, diff); + return ival; +} - inline uint64_t pickScaleForLongDouble(Row* out, uint32_t i, long double val) - { - /* have to pick a scale to use for the double. using 5... */ - uint32_t scale = 5; - uint64_t ival = (uint64_t)(double)(val * datatypes::scaleDivisor(scale)); - int diff = out->getScale(i) - scale; - ival = datatypes::applySignedScale(ival, diff); - return ival; - } +inline uint64_t pickScaleForLongDouble(Row* out, uint32_t i, long double val) +{ + /* have to pick a scale to use for the double. using 5... */ + uint32_t scale = 5; + uint64_t ival = (uint64_t)(double)(val * datatypes::scaleDivisor(scale)); + int diff = out->getScale(i) - scale; + ival = datatypes::applySignedScale(ival, diff); + return ival; +} - NullString formatDouble(double val) - { - char buf[datatypes::INT128MAXPRECISION + 1]; - my_bool error = 0; - auto len = my_gcvt(val, MY_GCVT_ARG_DOUBLE, sizeof(buf) - 1, buf, &error); - idbassert(error == 0 && len <= sizeof(buf)); - return {buf, len}; - } +NullString formatDouble(double val) +{ + char buf[datatypes::INT128MAXPRECISION + 1]; + my_bool error = 0; + auto len = my_gcvt(val, MY_GCVT_ARG_DOUBLE, sizeof(buf) - 1, buf, &error); + idbassert(error == 0 && len <= sizeof(buf)); + return {buf, len}; +} - void normalizeIntToIntNoScale(const Row& in, Row* out, uint32_t i) - { - out->setIntField(in.getIntField(i), i); - } +void normalizeIntToIntNoScale(const Row& in, Row* out, uint32_t i) +{ + out->setIntField(in.getIntField(i), i); +} - void normalizeIntToIntWithScaleInt128(const Row& in, Row* out, uint32_t i) - { - const int diff = out->getScale(i) - in.getScale(i); - idbassert(diff >= 0); - int128_t val = datatypes::applySignedScale(in.getIntField(i), diff); - out->setInt128Field(val, i); - } +void normalizeIntToIntWithScaleInt128(const Row& in, Row* out, uint32_t i) +{ + const int diff = out->getScale(i) - in.getScale(i); + idbassert(diff >= 0); + int128_t val = datatypes::applySignedScale(in.getIntField(i), diff); + out->setInt128Field(val, i); +} - void normalizeIntToIntWithScaleInt64(const Row& in, Row* out, uint32_t i) - { - const int diff = out->getScale(i) - in.getScale(i); - idbassert(diff >= 0); - int64_t val = datatypes::applySignedScale(in.getIntField(i), diff); - out->setIntField(val, i); - } - - void normalizeIntToUintNoScale(const Row& in, Row* out, uint32_t i) - { - out->setUintField(in.getIntField(i), i); - } +void normalizeIntToIntWithScaleInt64(const Row& in, Row* out, uint32_t i) +{ + const int diff = out->getScale(i) - in.getScale(i); + idbassert(diff >= 0); + int64_t val = datatypes::applySignedScale(in.getIntField(i), diff); + out->setIntField(val, i); +} - void normalizeIntToUintWithScaleInt128(const Row& in, Row* out, uint32_t i) - { - const int diff = out->getScale(i) - in.getScale(i); - idbassert(diff >= 0); - int128_t val = datatypes::applySignedScale(in.getIntField(i), diff); - out->setInt128Field(val, i); - } +void normalizeIntToUintNoScale(const Row& in, Row* out, uint32_t i) +{ + out->setUintField(in.getIntField(i), i); +} - void normalizeIntToUintWithScaleInt64(const Row& in, Row* out, uint32_t i) - { - const int diff = out->getScale(i) - in.getScale(i); - idbassert(diff >= 0); - int64_t val = datatypes::applySignedScale(in.getIntField(i), diff); - out->setIntField(val, i); - } +void normalizeIntToUintWithScaleInt128(const Row& in, Row* out, uint32_t i) +{ + const int diff = out->getScale(i) - in.getScale(i); + idbassert(diff >= 0); + int128_t val = datatypes::applySignedScale(in.getIntField(i), diff); + out->setInt128Field(val, i); +} - void normalizeIntToStringWithScale(const Row& in, Row* out, uint32_t i) - { - double d = in.getIntField(i); - d /= exp10(in.getScale(i)); - out->setStringField(formatDouble(d), i); - } +void normalizeIntToUintWithScaleInt64(const Row& in, Row* out, uint32_t i) +{ + const int diff = out->getScale(i) - in.getScale(i); + idbassert(diff >= 0); + int64_t val = datatypes::applySignedScale(in.getIntField(i), diff); + out->setIntField(val, i); +} - void normalizeIntToStringNoScale(const Row& in, Row* out, uint32_t i) - { - utils::NullString ns(std::to_string(in.getIntField(i))); - out->setStringField(ns, i); - } +void normalizeIntToStringWithScale(const Row& in, Row* out, uint32_t i) +{ + double d = in.getIntField(i); + d /= exp10(in.getScale(i)); + out->setStringField(formatDouble(d), i); +} - void normalizeIntToXFloat(const Row& in, Row* out, uint32_t i) - { - auto d = in.getScaledSInt64FieldAsXFloat(i); - out->setFloatField((float)d, i); - } +void normalizeIntToStringNoScale(const Row& in, Row* out, uint32_t i) +{ + utils::NullString ns(std::to_string(in.getIntField(i))); + out->setStringField(ns, i); +} - void normalizeIntToXDouble(const Row& in, Row* out, uint32_t i) - { - auto d = in.getScaledSInt64FieldAsXFloat(i); - out->setDoubleField(d, i); - } +void normalizeIntToXFloat(const Row& in, Row* out, uint32_t i) +{ + auto d = in.getScaledSInt64FieldAsXFloat(i); + out->setFloatField((float)d, i); +} - void normalizeIntToLongDouble(const Row& in, Row* out, uint32_t i) - { - auto d = in.getScaledSInt64FieldAsXFloat(i); - out->setLongDoubleField(d, i); - } +void normalizeIntToXDouble(const Row& in, Row* out, uint32_t i) +{ + auto d = in.getScaledSInt64FieldAsXFloat(i); + out->setDoubleField(d, i); +} - void normalizeIntToXDecimalInt128(const Row& in, Row* out, uint32_t i) - { - const int diff = out->getScale(i) - in.getScale(i); - idbassert(diff >= 0); - int128_t val = datatypes::applySignedScale(in.getIntField(i), diff); - out->setInt128Field(val, i); - } +void normalizeIntToLongDouble(const Row& in, Row* out, uint32_t i) +{ + auto d = in.getScaledSInt64FieldAsXFloat(i); + out->setLongDoubleField(d, i); +} - void normalizeIntToXDecimalInt64(const Row& in, Row* out, uint32_t i) - { - const int diff = out->getScale(i) - in.getScale(i); - idbassert(diff >= 0); - int64_t val = datatypes::applySignedScale(in.getIntField(i), diff); - out->setIntField(val, i); - } +void normalizeIntToXDecimalInt128(const Row& in, Row* out, uint32_t i) +{ + const int diff = out->getScale(i) - in.getScale(i); + idbassert(diff >= 0); + int128_t val = datatypes::applySignedScale(in.getIntField(i), diff); + out->setInt128Field(val, i); +} - void normalizeUintToIntNoScale(const Row& in, Row* out, uint32_t i) - { - out->setIntField(in.getUintField(i), i); - } +void normalizeIntToXDecimalInt64(const Row& in, Row* out, uint32_t i) +{ + const int diff = out->getScale(i) - in.getScale(i); + idbassert(diff >= 0); + int64_t val = datatypes::applySignedScale(in.getIntField(i), diff); + out->setIntField(val, i); +} - void normalizeUintToIntWithScaleInt128(const Row& in, Row* out, uint32_t i) - { - const int diff = out->getScale(i) - in.getScale(i); - idbassert(diff >= 0); - int128_t val = datatypes::applySignedScale(in.getUintField(i), diff); - out->setInt128Field(val, i); - } +void normalizeUintToIntNoScale(const Row& in, Row* out, uint32_t i) +{ + out->setIntField(in.getUintField(i), i); +} - void normalizeUntToIntWithScaleInt64(const Row& in, Row* out, uint32_t i) - { - const int diff = out->getScale(i) - in.getScale(i); - idbassert(diff >= 0); - uint64_t val = datatypes::applySignedScale(in.getUintField(i), diff); - out->setIntField(val, i); - } +void normalizeUintToIntWithScaleInt128(const Row& in, Row* out, uint32_t i) +{ + const int diff = out->getScale(i) - in.getScale(i); + idbassert(diff >= 0); + int128_t val = datatypes::applySignedScale(in.getUintField(i), diff); + out->setInt128Field(val, i); +} - void normalizeUintToUint(const Row& in, Row* out, uint32_t i) - { - out->setUintField(in.getUintField(i), i); - } +void normalizeUntToIntWithScaleInt64(const Row& in, Row* out, uint32_t i) +{ + const int diff = out->getScale(i) - in.getScale(i); + idbassert(diff >= 0); + uint64_t val = datatypes::applySignedScale(in.getUintField(i), diff); + out->setIntField(val, i); +} - void normalizeUintToStringWithScale(const Row& in, Row* out, uint32_t i) - { - double d = in.getUintField(i); - d /= exp10(in.getScale(i)); - out->setStringField(formatDouble(d), i); - } +void normalizeUintToUint(const Row& in, Row* out, uint32_t i) +{ + out->setUintField(in.getUintField(i), i); +} - void normalizeUintToStringNoScale(const Row& in, Row* out, uint32_t i) - { - utils::NullString ns(std::to_string(in.getUintField(i))); - out->setStringField(ns, i); - } +void normalizeUintToStringWithScale(const Row& in, Row* out, uint32_t i) +{ + double d = in.getUintField(i); + d /= exp10(in.getScale(i)); + out->setStringField(formatDouble(d), i); +} - void normalizUintToXFloat(const Row& in, Row* out, uint32_t i) - { - auto d = in.getScaledUInt64FieldAsXFloat(i); - out->setFloatField((float)d, i); - } +void normalizeUintToStringNoScale(const Row& in, Row* out, uint32_t i) +{ + utils::NullString ns(std::to_string(in.getUintField(i))); + out->setStringField(ns, i); +} - void normalizeUintToXDouble(const Row& in, Row* out, uint32_t i) - { - auto d = in.getScaledUInt64FieldAsXFloat(i); - out->setDoubleField(d, i); - } +void normalizUintToXFloat(const Row& in, Row* out, uint32_t i) +{ + auto d = in.getScaledUInt64FieldAsXFloat(i); + out->setFloatField((float)d, i); +} - void normalizeUintToLongDouble(const Row& in, Row* out, uint32_t i) - { - auto d = in.getScaledUInt64FieldAsXFloat(i); - out->setLongDoubleField(d, i); - } +void normalizeUintToXDouble(const Row& in, Row* out, uint32_t i) +{ + auto d = in.getScaledUInt64FieldAsXFloat(i); + out->setDoubleField(d, i); +} - void normalizeUintToXDecimalInt128(const Row& in, Row* out, uint32_t i) - { - const int diff = out->getScale(i) - in.getScale(i); - idbassert(diff >= 0); - int128_t val = datatypes::applySignedScale(in.getUintField(i), diff); - out->setInt128Field(val, i); - } +void normalizeUintToLongDouble(const Row& in, Row* out, uint32_t i) +{ + auto d = in.getScaledUInt64FieldAsXFloat(i); + out->setLongDoubleField(d, i); +} - void normalizeUintToXDecimalInt64(const Row& in, Row* out, uint32_t i) - { - const int diff = out->getScale(i) - in.getScale(i); - idbassert(diff >= 0); - uint64_t val = datatypes::applySignedScale(in.getUintField(i), diff); - out->setIntField(val, i); - } +void normalizeUintToXDecimalInt128(const Row& in, Row* out, uint32_t i) +{ + const int diff = out->getScale(i) - in.getScale(i); + idbassert(diff >= 0); + int128_t val = datatypes::applySignedScale(in.getUintField(i), diff); + out->setInt128Field(val, i); +} - void normalizeStringToString(const Row& in, Row* out, uint32_t i) - { - out->setStringField(in.getStringField(i), i); - } +void normalizeUintToXDecimalInt64(const Row& in, Row* out, uint32_t i) +{ + const int diff = out->getScale(i) - in.getScale(i); + idbassert(diff >= 0); + uint64_t val = datatypes::applySignedScale(in.getUintField(i), diff); + out->setIntField(val, i); +} - void normalizeDateToDate(const Row& in, Row* out, uint32_t i) - { - out->setIntField(in.getIntField(i), i); - } +void normalizeStringToString(const Row& in, Row* out, uint32_t i) +{ + out->setStringField(in.getStringField(i), i); +} - void normalizeDateToDatetime(const Row& in, Row* out, uint32_t i) - { - uint64_t date = in.getUintField(i); - date &= ~0x3f; // zero the 'spare' field - date <<= 32; - out->setUintField(date, i); - } +void normalizeDateToDate(const Row& in, Row* out, uint32_t i) +{ + out->setIntField(in.getIntField(i), i); +} - void normalizeDateToTimestamp(const Row& in, Row* out, uint32_t i, long fTimeZone) - { - dataconvert::Date date(in.getUintField(i)); - dataconvert::MySQLTime m_time; - m_time.year = date.year; - m_time.month = date.month; - m_time.day = date.day; - m_time.hour = 0; - m_time.minute = 0; - m_time.second = 0; - m_time.second_part = 0; - - dataconvert::TimeStamp timeStamp; - bool isValid = true; - int64_t seconds = dataconvert::mySQLTimeToGmtSec(m_time, fTimeZone, isValid); - - if (!isValid) - { - timeStamp.reset(); - } - else - { - timeStamp.second = seconds; - timeStamp.msecond = m_time.second_part; - } +void normalizeDateToDatetime(const Row& in, Row* out, uint32_t i) +{ + uint64_t date = in.getUintField(i); + date &= ~0x3f; // zero the 'spare' field + date <<= 32; + out->setUintField(date, i); +} - uint64_t outValue = (uint64_t) * (reinterpret_cast(&timeStamp)); - out->setUintField(outValue, i); - } +void normalizeDateToTimestamp(const Row& in, Row* out, uint32_t i, long fTimeZone) +{ + dataconvert::Date date(in.getUintField(i)); + dataconvert::MySQLTime m_time; + m_time.year = date.year; + m_time.month = date.month; + m_time.day = date.day; + m_time.hour = 0; + m_time.minute = 0; + m_time.second = 0; + m_time.second_part = 0; - void normalizeDateToString(const Row& in, Row* out, uint32_t i) - { - string d = DataConvert::dateToString(in.getUintField(i)); - utils::NullString ns(d); - out->setStringField(ns, i); - } + dataconvert::TimeStamp timeStamp; + bool isValid = true; + int64_t seconds = dataconvert::mySQLTimeToGmtSec(m_time, fTimeZone, isValid); - void normalizeDatetimeToDatetime(const Row& in, Row* out, uint32_t i) + if (!isValid) { - out->setIntField(in.getIntField(i), i); + timeStamp.reset(); } - - void normalizeDatetimeToDate(const Row& in, Row* out, uint32_t i) + else { - uint64_t val = in.getUintField(i); - val >>= 32; - out->setUintField(val, i); + timeStamp.second = seconds; + timeStamp.msecond = m_time.second_part; } - void normalizeDatetimeToTimestamp(const Row& in, Row* out, uint32_t i, long fTimeZone) - { - uint64_t val = in.getUintField(i); - dataconvert::DateTime dtime(val); - dataconvert::MySQLTime m_time; - dataconvert::TimeStamp timeStamp; - - m_time.year = dtime.year; - m_time.month = dtime.month; - m_time.day = dtime.day; - m_time.hour = dtime.hour; - m_time.minute = dtime.minute; - m_time.second = dtime.second; - m_time.second_part = dtime.msecond; - - bool isValid = true; - int64_t seconds = mySQLTimeToGmtSec(m_time, fTimeZone, isValid); - - if (!isValid) - { - timeStamp.reset(); - } - else - { - timeStamp.second = seconds; - timeStamp.msecond = m_time.second_part; - } + uint64_t outValue = (uint64_t)*(reinterpret_cast(&timeStamp)); + out->setUintField(outValue, i); +} - uint64_t outValue = (uint64_t) * (reinterpret_cast(&timeStamp)); - out->setUintField(outValue, i); - } +void normalizeDateToString(const Row& in, Row* out, uint32_t i) +{ + string d = DataConvert::dateToString(in.getUintField(i)); + utils::NullString ns(d); + out->setStringField(ns, i); +} - void normalizeDatetimeToString(const Row& in, Row* out, uint32_t i) - { - string d = DataConvert::datetimeToString(in.getUintField(i)); - utils::NullString ns(d); - out->setStringField(ns, i); - } +void normalizeDatetimeToDatetime(const Row& in, Row* out, uint32_t i) +{ + out->setIntField(in.getIntField(i), i); +} - void normalizeTimestampToTimestamp(const Row& in, Row* out, uint32_t i) - { - out->setIntField(in.getIntField(i), i); - } +void normalizeDatetimeToDate(const Row& in, Row* out, uint32_t i) +{ + uint64_t val = in.getUintField(i); + val >>= 32; + out->setUintField(val, i); +} - void normalizeTimestampToDate(const Row& in, Row* out, uint32_t i, long fTimeZone) - { - uint64_t val = in.getUintField(i); - dataconvert::TimeStamp timestamp(val); - int64_t seconds = timestamp.second; - uint64_t outValue; - - dataconvert::MySQLTime time; - dataconvert::gmtSecToMySQLTime(seconds, time, fTimeZone); - - dataconvert::Date date; - date.year = time.year; - date.month = time.month; - date.day = time.day; - date.spare = 0; - outValue = (uint32_t) * (reinterpret_cast(&date)); - - out->setUintField(outValue, i); - } +void normalizeDatetimeToTimestamp(const Row& in, Row* out, uint32_t i, long fTimeZone) +{ + uint64_t val = in.getUintField(i); + dataconvert::DateTime dtime(val); + dataconvert::MySQLTime m_time; + dataconvert::TimeStamp timeStamp; - void normalizeTimestampToDatetime(const Row& in, Row* out, uint32_t i, long fTimeZone) - { - uint64_t val = in.getUintField(i); - dataconvert::TimeStamp timestamp(val); - int64_t seconds = timestamp.second; - uint64_t outValue; - - dataconvert::MySQLTime time; - dataconvert::gmtSecToMySQLTime(seconds, time, fTimeZone); - - dataconvert::DateTime datetime; - datetime.year = time.year; - datetime.month = time.month; - datetime.day = time.day; - datetime.hour = time.hour; - datetime.minute = time.minute; - datetime.second = time.second; - datetime.msecond = timestamp.msecond; - outValue = (uint64_t) * (reinterpret_cast(&datetime)); - - out->setUintField(outValue, i); - } + m_time.year = dtime.year; + m_time.month = dtime.month; + m_time.day = dtime.day; + m_time.hour = dtime.hour; + m_time.minute = dtime.minute; + m_time.second = dtime.second; + m_time.second_part = dtime.msecond; - void normalizeTimestampToString(const Row& in, Row* out, uint32_t i, long fTimeZone) - { - string d = DataConvert::timestampToString(in.getUintField(i), fTimeZone); - utils::NullString ns(d); - out->setStringField(ns, i); - } + bool isValid = true; + int64_t seconds = mySQLTimeToGmtSec(m_time, fTimeZone, isValid); - void normalizeTimeToTime(const Row& in, Row* out, uint32_t i) + if (!isValid) { - out->setIntField(in.getIntField(i), i); + timeStamp.reset(); } - - void normalizeTimeToString(const Row& in, Row* out, uint32_t i) + else { - string d = DataConvert::timeToString(in.getIntField(i)); - utils::NullString ns(d); - out->setStringField(ns, i); + timeStamp.second = seconds; + timeStamp.msecond = m_time.second_part; } - void normalizeXFloatToIntWithScaleInt128(const Row& in, Row* out, uint32_t i) - { - double val = in.getFloatField(i); - out->setInt128Field(pickScaleForDouble(out, i, val), i); - } + uint64_t outValue = (uint64_t)*(reinterpret_cast(&timeStamp)); + out->setUintField(outValue, i); +} - void normalizeXDoubleToIntWithScaleInt128(const Row& in, Row* out, uint32_t i) - { - double val = in.getDoubleField(i); - out->setInt128Field(pickScaleForDouble(out, i, val), i); - } +void normalizeDatetimeToString(const Row& in, Row* out, uint32_t i) +{ + string d = DataConvert::datetimeToString(in.getUintField(i)); + utils::NullString ns(d); + out->setStringField(ns, i); +} - void normalizeXFloatToIntWithScaleInt64(const Row& in, Row* out, uint32_t i) - { - double val = in.getFloatField(i); - out->setIntField(pickScaleForDouble(out, i, val), i); - } +void normalizeTimestampToTimestamp(const Row& in, Row* out, uint32_t i) +{ + out->setIntField(in.getIntField(i), i); +} - void normalizeXDoubleToIntWithScaleInt64(const Row& in, Row* out, uint32_t i) - { - double val = in.getDoubleField(i); - out->setIntField(pickScaleForDouble(out, i, val), i); - } +void normalizeTimestampToDate(const Row& in, Row* out, uint32_t i, long fTimeZone) +{ + uint64_t val = in.getUintField(i); + dataconvert::TimeStamp timestamp(val); + int64_t seconds = timestamp.second; + uint64_t outValue; + + dataconvert::MySQLTime time; + dataconvert::gmtSecToMySQLTime(seconds, time, fTimeZone); + + dataconvert::Date date; + date.year = time.year; + date.month = time.month; + date.day = time.day; + date.spare = 0; + outValue = (uint32_t)*(reinterpret_cast(&date)); + + out->setUintField(outValue, i); +} - void normalizeXFloatToIntNoScale(const Row& in, Row* out, uint32_t i) - { - double val = in.getFloatField(i); - out->setIntField((int64_t)val, i); - } +void normalizeTimestampToDatetime(const Row& in, Row* out, uint32_t i, long fTimeZone) +{ + uint64_t val = in.getUintField(i); + dataconvert::TimeStamp timestamp(val); + int64_t seconds = timestamp.second; + uint64_t outValue; + + dataconvert::MySQLTime time; + dataconvert::gmtSecToMySQLTime(seconds, time, fTimeZone); + + dataconvert::DateTime datetime; + datetime.year = time.year; + datetime.month = time.month; + datetime.day = time.day; + datetime.hour = time.hour; + datetime.minute = time.minute; + datetime.second = time.second; + datetime.msecond = timestamp.msecond; + outValue = (uint64_t)*(reinterpret_cast(&datetime)); + + out->setUintField(outValue, i); +} - void normalizeXDoubleToIntNoScale(const Row& in, Row* out, uint32_t i) - { - double val = in.getDoubleField(i); - out->setIntField((int64_t)val, i); - } +void normalizeTimestampToString(const Row& in, Row* out, uint32_t i, long fTimeZone) +{ + string d = DataConvert::timestampToString(in.getUintField(i), fTimeZone); + utils::NullString ns(d); + out->setStringField(ns, i); +} - void normalizeXFloatToUint(const Row& in, Row* out, uint32_t i) - { - double val = in.getFloatField(i); - out->setUintField((uint64_t)val, i); - } +void normalizeTimeToTime(const Row& in, Row* out, uint32_t i) +{ + out->setIntField(in.getIntField(i), i); +} - void normalizeXDoubleToUint(const Row& in, Row* out, uint32_t i) - { - double val = in.getDoubleField(i); - out->setUintField((uint64_t)val, i); - } +void normalizeTimeToString(const Row& in, Row* out, uint32_t i) +{ + string d = DataConvert::timeToString(in.getIntField(i)); + utils::NullString ns(d); + out->setStringField(ns, i); +} - void normalizeXFloatToXFloat(const Row& in, Row* out, uint32_t i) - { - double val = in.getFloatField(i); - out->setFloatField(val, i); - } +void normalizeXFloatToIntWithScaleInt128(const Row& in, Row* out, uint32_t i) +{ + double val = in.getFloatField(i); + out->setInt128Field(pickScaleForDouble(out, i, val), i); +} - void normalizeXDoubleToXFloat(const Row& in, Row* out, uint32_t i) - { - double val = in.getDoubleField(i); - out->setFloatField(val, i); - } +void normalizeXDoubleToIntWithScaleInt128(const Row& in, Row* out, uint32_t i) +{ + double val = in.getDoubleField(i); + out->setInt128Field(pickScaleForDouble(out, i, val), i); +} - void normalizeXFloatToXDouble(const Row& in, Row* out, uint32_t i) - { - double val = in.getFloatField(i); - out->setDoubleField(val, i); - } +void normalizeXFloatToIntWithScaleInt64(const Row& in, Row* out, uint32_t i) +{ + double val = in.getFloatField(i); + out->setIntField(pickScaleForDouble(out, i, val), i); +} - void normalizeXDoubleToXDouble(const Row& in, Row* out, uint32_t i) - { - double val = in.getDoubleField(i); - out->setDoubleField(val, i); - } +void normalizeXDoubleToIntWithScaleInt64(const Row& in, Row* out, uint32_t i) +{ + double val = in.getDoubleField(i); + out->setIntField(pickScaleForDouble(out, i, val), i); +} - void normalizeXFloatToLongDouble(const Row& in, Row* out, uint32_t i) - { - double val = in.getFloatField(i); - out->setLongDoubleField(val, i); - } +void normalizeXFloatToIntNoScale(const Row& in, Row* out, uint32_t i) +{ + double val = in.getFloatField(i); + out->setIntField((int64_t)val, i); +} - void normalizeXDoubleToLongDouble(const Row& in, Row* out, uint32_t i) - { - double val = in.getDoubleField(i); - out->setLongDoubleField(val, i); - } +void normalizeXDoubleToIntNoScale(const Row& in, Row* out, uint32_t i) +{ + double val = in.getDoubleField(i); + out->setIntField((int64_t)val, i); +} - void normalizeXFloatToString(const Row& in, Row* out, uint32_t i) - { - double val = in.getFloatField(i); - out->setStringField(formatDouble(val), i); - } +void normalizeXFloatToUint(const Row& in, Row* out, uint32_t i) +{ + double val = in.getFloatField(i); + out->setUintField((uint64_t)val, i); +} - void normalizeXDoubleToString(const Row& in, Row* out, uint32_t i) - { - double val = in.getDoubleField(i); - out->setStringField(formatDouble(val), i); - } +void normalizeXDoubleToUint(const Row& in, Row* out, uint32_t i) +{ + double val = in.getDoubleField(i); + out->setUintField((uint64_t)val, i); +} - void normalizeXFloatToWideXDecimal(const Row& in, Row* out, uint32_t i) - { - double val = in.getFloatField(i); - out->setInt128Field(pickScaleForDouble(out, i, val), i); - } +void normalizeXFloatToXFloat(const Row& in, Row* out, uint32_t i) +{ + double val = in.getFloatField(i); + out->setFloatField(val, i); +} - void normalizeXDoubleToWideXDecimal(const Row& in, Row* out, uint32_t i) - { - double val = in.getDoubleField(i); - out->setInt128Field(pickScaleForDouble(out, i, val), i); - } +void normalizeXDoubleToXFloat(const Row& in, Row* out, uint32_t i) +{ + double val = in.getDoubleField(i); + out->setFloatField(val, i); +} - void normalizeXFloatToXDecimal(const Row& in, Row* out, uint32_t i) - { - double val = in.getFloatField(i); - out->setIntField(pickScaleForDouble(out, i, val), i); - } +void normalizeXFloatToXDouble(const Row& in, Row* out, uint32_t i) +{ + double val = in.getFloatField(i); + out->setDoubleField(val, i); +} - void normalizeXDoubleToXDecimal(const Row& in, Row* out, uint32_t i) - { - double val = in.getDoubleField(i); - out->setIntField(pickScaleForDouble(out, i, val), i); - } +void normalizeXDoubleToXDouble(const Row& in, Row* out, uint32_t i) +{ + double val = in.getDoubleField(i); + out->setDoubleField(val, i); +} - void normalizeLongDoubleToIntNoScale(const Row& in, Row* out, uint32_t i) - { - long double val = in.getLongDoubleField(i); - out->setIntField((int64_t)val, i); - } +void normalizeXFloatToLongDouble(const Row& in, Row* out, uint32_t i) +{ + double val = in.getFloatField(i); + out->setLongDoubleField(val, i); +} - void normalizeLongDoubleToIntWithScaleInt128(const Row& in, Row* out, uint32_t i) - { - long double val = in.getLongDoubleField(i); - out->setInt128Field(pickScaleForLongDouble(out, i, val), i); - } +void normalizeXDoubleToLongDouble(const Row& in, Row* out, uint32_t i) +{ + double val = in.getDoubleField(i); + out->setLongDoubleField(val, i); +} - void normalizeLongDoubleToIntWithScaleInt(const Row& in, Row* out, uint32_t i) - { - long double val = in.getLongDoubleField(i); - out->setIntField(pickScaleForLongDouble(out, i, val), i); - } +void normalizeXFloatToString(const Row& in, Row* out, uint32_t i) +{ + double val = in.getFloatField(i); + out->setStringField(formatDouble(val), i); +} - void normalizeLongDoubleToUint(const Row& in, Row* out, uint32_t i) - { - long double val = in.getLongDoubleField(i); - out->setUintField((uint64_t)val, i); - } +void normalizeXDoubleToString(const Row& in, Row* out, uint32_t i) +{ + double val = in.getDoubleField(i); + out->setStringField(formatDouble(val), i); +} - void normalizeLongDoubleToXFloat(const Row& in, Row* out, uint32_t i) - { - long double val = in.getLongDoubleField(i); - out->setFloatField(val, i); - } +void normalizeXFloatToWideXDecimal(const Row& in, Row* out, uint32_t i) +{ + double val = in.getFloatField(i); + out->setInt128Field(pickScaleForDouble(out, i, val), i); +} - void normalizeLongDoubleToXDouble(const Row& in, Row* out, uint32_t i) - { - long double val = in.getLongDoubleField(i); - out->setDoubleField(val, i); - } +void normalizeXDoubleToWideXDecimal(const Row& in, Row* out, uint32_t i) +{ + double val = in.getDoubleField(i); + out->setInt128Field(pickScaleForDouble(out, i, val), i); +} - void normalizeLongDoubleToLongDouble(const Row& in, Row* out, uint32_t i) - { - long double val = in.getLongDoubleField(i); - out->setLongDoubleField(val, i); - } +void normalizeXFloatToXDecimal(const Row& in, Row* out, uint32_t i) +{ + double val = in.getFloatField(i); + out->setIntField(pickScaleForDouble(out, i, val), i); +} - void normalizeLongDoubleToString(const Row& in, Row* out, uint32_t i) - { - // FIXME: ostream output looks like '1.234e+56' while MDB output is '1.234e56' - long double val = in.getLongDoubleField(i); - ostringstream os; - os.precision(15); // to match mysql's output - os << val; - utils::NullString ns(os.str()); - out->setStringField(ns, i); - } +void normalizeXDoubleToXDecimal(const Row& in, Row* out, uint32_t i) +{ + double val = in.getDoubleField(i); + out->setIntField(pickScaleForDouble(out, i, val), i); +} - void normalizeLongDoubleToXDecimalInt128(const Row& in, Row* out, uint32_t i) - { - long double val = in.getLongDoubleField(i); - out->setInt128Field(pickScaleForLongDouble(out, i, val), i); - } +void normalizeLongDoubleToIntNoScale(const Row& in, Row* out, uint32_t i) +{ + long double val = in.getLongDoubleField(i); + out->setIntField((int64_t)val, i); +} - void normalizeLongDoubleToXDecimalInt(const Row& in, Row* out, uint32_t i) - { - long double val = in.getLongDoubleField(i); - out->setIntField(pickScaleForLongDouble(out, i, val), i); - } +void normalizeLongDoubleToIntWithScaleInt128(const Row& in, Row* out, uint32_t i) +{ + long double val = in.getLongDoubleField(i); + out->setInt128Field(pickScaleForLongDouble(out, i, val), i); +} - void normalizeWideXDecimalToWideXDecimalNoScale(const Row& in, Row* out, uint32_t i) - { - int128_t val128 = 0; - in.getInt128Field(i, val128); - out->setInt128Field(val128, i); - } +void normalizeLongDoubleToIntWithScaleInt(const Row& in, Row* out, uint32_t i) +{ + long double val = in.getLongDoubleField(i); + out->setIntField(pickScaleForLongDouble(out, i, val), i); +} - void normalizeXDecimalToWideXDecimalNoScale(const Row& in, Row* out, uint32_t i) - { - int64_t val = in.getIntField(i); - out->setInt128Field(val, i); - } +void normalizeLongDoubleToUint(const Row& in, Row* out, uint32_t i) +{ + long double val = in.getLongDoubleField(i); + out->setUintField((uint64_t)val, i); +} - void normalizeWideXDecimalToWideXDecimalWithScale(const Row& in, Row* out, uint32_t i) - { - int128_t val128 = 0; - in.getInt128Field(i, val128); - int128_t temp = datatypes::applySignedScale(val128, out->getScale(i) - in.getScale(i)); - out->setInt128Field(temp, i); - } +void normalizeLongDoubleToXFloat(const Row& in, Row* out, uint32_t i) +{ + long double val = in.getLongDoubleField(i); + out->setFloatField(val, i); +} - void normalizeXDecimalToWideXDecimalWithScale(const Row& in, Row* out, uint32_t i) - { - int64_t val = in.getIntField(i); - int128_t temp = datatypes::applySignedScale(val, out->getScale(i) - in.getScale(i)); - out->setInt128Field(temp, i); - } +void normalizeLongDoubleToXDouble(const Row& in, Row* out, uint32_t i) +{ + long double val = in.getLongDoubleField(i); + out->setDoubleField(val, i); +} - void normalizeXDecimalToOtherNoScale(const Row& in, Row* out, uint32_t i) - { - int64_t val = in.getIntField(i); - out->setIntField(val, i); - } +void normalizeLongDoubleToLongDouble(const Row& in, Row* out, uint32_t i) +{ + long double val = in.getLongDoubleField(i); + out->setLongDoubleField(val, i); +} - void normalizeXDecimalToOtherWithScale(const Row& in, Row* out, uint32_t i) - { - int64_t val = in.getIntField(i); - int64_t temp = datatypes::applySignedScale(val, out->getScale(i) - in.getScale(i)); - out->setIntField(temp, i); - } +void normalizeLongDoubleToString(const Row& in, Row* out, uint32_t i) +{ + // FIXME: ostream output looks like '1.234e+56' while MDB output is '1.234e56' + long double val = in.getLongDoubleField(i); + ostringstream os; + os.precision(15); // to match mysql's output + os << val; + utils::NullString ns(os.str()); + out->setStringField(ns, i); +} - void normalizeXDecimalToXFloat(const Row& in, Row* out, uint32_t i) - { - int64_t val = in.getIntField(i); - float fval = ((float)val) / IDB_pow[in.getScale(i)]; - out->setFloatField(fval, i); - } +void normalizeLongDoubleToXDecimalInt128(const Row& in, Row* out, uint32_t i) +{ + long double val = in.getLongDoubleField(i); + out->setInt128Field(pickScaleForLongDouble(out, i, val), i); +} - void normalizeXDecimalToXDouble(const Row& in, Row* out, uint32_t i) - { - int64_t val = in.getIntField(i); - double dval = ((double)val) / IDB_pow[in.getScale(i)]; - out->setDoubleField(dval, i); - } +void normalizeLongDoubleToXDecimalInt(const Row& in, Row* out, uint32_t i) +{ + long double val = in.getLongDoubleField(i); + out->setIntField(pickScaleForLongDouble(out, i, val), i); +} - void normalizeXDecimalToLongDouble(const Row& in, Row* out, uint32_t i) - { - int64_t val = in.getIntField(i); - long double dval = ((long double)val) / IDB_pow[in.getScale(i)]; - out->setLongDoubleField(dval, i); - } +void normalizeWideXDecimalToWideXDecimalNoScale(const Row& in, Row* out, uint32_t i) +{ + int128_t val128 = 0; + in.getInt128Field(i, val128); + out->setInt128Field(val128, i); +} - void normalizeWideXDecimalToString(const Row& in, Row* out, uint32_t i) - { - int128_t val128 = 0; - in.getInt128Field(i, val128); - datatypes::Decimal dec(0, in.getScale(i), in.getPrecision(i), val128); - out->setStringField(dec.toNullString(), i); - } +void normalizeXDecimalToWideXDecimalNoScale(const Row& in, Row* out, uint32_t i) +{ + int64_t val = in.getIntField(i); + out->setInt128Field(val, i); +} - void normalizeXDecimalToString(const Row& in, Row* out, uint32_t i) - { - int64_t val = in.getIntField(i); - datatypes::Decimal dec(val, in.getScale(i), in.getPrecision(i)); - out->setStringField(dec.toNullString(), i); - } +void normalizeWideXDecimalToWideXDecimalWithScale(const Row& in, Row* out, uint32_t i) +{ + int128_t val128 = 0; + in.getInt128Field(i, val128); + int128_t temp = datatypes::applySignedScale(val128, out->getScale(i) - in.getScale(i)); + out->setInt128Field(temp, i); +} - void normalizeBlobVarbinary(const Row& in, Row* out, uint32_t i) - { - // out->setVarBinaryField(in.getVarBinaryStringField(i), i); // not efficient - out->setVarBinaryField(in.getVarBinaryField(i), in.getVarBinaryLength(i), i); - } +void normalizeXDecimalToWideXDecimalWithScale(const Row& in, Row* out, uint32_t i) +{ + int64_t val = in.getIntField(i); + int128_t temp = datatypes::applySignedScale(val, out->getScale(i) - in.getScale(i)); + out->setInt128Field(temp, i); +} - joblist::normalizeFunctionsT inferNormalizeFunctions(const Row& in, Row* out, long fTimeZone) - { - uint32_t i; - joblist::normalizeFunctionsT result; +void normalizeXDecimalToOtherNoScale(const Row& in, Row* out, uint32_t i) +{ + int64_t val = in.getIntField(i); + out->setIntField(val, i); +} + +void normalizeXDecimalToOtherWithScale(const Row& in, Row* out, uint32_t i) +{ + int64_t val = in.getIntField(i); + int64_t temp = datatypes::applySignedScale(val, out->getScale(i) - in.getScale(i)); + out->setIntField(temp, i); +} + +void normalizeXDecimalToXFloat(const Row& in, Row* out, uint32_t i) +{ + int64_t val = in.getIntField(i); + float fval = ((float)val) / IDB_pow[in.getScale(i)]; + out->setFloatField(fval, i); +} + +void normalizeXDecimalToXDouble(const Row& in, Row* out, uint32_t i) +{ + int64_t val = in.getIntField(i); + double dval = ((double)val) / IDB_pow[in.getScale(i)]; + out->setDoubleField(dval, i); +} - for (i = 0; i < out->getColumnCount(); i++) +void normalizeXDecimalToLongDouble(const Row& in, Row* out, uint32_t i) +{ + int64_t val = in.getIntField(i); + long double dval = ((long double)val) / IDB_pow[in.getScale(i)]; + out->setLongDoubleField(dval, i); +} + +void normalizeWideXDecimalToString(const Row& in, Row* out, uint32_t i) +{ + int128_t val128 = 0; + in.getInt128Field(i, val128); + datatypes::Decimal dec(0, in.getScale(i), in.getPrecision(i), val128); + out->setStringField(dec.toNullString(), i); +} + +void normalizeXDecimalToString(const Row& in, Row* out, uint32_t i) +{ + int64_t val = in.getIntField(i); + datatypes::Decimal dec(val, in.getScale(i), in.getPrecision(i)); + out->setStringField(dec.toNullString(), i); +} + +void normalizeBlobVarbinary(const Row& in, Row* out, uint32_t i) +{ + // out->setVarBinaryField(in.getVarBinaryStringField(i), i); // not efficient + out->setVarBinaryField(in.getVarBinaryField(i), in.getVarBinaryLength(i), i); +} + +joblist::normalizeFunctionsT inferNormalizeFunctions(const Row& in, Row* out, long fTimeZone) +{ + uint32_t i; + joblist::normalizeFunctionsT result; + + for (i = 0; i < out->getColumnCount(); i++) + { + switch (in.getColTypes()[i]) { - switch (in.getColTypes()[i]) - { - case CalpontSystemCatalog::TINYINT: - case CalpontSystemCatalog::SMALLINT: - case CalpontSystemCatalog::MEDINT: - case CalpontSystemCatalog::INT: - case CalpontSystemCatalog::BIGINT: - switch (out->getColTypes()[i]) + case CalpontSystemCatalog::TINYINT: + case CalpontSystemCatalog::SMALLINT: + case CalpontSystemCatalog::MEDINT: + case CalpontSystemCatalog::INT: + case CalpontSystemCatalog::BIGINT: + switch (out->getColTypes()[i]) + { + case CalpontSystemCatalog::TINYINT: + case CalpontSystemCatalog::SMALLINT: + case CalpontSystemCatalog::MEDINT: + case CalpontSystemCatalog::INT: + case CalpontSystemCatalog::BIGINT: { - case CalpontSystemCatalog::TINYINT: - case CalpontSystemCatalog::SMALLINT: - case CalpontSystemCatalog::MEDINT: - case CalpontSystemCatalog::INT: - case CalpontSystemCatalog::BIGINT: + if (out->getScale(i) || in.getScale(i)) { - if (out->getScale(i) || in.getScale(i)) - { - if (out->getColumnWidth(i) == datatypes::MAXDECIMALWIDTH) - result.emplace_back(normalizeIntToIntWithScaleInt128); - else - result.emplace_back(normalizeIntToIntWithScaleInt64); - } + if (out->getColumnWidth(i) == datatypes::MAXDECIMALWIDTH) + result.emplace_back(normalizeIntToIntWithScaleInt128); else - result.emplace_back(normalizeIntToIntNoScale); - break; + result.emplace_back(normalizeIntToIntWithScaleInt64); } + else + result.emplace_back(normalizeIntToIntNoScale); + break; + } - case CalpontSystemCatalog::UTINYINT: - case CalpontSystemCatalog::USMALLINT: - case CalpontSystemCatalog::UMEDINT: - case CalpontSystemCatalog::UINT: - case CalpontSystemCatalog::UBIGINT: + case CalpontSystemCatalog::UTINYINT: + case CalpontSystemCatalog::USMALLINT: + case CalpontSystemCatalog::UMEDINT: + case CalpontSystemCatalog::UINT: + case CalpontSystemCatalog::UBIGINT: + { + if (in.getScale(i)) { - if (in.getScale(i)) - { - if (out->getColumnWidth(i) == datatypes::MAXDECIMALWIDTH) - result.emplace_back(normalizeIntToUintWithScaleInt128); - else - result.emplace_back(normalizeIntToUintWithScaleInt64); - } + if (out->getColumnWidth(i) == datatypes::MAXDECIMALWIDTH) + result.emplace_back(normalizeIntToUintWithScaleInt128); else - result.emplace_back(normalizeIntToUintNoScale); - break; + result.emplace_back(normalizeIntToUintWithScaleInt64); } + else + result.emplace_back(normalizeIntToUintNoScale); + break; + } - case CalpontSystemCatalog::CHAR: - case CalpontSystemCatalog::TEXT: - case CalpontSystemCatalog::VARCHAR: - { - if (in.getScale(i)) - result.emplace_back(normalizeIntToStringWithScale); - else - result.emplace_back(normalizeIntToStringNoScale); - break; - } + case CalpontSystemCatalog::CHAR: + case CalpontSystemCatalog::TEXT: + case CalpontSystemCatalog::VARCHAR: + { + if (in.getScale(i)) + result.emplace_back(normalizeIntToStringWithScale); + else + result.emplace_back(normalizeIntToStringNoScale); + break; + } + + case CalpontSystemCatalog::DATE: + case CalpontSystemCatalog::DATETIME: + case CalpontSystemCatalog::TIME: + case CalpontSystemCatalog::TIMESTAMP: + throw logic_error( + "TupleUnion::normalize(): tried to normalize an int to a timestamp, time, date or datetime"); - case CalpontSystemCatalog::DATE: - case CalpontSystemCatalog::DATETIME: - case CalpontSystemCatalog::TIME: - case CalpontSystemCatalog::TIMESTAMP: - throw logic_error( - "TupleUnion::normalize(): tried to normalize an int to a timestamp, time, date or datetime"); + case CalpontSystemCatalog::FLOAT: + case CalpontSystemCatalog::UFLOAT: result.emplace_back(normalizeIntToXFloat); break; + + case CalpontSystemCatalog::DOUBLE: + case CalpontSystemCatalog::UDOUBLE: result.emplace_back(normalizeIntToXDouble); break; + + case CalpontSystemCatalog::LONGDOUBLE: result.emplace_back(normalizeIntToLongDouble); break; - case CalpontSystemCatalog::FLOAT: - case CalpontSystemCatalog::UFLOAT: result.emplace_back(normalizeIntToXFloat); break; + case CalpontSystemCatalog::DECIMAL: + case CalpontSystemCatalog::UDECIMAL: + { + /* + Signed INT to XDecimal + TODO: + - This code does not handle overflow that may happen on + scale multiplication. Instead of returning a garbage value + we should probably apply saturation here. In long terms we + should implement DECIMAL(65,x) to avoid overflow completely + (so the UNION between DECIMAL and integer can choose a proper + DECIMAL(M,N) result data type to guarantee that any incoming + integer value can fit into it). + */ + if (out->getColumnWidth(i) == datatypes::MAXDECIMALWIDTH) + result.emplace_back(normalizeIntToXDecimalInt128); + else + result.emplace_back(normalizeIntToXDecimalInt64); + break; + } - case CalpontSystemCatalog::DOUBLE: - case CalpontSystemCatalog::UDOUBLE: result.emplace_back(normalizeIntToXDouble); break; + default: + ostringstream os; + os << "TupleUnion::normalize(): tried an illegal conversion: integer to " + << out->getColTypes()[i]; + throw logic_error(os.str()); + } - case CalpontSystemCatalog::LONGDOUBLE: result.emplace_back(normalizeIntToLongDouble); break; + break; - case CalpontSystemCatalog::DECIMAL: - case CalpontSystemCatalog::UDECIMAL: + case CalpontSystemCatalog::UTINYINT: + case CalpontSystemCatalog::USMALLINT: + case CalpontSystemCatalog::UMEDINT: + case CalpontSystemCatalog::UINT: + case CalpontSystemCatalog::UBIGINT: + switch (out->getColTypes()[i]) + { + case CalpontSystemCatalog::TINYINT: + case CalpontSystemCatalog::SMALLINT: + case CalpontSystemCatalog::MEDINT: + case CalpontSystemCatalog::INT: + case CalpontSystemCatalog::BIGINT: + { + if (out->getScale(i)) { - /* - Signed INT to XDecimal - TODO: - - This code does not handle overflow that may happen on - scale multiplication. Instead of returning a garbage value - we should probably apply saturation here. In long terms we - should implement DECIMAL(65,x) to avoid overflow completely - (so the UNION between DECIMAL and integer can choose a proper - DECIMAL(M,N) result data type to guarantee that any incoming - integer value can fit into it). - */ if (out->getColumnWidth(i) == datatypes::MAXDECIMALWIDTH) - result.emplace_back(normalizeIntToXDecimalInt128); + result.emplace_back(normalizeUintToIntWithScaleInt128); else - result.emplace_back(normalizeIntToXDecimalInt64); - break; + result.emplace_back(normalizeUntToIntWithScaleInt64); } + else + result.emplace_back(normalizeUintToIntNoScale); + break; + } + + case CalpontSystemCatalog::UTINYINT: + case CalpontSystemCatalog::USMALLINT: + case CalpontSystemCatalog::UMEDINT: + case CalpontSystemCatalog::UINT: + case CalpontSystemCatalog::UBIGINT: result.emplace_back(normalizeUintToUint); break; + + case CalpontSystemCatalog::CHAR: + case CalpontSystemCatalog::TEXT: + case CalpontSystemCatalog::VARCHAR: + { + if (in.getScale(i)) + result.emplace_back(normalizeUintToStringWithScale); + else + result.emplace_back(normalizeUintToStringNoScale); + break; + } + + case CalpontSystemCatalog::DATE: + case CalpontSystemCatalog::DATETIME: + case CalpontSystemCatalog::TIME: + case CalpontSystemCatalog::TIMESTAMP: + throw logic_error( + "TupleUnion::normalize(): tried to normalize an int to a timestamp, time, date or datetime"); + + case CalpontSystemCatalog::FLOAT: + case CalpontSystemCatalog::UFLOAT: result.emplace_back(normalizUintToXFloat); break; + + case CalpontSystemCatalog::DOUBLE: + case CalpontSystemCatalog::UDOUBLE: result.emplace_back(normalizeUintToXDouble); break; + + case CalpontSystemCatalog::LONGDOUBLE: result.emplace_back(normalizeUintToLongDouble); break; + + case CalpontSystemCatalog::DECIMAL: + case CalpontSystemCatalog::UDECIMAL: + { + /* + Unsigned INT to XDecimal + TODO: + - The overflow problem mentioned in the code under case "Signed INT to XDecimal:" is + also applicable here. + */ + + if (out->getColumnWidth(i) == datatypes::MAXDECIMALWIDTH) + result.emplace_back(normalizeUintToXDecimalInt128); + else + result.emplace_back(normalizeUintToXDecimalInt64); + break; + } + + default: + ostringstream os; + os << "TupleUnion::normalize(): tried an illegal conversion: integer to " + << out->getColTypes()[i]; + throw logic_error(os.str()); + } + + break; + + case CalpontSystemCatalog::CHAR: + case CalpontSystemCatalog::TEXT: + case CalpontSystemCatalog::VARCHAR: + switch (out->getColTypes()[i]) + { + case CalpontSystemCatalog::CHAR: + case CalpontSystemCatalog::TEXT: + case CalpontSystemCatalog::VARCHAR: result.emplace_back(normalizeStringToString); break; + + default: + { + ostringstream os; + os << "TupleUnion::normalize(): tried an illegal conversion: string to " << out->getColTypes()[i]; + throw logic_error(os.str()); + } + } + + break; + + case CalpontSystemCatalog::DATE: + switch (out->getColTypes()[i]) + { + case CalpontSystemCatalog::DATE: result.emplace_back(normalizeDateToDate); break; - default: - ostringstream os; - os << "TupleUnion::normalize(): tried an illegal conversion: integer to " - << out->getColTypes()[i]; - throw logic_error(os.str()); + case CalpontSystemCatalog::DATETIME: result.emplace_back(normalizeDateToDatetime); break; + + case CalpontSystemCatalog::TIMESTAMP: + result.emplace_back(std::bind(normalizeDateToTimestamp, std::placeholders::_1, + std::placeholders::_2, std::placeholders::_3, fTimeZone)); + break; + + case CalpontSystemCatalog::CHAR: + case CalpontSystemCatalog::TEXT: + case CalpontSystemCatalog::VARCHAR: result.emplace_back(normalizeDateToString); break; + + default: + { + ostringstream os; + os << "TupleUnion::normalize(): tried an illegal conversion: date to " << out->getColTypes()[i]; + throw logic_error(os.str()); + } + } + + break; + + case CalpontSystemCatalog::DATETIME: + switch (out->getColTypes()[i]) + { + case CalpontSystemCatalog::DATETIME: result.emplace_back(normalizeDatetimeToDatetime); break; + + case CalpontSystemCatalog::DATE: result.emplace_back(normalizeDatetimeToDate); break; + + case CalpontSystemCatalog::TIMESTAMP: + result.emplace_back(std::bind(normalizeDatetimeToTimestamp, std::placeholders::_1, + std::placeholders::_2, std::placeholders::_3, fTimeZone)); + break; + + case CalpontSystemCatalog::CHAR: + case CalpontSystemCatalog::TEXT: + case CalpontSystemCatalog::VARCHAR: result.emplace_back(normalizeDatetimeToString); break; + + default: + { + ostringstream os; + os << "TupleUnion::normalize(): tried an illegal conversion: datetime to " + << out->getColTypes()[i]; + throw logic_error(os.str()); + } + } + + break; + + case CalpontSystemCatalog::TIMESTAMP: + switch (out->getColTypes()[i]) + { + case CalpontSystemCatalog::TIMESTAMP: result.emplace_back(normalizeTimestampToTimestamp); break; + + case CalpontSystemCatalog::DATE: + result.emplace_back(std::bind(normalizeTimestampToDate, std::placeholders::_1, + std::placeholders::_2, std::placeholders::_3, fTimeZone)); + break; + + case CalpontSystemCatalog::DATETIME: + result.emplace_back(std::bind(normalizeTimestampToDatetime, std::placeholders::_1, + std::placeholders::_2, std::placeholders::_3, fTimeZone)); + break; + + case CalpontSystemCatalog::CHAR: + case CalpontSystemCatalog::TEXT: + case CalpontSystemCatalog::VARCHAR: + result.emplace_back(std::bind(normalizeTimestampToString, std::placeholders::_1, + std::placeholders::_2, std::placeholders::_3, fTimeZone)); + break; + + default: + { + ostringstream os; + os << "TupleUnion::normalize(): tried an illegal conversion: timestamp to " + << out->getColTypes()[i]; + throw logic_error(os.str()); + } + } + + break; + + case CalpontSystemCatalog::TIME: + switch (out->getColTypes()[i]) + { + case CalpontSystemCatalog::TIME: result.emplace_back(normalizeTimeToTime); break; + + case CalpontSystemCatalog::CHAR: + case CalpontSystemCatalog::TEXT: + case CalpontSystemCatalog::VARCHAR: result.emplace_back(normalizeTimeToString); break; + + default: + { + ostringstream os; + os << "TupleUnion::normalize(): tried an illegal conversion: time to " << out->getColTypes()[i]; + throw logic_error(os.str()); } + } - break; + break; - case CalpontSystemCatalog::UTINYINT: - case CalpontSystemCatalog::USMALLINT: - case CalpontSystemCatalog::UMEDINT: - case CalpontSystemCatalog::UINT: - case CalpontSystemCatalog::UBIGINT: - switch (out->getColTypes()[i]) + case CalpontSystemCatalog::FLOAT: + case CalpontSystemCatalog::UFLOAT: + case CalpontSystemCatalog::DOUBLE: + case CalpontSystemCatalog::UDOUBLE: + { + switch (out->getColTypes()[i]) + { + case CalpontSystemCatalog::TINYINT: + case CalpontSystemCatalog::SMALLINT: + case CalpontSystemCatalog::MEDINT: + case CalpontSystemCatalog::INT: + case CalpontSystemCatalog::BIGINT: { - case CalpontSystemCatalog::TINYINT: - case CalpontSystemCatalog::SMALLINT: - case CalpontSystemCatalog::MEDINT: - case CalpontSystemCatalog::INT: - case CalpontSystemCatalog::BIGINT: + if (out->getScale(i)) { - if (out->getScale(i)) + if (out->getColumnWidth(i) == datatypes::MAXDECIMALWIDTH) { - if (out->getColumnWidth(i) == datatypes::MAXDECIMALWIDTH) - result.emplace_back(normalizeUintToIntWithScaleInt128); + if (in.getColTypes()[i] == CalpontSystemCatalog::FLOAT || + in.getColTypes()[i] == CalpontSystemCatalog::UFLOAT) + result.emplace_back(normalizeXFloatToIntWithScaleInt128); else - result.emplace_back(normalizeUntToIntWithScaleInt64); - } + result.emplace_back(normalizeXDoubleToIntWithScaleInt128); + } else - result.emplace_back(normalizeUintToIntNoScale); - break; + { + if (in.getColTypes()[i] == CalpontSystemCatalog::FLOAT || + in.getColTypes()[i] == CalpontSystemCatalog::UFLOAT) + result.emplace_back(normalizeXFloatToIntWithScaleInt64); + else + result.emplace_back(normalizeXDoubleToIntWithScaleInt64); + } + } + else + { + if (in.getColTypes()[i] == CalpontSystemCatalog::FLOAT || + in.getColTypes()[i] == CalpontSystemCatalog::UFLOAT) + result.emplace_back(normalizeXFloatToIntNoScale); + else + result.emplace_back(normalizeXDoubleToIntNoScale); } + break; + } + + case CalpontSystemCatalog::UTINYINT: + case CalpontSystemCatalog::USMALLINT: + case CalpontSystemCatalog::UMEDINT: + case CalpontSystemCatalog::UINT: + case CalpontSystemCatalog::UBIGINT: + { + if (in.getColTypes()[i] == CalpontSystemCatalog::FLOAT || + in.getColTypes()[i] == CalpontSystemCatalog::UFLOAT) + result.emplace_back(normalizeXFloatToUint); + else + result.emplace_back(normalizeXDoubleToUint); + break; + } + + case CalpontSystemCatalog::FLOAT: + case CalpontSystemCatalog::UFLOAT: + { + if (in.getColTypes()[i] == CalpontSystemCatalog::FLOAT || + in.getColTypes()[i] == CalpontSystemCatalog::UFLOAT) + result.emplace_back(normalizeXFloatToXFloat); + else + result.emplace_back(normalizeXDoubleToXFloat); + break; + } + + case CalpontSystemCatalog::DOUBLE: + case CalpontSystemCatalog::UDOUBLE: + { + if (in.getColTypes()[i] == CalpontSystemCatalog::FLOAT || + in.getColTypes()[i] == CalpontSystemCatalog::UFLOAT) + result.emplace_back(normalizeXFloatToXDouble); + else + result.emplace_back(normalizeXDoubleToXDouble); + break; + } + + case CalpontSystemCatalog::LONGDOUBLE: + { + if (in.getColTypes()[i] == CalpontSystemCatalog::FLOAT || + in.getColTypes()[i] == CalpontSystemCatalog::UFLOAT) + result.emplace_back(normalizeXFloatToLongDouble); + else + result.emplace_back(normalizeXDoubleToLongDouble); + break; + } - case CalpontSystemCatalog::UTINYINT: - case CalpontSystemCatalog::USMALLINT: - case CalpontSystemCatalog::UMEDINT: - case CalpontSystemCatalog::UINT: - case CalpontSystemCatalog::UBIGINT: result.emplace_back(normalizeUintToUint); break; + case CalpontSystemCatalog::CHAR: + case CalpontSystemCatalog::TEXT: + case CalpontSystemCatalog::VARCHAR: + { + if (in.getColTypes()[i] == CalpontSystemCatalog::FLOAT || + in.getColTypes()[i] == CalpontSystemCatalog::UFLOAT) + result.emplace_back(normalizeXFloatToString); + else + result.emplace_back(normalizeXDoubleToString); + break; + } - case CalpontSystemCatalog::CHAR: - case CalpontSystemCatalog::TEXT: - case CalpontSystemCatalog::VARCHAR: + case CalpontSystemCatalog::DECIMAL: + case CalpontSystemCatalog::UDECIMAL: + { + // xFLOAT or xDOUBLE to xDECIMAL conversion. Is it really possible? + // TODO: + // Perhaps we should add an assert here that this combination is not possible + // In the current reduction all problems mentioned in the code under + // case "Signed INT to XDecimal" are also applicable here. + // TODO: isn't overflow possible below? + if (out->getColumnWidth(i) == datatypes::MAXDECIMALWIDTH) { - if (in.getScale(i)) - result.emplace_back(normalizeUintToStringWithScale); + if (in.getColTypes()[i] == CalpontSystemCatalog::FLOAT || + in.getColTypes()[i] == CalpontSystemCatalog::UFLOAT) + result.emplace_back(normalizeXFloatToWideXDecimal); else - result.emplace_back(normalizeUintToStringNoScale); + result.emplace_back(normalizeXDoubleToWideXDecimal); break; } - - case CalpontSystemCatalog::DATE: - case CalpontSystemCatalog::DATETIME: - case CalpontSystemCatalog::TIME: - case CalpontSystemCatalog::TIMESTAMP: - throw logic_error( - "TupleUnion::normalize(): tried to normalize an int to a timestamp, time, date or datetime"); - - case CalpontSystemCatalog::FLOAT: - case CalpontSystemCatalog::UFLOAT: result.emplace_back(normalizUintToXFloat); break; + else + { + if (in.getColTypes()[i] == CalpontSystemCatalog::FLOAT || + in.getColTypes()[i] == CalpontSystemCatalog::UFLOAT) + result.emplace_back(normalizeXFloatToXDecimal); + else + result.emplace_back(normalizeXDoubleToXDecimal); + break; + } + break; + } - case CalpontSystemCatalog::DOUBLE: - case CalpontSystemCatalog::UDOUBLE: result.emplace_back(normalizeUintToXDouble); break; + default: + ostringstream os; + os << "TupleUnion::normalize(): tried an illegal conversion: floating point to " + << out->getColTypes()[i]; + throw logic_error(os.str()); + } - case CalpontSystemCatalog::LONGDOUBLE: result.emplace_back(normalizeUintToLongDouble); break; + break; + } - case CalpontSystemCatalog::DECIMAL: - case CalpontSystemCatalog::UDECIMAL: + case CalpontSystemCatalog::LONGDOUBLE: + { + switch (out->getColTypes()[i]) + { + case CalpontSystemCatalog::TINYINT: + case CalpontSystemCatalog::SMALLINT: + case CalpontSystemCatalog::MEDINT: + case CalpontSystemCatalog::INT: + case CalpontSystemCatalog::BIGINT: + { + if (out->getScale(i)) { - /* - Unsigned INT to XDecimal - TODO: - - The overflow problem mentioned in the code under case "Signed INT to XDecimal:" is - also applicable here. - */ - if (out->getColumnWidth(i) == datatypes::MAXDECIMALWIDTH) - result.emplace_back(normalizeUintToXDecimalInt128); + result.emplace_back(normalizeLongDoubleToIntWithScaleInt128); else - result.emplace_back(normalizeUintToXDecimalInt64); - break; + result.emplace_back(normalizeLongDoubleToIntWithScaleInt); } + else + result.emplace_back(normalizeLongDoubleToIntNoScale); + break; + } + + case CalpontSystemCatalog::UTINYINT: + case CalpontSystemCatalog::USMALLINT: + case CalpontSystemCatalog::UMEDINT: + case CalpontSystemCatalog::UINT: + case CalpontSystemCatalog::UBIGINT: result.emplace_back(normalizeLongDoubleToUint); break; + + case CalpontSystemCatalog::FLOAT: + case CalpontSystemCatalog::UFLOAT: result.emplace_back(normalizeLongDoubleToXFloat); break; + + case CalpontSystemCatalog::DOUBLE: + case CalpontSystemCatalog::UDOUBLE: result.emplace_back(normalizeLongDoubleToXDouble); break; + + case CalpontSystemCatalog::LONGDOUBLE: result.emplace_back(normalizeLongDoubleToLongDouble); break; - default: - ostringstream os; - os << "TupleUnion::normalize(): tried an illegal conversion: integer to " - << out->getColTypes()[i]; - throw logic_error(os.str()); + case CalpontSystemCatalog::CHAR: + case CalpontSystemCatalog::TEXT: + case CalpontSystemCatalog::VARCHAR: result.emplace_back(normalizeLongDoubleToString); break; + + case CalpontSystemCatalog::DECIMAL: + case CalpontSystemCatalog::UDECIMAL: + { + // LONGDOUBLE to xDECIMAL conversions: is it really possible? + // TODO: + // Perhaps we should add an assert here that this combination is not possible + // In the current reduction all problems mentioned in the code under + // case "Signed INT to XDecimal" are also applicable here. + if (out->getColumnWidth(i) == datatypes::MAXDECIMALWIDTH) + result.emplace_back(normalizeLongDoubleToXDecimalInt128); + else + result.emplace_back(normalizeLongDoubleToXDecimalInt); + + break; } - break; + default: + ostringstream os; + os << "TupleUnion::normalize(): tried an illegal conversion: floating point to " + << out->getColTypes()[i]; + throw logic_error(os.str()); + } + + break; + } + + case CalpontSystemCatalog::DECIMAL: + case CalpontSystemCatalog::UDECIMAL: + { + switch (out->getColTypes()[i]) + { + case CalpontSystemCatalog::TINYINT: + case CalpontSystemCatalog::SMALLINT: + case CalpontSystemCatalog::MEDINT: + case CalpontSystemCatalog::INT: + case CalpontSystemCatalog::BIGINT: + case CalpontSystemCatalog::UTINYINT: + case CalpontSystemCatalog::USMALLINT: + case CalpontSystemCatalog::UMEDINT: + case CalpontSystemCatalog::UINT: + case CalpontSystemCatalog::UBIGINT: + case CalpontSystemCatalog::DECIMAL: + case CalpontSystemCatalog::UDECIMAL: + { + if (datatypes::isWideDecimalType(out->getColTypes()[i], out->getColumnWidth(i))) + { + if (out->getScale(i) == in.getScale(i)) + { + if (in.getColumnWidth(i) == datatypes::MAXDECIMALWIDTH) + result.emplace_back(normalizeWideXDecimalToWideXDecimalNoScale); + else + result.emplace_back(normalizeXDecimalToWideXDecimalNoScale); + } + else if (out->getScale(i) > in.getScale(i)) + { + if (in.getColumnWidth(i) == datatypes::MAXDECIMALWIDTH) + result.emplace_back(normalizeWideXDecimalToWideXDecimalWithScale); + else + result.emplace_back(normalizeXDecimalToWideXDecimalWithScale); + } + else // should not happen, the output's scale is the largest + throw logic_error("TupleUnion::normalize(): incorrect scale setting"); + } + // If output type is narrow decimal, input type + // has to be narrow decimal as well. + else + { + if (out->getScale(i) == in.getScale(i)) + result.emplace_back(normalizeXDecimalToOtherNoScale); + else if (out->getScale(i) > in.getScale(i)) + result.emplace_back(normalizeXDecimalToOtherWithScale); + else // should not happen, the output's scale is the largest + throw logic_error("TupleUnion::normalize(): incorrect scale setting"); + } + + break; + } + + case CalpontSystemCatalog::FLOAT: + case CalpontSystemCatalog::UFLOAT: result.emplace_back(normalizeXDecimalToXFloat); break; + + case CalpontSystemCatalog::DOUBLE: + case CalpontSystemCatalog::UDOUBLE: result.emplace_back(normalizeXDecimalToXDouble); break; + + case CalpontSystemCatalog::LONGDOUBLE: result.emplace_back(normalizeXDecimalToLongDouble); break; + + case CalpontSystemCatalog::CHAR: + case CalpontSystemCatalog::TEXT: + case CalpontSystemCatalog::VARCHAR: + default: + { + if (LIKELY(in.getColumnWidth(i) == datatypes::MAXDECIMALWIDTH)) + result.emplace_back(normalizeWideXDecimalToString); + else + result.emplace_back(normalizeXDecimalToString); + break; + } + } + + break; + } + + case CalpontSystemCatalog::BLOB: + case CalpontSystemCatalog::VARBINARY: result.emplace_back(normalizeBlobVarbinary); break; + + default: + { + ostringstream os; + os << "TupleUnion::normalize(): unknown input type (" << in.getColTypes()[i] << ")"; + cout << os.str() << endl; + throw logic_error(os.str()); + } + } + } + + idbassert(out->getColumnCount() == result.size()); + return result; +} + +} // namespace + +namespace joblist +{ +inline uint64_t TupleUnion::Hasher::operator()(const RowPosition& p) const +{ + Row& row = ts->row; + + if (p.group & RowPosition::normalizedFlag) + ts->normalizedData[p.group & ~RowPosition::normalizedFlag].getRow(p.row, &row); + else + ts->rowMemory[p.group].getRow(p.row, &row); + + return row.hash(ts->fLastCol); +} + +inline bool TupleUnion::Eq::operator()(const RowPosition& d1, const RowPosition& d2) const +{ + Row &r1 = ts->row, &r2 = ts->row2; + + if (d1.group & RowPosition::normalizedFlag) + ts->normalizedData[d1.group & ~RowPosition::normalizedFlag].getRow(d1.row, &r1); + else + ts->rowMemory[d1.group].getRow(d1.row, &r1); + + if (d2.group & RowPosition::normalizedFlag) + ts->normalizedData[d2.group & ~RowPosition::normalizedFlag].getRow(d2.row, &r2); + else + ts->rowMemory[d2.group].getRow(d2.row, &r2); + + return r1.equals(r2, ts->fLastCol); +} + +TupleUnion::TupleUnion(CalpontSystemCatalog::OID tableOID, const JobInfo& jobInfo, uint32_t keyCount) + : JobStep(jobInfo) + , fTableOID(tableOID) + , output(NULL) + , outputIt(-1) + , memUsage(0) + , rm(jobInfo.rm) + , runnersDone(0) + , distinctCount(0) + , distinctDone(0) + , fRowsReturned(0) + , runRan(false) + , joinRan(false) + , sessionMemLimit(jobInfo.umMemLimit) + , fTimeZone(jobInfo.timeZone) + , fLastCol(keyCount - 1) +{ + uniquer.reset(new Uniquer_t(10, Hasher(this), Eq(this), allocator)); + fExtendedInfo = "TUN: "; + fQtc.stepParms().stepType = StepTeleStats::T_TUN; +} + +TupleUnion::~TupleUnion() +{ + rm->returnMemory(memUsage, sessionMemLimit); + + if (!runRan && output) + output->endOfInput(); +} + +CalpontSystemCatalog::OID TupleUnion::tableOid() const +{ + return fTableOID; +} + +void TupleUnion::setInputRowGroups(const vector& in) +{ + inputRGs = in; +} + +void TupleUnion::setOutputRowGroup(const rowgroup::RowGroup& out) +{ + outputRG = out; + rowLength = outputRG.getRowSizeWithStrings(); +} + +void TupleUnion::setDistinctFlags(const vector& v) +{ + distinctFlags = v; +} + +void TupleUnion::readInput(uint32_t which) +{ + /* The handling of the output got a little kludgey with the string table enhancement. + * When there is no distinct check, the outputs are all generated independently of + * each other locally in this fcn. When there is a distinct check, threads + * share the output, which is built in the 'rowMemory' vector rather than in + * thread-local memory. Building the result in a common space allows us to + * store 8-byte offsets in rowMemory rather than 16-bytes for absolute pointers. + */ + + RowGroupDL* dl = NULL; + bool more = true; + RGData inRGData, outRGData, *tmpRGData; + uint32_t it = numeric_limits::max(); + RowGroup l_inputRG, l_outputRG, l_tmpRG; + Row inRow, outRow, tmpRow; + bool distinct; + uint64_t memUsageBefore, memUsageAfter, memDiff; + l_outputRG = outputRG; + dl = inputs[which]; + l_inputRG = inputRGs[which]; + l_inputRG.initRow(&inRow); + l_outputRG.initRow(&outRow); + distinct = distinctFlags[which]; + + if (distinct) + { + l_tmpRG = outputRG; + tmpRGData = &normalizedData[which]; + l_tmpRG.initRow(&tmpRow); + l_tmpRG.setData(tmpRGData); + l_tmpRG.resetRowGroup(0); + l_tmpRG.getRow(0, &tmpRow); + } + else + { + outRGData = RGData(l_outputRG); + l_outputRG.setData(&outRGData); + l_outputRG.resetRowGroup(0); + l_outputRG.getRow(0, &outRow); + } + + try + { + it = dl->getIterator(); + more = dl->next(it, &inRGData); + + if (dlTimes.FirstReadTime().tv_sec == 0) + dlTimes.setFirstReadTime(); + + if (fStartTime == -1) + { + StepTeleStats sts(fQueryUuid, fStepUuid, StepTeleStats::ST_START, 1); + postStepStartTele(sts); + } + + while (more && !cancelled()) + { + /* + normalize each row + if distinct flag is set + copy the row into the output and test for uniqueness + if unique, increment the row count + else + copy the row into the output & inc row count + */ + l_inputRG.setData(&inRGData); + l_inputRG.getRow(0, &inRow); + + if (distinct) + { + memDiff = 0; + l_tmpRG.resetRowGroup(0); + l_tmpRG.getRow(0, &tmpRow); + l_tmpRG.setRowCount(l_inputRG.getRowCount()); + + const normalizeFunctionsT normalizeFunctions = inferNormalizeFunctions(inRow, &tmpRow, fTimeZone); + for (uint32_t i = 0; i < l_inputRG.getRowCount(); i++, inRow.nextRow(), tmpRow.nextRow()) + normalize(inRow, &tmpRow, normalizeFunctions); + + l_tmpRG.getRow(0, &tmpRow); + { + boost::mutex::scoped_lock lk(uniquerMutex); + getOutput(&l_outputRG, &outRow, &outRGData); + memUsageBefore = allocator.getMemUsage(); + + uint32_t tmpOutputRowCount = l_outputRG.getRowCount(); + const uint32_t tmpRGRowCount = l_tmpRG.getRowCount(); + for (uint32_t i = 0; i < tmpRGRowCount; i++, tmpRow.nextRow()) + { + pair inserted; + inserted = uniquer->insert(RowPosition(which | RowPosition::normalizedFlag, i)); + + if (inserted.second) + { + copyRow(tmpRow, &outRow); + const_cast(*(inserted.first)) = + RowPosition(rowMemory.size() - 1, tmpOutputRowCount); + memDiff += outRow.getRealSize(); + addToOutput(&outRow, &l_outputRG, true, outRGData, tmpOutputRowCount); + fRowsReturned++; + } + } + + l_outputRG.setRowCount(tmpOutputRowCount); + + memUsageAfter = allocator.getMemUsage(); + memDiff += (memUsageAfter - memUsageBefore); + } + + if (rm->getMemory(memDiff, sessionMemLimit)) + { + memUsage += memDiff; + } + else + { + fLogger->logMessage(logging::LOG_TYPE_INFO, logging::ERR_UNION_TOO_BIG); + + if (status() == 0) // preserve existing error code + { + errorMessage(logging::IDBErrorInfo::instance()->errorMsg(logging::ERR_UNION_TOO_BIG)); + status(logging::ERR_UNION_TOO_BIG); + } + + abort(); + } + } + else + { + const normalizeFunctionsT normalizeFunctions = inferNormalizeFunctions(inRow, &outRow, fTimeZone); + const uint32_t inputRGRowCount = l_inputRG.getRowCount(); + uint32_t tmpOutputRowCount = l_outputRG.getRowCount(); + + for (uint32_t i = 0; i < inputRGRowCount; i++, inRow.nextRow()) + { + normalize(inRow, &outRow, normalizeFunctions); + addToOutput(&outRow, &l_outputRG, false, outRGData, tmpOutputRowCount); + } + + fRowsReturned += inputRGRowCount; + l_outputRG.setRowCount(tmpOutputRowCount); + } + + more = dl->next(it, &inRGData); + } + } + catch (...) + { + handleException(std::current_exception(), logging::unionStepErr, logging::ERR_UNION_TOO_BIG, + "TupleUnion::readInput()"); + status(logging::unionStepErr); + abort(); + } + + /* make sure that the input was drained before exiting. This can happen if the + query was aborted */ + if (dl && it != numeric_limits::max()) + while (more) + more = dl->next(it, &inRGData); + + { + boost::mutex::scoped_lock lock1(uniquerMutex); + boost::mutex::scoped_lock lock2(sMutex); + + if (!distinct && l_outputRG.getRowCount() > 0) + output->insert(outRGData); + + if (distinct) + { + getOutput(&l_outputRG, &outRow, &outRGData); + + if (++distinctDone == distinctCount && l_outputRG.getRowCount() > 0) + output->insert(outRGData); + } + + if (++runnersDone == fInputJobStepAssociation.outSize()) + { + output->endOfInput(); + + StepTeleStats sts(fQueryUuid, fStepUuid, StepTeleStats::ST_SUMMARY, 1, 1, fRowsReturned); + postStepSummaryTele(sts); + + if (traceOn()) + { + dlTimes.setLastReadTime(); + dlTimes.setEndOfInputTime(); + + time_t t = time(0); + char timeString[50]; + ctime_r(&t, timeString); + timeString[strlen(timeString) - 1] = '\0'; + ostringstream logStr; + logStr << "ses:" << fSessionId << " st: " << fStepId << " finished at " << timeString + << "; total rows returned-" << fRowsReturned << endl + << "\t1st read " << dlTimes.FirstReadTimeString() << "; EOI " << dlTimes.EndOfInputTimeString() + << "; runtime-" << JSTimeStamp::tsdiffstr(dlTimes.EndOfInputTime(), dlTimes.FirstReadTime()) + << "s;\n\tUUID " << uuids::to_string(fStepUuid) << endl + << "\tJob completion status " << status() << endl; + logEnd(logStr.str().c_str()); + fExtendedInfo += logStr.str(); + formatMiniStats(); + } + } + } +} + +uint32_t TupleUnion::nextBand(messageqcpp::ByteStream& bs) +{ + RGData mem; + bool more; + uint32_t ret = 0; + + bs.restart(); + more = output->next(outputIt, &mem); + + if (more) + outputRG.setData(&mem); + else + { + mem = RGData(outputRG, 0U); + outputRG.setData(&mem); + outputRG.resetRowGroup(0); + outputRG.setStatus(status()); + } + + outputRG.serializeRGData(bs); + ret = outputRG.getRowCount(); + + return ret; +} + +void TupleUnion::getOutput(RowGroup* rg, Row* row, RGData* data) +{ + if (UNLIKELY(rowMemory.empty())) + { + *data = RGData(*rg); + rg->setData(data); + rg->resetRowGroup(0); + rowMemory.push_back(*data); + } + else + { + *data = rowMemory.back(); + rg->setData(data); + } + + rg->getRow(rg->getRowCount(), row); +} + +void TupleUnion::addToOutput(Row* r, RowGroup* rg, bool keepit, RGData& data, uint32_t& tmpOutputRowCount) +{ + r->nextRow(); + tmpOutputRowCount++; + + if (UNLIKELY(tmpOutputRowCount == 8192)) + { + rg->setRowCount(8192); + { + boost::mutex::scoped_lock lock(sMutex); + output->insert(data); + } + data = RGData(*rg); + rg->setData(&data); + rg->resetRowGroup(0); + rg->getRow(0, r); + tmpOutputRowCount = 0; + + if (keepit) + rowMemory.push_back(data); + } +} + +void TupleUnion::normalize(const Row& in, Row* out, const normalizeFunctionsT& normalizeFunctions) +{ + uint32_t i; + + out->setRid(0); + + for (i = 0; i < out->getColumnCount(); i++) + { + if (in.isNullValue(i)) + { + TupleUnion::writeNull(out, i); + continue; + } + + /// Call the pre-compiled function. + normalizeFunctions[i](in, out, i); + } +} + +void TupleUnion::run() +{ + uint32_t i; + + boost::mutex::scoped_lock lk(jlLock); + + if (runRan) + return; + + runRan = true; + lk.unlock(); + + for (i = 0; i < fInputJobStepAssociation.outSize(); i++) + inputs.push_back(fInputJobStepAssociation.outAt(i)->rowGroupDL()); - case CalpontSystemCatalog::CHAR: - case CalpontSystemCatalog::TEXT: - case CalpontSystemCatalog::VARCHAR: - switch (out->getColTypes()[i]) - { - case CalpontSystemCatalog::CHAR: - case CalpontSystemCatalog::TEXT: - case CalpontSystemCatalog::VARCHAR: result.emplace_back(normalizeStringToString); break; + output = fOutputJobStepAssociation.outAt(0)->rowGroupDL(); - default: - { - ostringstream os; - os << "TupleUnion::normalize(): tried an illegal conversion: string to " << out->getColTypes()[i]; - throw logic_error(os.str()); - } - } + if (fDelivery) + { + outputIt = output->getIterator(); + } - break; + outputRG.initRow(&row); + outputRG.initRow(&row2); - case CalpontSystemCatalog::DATE: - switch (out->getColTypes()[i]) - { - case CalpontSystemCatalog::DATE: result.emplace_back(normalizeDateToDate); break; + distinctCount = 0; + normalizedData.reset(new RGData[inputs.size()]); - case CalpontSystemCatalog::DATETIME: result.emplace_back(normalizeDateToDatetime); break; + for (i = 0; i < inputs.size(); i++) + { + if (distinctFlags[i]) + { + distinctCount++; + normalizedData[i].reinit(outputRG); + } + } - case CalpontSystemCatalog::TIMESTAMP: result.emplace_back(std::bind(normalizeDateToTimestamp, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, fTimeZone)); break; - - case CalpontSystemCatalog::CHAR: - case CalpontSystemCatalog::TEXT: - case CalpontSystemCatalog::VARCHAR: result.emplace_back(normalizeDateToString); break; + runners.reserve(inputs.size()); - default: - { - ostringstream os; - os << "TupleUnion::normalize(): tried an illegal conversion: date to " << out->getColTypes()[i]; - throw logic_error(os.str()); - } - } + for (i = 0; i < inputs.size(); i++) + { + runners.push_back(jobstepThreadPool.invoke(Runner(this, i))); + } +} - break; +void TupleUnion::join() +{ + boost::mutex::scoped_lock lk(jlLock); - case CalpontSystemCatalog::DATETIME: - switch (out->getColTypes()[i]) - { - case CalpontSystemCatalog::DATETIME: result.emplace_back(normalizeDatetimeToDatetime); break; + if (joinRan) + return; - case CalpontSystemCatalog::DATE: result.emplace_back(normalizeDatetimeToDate); break; + joinRan = true; + lk.unlock(); - case CalpontSystemCatalog::TIMESTAMP: result.emplace_back(std::bind(normalizeDatetimeToTimestamp, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, fTimeZone)); break; + jobstepThreadPool.join(runners); + runners.clear(); + uniquer->clear(); + rowMemory.clear(); + rm->returnMemory(memUsage, sessionMemLimit); + memUsage = 0; +} - case CalpontSystemCatalog::CHAR: - case CalpontSystemCatalog::TEXT: - case CalpontSystemCatalog::VARCHAR: result.emplace_back(normalizeDatetimeToString); break; +const string TupleUnion::toString() const +{ + ostringstream oss; + oss << "TupleUnion ses:" << fSessionId << " txn:" << fTxnId << " ver:" << fVerId; + oss << " st:" << fStepId; + oss << " in:"; - default: - { - ostringstream os; - os << "TupleUnion::normalize(): tried an illegal conversion: datetime to " - << out->getColTypes()[i]; - throw logic_error(os.str()); - } - } + for (unsigned i = 0; i < fInputJobStepAssociation.outSize(); i++) + oss << ((i == 0) ? " " : ", ") << fInputJobStepAssociation.outAt(i); - break; + oss << " out:"; - case CalpontSystemCatalog::TIMESTAMP: - switch (out->getColTypes()[i]) - { - case CalpontSystemCatalog::TIMESTAMP: result.emplace_back(normalizeTimestampToTimestamp); break; + for (unsigned i = 0; i < fOutputJobStepAssociation.outSize(); i++) + oss << ((i == 0) ? " " : ", ") << fOutputJobStepAssociation.outAt(i); - case CalpontSystemCatalog::DATE: result.emplace_back(std::bind(normalizeTimestampToDate, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, fTimeZone)); break; - - case CalpontSystemCatalog::DATETIME: result.emplace_back(std::bind(normalizeTimestampToDatetime, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, fTimeZone)); break; - - case CalpontSystemCatalog::CHAR: - case CalpontSystemCatalog::TEXT: - case CalpontSystemCatalog::VARCHAR: result.emplace_back(std::bind(normalizeTimestampToString, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, fTimeZone)); break; + oss << endl; - default: - { - ostringstream os; - os << "TupleUnion::normalize(): tried an illegal conversion: timestamp to " - << out->getColTypes()[i]; - throw logic_error(os.str()); - } - } + return oss.str(); +} - break; +void TupleUnion::writeNull(Row* out, uint32_t col) +{ + switch (out->getColTypes()[col]) + { + case CalpontSystemCatalog::TINYINT: out->setUintField<1>(joblist::TINYINTNULL, col); break; - case CalpontSystemCatalog::TIME: - switch (out->getColTypes()[i]) - { - case CalpontSystemCatalog::TIME: result.emplace_back(normalizeTimeToTime); break; + case CalpontSystemCatalog::SMALLINT: out->setUintField<1>(joblist::SMALLINTNULL, col); break; - case CalpontSystemCatalog::CHAR: - case CalpontSystemCatalog::TEXT: - case CalpontSystemCatalog::VARCHAR: result.emplace_back(normalizeTimeToString); break; + case CalpontSystemCatalog::UTINYINT: out->setUintField<1>(joblist::UTINYINTNULL, col); break; - default: - { - ostringstream os; - os << "TupleUnion::normalize(): tried an illegal conversion: time to " << out->getColTypes()[i]; - throw logic_error(os.str()); - } - } + case CalpontSystemCatalog::USMALLINT: out->setUintField<1>(joblist::USMALLINTNULL, col); break; - break; + case CalpontSystemCatalog::DECIMAL: + case CalpontSystemCatalog::UDECIMAL: + { + uint32_t len = out->getColumnWidth(col); - case CalpontSystemCatalog::FLOAT: - case CalpontSystemCatalog::UFLOAT: - case CalpontSystemCatalog::DOUBLE: - case CalpontSystemCatalog::UDOUBLE: - { - switch (out->getColTypes()[i]) - { - case CalpontSystemCatalog::TINYINT: - case CalpontSystemCatalog::SMALLINT: - case CalpontSystemCatalog::MEDINT: - case CalpontSystemCatalog::INT: - case CalpontSystemCatalog::BIGINT: - { - if (out->getScale(i)) - { - if (out->getColumnWidth(i) == datatypes::MAXDECIMALWIDTH) - { - if (in.getColTypes()[i] == CalpontSystemCatalog::FLOAT || in.getColTypes()[i] == CalpontSystemCatalog::UFLOAT) - result.emplace_back(normalizeXFloatToIntWithScaleInt128); - else - result.emplace_back(normalizeXDoubleToIntWithScaleInt128); - } - else - { - if (in.getColTypes()[i] == CalpontSystemCatalog::FLOAT || in.getColTypes()[i] == CalpontSystemCatalog::UFLOAT) - result.emplace_back(normalizeXFloatToIntWithScaleInt64); - else - result.emplace_back(normalizeXDoubleToIntWithScaleInt64); - } - } - else - { - if (in.getColTypes()[i] == CalpontSystemCatalog::FLOAT || in.getColTypes()[i] == CalpontSystemCatalog::UFLOAT) - result.emplace_back(normalizeXFloatToIntNoScale); - else - result.emplace_back(normalizeXDoubleToIntNoScale); - } - break; - } + switch (len) + { + case 1: out->setUintField<1>(joblist::TINYINTNULL, col); break; - case CalpontSystemCatalog::UTINYINT: - case CalpontSystemCatalog::USMALLINT: - case CalpontSystemCatalog::UMEDINT: - case CalpontSystemCatalog::UINT: - case CalpontSystemCatalog::UBIGINT: - { - if (in.getColTypes()[i] == CalpontSystemCatalog::FLOAT || in.getColTypes()[i] == CalpontSystemCatalog::UFLOAT) - result.emplace_back(normalizeXFloatToUint); - else - result.emplace_back(normalizeXDoubleToUint); - break; - } + case 2: out->setUintField<2>(joblist::SMALLINTNULL, col); break; - case CalpontSystemCatalog::FLOAT: - case CalpontSystemCatalog::UFLOAT: - { - if (in.getColTypes()[i] == CalpontSystemCatalog::FLOAT || in.getColTypes()[i] == CalpontSystemCatalog::UFLOAT) - result.emplace_back(normalizeXFloatToXFloat); - else - result.emplace_back(normalizeXDoubleToXFloat); - break; - } - - case CalpontSystemCatalog::DOUBLE: - case CalpontSystemCatalog::UDOUBLE: - { - if (in.getColTypes()[i] == CalpontSystemCatalog::FLOAT || in.getColTypes()[i] == CalpontSystemCatalog::UFLOAT) - result.emplace_back(normalizeXFloatToXDouble); - else - result.emplace_back(normalizeXDoubleToXDouble); - break; - } - - case CalpontSystemCatalog::LONGDOUBLE: - { - if (in.getColTypes()[i] == CalpontSystemCatalog::FLOAT || in.getColTypes()[i] == CalpontSystemCatalog::UFLOAT) - result.emplace_back(normalizeXFloatToLongDouble); - else - result.emplace_back(normalizeXDoubleToLongDouble); - break; - } - - case CalpontSystemCatalog::CHAR: - case CalpontSystemCatalog::TEXT: - case CalpontSystemCatalog::VARCHAR: - { - if (in.getColTypes()[i] == CalpontSystemCatalog::FLOAT || in.getColTypes()[i] == CalpontSystemCatalog::UFLOAT) - result.emplace_back(normalizeXFloatToString); - else - result.emplace_back(normalizeXDoubleToString); - break; - } - - case CalpontSystemCatalog::DECIMAL: - case CalpontSystemCatalog::UDECIMAL: - { - // xFLOAT or xDOUBLE to xDECIMAL conversion. Is it really possible? - // TODO: - // Perhaps we should add an assert here that this combination is not possible - // In the current reduction all problems mentioned in the code under - // case "Signed INT to XDecimal" are also applicable here. - // TODO: isn't overflow possible below? - if (out->getColumnWidth(i) == datatypes::MAXDECIMALWIDTH) - { - if (in.getColTypes()[i] == CalpontSystemCatalog::FLOAT || in.getColTypes()[i] == CalpontSystemCatalog::UFLOAT) - result.emplace_back(normalizeXFloatToWideXDecimal); - else - result.emplace_back(normalizeXDoubleToWideXDecimal); - break; - } - else - { - if (in.getColTypes()[i] == CalpontSystemCatalog::FLOAT || in.getColTypes()[i] == CalpontSystemCatalog::UFLOAT) - result.emplace_back(normalizeXFloatToXDecimal); - else - result.emplace_back(normalizeXDoubleToXDecimal); - break; - } - break; - } + case 4: out->setUintField<4>(joblist::INTNULL, col); break; - default: - ostringstream os; - os << "TupleUnion::normalize(): tried an illegal conversion: floating point to " - << out->getColTypes()[i]; - throw logic_error(os.str()); - } + case 8: out->setUintField<8>(joblist::BIGINTNULL, col); break; - break; - } + case 16: out->setInt128Field(datatypes::Decimal128Null, col); break; - case CalpontSystemCatalog::LONGDOUBLE: + default: { - switch (out->getColTypes()[i]) - { - case CalpontSystemCatalog::TINYINT: - case CalpontSystemCatalog::SMALLINT: - case CalpontSystemCatalog::MEDINT: - case CalpontSystemCatalog::INT: - case CalpontSystemCatalog::BIGINT: - { - if (out->getScale(i)) - { - if (out->getColumnWidth(i) == datatypes::MAXDECIMALWIDTH) - result.emplace_back(normalizeLongDoubleToIntWithScaleInt128); - else - result.emplace_back(normalizeLongDoubleToIntWithScaleInt); - } - else - result.emplace_back(normalizeLongDoubleToIntNoScale); - break; - } + } + } - case CalpontSystemCatalog::UTINYINT: - case CalpontSystemCatalog::USMALLINT: - case CalpontSystemCatalog::UMEDINT: - case CalpontSystemCatalog::UINT: - case CalpontSystemCatalog::UBIGINT: result.emplace_back(normalizeLongDoubleToUint); break; + break; + } - case CalpontSystemCatalog::FLOAT: - case CalpontSystemCatalog::UFLOAT: result.emplace_back(normalizeLongDoubleToXFloat); break; + case CalpontSystemCatalog::MEDINT: + case CalpontSystemCatalog::INT: out->setUintField<4>(joblist::INTNULL, col); break; - case CalpontSystemCatalog::DOUBLE: - case CalpontSystemCatalog::UDOUBLE: result.emplace_back(normalizeLongDoubleToXDouble); break; + case CalpontSystemCatalog::UMEDINT: + case CalpontSystemCatalog::UINT: out->setUintField<4>(joblist::UINTNULL, col); break; - case CalpontSystemCatalog::LONGDOUBLE: result.emplace_back(normalizeLongDoubleToLongDouble); break; + case CalpontSystemCatalog::FLOAT: + case CalpontSystemCatalog::UFLOAT: out->setUintField<4>(joblist::FLOATNULL, col); break; - case CalpontSystemCatalog::CHAR: - case CalpontSystemCatalog::TEXT: - case CalpontSystemCatalog::VARCHAR: result.emplace_back(normalizeLongDoubleToString); break; + case CalpontSystemCatalog::DATE: out->setUintField<4>(joblist::DATENULL, col); break; - case CalpontSystemCatalog::DECIMAL: - case CalpontSystemCatalog::UDECIMAL: - { - // LONGDOUBLE to xDECIMAL conversions: is it really possible? - // TODO: - // Perhaps we should add an assert here that this combination is not possible - // In the current reduction all problems mentioned in the code under - // case "Signed INT to XDecimal" are also applicable here. - if (out->getColumnWidth(i) == datatypes::MAXDECIMALWIDTH) - result.emplace_back(normalizeLongDoubleToXDecimalInt128); - else - result.emplace_back(normalizeLongDoubleToXDecimalInt); - - break; - } + case CalpontSystemCatalog::BIGINT: out->setUintField<8>(joblist::BIGINTNULL, col); break; - default: - ostringstream os; - os << "TupleUnion::normalize(): tried an illegal conversion: floating point to " - << out->getColTypes()[i]; - throw logic_error(os.str()); - } + case CalpontSystemCatalog::UBIGINT: out->setUintField<8>(joblist::UBIGINTNULL, col); break; - break; - } + case CalpontSystemCatalog::DOUBLE: + case CalpontSystemCatalog::UDOUBLE: out->setUintField<8>(joblist::DOUBLENULL, col); break; - case CalpontSystemCatalog::DECIMAL: - case CalpontSystemCatalog::UDECIMAL: - { - switch (out->getColTypes()[i]) - { - case CalpontSystemCatalog::TINYINT: - case CalpontSystemCatalog::SMALLINT: - case CalpontSystemCatalog::MEDINT: - case CalpontSystemCatalog::INT: - case CalpontSystemCatalog::BIGINT: - case CalpontSystemCatalog::UTINYINT: - case CalpontSystemCatalog::USMALLINT: - case CalpontSystemCatalog::UMEDINT: - case CalpontSystemCatalog::UINT: - case CalpontSystemCatalog::UBIGINT: - case CalpontSystemCatalog::DECIMAL: - case CalpontSystemCatalog::UDECIMAL: - { - if (datatypes::isWideDecimalType(out->getColTypes()[i], out->getColumnWidth(i))) - { - if (out->getScale(i) == in.getScale(i)) - { - if (in.getColumnWidth(i) == datatypes::MAXDECIMALWIDTH) - result.emplace_back(normalizeWideXDecimalToWideXDecimalNoScale); - else - result.emplace_back(normalizeXDecimalToWideXDecimalNoScale); - } - else if (out->getScale(i) > in.getScale(i)) - { - if (in.getColumnWidth(i) == datatypes::MAXDECIMALWIDTH) - result.emplace_back(normalizeWideXDecimalToWideXDecimalWithScale); - else - result.emplace_back(normalizeXDecimalToWideXDecimalWithScale); - } - else // should not happen, the output's scale is the largest - throw logic_error("TupleUnion::normalize(): incorrect scale setting"); - } - // If output type is narrow decimal, input type - // has to be narrow decimal as well. - else - { - if (out->getScale(i) == in.getScale(i)) - result.emplace_back(normalizeXDecimalToOtherNoScale); - else if (out->getScale(i) > in.getScale(i)) - result.emplace_back(normalizeXDecimalToOtherWithScale); - else // should not happen, the output's scale is the largest - throw logic_error("TupleUnion::normalize(): incorrect scale setting"); - } + case CalpontSystemCatalog::DATETIME: out->setUintField<8>(joblist::DATETIMENULL, col); break; - break; - } + case CalpontSystemCatalog::TIMESTAMP: out->setUintField<8>(joblist::TIMESTAMPNULL, col); break; - case CalpontSystemCatalog::FLOAT: - case CalpontSystemCatalog::UFLOAT: result.emplace_back(normalizeXDecimalToXFloat); break; + case CalpontSystemCatalog::TIME: out->setUintField<8>(joblist::TIMENULL, col); break; - case CalpontSystemCatalog::DOUBLE: - case CalpontSystemCatalog::UDOUBLE: result.emplace_back(normalizeXDecimalToXDouble); break; + case CalpontSystemCatalog::CHAR: + case CalpontSystemCatalog::TEXT: + case CalpontSystemCatalog::VARCHAR: + { + uint32_t len = out->getColumnWidth(col); - case CalpontSystemCatalog::LONGDOUBLE: result.emplace_back(normalizeXDecimalToLongDouble); break; + switch (len) + { + case 1: out->setUintField<1>(joblist::CHAR1NULL, col); break; - case CalpontSystemCatalog::CHAR: - case CalpontSystemCatalog::TEXT: - case CalpontSystemCatalog::VARCHAR: - default: - { - if (LIKELY(in.getColumnWidth(i) == datatypes::MAXDECIMALWIDTH)) - result.emplace_back(normalizeWideXDecimalToString); - else - result.emplace_back(normalizeXDecimalToString); - break; - } - } + case 2: out->setUintField<2>(joblist::CHAR2NULL, col); break; - break; - } + case 3: + case 4: out->setUintField<4>(joblist::CHAR4NULL, col); break; - case CalpontSystemCatalog::BLOB: - case CalpontSystemCatalog::VARBINARY: result.emplace_back(normalizeBlobVarbinary); break; + case 5: + case 6: + case 7: + case 8: out->setUintField<8>(joblist::CHAR8NULL, col); break; - default: - { - ostringstream os; - os << "TupleUnion::normalize(): unknown input type (" << in.getColTypes()[i] << ")"; - cout << os.str() << endl; - throw logic_error(os.str()); - } + default: out->setStringField(nullptr, 0, col); break; } + + break; } - idbassert(out->getColumnCount() == result.size()); - return result; - } + case CalpontSystemCatalog::BLOB: + case CalpontSystemCatalog::VARBINARY: + // could use below if zero length and NULL are treated the same + // out->setVarBinaryField("", col); break; + out->setVarBinaryField(nullptr, 0, col); + break; -} // namespace + default: + { + } + } +} -namespace joblist +void TupleUnion::formatMiniStats() { -inline uint64_t TupleUnion::Hasher::operator()(const RowPosition& p) const + ostringstream oss; + oss << "TUS " + << "UM " + << "- " + << "- " + << "- " + << "- " + << "- " + << "- " << JSTimeStamp::tsdiffstr(dlTimes.EndOfInputTime(), dlTimes.FirstReadTime()) << " " + << fRowsReturned << " "; + fMiniInfo += oss.str(); +} + +inline uint64_t TupleRecursiveUnion::Hasher::operator()(const RowPosition& p) const { Row& row = ts->row; @@ -1300,7 +1894,7 @@ inline uint64_t TupleUnion::Hasher::operator()(const RowPosition& p) const return row.hash(ts->fLastCol); } -inline bool TupleUnion::Eq::operator()(const RowPosition& d1, const RowPosition& d2) const +inline bool TupleRecursiveUnion::Eq::operator()(const RowPosition& d1, const RowPosition& d2) const { Row &r1 = ts->row, &r2 = ts->row2; @@ -1317,7 +1911,8 @@ inline bool TupleUnion::Eq::operator()(const RowPosition& d1, const RowPosition& return r1.equals(r2, ts->fLastCol); } -TupleUnion::TupleUnion(CalpontSystemCatalog::OID tableOID, const JobInfo& jobInfo, uint32_t keyCount) +TupleRecursiveUnion::TupleRecursiveUnion(CalpontSystemCatalog::OID tableOID, const JobInfo& jobInfo, + uint32_t keyCount) : JobStep(jobInfo) , fTableOID(tableOID) , output(NULL) @@ -1339,7 +1934,7 @@ TupleUnion::TupleUnion(CalpontSystemCatalog::OID tableOID, const JobInfo& jobInf fQtc.stepParms().stepType = StepTeleStats::T_TUN; } -TupleUnion::~TupleUnion() +TupleRecursiveUnion::~TupleRecursiveUnion() { rm->returnMemory(memUsage, sessionMemLimit); @@ -1347,28 +1942,28 @@ TupleUnion::~TupleUnion() output->endOfInput(); } -CalpontSystemCatalog::OID TupleUnion::tableOid() const +CalpontSystemCatalog::OID TupleRecursiveUnion::tableOid() const { return fTableOID; } -void TupleUnion::setInputRowGroups(const vector& in) +void TupleRecursiveUnion::setInputRowGroups(const vector& in) { inputRGs = in; } -void TupleUnion::setOutputRowGroup(const rowgroup::RowGroup& out) +void TupleRecursiveUnion::setOutputRowGroup(const rowgroup::RowGroup& out) { outputRG = out; rowLength = outputRG.getRowSizeWithStrings(); } -void TupleUnion::setDistinctFlags(const vector& v) +void TupleRecursiveUnion::setDistinctFlags(const vector& v) { distinctFlags = v; } -void TupleUnion::readInput(uint32_t which) +void TupleRecursiveUnion::readInput(uint32_t which) { /* The handling of the output got a little kludgey with the string table enhancement. * When there is no distinct check, the outputs are all generated independently of @@ -1424,6 +2019,11 @@ void TupleUnion::readInput(uint32_t which) postStepStartTele(sts); } + if (!more) + { + fRecursiveSteps[0]->abort(); + } + while (more && !cancelled()) { /* @@ -1574,7 +2174,223 @@ void TupleUnion::readInput(uint32_t which) } } -uint32_t TupleUnion::nextBand(messageqcpp::ByteStream& bs) +// bool TupleRecursiveUnion::readInput(uint32_t which) +// { +// /* The handling of the output got a little kludgey with the string table enhancement. +// * When there is no distinct check, the outputs are all generated independently of +// * each other locally in this fcn. When there is a distinct check, threads +// * share the output, which is built in the 'rowMemory' vector rather than in +// * thread-local memory. Building the result in a common space allows us to +// * store 8-byte offsets in rowMemory rather than 16-bytes for absolute pointers. +// */ +// +// // recursive union returns a boolean if there's the program should continue with the outer loop +// isStablised = false; +// RowGroupDL* dl = NULL; +// bool more = true; +// RGData inRGData, outRGData, *tmpRGData; +// uint32_t it = numeric_limits::max(); +// RowGroup l_inputRG, l_outputRG, l_tmpRG; +// Row inRow, outRow, tmpRow; +// bool distinct; +// uint64_t memUsageBefore, memUsageAfter, memDiff; +// l_outputRG = outputRG; +// dl = inputs[which]; +// l_inputRG = inputRGs[which]; +// l_inputRG.initRow(&inRow); +// l_outputRG.initRow(&outRow); +// distinct = distinctFlags[which]; +// +// if (distinct) +// { +// l_tmpRG = outputRG; +// tmpRGData = &normalizedData[which]; +// l_tmpRG.initRow(&tmpRow); +// l_tmpRG.setData(tmpRGData); +// l_tmpRG.resetRowGroup(0); +// l_tmpRG.getRow(0, &tmpRow); +// } +// else +// { +// outRGData = RGData(l_outputRG); +// l_outputRG.setData(&outRGData); +// l_outputRG.resetRowGroup(0); +// l_outputRG.getRow(0, &outRow); +// } +// +// try +// { +// it = dl->getIterator(); +// more = dl->next(it, &inRGData); +// +// if (dlTimes.FirstReadTime().tv_sec == 0) +// dlTimes.setFirstReadTime(); +// +// if (!more) +// { +// isStablised = true; +// } +// +// if (fStartTime == -1) +// { +// StepTeleStats sts(fQueryUuid, fStepUuid, StepTeleStats::ST_START, 1); +// postStepStartTele(sts); +// } +// +// while (more && !cancelled()) +// { +// /* +// normalize each row +// if distinct flag is set +// copy the row into the output and test for uniqueness +// if unique, increment the row count +// else +// copy the row into the output & inc row count +// */ +// l_inputRG.setData(&inRGData); +// l_inputRG.getRow(0, &inRow); +// +// if (distinct) +// { +// memDiff = 0; +// l_tmpRG.resetRowGroup(0); +// l_tmpRG.getRow(0, &tmpRow); +// l_tmpRG.setRowCount(l_inputRG.getRowCount()); +// +// const normalizeFunctionsT normalizeFunctions = inferNormalizeFunctions(inRow, &tmpRow, fTimeZone); +// for (uint32_t i = 0; i < l_inputRG.getRowCount(); i++, inRow.nextRow(), tmpRow.nextRow()) +// normalize(inRow, &tmpRow, normalizeFunctions); +// +// l_tmpRG.getRow(0, &tmpRow); +// { +// boost::mutex::scoped_lock lk(uniquerMutex); +// getOutput(&l_outputRG, &outRow, &outRGData); +// memUsageBefore = allocator.getMemUsage(); +// +// uint32_t tmpOutputRowCount = l_outputRG.getRowCount(); +// const uint32_t tmpRGRowCount = l_tmpRG.getRowCount(); +// for (uint32_t i = 0; i < tmpRGRowCount; i++, tmpRow.nextRow()) +// { +// pair inserted; +// inserted = uniquer->insert(RowPosition(which | RowPosition::normalizedFlag, i)); +// +// if (inserted.second) +// { +// copyRow(tmpRow, &outRow); +// const_cast(*(inserted.first)) = +// RowPosition(rowMemory.size() - 1, tmpOutputRowCount); +// memDiff += outRow.getRealSize(); +// addToOutput(&outRow, &l_outputRG, true, outRGData, tmpOutputRowCount); +// fRowsReturned++; +// } +// } +// +// l_outputRG.setRowCount(tmpOutputRowCount); +// +// memUsageAfter = allocator.getMemUsage(); +// memDiff += (memUsageAfter - memUsageBefore); +// } +// +// if (rm->getMemory(memDiff, sessionMemLimit)) +// { +// memUsage += memDiff; +// } +// else +// { +// fLogger->logMessage(logging::LOG_TYPE_INFO, logging::ERR_UNION_TOO_BIG); +// +// if (status() == 0) // preserve existing error code +// { +// errorMessage(logging::IDBErrorInfo::instance()->errorMsg(logging::ERR_UNION_TOO_BIG)); +// status(logging::ERR_UNION_TOO_BIG); +// } +// +// abort(); +// } +// } +// else +// { +// const normalizeFunctionsT normalizeFunctions = inferNormalizeFunctions(inRow, &outRow, fTimeZone); +// const uint32_t inputRGRowCount = l_inputRG.getRowCount(); +// uint32_t tmpOutputRowCount = l_outputRG.getRowCount(); +// +// for (uint32_t i = 0; i < inputRGRowCount; i++, inRow.nextRow()) +// { +// normalize(inRow, &outRow, normalizeFunctions); +// addToOutput(&outRow, &l_outputRG, false, outRGData, tmpOutputRowCount); +// } +// +// fRowsReturned += inputRGRowCount; +// l_outputRG.setRowCount(tmpOutputRowCount); +// } +// +// more = dl->next(it, &inRGData); +// } +// } +// catch (...) +// { +// handleException(std::current_exception(), logging::unionStepErr, logging::ERR_UNION_TOO_BIG, +// "TupleRecursiveUnion::readInput()"); +// status(logging::unionStepErr); +// abort(); +// } +// +// /* make sure that the input was drained before exiting. This can happen if the +// query was aborted */ +// if (dl && it != numeric_limits::max()) +// while (more) +// more = dl->next(it, &inRGData); +// +// { +// boost::mutex::scoped_lock lock1(uniquerMutex); +// boost::mutex::scoped_lock lock2(sMutex); +// +// if (!distinct && l_outputRG.getRowCount() > 0) +// output->insert(outRGData); +// +// if (distinct) +// { +// getOutput(&l_outputRG, &outRow, &outRGData); +// +// if (++distinctDone == distinctCount && l_outputRG.getRowCount() > 0) +// output->insert(outRGData); +// } +// +// if (++runnersDone == fInputJobStepAssociation.outSize() || isStablised) +// { +// output->endOfInput(); +// +// StepTeleStats sts(fQueryUuid, fStepUuid, StepTeleStats::ST_SUMMARY, 1, 1, fRowsReturned); +// postStepSummaryTele(sts); +// +// if (traceOn()) +// { +// dlTimes.setLastReadTime(); +// dlTimes.setEndOfInputTime(); +// +// time_t t = time(0); +// char timeString[50]; +// ctime_r(&t, timeString); +// timeString[strlen(timeString) - 1] = '\0'; +// ostringstream logStr; +// logStr << "ses:" << fSessionId << " st: " << fStepId << " finished at " << timeString +// << "; total rows returned-" << fRowsReturned << endl +// << "\t1st read " << dlTimes.FirstReadTimeString() << "; EOI " << +// dlTimes.EndOfInputTimeString() +// << "; runtime-" << JSTimeStamp::tsdiffstr(dlTimes.EndOfInputTime(), dlTimes.FirstReadTime()) +// << "s;\n\tUUID " << uuids::to_string(fStepUuid) << endl +// << "\tJob completion status " << status() << endl; +// logEnd(logStr.str().c_str()); +// fExtendedInfo += logStr.str(); +// formatMiniStats(); +// } +// return false; +// } +// return true; +// } +// } +// +uint32_t TupleRecursiveUnion::nextBand(messageqcpp::ByteStream& bs) { RGData mem; bool more; @@ -1599,7 +2415,7 @@ uint32_t TupleUnion::nextBand(messageqcpp::ByteStream& bs) return ret; } -void TupleUnion::getOutput(RowGroup* rg, Row* row, RGData* data) +void TupleRecursiveUnion::getOutput(RowGroup* rg, Row* row, RGData* data) { if (UNLIKELY(rowMemory.empty())) { @@ -1617,7 +2433,8 @@ void TupleUnion::getOutput(RowGroup* rg, Row* row, RGData* data) rg->getRow(rg->getRowCount(), row); } -void TupleUnion::addToOutput(Row* r, RowGroup* rg, bool keepit, RGData& data, uint32_t& tmpOutputRowCount) +void TupleRecursiveUnion::addToOutput(Row* r, RowGroup* rg, bool keepit, RGData& data, + uint32_t& tmpOutputRowCount) { r->nextRow(); tmpOutputRowCount++; @@ -1640,7 +2457,7 @@ void TupleUnion::addToOutput(Row* r, RowGroup* rg, bool keepit, RGData& data, ui } } -void TupleUnion::normalize(const Row& in, Row* out, const normalizeFunctionsT& normalizeFunctions) +void TupleRecursiveUnion::normalize(const Row& in, Row* out, const normalizeFunctionsT& normalizeFunctions) { uint32_t i; @@ -1650,7 +2467,7 @@ void TupleUnion::normalize(const Row& in, Row* out, const normalizeFunctionsT& n { if (in.isNullValue(i)) { - TupleUnion::writeNull(out, i); + TupleRecursiveUnion::writeNull(out, i); continue; } @@ -1659,7 +2476,7 @@ void TupleUnion::normalize(const Row& in, Row* out, const normalizeFunctionsT& n } } -void TupleUnion::run() +void TupleRecursiveUnion::run() { uint32_t i; @@ -1696,6 +2513,31 @@ void TupleUnion::run() } } + // for (i = 0; i < inputs.size(); i++) + // { + // bool more = readInput(i); + // if (!more) + // { + // // Drain all remaining inputs so they don’t block upstream producers + // + // // runners.reserve(inputs.size() - i - 1); + // for (uint32_t j = i + 1; j < inputs.size(); j++) + // { + // // runners.push_back(jobstepThreadPool.invoke(Runner(this, j))); + // + // RowGroupDL* dl = inputs[j]; + // uint32_t it = dl->getIterator(); + // rowgroup::RGData tmp; + // while (dl->next(it, &tmp)) + // { + // // discard rows + // } + // } + // break; // we’re stabilized, stop real work + // } + // + // } + runners.reserve(inputs.size()); for (i = 0; i < inputs.size(); i++) @@ -1704,7 +2546,7 @@ void TupleUnion::run() } } -void TupleUnion::join() +void TupleRecursiveUnion::join() { boost::mutex::scoped_lock lk(jlLock); @@ -1722,10 +2564,10 @@ void TupleUnion::join() memUsage = 0; } -const string TupleUnion::toString() const +const string TupleRecursiveUnion::toString() const { ostringstream oss; - oss << "TupleUnion ses:" << fSessionId << " txn:" << fTxnId << " ver:" << fVerId; + oss << "TupleRecursiveUnion ses:" << fSessionId << " txn:" << fTxnId << " ver:" << fVerId; oss << " st:" << fStepId; oss << " in:"; @@ -1742,7 +2584,7 @@ const string TupleUnion::toString() const return oss.str(); } -void TupleUnion::writeNull(Row* out, uint32_t col) +void TupleRecursiveUnion::writeNull(Row* out, uint32_t col) { switch (out->getColTypes()[col]) { @@ -1842,7 +2684,7 @@ void TupleUnion::writeNull(Row* out, uint32_t col) } } -void TupleUnion::formatMiniStats() +void TupleRecursiveUnion::formatMiniStats() { ostringstream oss; oss << "TUS " @@ -1856,5 +2698,4 @@ void TupleUnion::formatMiniStats() << fRowsReturned << " "; fMiniInfo += oss.str(); } - } // namespace joblist diff --git a/dbcon/joblist/tupleunion.h b/dbcon/joblist/tupleunion.h index 248ae2fd5..e2ec005fc 100644 --- a/dbcon/joblist/tupleunion.h +++ b/dbcon/joblist/tupleunion.h @@ -27,6 +27,7 @@ // // +#include "joblist.h" #include "jobstep.h" #include @@ -202,5 +203,176 @@ class TupleUnion : public JobStep, public TupleDeliveryStep long fTimeZone; uint32_t fLastCol; }; +class TupleRecursiveUnion : public JobStep, public TupleDeliveryStep +{ + public: + TupleRecursiveUnion(execplan::CalpontSystemCatalog::OID tableOID, const JobInfo& jobInfo, + uint32_t keyCount); + ~TupleRecursiveUnion() override; + + void run() override; + void join() override; + + const std::string toString() const override; + execplan::CalpontSystemCatalog::OID tableOid() const override; + + void setInputRowGroups(const std::vector&); + void setOutputRowGroup(const rowgroup::RowGroup&) override; + void setDistinctFlags(const std::vector&); + + const rowgroup::RowGroup& getOutputRowGroup() const override + { + return outputRG; + } + const rowgroup::RowGroup& getDeliveredRowGroup() const override + { + return outputRG; + } + void deliverStringTableRowGroup(bool b) override + { + outputRG.setUseStringTable(b); + } + bool deliverStringTableRowGroup() const override + { + return outputRG.usesStringTable(); + } + + // @bug 598 for self-join + std::string alias1() const + { + return fAlias1; + } + void alias1(const std::string& alias) + { + fAlias = fAlias1 = alias; + } + std::string alias2() const + { + return fAlias2; + } + void alias2(const std::string& alias) + { + fAlias2 = alias; + } + + std::string view1() const + { + return fView1; + } + void view1(const std::string& vw) + { + fView = fView1 = vw; + } + std::string view2() const + { + return fView2; + } + void view2(const std::string& vw) + { + fView2 = vw; + } + + uint32_t nextBand(messageqcpp::ByteStream& bs) override; + + void recursiveSteps(JobStepVector jsv) + { + fRecursiveSteps = jsv; + } + + private: + struct RowPosition + { + uint64_t group : 48; + uint64_t row : 16; + + inline explicit RowPosition(uint64_t i = 0, uint64_t j = 0) : group(i), row(j) {}; + static const uint64_t normalizedFlag = 0x800000000000ULL; // 48th bit is set + }; + + void getOutput(rowgroup::RowGroup* rg, rowgroup::Row* row, rowgroup::RGData* data); + void addToOutput(rowgroup::Row* r, rowgroup::RowGroup* rg, bool keepit, rowgroup::RGData& data, + uint32_t& tmpOutputRowCount); + void normalize(const rowgroup::Row& in, rowgroup::Row* out, const normalizeFunctionsT& normalizeFunctions); + void writeNull(rowgroup::Row* out, uint32_t col); + void readInput(uint32_t); + void formatMiniStats(); + + execplan::CalpontSystemCatalog::OID fTableOID; + // @bug 598 for self-join + std::string fAlias1; + std::string fAlias2; + + std::string fView1; + std::string fView2; + + rowgroup::RowGroup outputRG; + std::vector inputRGs; + std::vector inputs; + RowGroupDL* output; + uint32_t outputIt; + + JobStepVector fRecursiveSteps; + + struct Runner + { + TupleRecursiveUnion* tu; + uint32_t index; + Runner(TupleRecursiveUnion* t, uint32_t in) : tu(t), index(in) + { + } + void operator()() + { + utils::setThreadName("TRUSRunner"); + tu->readInput(index); + } + }; + std::vector runners; // thread pool handles + + struct Hasher + { + TupleRecursiveUnion* ts; + utils::Hasher_r h; + explicit Hasher(TupleRecursiveUnion* t) : ts(t) + { + } + uint64_t operator()(const RowPosition&) const; + }; + struct Eq + { + TupleRecursiveUnion* ts; + explicit Eq(TupleRecursiveUnion* t) : ts(t) + { + } + bool operator()(const RowPosition&, const RowPosition&) const; + }; + + typedef std::tr1::unordered_set> Uniquer_t; + boost::scoped_ptr uniquer; + std::vector rowMemory; + boost::mutex sMutex, uniquerMutex; + uint64_t memUsage; + uint32_t rowLength; + rowgroup::Row row, row2; + std::vector distinctFlags; + ResourceManager* rm; + utils::STLPoolAllocator allocator; + boost::scoped_array normalizedData; + + uint32_t runnersDone; + uint32_t distinctCount; + uint32_t distinctDone; + + uint64_t fRowsReturned; + + // temporary hack to make sure JobList only calls run, join once + boost::mutex jlLock; + bool runRan, joinRan; + + boost::shared_ptr sessionMemLimit; + long fTimeZone; + uint32_t fLastCol; + + bool isStablised = false; +}; } // namespace joblist diff --git a/dbcon/mysql/ha_from_sub.cpp b/dbcon/mysql/ha_from_sub.cpp index 8f9da0c76..788e2aca0 100644 --- a/dbcon/mysql/ha_from_sub.cpp +++ b/dbcon/mysql/ha_from_sub.cpp @@ -220,6 +220,7 @@ SCSEP FromSubQuery::transform() gwi.thd = fGwip.thd; gwi.subQuery = this; gwi.viewName = fGwip.viewName; + gwi.isRecursiveWithTable = fGwip.isRecursiveWithTable; csep->derivedTbAlias(fAlias); // always lower case csep->derivedTbView(fGwip.viewName.alias, lower_case_table_names); @@ -243,4 +244,40 @@ SCSEP FromSubQuery::transform() return csep; } +SCSEP FromSubQuery::transform(bool b) +{ + assert(fFromSub); + SCSEP csep(new CalpontSelectExecutionPlan()); + csep->sessionID(fGwip.sessionid); + csep->location(CalpontSelectExecutionPlan::FROM); + csep->subType(CalpontSelectExecutionPlan::FROM_SUBS); + + // gwi for the sub query + gp_walk_info gwi(fGwip.timeZone, fGwip.subQueriesChain); + gwi.thd = fGwip.thd; + gwi.subQuery = this; + gwi.viewName = fGwip.viewName; + gwi.isRecursiveWithTable = fGwip.isRecursiveWithTable; + csep->derivedTbAlias(fAlias); // always lower case + csep->derivedTbView(fGwip.viewName.alias, lower_case_table_names); + + if (getSelectPlan(gwi, *fFromSub, csep, b) != 0) + { + fGwip.fatalParseError = true; + + if (!gwi.parseErrorText.empty()) + fGwip.parseErrorText = gwi.parseErrorText; + else + fGwip.parseErrorText = "Error occurred in FromSubQuery::transform()"; + + csep.reset(); + return csep; + } + + // Insert column statistics + fGwip.mergeTableStatistics(gwi.tableStatistics); + + fGwip.subselectList.push_back(csep); + return csep; +} } // namespace cal_impl_if diff --git a/dbcon/mysql/ha_mcs_execplan.cpp b/dbcon/mysql/ha_mcs_execplan.cpp index 701aac3b1..d12a00122 100644 --- a/dbcon/mysql/ha_mcs_execplan.cpp +++ b/dbcon/mysql/ha_mcs_execplan.cpp @@ -31,6 +31,7 @@ #include #include #include +#include "idberrorinfo.h" #include "messagelog.h" #include @@ -41,6 +42,7 @@ #include #include "errorids.h" +#include "mysqld_error.h" using namespace logging; #define PREFER_MY_CONFIG_H @@ -5315,22 +5317,95 @@ int processFrom(bool& isUnion, SELECT_LEX& select_lex, gp_walk_info& gwi, SCSEP& { // Until we handle recursive cte: // Checking here ensures we catch all with clauses in the query. - if (table_ptr->is_recursive_with_table()) - { - gwi.fatalParseError = true; - gwi.parseErrorText = "Recursive CTE"; - setError(gwi.thd, ER_CHECK_NOT_IMPLEMENTED, gwi.parseErrorText, gwi); - return ER_CHECK_NOT_IMPLEMENTED; - } + /* + + refer to sql_union.cc, exec_recursive for a sample implementation + + might just work by setting isUnion to true, then calling get select again. + need to set relevant meta data. + needs to write all to the first table, probably can be achieved + */ string viewName = getViewName(table_ptr); if (lower_case_table_names) { boost::algorithm::to_lower(viewName); } + if (table_ptr->is_recursive_with_table()) + { + dynamic_cast(csep.get())->containsRecursiveQuery(true); + SELECT_LEX* start = table_ptr->derived->first_select(); + // SELECT_LEX* end = NULL; + dynamic_cast(csep.get()) + ->maxRecursiveDepth(gwi.thd->variables.max_recursive_iterations); + // CalpontSelectExecutionPlan::SelectList unionVec; + // bool unionSel = true; + // uint8_t distUnionNum = 0; + SCSEP anchor_plan = NULL; + + gwi.isRecursiveWithTable = true; +#ifdef DEBUG_WALK_COND + + if (gwi.recursiveWithTableName == table_ptr->table_name.str) + { + cerr << "RECURSIVE TABLE: " << gwi.recursiveWithTableName << endl; + } + +#endif + + FromSubQuery* fromSub = new FromSubQuery(gwi, start); + string alias(table_ptr->alias.str); + if (lower_case_table_names) + { + boost::algorithm::to_lower(alias); + } + fromSub->alias(alias); + + CalpontSystemCatalog::TableAliasName tn = + make_aliasview("", table_ptr->table_name.str, alias, viewName); + // @bug 3852. check return execplan + anchor_plan = fromSub->transform(isUnion); + if (!anchor_plan) + { + setError(gwi.thd, ER_INTERNAL_ERROR, fromSub->gwip().parseErrorText, gwi); + CalpontSystemCatalog::removeCalpontSystemCatalog(gwi.sessionid); + return ER_INTERNAL_ERROR; + } + dynamic_cast(anchor_plan.get())->isRecursiveWithTable(true); + + gwi.derivedTbList.push_back(anchor_plan); + gwi.tbList.push_back(tn); + CalpontSystemCatalog::TableAliasName tan = make_aliastable("", table_ptr->table_name.str, alias); + gwi.tableMap[tan] = make_pair(0, table_ptr); + // MCOL-2178 isUnion member only assigned, never used + // MIGR::infinidb_vtable.isUnion = true; //by-pass the 2nd pass of rnd_init + start = table_ptr->derived->first_select(); + + // if (with_element->with_anchor) + // end = with_element->first_recursive; + + if (!anchor_plan) + { + setError(gwi.thd, ER_INTERNAL_ERROR, "No Anchor Query", gwi); + CalpontSystemCatalog::removeCalpontSystemCatalog(gwi.sessionid); + return ER_INTERNAL_ERROR; + } + + // if (table_ptr->view) + // { + // gwi.parseErrorText = "Recursive CTE view"; + // } + // else + // { + // gwi.parseErrorText = "Recursive CTE"; + // } + + // setError(gwi.thd, ER_CHECK_NOT_IMPLEMENTED, gwi.parseErrorText, gwi); + // return ER_CHECK_NOT_IMPLEMENTED; + } // @todo process from subquery - if (table_ptr->derived) + else if (table_ptr->derived) { SELECT_LEX* select_cursor = table_ptr->derived->first_select(); FromSubQuery* fromSub = new FromSubQuery(gwi, select_cursor); @@ -5341,7 +5416,8 @@ int processFrom(bool& isUnion, SELECT_LEX& select_lex, gp_walk_info& gwi, SCSEP& } fromSub->alias(alias); - CalpontSystemCatalog::TableAliasName tn = make_aliasview("", "", alias, viewName); + CalpontSystemCatalog::TableAliasName tn = + make_aliasview("", table_ptr->table_name.str, alias, viewName); // @bug 3852. check return execplan SCSEP plan = fromSub->transform(); @@ -5352,10 +5428,17 @@ int processFrom(bool& isUnion, SELECT_LEX& select_lex, gp_walk_info& gwi, SCSEP& return ER_INTERNAL_ERROR; } + if (plan->containsRecursiveQuery()) + { + csep->containsRecursiveQuery(true); + } + gwi.derivedTbList.push_back(plan); gwi.tbList.push_back(tn); - CalpontSystemCatalog::TableAliasName tan = make_aliastable("", alias, alias); + CalpontSystemCatalog::TableAliasName tan = make_aliastable("", table_ptr->table_name.str, alias); gwi.tableMap[tan] = make_pair(0, table_ptr); + // MCOL-2178 isUnion member only assigned, never used + // MIGR::infinidb_vtable.isUnion = true; //by-pass the 2nd pass of rnd_init } else if (table_ptr->view) { @@ -5392,12 +5475,9 @@ int processFrom(bool& isUnion, SELECT_LEX& select_lex, gp_walk_info& gwi, SCSEP& CalpontSystemCatalog::TableAliasName tn = make_aliasview(table_ptr->db.str, table_name, table_ptr->alias.str, viewName, columnStore, lower_case_table_names); - execplan::Partitions parts = getPartitions(table_ptr); - tn.partitions = parts; gwi.tbList.push_back(tn); CalpontSystemCatalog::TableAliasName tan = make_aliastable( table_ptr->db.str, table_name, table_ptr->alias.str, columnStore, lower_case_table_names); - tan.partitions = parts; gwi.tableMap[tan] = make_pair(0, table_ptr); #ifdef DEBUG_WALK_COND cerr << tn << endl; @@ -5461,6 +5541,8 @@ int processFrom(bool& isUnion, SELECT_LEX& select_lex, gp_walk_info& gwi, SCSEP& if (!isUnion && (!isSelectHandlerTop || isSelectLexUnit) && select_lex.master_unit()->is_unit_op()) { + // MCOL-2178 isUnion member only assigned, never used + // MIGR::infinidb_vtable.isUnion = true; CalpontSelectExecutionPlan::SelectList unionVec; SELECT_LEX* select_cursor = select_lex.master_unit()->first_select(); unionSel = true; @@ -5488,6 +5570,8 @@ int processFrom(bool& isUnion, SELECT_LEX& select_lex, gp_walk_info& gwi, SCSEP& // distinct union num if (sl == select_lex.master_unit()->union_distinct) distUnionNum = unionVec.size(); + // if (sl->get_table_list()->is_recursive_with_table()) + // break; } csep->unionVec(unionVec); @@ -5751,6 +5835,14 @@ int processGroupBy(SELECT_LEX& select_lex, gp_walk_info& gwi, const bool withRol gwi.hasWindowFunc = hasWindowFunc; groupcol = static_cast(select_lex.group_list.first); + if (gwi.isRecursiveWithTable && groupcol) + { + gwi.fatalParseError = true; + gwi.parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_NON_SUPPORT_GROUP_BY, "GROUP BY clause"); + setError(gwi.thd, ER_CHECK_NOT_IMPLEMENTED, gwi.parseErrorText, gwi); + return ER_CHECK_NOT_IMPLEMENTED; + } + gwi.disableWrapping = true; for (; groupcol; groupcol = groupcol->next) { @@ -7056,9 +7148,16 @@ int processOrderBy(SELECT_LEX& select_lex, gp_walk_info& gwi, SCSEP& csep, { SQL_I_List order_list = select_lex.order_list; ORDER* ordercol = static_cast(order_list.first); - // check if window functions are in order by. InfiniDB process order by list if // window functions are involved, either in order by or projection. + if (gwi.isRecursiveWithTable && ordercol) + { + gwi.fatalParseError = true; + gwi.parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_NON_SUPPORT_ORDER_BY, "WITH RECURSIVE"); + setError(gwi.thd, ER_CHECK_NOT_IMPLEMENTED, gwi.parseErrorText, gwi); + return ER_CHECK_NOT_IMPLEMENTED; + } + for (; ordercol; ordercol = ordercol->next) { if ((*(ordercol->item))->type() == Item::WINDOW_FUNC_ITEM) diff --git a/dbcon/mysql/ha_mcs_impl_if.h b/dbcon/mysql/ha_mcs_impl_if.h index 5a4291f65..efdf94359 100644 --- a/dbcon/mysql/ha_mcs_impl_if.h +++ b/dbcon/mysql/ha_mcs_impl_if.h @@ -415,6 +415,7 @@ struct gp_walk_info bool aggOnSelect; bool hasWindowFunc; bool hasSubSelect; + bool isRecursiveWithTable = false; SubQuery* lastSub; std::vector viewList; std::map derivedTbFilterMap; @@ -722,9 +723,9 @@ execplan::CalpontSystemCatalog::ColType colType_MysqlToIDB(const Item* item); execplan::SPTP getIntervalType(gp_walk_info* gwip, int interval_type); uint32_t isPseudoColumn(std::string funcName); void setDerivedTable(execplan::ParseTree* n); -execplan::ParseTree* setDerivedFilter(gp_walk_info* gwip, execplan::ParseTree*& n, - std::map& obj, - execplan::CalpontSelectExecutionPlan::SelectList& derivedTbList); +// execplan::ParseTree* setDerivedFilter(gp_walk_info* gwip, execplan::ParseTree*& n, +// std::map& obj, +// execplan::CalpontSelectExecutionPlan::SelectList& derivedTbList); void derivedTableOptimization(gp_walk_info* gwip, execplan::SCSEP& csep); bool buildEqualityPredicate(execplan::ReturnedColumn* lhs, execplan::ReturnedColumn* rhs, gp_walk_info* gwip, boost::shared_ptr& sop, const Item_func::Functype& funcType, diff --git a/dbcon/mysql/ha_mcs_opt_rewrites.cpp b/dbcon/mysql/ha_mcs_opt_rewrites.cpp index c6bea2401..0e718095a 100644 --- a/dbcon/mysql/ha_mcs_opt_rewrites.cpp +++ b/dbcon/mysql/ha_mcs_opt_rewrites.cpp @@ -106,7 +106,10 @@ void first_cond_optimization_flag_toggle(SELECT_LEX* select_lex, void (*func)(SE { for (SELECT_LEX* sl = unit->first_select(); sl; sl = sl->next_select()) { - first_cond_optimization_flag_toggle(sl, func); + if (sl->get_table_list()) + { + first_cond_optimization_flag_toggle(sl, func); + } } } } diff --git a/dbcon/mysql/ha_mcs_pushdown.cpp b/dbcon/mysql/ha_mcs_pushdown.cpp index 134188dce..080f76f27 100644 --- a/dbcon/mysql/ha_mcs_pushdown.cpp +++ b/dbcon/mysql/ha_mcs_pushdown.cpp @@ -364,7 +364,8 @@ void item_check(Item* item, bool* unsupported_feature) bool check_user_var(SELECT_LEX* select_lex) { - if (!select_lex) { + if (!select_lex) + { // There are definitely no user vars if select_lex is null return false; } @@ -878,7 +879,7 @@ select_handler* create_columnstore_select_handler_(THD* thd, SELECT_LEX* sel_lex // Unset select_lex::first_cond_optimization if (select_lex->first_cond_optimization) { - first_cond_optimization_flag_toggle(select_lex, &first_cond_optimization_flag_unset); + // first_cond_optimization_flag_toggle(select_lex, &first_cond_optimization_flag_unset); } } } diff --git a/dbcon/mysql/ha_subquery.h b/dbcon/mysql/ha_subquery.h index 403881e21..4d961a95d 100644 --- a/dbcon/mysql/ha_subquery.h +++ b/dbcon/mysql/ha_subquery.h @@ -228,6 +228,8 @@ class FromSubQuery : public SubQuery } execplan::SCSEP transform(); + execplan::SCSEP transform(bool b); + private: SELECT_LEX* fFromSub; std::string fAlias; diff --git a/dbcon/mysql/ha_window_function.cpp b/dbcon/mysql/ha_window_function.cpp index 887ebbef3..ee9aec183 100644 --- a/dbcon/mysql/ha_window_function.cpp +++ b/dbcon/mysql/ha_window_function.cpp @@ -21,6 +21,7 @@ * * ***********************************************************************/ +#include #define PREFER_MY_CONFIG_H #include #include @@ -307,6 +308,13 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n // String str; // item->print(&str, QT_INFINIDB_NO_QUOTE); // cout << str.c_ptr() << endl; + if (gwi.isRecursiveWithTable) + { + gwi.fatalParseError = true; + gwi.parseErrorText = "Window Functions not supported in recursive CTE"; + return NULL; + } + if (get_fe_conn_info_ptr() == NULL) { set_fe_conn_info_ptr((void*)new cal_connection_info()); @@ -536,7 +544,7 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n srcp->asc(orderCol->direction == ORDER::ORDER_ASC ? true : false); // srcp->nullsFirst(orderCol->nulls); // nulls 2-default, 1-nulls - //first, 0-nulls last + // first, 0-nulls last srcp->nullsFirst(orderCol->direction == ORDER::ORDER_ASC ? 1 : 0); // WINDOWS TODO: implement NULLS FIRST/LAST in 10.2 front end diff --git a/dbcon/rbo/rbo_predicate_pushdown.h b/dbcon/rbo/rbo_predicate_pushdown.h index de8b5fa57..8a86f1d9e 100644 --- a/dbcon/rbo/rbo_predicate_pushdown.h +++ b/dbcon/rbo/rbo_predicate_pushdown.h @@ -20,11 +20,17 @@ #define PREFER_MY_CONFIG_H #include #include +#include #include "execplan/calpontselectexecutionplan.h" #include "rulebased_optimizer.h" -namespace optimizer { - bool predicatePushdownFilter(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimizerContext& ctx); - bool applyPredicatePushdown(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimizerContext& ctx); -} \ No newline at end of file +namespace optimizer +{ +bool predicatePushdownFilter(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimizerContext& ctx); +bool applyPredicatePushdown(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimizerContext& ctx); +execplan::ParseTree* setDerivedFilter(cal_impl_if::gp_walk_info* gwip, execplan::ParseTree*& n, + std::map& obj, + execplan::CalpontSelectExecutionPlan::SelectList& derivedTbList); + +} // namespace optimizer diff --git a/mysql-test/columnstore/future/MCOL-5142-additional-cases.result b/mysql-test/columnstore/future/MCOL-5142-additional-cases.result new file mode 100644 index 000000000..0e2ccd371 --- /dev/null +++ b/mysql-test/columnstore/future/MCOL-5142-additional-cases.result @@ -0,0 +1,150 @@ +DROP DATABASE IF EXISTS MCOL5142_COMP; +CREATE DATABASE MCOL5142_COMP; +USE MCOL5142_COMP; +CREATE TABLE t_cycle (id INT, next_id INT) ENGINE=ColumnStore; +INSERT INTO t_cycle VALUES (1, 2), (2, 3), (3, 1); +SET @@max_recursive_iterations = 10; +WITH RECURSIVE cte AS ( +SELECT id, next_id FROM t_cycle WHERE id = 1 +UNION ALL +SELECT t.id, t.next_id +FROM t_cycle t JOIN cte c ON t.id = c.next_id +) +SELECT * FROM cte LIMIT 100; +id next_id +1 2 +WITH RECURSIVE cte AS ( +SELECT id, next_id FROM t_cycle WHERE id = 1 +UNION ALL +SELECT t.id, t.next_id +FROM t_cycle t JOIN cte c ON t.id = c.next_id +) +SELECT COUNT(*) FROM cte; +ERROR HY000: Internal error: No key found. +WITH RECURSIVE cte AS ( +SELECT id, next_id FROM t_cycle WHERE id = 1 +UNION ALL +SELECT t.id, t.next_id +FROM t_cycle t JOIN cte c ON t.id = c.next_id +) +SELECT id FROM cte LIMIT 20; +ERROR HY000: Internal error: No key found. +SET @@max_recursive_iterations = 1000; +CREATE TABLE t_base (id INT, val INT) ENGINE=ColumnStore; +INSERT INTO t_base VALUES (1, 10), (2, 20); +WITH RECURSIVE cte AS ( +SELECT id, val FROM t_base +UNION ALL +SELECT t.id, SUM(t.val) +FROM t_base t JOIN cte c ON t.id = c.id + 1 +GROUP BY t.id +) +SELECT * FROM cte; +ERROR HY000: Restrictions imposed on recursive definitions are violated for table 'cte' +WITH RECURSIVE cte AS ( +SELECT id, val FROM t_base +UNION ALL +SELECT t.id, t.val +FROM t_base t JOIN cte c ON t.id = c.id + 1 +ORDER BY t.val +) +SELECT * FROM cte; +ERROR 42000: Table 't' from one of the SELECTs cannot be used in ORDER BY +WITH RECURSIVE cte AS ( +SELECT id, val FROM t_base +UNION ALL +SELECT t.id, ROW_NUMBER() OVER (ORDER BY t.val) +FROM t_base t JOIN cte c ON t.id = c.id + 1 +) +SELECT * FROM cte; +ERROR HY000: Restrictions imposed on recursive definitions are violated for table 'cte' +CREATE TABLE t_empty_1 (id INT) ENGINE=ColumnStore; +CREATE TABLE t_empty_2 (id INT) ENGINE=ColumnStore; +WITH RECURSIVE cte AS ( +SELECT id FROM t_empty_1 +UNION ALL +SELECT t.id FROM t_empty_2 t JOIN cte c ON t.id = c.id +) +SELECT * FROM cte; +id +CREATE TABLE t_linear (id INT) ENGINE=ColumnStore; +INSERT INTO t_linear VALUES (1), (2), (3); +WITH RECURSIVE cte AS ( +SELECT id +FROM t_linear +WHERE id = 1 +UNION ALL +SELECT t.id +FROM t_linear t +JOIN cte c ON t.id = c.id + 1 +) +SELECT * FROM cte; +id +1 +2 +3 +WITH RECURSIVE cte AS ( +SELECT id +FROM t_linear +WHERE id = 1 +UNION ALL +SELECT t.id +FROM t_linear t +JOIN cte c ON t.id = c.id + 1 +) +SELECT COUNT(*) FROM cte; +COUNT(*) +3 +CREATE TABLE t_external (id INT, multiplier INT) ENGINE=ColumnStore; +INSERT INTO t_external VALUES (1, 2), (2, 3), (3, 4); +WITH RECURSIVE cte AS ( +SELECT t_linear.id, t_linear.id AS val +FROM t_linear +WHERE id = 1 +UNION ALL +SELECT t_linear.id, t_linear.id * t_external.multiplier +FROM t_linear +JOIN cte c ON t_linear.id = c.id + 1 +JOIN t_external ON t_external.id = t_linear.id +) +SELECT * FROM cte; +id val +1 1 +2 6 +3 12 +CREATE TABLE t_empty_anchor (id INT) ENGINE=ColumnStore; +INSERT INTO t_empty_anchor VALUES (1), (2), (3); +WITH RECURSIVE cte AS ( +SELECT id +FROM t_empty_anchor +WHERE id = 999 +UNION ALL +SELECT t.id +FROM t_empty_anchor t +JOIN cte c ON t.id = c.id + 1 +) +SELECT * FROM cte; +id +WITH RECURSIVE +cte1 AS ( +SELECT 1 AS n +UNION ALL +SELECT n + 1 FROM cte1 WHERE n < 3 +), +cte2 AS ( +SELECT 10 AS m +UNION ALL +SELECT m + 10 FROM cte2 WHERE m < 30 +) +SELECT 'cte1' AS source, n AS value FROM cte1 +UNION ALL +SELECT 'cte2' AS source, m AS value FROM cte2 +ORDER BY source, value; +source value +cte1 1 +cte1 2 +cte1 3 +cte2 10 +cte2 20 +cte2 30 +DROP DATABASE MCOL5142_COMP; diff --git a/mysql-test/columnstore/future/MCOL-5142-additional-cases.test b/mysql-test/columnstore/future/MCOL-5142-additional-cases.test new file mode 100644 index 000000000..5db0b4813 --- /dev/null +++ b/mysql-test/columnstore/future/MCOL-5142-additional-cases.test @@ -0,0 +1,178 @@ +--disable_warnings +DROP DATABASE IF EXISTS MCOL5142_COMP; +--enable_warnings + +CREATE DATABASE MCOL5142_COMP; + +USE MCOL5142_COMP; + +# FAILING TEST CASES (bugs) + +# Cyclic recursion tests +CREATE TABLE t_cycle (id INT, next_id INT) ENGINE=ColumnStore; +INSERT INTO t_cycle VALUES (1, 2), (2, 3), (3, 1); + +SET @@max_recursive_iterations = 10; + +# Cyclic recursion with SELECT * - non-deterministic behavior, seems to ignore max_recursive_iterations value +# may hang or return 1000, 1 or other number of rows +WITH RECURSIVE cte AS ( + SELECT id, next_id FROM t_cycle WHERE id = 1 + UNION ALL + SELECT t.id, t.next_id + FROM t_cycle t JOIN cte c ON t.id = c.next_id +) +SELECT * FROM cte LIMIT 100; + +# Cyclic recursion with aggregate functions - fails with ERROR 1815 +--error 1815 +WITH RECURSIVE cte AS ( + SELECT id, next_id FROM t_cycle WHERE id = 1 + UNION ALL + SELECT t.id, t.next_id + FROM t_cycle t JOIN cte c ON t.id = c.next_id +) +SELECT COUNT(*) FROM cte; + +# Cyclic recursion with column projection - fails with ERROR 1815 +--error 1815 +WITH RECURSIVE cte AS ( + SELECT id, next_id FROM t_cycle WHERE id = 1 + UNION ALL + SELECT t.id, t.next_id + FROM t_cycle t JOIN cte c ON t.id = c.next_id +) +SELECT id FROM cte LIMIT 20; + +SET @@max_recursive_iterations = 1000; + +# ============================================================ +# WORKING TEST CASES +# ============================================================ + +#Prohibited clauses in recursive part - correctly fail with expected errors + +CREATE TABLE t_base (id INT, val INT) ENGINE=ColumnStore; +INSERT INTO t_base VALUES (1, 10), (2, 20); + +# GROUP BY in recursive part -> Should fail +--error 4008 +WITH RECURSIVE cte AS ( + SELECT id, val FROM t_base + UNION ALL + SELECT t.id, SUM(t.val) + FROM t_base t JOIN cte c ON t.id = c.id + 1 + GROUP BY t.id +) +SELECT * FROM cte; + + +#ORDER BY in recursive part -> Should fail +--error 1250 +WITH RECURSIVE cte AS ( + SELECT id, val FROM t_base + UNION ALL + SELECT t.id, t.val + FROM t_base t JOIN cte c ON t.id = c.id + 1 + ORDER BY t.val +) +SELECT * FROM cte; + + +# Window Function in recursive part -> Should fail +--error 4008 +WITH RECURSIVE cte AS ( + SELECT id, val FROM t_base + UNION ALL + SELECT t.id, ROW_NUMBER() OVER (ORDER BY t.val) + FROM t_base t JOIN cte c ON t.id = c.id + 1 +) +SELECT * FROM cte; + +#empty tables +CREATE TABLE t_empty_1 (id INT) ENGINE=ColumnStore; +CREATE TABLE t_empty_2 (id INT) ENGINE=ColumnStore; + +WITH RECURSIVE cte AS ( + SELECT id FROM t_empty_1 + UNION ALL + SELECT t.id FROM t_empty_2 t JOIN cte c ON t.id = c.id +) +SELECT * FROM cte; + +# Linear recursion - works +CREATE TABLE t_linear (id INT) ENGINE=ColumnStore; +INSERT INTO t_linear VALUES (1), (2), (3); + +WITH RECURSIVE cte AS ( + SELECT id + FROM t_linear + WHERE id = 1 + UNION ALL + SELECT t.id + FROM t_linear t + JOIN cte c ON t.id = c.id + 1 +) +SELECT * FROM cte; + +WITH RECURSIVE cte AS ( + SELECT id + FROM t_linear + WHERE id = 1 + UNION ALL + SELECT t.id + FROM t_linear t + JOIN cte c ON t.id = c.id + 1 +) +SELECT COUNT(*) FROM cte; + +# JOIN with external table inside recursive CTE - works +CREATE TABLE t_external (id INT, multiplier INT) ENGINE=ColumnStore; +INSERT INTO t_external VALUES (1, 2), (2, 3), (3, 4); + +WITH RECURSIVE cte AS ( + SELECT t_linear.id, t_linear.id AS val + FROM t_linear + WHERE id = 1 + UNION ALL + SELECT t_linear.id, t_linear.id * t_external.multiplier + FROM t_linear + JOIN cte c ON t_linear.id = c.id + 1 + JOIN t_external ON t_external.id = t_linear.id +) +SELECT * FROM cte; + +# Empty anchor set +CREATE TABLE t_empty_anchor (id INT) ENGINE=ColumnStore; +INSERT INTO t_empty_anchor VALUES (1), (2), (3); + +WITH RECURSIVE cte AS ( + SELECT id + FROM t_empty_anchor + WHERE id = 999 + UNION ALL + SELECT t.id + FROM t_empty_anchor t + JOIN cte c ON t.id = c.id + 1 +) +SELECT * FROM cte; + +#Multiple independent recursive CTEs + +WITH RECURSIVE + cte1 AS ( + SELECT 1 AS n + UNION ALL + SELECT n + 1 FROM cte1 WHERE n < 3 + ), + cte2 AS ( + SELECT 10 AS m + UNION ALL + SELECT m + 10 FROM cte2 WHERE m < 30 + ) +SELECT 'cte1' AS source, n AS value FROM cte1 +UNION ALL +SELECT 'cte2' AS source, m AS value FROM cte2 +ORDER BY source, value; + +DROP DATABASE MCOL5142_COMP; diff --git a/mysql-test/columnstore/future/MCOL-5142-basic.test b/mysql-test/columnstore/future/MCOL-5142-basic.test new file mode 100644 index 000000000..3ceb4cb38 --- /dev/null +++ b/mysql-test/columnstore/future/MCOL-5142-basic.test @@ -0,0 +1,46 @@ +--disable_warnings +DROP DATABASE IF EXISTS MCOL5142; +--enable_warnings + +CREATE DATABASE MCOL5142; + + +USE MCOL5142; +CREATE TABLE employees ( + id INT NOT NULL, + name VARCHAR(100), + manager_id INT +) ENGINE = ColumnStore; + +INSERT INTO employees (id, name, manager_id) VALUES +(1, 'Alice', NULL), +(2, 'Bob', 1), +(3, 'Charlie', 1), +(4, 'David', 2), +(5, 'Eve', 2), +(6, 'Frank', 3); + +INSERT INTO employees (id, name, manager_id) VALUES +(7, 'Grace', 4), +(8, 'Heidi', 4), +(9, 'Ivan', 5), +(10, 'Judy', 6), +(11, 'Karl', 7), +(12, 'Laura', 11); + + + +WITH RECURSIVE employee_hierarchy AS ( + SELECT id, name, manager_id, 0 AS level + FROM employees + WHERE id = 1 + + UNION ALL + + SELECT e.id, e.name, e.manager_id, eh.level + 1 + FROM employees as e + JOIN employee_hierarchy eh ON e.manager_id = eh.id +) +SELECT * FROM employee_hierarchy; + +DROP DATABASE MCOL5142; diff --git a/mysql-test/columnstore/future/MCOL-5142-multi-table.test b/mysql-test/columnstore/future/MCOL-5142-multi-table.test new file mode 100644 index 000000000..40a5848ea --- /dev/null +++ b/mysql-test/columnstore/future/MCOL-5142-multi-table.test @@ -0,0 +1,73 @@ +--disable_warnings +DROP DATABASE IF EXISTS MULTI_TABLE_TEST; +--enable_warnings + +CREATE DATABASE MULTI_TABLE_TEST; + +USE MULTI_TABLE_TEST; + +CREATE TABLE products ( + product_id INT NOT NULL, + product_name VARCHAR(100) +) ENGINE = ColumnStore; + +CREATE TABLE components ( + component_id INT NOT NULL, + component_name VARCHAR(100), + parent_product_id INT +) ENGINE = ColumnStore; + +CREATE TABLE suppliers ( + supplier_id INT NOT NULL, + supplier_name VARCHAR(100), + component_id INT +) ENGINE = ColumnStore; + +INSERT INTO products (product_id, product_name) VALUES +(10, 'Smartphone'); + +INSERT INTO components (component_id, component_name, parent_product_id) VALUES +(100, 'Screen', 10), +(101, 'Battery', 10), +(200, 'Glass Panel', 100), +(201, 'LCD', 100), +(202, 'Connector', 101); + +INSERT INTO suppliers (supplier_id, supplier_name, component_id) VALUES +(1000, 'Supplier A', 200), +(1001, 'Supplier B', 201), +(1002, 'Supplier C', 202); + +WITH RECURSIVE product_tree AS ( + SELECT + p.product_id AS root_id, + p.product_name AS root_name, + c.component_id, + c.component_name, + c.parent_product_id, + 1 AS level + FROM products p + JOIN components c ON p.product_id = c.parent_product_id + + UNION ALL + + SELECT + pt.root_id, + pt.root_name, + c.component_id, + c.component_name, + c.parent_product_id, + pt.level + 1 + FROM components c + JOIN product_tree pt ON c.parent_product_id = pt.component_id +) +SELECT + pt.root_name AS product, + pt.component_name AS component, + s.supplier_name AS supplier, + pt.level +FROM product_tree pt +JOIN suppliers s ON pt.component_id = s.component_id; + +DROP DATABASE MULTI_TABLE_TEST; + diff --git a/mysql-test/columnstore/future/MCOL-5142-nested-subquery-anchor.test b/mysql-test/columnstore/future/MCOL-5142-nested-subquery-anchor.test new file mode 100644 index 000000000..9f93de10d --- /dev/null +++ b/mysql-test/columnstore/future/MCOL-5142-nested-subquery-anchor.test @@ -0,0 +1,54 @@ +--disable_warnings +DROP DATABASE IF EXISTS NESTED_SUBQUERY_TEST; +--enable_warnings + +CREATE DATABASE NESTED_SUBQUERY_TEST; + +USE NESTED_SUBQUERY_TEST; + +CREATE TABLE employees ( + id INT NOT NULL, + name VARCHAR(100), + manager_id INT, + department_id INT +) ENGINE = ColumnStore; + +CREATE TABLE departments ( + id INT NOT NULL, + name VARCHAR(100) +) ENGINE = ColumnStore; + +INSERT INTO departments (id, name) VALUES +(101, 'Sales'), +(102, 'Engineering'), +(103, 'Marketing'); + +INSERT INTO employees (id, name, manager_id, department_id) VALUES +(1, 'Alice', NULL, 101), +(2, 'Bob', 1, 101), +(3, 'Charlie', 1, 102), +(4, 'David', 2, 101), +(5, 'Eve', 3, 102), +(6, 'Frank', 3, 103); + +WITH RECURSIVE sales_hierarchy AS ( + SELECT id, name, manager_id, 0 AS level, department_id + FROM employees + WHERE department_id IN (SELECT id FROM departments WHERE name = 'Sales') + AND manager_id IS NULL + + UNION ALL + + SELECT + e.id, + e.name, + e.manager_id, + eh.level + 1, + e.department_id + FROM employees AS e + JOIN sales_hierarchy eh ON e.manager_id = eh.id +) +SELECT * FROM sales_hierarchy; + +DROP DATABASE NESTED_SUBQUERY_TEST; + diff --git a/utils/libmarias3/libmarias3 b/utils/libmarias3/libmarias3 index d9cb536a5..f74150b05 160000 --- a/utils/libmarias3/libmarias3 +++ b/utils/libmarias3/libmarias3 @@ -1 +1 @@ -Subproject commit d9cb536a532ef6e71df66d99e95562e1169ec93f +Subproject commit f74150b05693440d35f93c43e2d2411cc66fee19 diff --git a/utils/rowgroup/rowgroup.h b/utils/rowgroup/rowgroup.h index 50baf681c..e6eb2195d 100644 --- a/utils/rowgroup/rowgroup.h +++ b/utils/rowgroup/rowgroup.h @@ -27,6 +27,7 @@ #pragma once +#include #include #include #include @@ -377,6 +378,8 @@ class RGData // Need sig to support backward compat. RGData can deserialize both forms. static const uint32_t RGDATA_SIG = 0xffffffff; // won't happen for 'old' Rowgroup data + uint32_t skipSteps = 0; // TEST:: for dealing with recursive CTE + friend class RowGroup; friend class RowGroupStorage; };