diff --git a/dbcon/execplan/calpontselectexecutionplan.cpp b/dbcon/execplan/calpontselectexecutionplan.cpp
index 62e6cfcd5..7e2415a0e 100644
--- a/dbcon/execplan/calpontselectexecutionplan.cpp
+++ b/dbcon/execplan/calpontselectexecutionplan.cpp
@@ -628,6 +628,11 @@ void CalpontSelectExecutionPlan::serialize(messageqcpp::ByteStream& b) const
   b << timeZone;
   b << fPron;
   b << (uint8_t)fWithRollup;
+  b << (uint8_t)fIsRecursiveWithTable;
+  b << (uint8_t)fIsRecursiveQuery;
+  b << (uint8_t)fContainsRecursiveQuery;
+
+  b << fMaxRecursiveDepth;
 }
 
 void CalpontSelectExecutionPlan::unserialize(messageqcpp::ByteStream& b)
@@ -832,6 +837,13 @@ void CalpontSelectExecutionPlan::unserialize(messageqcpp::ByteStream& b)
   utils::Pron::instance().pron(fPron);
   b >> tmp8;
   fWithRollup = tmp8;
+  b >> tmp8;
+  fIsRecursiveWithTable = tmp8;
+  b >> tmp8;
+  fIsRecursiveQuery = tmp8;
+  b >> tmp8;
+  fContainsRecursiveQuery = tmp8;
+  b >> fMaxRecursiveDepth;
 }
 
 bool CalpontSelectExecutionPlan::operator==(const CalpontSelectExecutionPlan& t) const
diff --git a/dbcon/execplan/calpontselectexecutionplan.h b/dbcon/execplan/calpontselectexecutionplan.h
index d39b16693..f06ceee89 100644
--- a/dbcon/execplan/calpontselectexecutionplan.h
+++ b/dbcon/execplan/calpontselectexecutionplan.h
@@ -23,6 +23,7 @@
 /** @file */
 
 #pragma once
+#include <cstdint>
 #include <vector>
 #include <map>
 #include <iosfwd>
@@ -496,7 +497,12 @@ class CalpontSelectExecutionPlan : public CalpontExecutionPlan
   {
     return fDerivedTableList;
   }
-  void derivedTableList(const SelectList& derivedTableList)
+
+  SelectList& derivedTableList()
+  {
+    return fDerivedTableList;
+  }
+  void derivedTableList(SelectList& derivedTableList)
   {
     fDerivedTableList = derivedTableList;
   }
@@ -523,10 +529,12 @@ class CalpontSelectExecutionPlan : public CalpontExecutionPlan
   {
     fUnionVec = unionVec;
   }
+
   const SelectList& unionVec() const
   {
     return fUnionVec;
   }
+
   SelectList& unionVec()
   {
     return fUnionVec;
@@ -765,6 +773,46 @@ class CalpontSelectExecutionPlan : public CalpontExecutionPlan
     return fTimeZone;
   }
 
+  void isRecursiveWithTable(bool b)
+  {
+    fIsRecursiveWithTable = b;
+  }
+
+  bool isRecursiveWithTable()
+  {
+    return fIsRecursiveWithTable;
+  }
+
+  void isRecursiveQuery(bool b)
+  {
+    fIsRecursiveQuery = b;
+  }
+
+  bool isRecursiveQuery()
+  {
+    return fIsRecursiveQuery;
+  }
+
+  void containsRecursiveQuery(bool b)
+  {
+    fContainsRecursiveQuery = b;
+  }
+
+  bool containsRecursiveQuery()
+  {
+    return fContainsRecursiveQuery;
+  }
+
+  void maxRecursiveDepth(uint32_t i)
+  {
+    fMaxRecursiveDepth = i;
+  }
+
+  int maxRecursiveDepth()
+  {
+    return fMaxRecursiveDepth;
+  }
+
   /**
    * The serialization interface
    */
@@ -985,6 +1033,11 @@ class CalpontSelectExecutionPlan : public CalpontExecutionPlan
    * A flag to compute subtotals, related to GROUP BY operation.
    */
   bool fWithRollup;
+  bool fIsRecursiveWithTable = false;
+  bool fIsRecursiveQuery = false;
+  bool fContainsRecursiveQuery = false;
+
+  uint32_t fMaxRecursiveDepth;
 };
 
 /**
diff --git a/dbcon/joblist/fifo.h b/dbcon/joblist/fifo.h
index b88bbac75..096f6e8e3 100644
--- a/dbcon/joblist/fifo.h
+++ b/dbcon/joblist/fifo.h
@@ -83,7 +83,7 @@ class FIFO : public DataListImpl<std::vector<element_t>, element_t>
   }
 
   inline void dropToken() {};
-  inline void dropToken(uint32_t){};
+  inline void dropToken(uint32_t) {};
 
   // Counters that reflect how many many times this FIFO blocked on reads/writes
   uint64_t blockedWriteCount() const;
diff --git a/dbcon/joblist/jlf_subquery.cpp b/dbcon/joblist/jlf_subquery.cpp
index b94381756..5e662a7eb 100644
--- a/dbcon/joblist/jlf_subquery.cpp
+++ b/dbcon/joblist/jlf_subquery.cpp
@@ -21,7 +21,7 @@
 #include <iostream>
 #include <stack>
 #include <iterator>
-//#define NDEBUG
+// #define NDEBUG
 #include <cassert>
 #include <vector>
 using namespace std;
@@ -748,6 +748,10 @@ int doFromSubquery(CalpontExecutionPlan* ep, const string& alias, const string&
   SJSTEP subQueryStep = transformer.makeSubQueryStep(csep, true);
   subQueryStep->view(view);
   SJSTEP subAd(new SubAdapterStep(subQueryStep, jobInfo));
+  if (csep->isRecursiveQuery())
+  {
+    dynamic_cast<SubAdapterStep*>(subAd.get())->isRecursiveStep(true);
+  }
   jobInfo.selectAndFromSubs.push_back(subAd);
 
   return CNX_VTABLE_ID;
@@ -870,6 +874,10 @@ SJSTEP doUnionSub(CalpontExecutionPlan* ep, JobInfo& jobInfo)
   transformer.setVarbinaryOK();
   SJSTEP subQueryStep = transformer.makeSubQueryStep(csep, false);
   SJSTEP subAd(new SubAdapterStep(subQueryStep, jobInfo));
+  if (csep->isRecursiveQuery())
+  {
+    dynamic_cast<SubAdapterStep*>(subAd.get())->isRecursiveStep(true);
+  }
   return subAd;
 }
 
diff --git a/dbcon/joblist/jlf_tuplejoblist.cpp b/dbcon/joblist/jlf_tuplejoblist.cpp
index 7ec8f064f..54eeb21c5 100644
--- a/dbcon/joblist/jlf_tuplejoblist.cpp
+++ b/dbcon/joblist/jlf_tuplejoblist.cpp
@@ -5262,7 +5262,203 @@ SJSTEP unionQueries(JobStepVector& queries, uint64_t distinctUnionNum, JobInfo&
 
   return SJSTEP(unionStep);
 }
+SJSTEP recursiveUnionQueries(JobStepVector& queries, uint64_t distinctUnionNum, JobInfo& jobInfo,
+                             JobStepVector& recurQueries, uint32_t keyCount)
+{
+  vector<RowGroup> inputRGs;
+  vector<bool> distinct;
+  uint64_t colCount = jobInfo.deliveredCols.size();
+
+  vector<uint32_t> oids;
+  vector<uint32_t> keys;
+  vector<uint32_t> scale;
+  vector<uint32_t> precision;
+  vector<uint32_t> width;
+  vector<CalpontSystemCatalog::ColDataType> types;
+  vector<uint32_t> csNums;
+  JobStepAssociation jsaToUnion;
+
+  // bug4388, share code with connector for column type coversion
+  vector<vector<CalpontSystemCatalog::ColType>> queryColTypes;
+
+  for (uint64_t j = 0; j < colCount; ++j)
+    queryColTypes.push_back(vector<CalpontSystemCatalog::ColType>(queries.size() + recurQueries.size()));
+
+  for (uint64_t i = 0; i < queries.size(); i++)
+  {
+    SJSTEP& spjs = queries[i];
+    TupleDeliveryStep* tds = dynamic_cast<TupleDeliveryStep*>(spjs.get());
+
+    if (tds == NULL)
+    {
+      throw runtime_error("Not a deliverable step.");
+    }
+
+    const RowGroup& rg = tds->getDeliveredRowGroup();
+    inputRGs.push_back(rg);
+
+    const vector<uint32_t>& scaleIn = rg.getScale();
+    const vector<uint32_t>& precisionIn = rg.getPrecision();
+    const vector<CalpontSystemCatalog::ColDataType>& typesIn = rg.getColTypes();
+    const vector<uint32_t>& csNumsIn = rg.getCharsetNumbers();
+
+    for (uint64_t j = 0; j < colCount; ++j)
+    {
+      queryColTypes[j][i].colDataType = typesIn[j];
+      queryColTypes[j][i].charsetNumber = csNumsIn[j];
+      queryColTypes[j][i].scale = scaleIn[j];
+      queryColTypes[j][i].precision = precisionIn[j];
+      queryColTypes[j][i].colWidth = rg.getColumnWidth(j);
+    }
+
+    if (i == 0)
+    {
+      const vector<uint32_t>& oidsIn = rg.getOIDs();
+      const vector<uint32_t>& keysIn = rg.getKeys();
+      oids.insert(oids.end(), oidsIn.begin(), oidsIn.begin() + colCount);
+      keys.insert(keys.end(), keysIn.begin(), keysIn.begin() + colCount);
+    }
+
+    // if all union types are UNION_ALL, distinctUnionNum is 0.
+    distinct.push_back(distinctUnionNum > i);
+
+    AnyDataListSPtr spdl(new AnyDataList());
+    RowGroupDL* dl = new RowGroupDL(1, jobInfo.fifoSize);
+    spdl->rowGroupDL(dl);
+    dl->OID(CNX_VTABLE_ID);
+    JobStepAssociation jsa;
+    jsa.outAdd(spdl);
+    spjs->outputAssociation(jsa);
+    jsaToUnion.outAdd(spdl);
+  }
 
+  for (uint64_t i = 0; i < recurQueries.size(); i++)
+  {
+    SJSTEP spjs = recurQueries[i];
+    TupleDeliveryStep* tds = dynamic_cast<TupleDeliveryStep*>(spjs.get());
+
+    if (tds == NULL)
+    {
+      throw runtime_error("Not a deliverable step.");
+    }
+
+    const RowGroup& rg = tds->getDeliveredRowGroup();
+    inputRGs.push_back(rg);
+
+    const vector<uint32_t>& scaleIn = rg.getScale();
+    const vector<uint32_t>& precisionIn = rg.getPrecision();
+    const vector<CalpontSystemCatalog::ColDataType>& typesIn = rg.getColTypes();
+    const vector<uint32_t>& csNumsIn = rg.getCharsetNumbers();
+
+    for (uint64_t j = 0; j < colCount; ++j)
+    {
+      queryColTypes[j][i + queries.size()].colDataType = typesIn[j];
+      queryColTypes[j][i + queries.size()].charsetNumber = csNumsIn[j];
+      queryColTypes[j][i + queries.size()].scale = scaleIn[j];
+      queryColTypes[j][i + queries.size()].precision = precisionIn[j];
+      queryColTypes[j][i + queries.size()].colWidth = rg.getColumnWidth(j);
+    }
+
+    // if all union types are UNION_ALL, distinctUnionNum is 0.
+    distinct.push_back(distinctUnionNum > i);
+
+    // mostly should have initialised DLs hence the change
+    if (i < recurQueries.size() - 1)
+    {
+      AnyDataListSPtr spdl = spjs->outputAssociation().outAt(0);
+      spdl->rowGroupDL()->setNumConsumers(2);
+      jsaToUnion.outAdd(spdl);
+    }
+    else
+    {
+      AnyDataListSPtr spdl(new AnyDataList());
+      RowGroupDL* dl = new RowGroupDL(1, jobInfo.fifoSize);
+      spdl->rowGroupDL(dl);
+      dl->OID(CNX_VTABLE_ID);
+      JobStepAssociation jsa;
+      jsa.outAdd(spdl);
+      spjs->outputAssociation(jsa);
+      jsaToUnion.outAdd(spdl);
+    }
+  }
+
+  AnyDataListSPtr spdl(new AnyDataList());
+  RowGroupDL* dl = new RowGroupDL(1, jobInfo.fifoSize);
+  spdl->rowGroupDL(dl);
+  dl->OID(CNX_VTABLE_ID);
+  JobStepAssociation jsa;
+  jsa.outAdd(spdl);
+  TupleRecursiveUnion* unionStep = new TupleRecursiveUnion(CNX_VTABLE_ID, jobInfo, keyCount);
+  unionStep->inputAssociation(jsaToUnion);
+  unionStep->outputAssociation(jsa);
+
+  // This return code in the call to convertUnionColType() below would
+  // always be 0. This is because convertUnionColType() is also called
+  // in the connector code in getSelectPlan() which handle
+  // the non-zero return code scenarios from this function call and error
+  // out, in which case, the execution does not even get to ExeMgr.
+  unsigned int dummyUnionedTypeRc = 0;
+
+  // get unioned column types
+  for (uint64_t j = 0; j < colCount; ++j)
+  {
+    CalpontSystemCatalog::ColType colType =
+        CalpontSystemCatalog::ColType::convertUnionColType(queryColTypes[j], dummyUnionedTypeRc);
+    types.push_back(colType.colDataType);
+    csNums.push_back(colType.charsetNumber);
+    scale.push_back(colType.scale);
+    precision.push_back(colType.precision);
+    width.push_back(colType.colWidth);
+  }
+
+  vector<uint32_t> pos;
+  pos.push_back(2);
+
+  for (uint64_t i = 0; i < oids.size(); ++i)
+    pos.push_back(pos[i] + width[i]);
+
+  unionStep->setInputRowGroups(inputRGs);
+  unionStep->setDistinctFlags(distinct);
+  unionStep->setOutputRowGroup(
+      RowGroup(oids.size(), pos, oids, keys, types, csNums, scale, precision, jobInfo.stringTableThreshold));
+
+  unionStep->recursiveSteps(recurQueries);
+  // Fix for bug 4388 adjusts the result type at connector side, this workaround is obsolete.
+  // bug 3067, update the returned column types.
+  // This is a workaround as the connector always uses the first query' returned columns.
+  // ct.colDataType = types[i];
+  // ct.scale = scale[i];
+  // ct.colWidth = width[i];
+
+  for (size_t i = 0; i < jobInfo.deliveredCols.size(); i++)
+  {
+    CalpontSystemCatalog::ColType ct = jobInfo.deliveredCols[i]->resultType();
+    // XXX remove after connector change
+    ct.colDataType = types[i];
+    ct.scale = scale[i];
+    ct.colWidth = width[i];
+
+    // varchar/varbinary column width has been fudged, see fudgeWidth in jlf_common.cpp.
+    if (ct.colDataType == CalpontSystemCatalog::VARCHAR)
+      ct.colWidth--;
+    else if (ct.colDataType == CalpontSystemCatalog::VARBINARY)
+      ct.colWidth -= 2;
+
+    jobInfo.deliveredCols[i]->resultType(ct);
+  }
+
+  if (jobInfo.trace)
+  {
+    cout << boldStart << "\ninput RGs: (distinct=" << distinctUnionNum << ")\n" << boldStop;
+
+    for (vector<RowGroup>::iterator i = inputRGs.begin(); i != inputRGs.end(); i++)
+      cout << i->toString() << endl << endl;
+
+    cout << boldStart << "output RG:\n" << boldStop << unionStep->getDeliveredRowGroup().toString() << endl;
+  }
+
+  return SJSTEP(unionStep);
+}
 }  // namespace joblist
 
 #ifdef __clang__
diff --git a/dbcon/joblist/jlf_tuplejoblist.h b/dbcon/joblist/jlf_tuplejoblist.h
index 423c9cf1c..0b29ae68c 100644
--- a/dbcon/joblist/jlf_tuplejoblist.h
+++ b/dbcon/joblist/jlf_tuplejoblist.h
@@ -130,7 +130,8 @@ void orExpresssion(const execplan::Operator* op, JobInfo& jobInfo);
 
 // union the queries and return the tuple union step
 SJSTEP unionQueries(JobStepVector& queries, uint64_t distinctUnionNum, JobInfo& jobInfo, uint32_t keyCount);
-
+SJSTEP recursiveUnionQueries(JobStepVector& queries, uint64_t distinctUnionNum, JobInfo& jobInfo,
+                             JobStepVector& recurQueries, uint32_t keyCount);
 void addAnnexStep(JobStepVector& querySteps, DeliveredTableMap& deliverySteps, JobInfo& jobInfo,
                   IDBQueryType queryType = execplan::IDBQueryType::SELECT);
 
diff --git a/dbcon/joblist/joblistfactory.cpp b/dbcon/joblist/joblistfactory.cpp
index 37bc73fce..7da63dbf0 100644
--- a/dbcon/joblist/joblistfactory.cpp
+++ b/dbcon/joblist/joblistfactory.cpp
@@ -18,6 +18,7 @@
 
 //   $Id: joblistfactory.cpp 9632 2013-06-18 22:18:20Z xlou $
 
+#include <cstdint>
 #include <iostream>
 #include <stack>
 #include <iterator>
@@ -28,6 +29,7 @@
 #include <set>
 #include <map>
 #include <limits>
+#include "subquerystep.h"
 using namespace std;
 
 #include <boost/scoped_ptr.hpp>
@@ -1332,7 +1334,7 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo
   }
 
   // for dictionary columns not count only, replace the token oid with string oid
-  for (vector<pair<uint32_t, int> >::iterator it = jobInfo.returnedColVec.begin();
+  for (vector<pair<uint32_t, int>>::iterator it = jobInfo.returnedColVec.begin();
        it != jobInfo.returnedColVec.end(); it++)
   {
     // if the column is a dictionary column and not count only
@@ -1578,7 +1580,7 @@ void parseExecutionPlan(CalpontSelectExecutionPlan* csep, JobInfo& jobInfo, JobS
   set<uint32_t> seenTableIds;
 
   // Stack of seenTables to make sure the left-hand side and right-hand have the same content
-  stack<set<uint32_t> > seenTableStack;
+  stack<set<uint32_t>> seenTableStack;
 
   if (!querySteps.empty())
   {
@@ -2015,126 +2017,385 @@ void makeJobSteps(CalpontSelectExecutionPlan* csep, JobInfo& jobInfo, JobStepVec
   parseExecutionPlan(csep, jobInfo, querySteps, projectSteps, deliverySteps);
   makeVtableModeSteps(csep, jobInfo, querySteps, projectSteps, deliverySteps);
 }
-
-void makeUnionJobSteps(CalpontSelectExecutionPlan* csep, JobInfo& jobInfo, JobStepVector& querySteps,
-                       JobStepVector& /*projectSteps*/, DeliveredTableMap& deliverySteps)
+void findRecursiveSubSteps(const SJSTEP& root, JobStepVector& result)
 {
-  CalpontSelectExecutionPlan::SelectList& unionVec = csep->unionVec();
-  uint8_t distinctUnionNum = csep->distinctUnionNum();
-  uint32_t unionRetColsCount = csep->returnedCols().size();
-  JobStepVector unionFeeders;
+  if (!root)
+    return;
 
-  std::remove_cv_t<std::remove_reference_t<decltype(csep->orderByCols())>> expOrderByCols;
-  for (auto& obc : csep->orderByCols())
+  std::stack<SJSTEP> work;
+  work.push(root);
+
+  while (!work.empty())
   {
-    if (obc->orderPos() != -1ull)
+    SJSTEP step = work.top();
+    work.pop();
+
+    if (!step)
+      continue;
+
+    // Case 1: SubAdapterStep
+    if (auto* adapter = dynamic_cast<SubAdapterStep*>(step.get()))
     {
+      if (adapter->isRecursiveStep())
+      {
+        result.push_back(step);
+      }
+
+      // push its substep
+      work.push(adapter->subStep());
+    }
+    // Case 2: SubQueryStep
+    else if (auto* subq = dynamic_cast<SubQueryStep*>(step.get()))
+    {
+      const STJLP& subJoblist = subq->subJoblist();
+      if (subJoblist)
+      {
+        const auto& qsteps = subJoblist->querySteps();
+        for (const auto& qstep : qsteps)
+        {
+          work.push(qstep);
+        }
+      }
+    }
+  }
+  std::reverse(result.begin(), result.end());
+}
+
+void replaceDerivedTableList(CalpontSelectExecutionPlan::SelectList& list, const SCSEP& replacementScep)
+{
+  for (auto& scep : list)
+  {
+    auto plan = dynamic_cast<CalpontSelectExecutionPlan*>(scep.get());
+    if (!plan)
       continue;
+
+    if (plan->isRecursiveWithTable())
+    {
+      scep = replacementScep;
     }
-    if (dynamic_cast<SimpleColumn*>(obc.get()) == nullptr &&
-        dynamic_cast<ConstantColumn*>(obc.get()) == nullptr)
+    else if (plan->containsRecursiveQuery())
     {
-      // Arithmetic & function columns need special processing
-      expOrderByCols.push_back(obc);
+      replaceDerivedTableList(plan->derivedTableList(), replacementScep);
     }
   }
+}
 
-  for (auto& unionSub : unionVec)
+void makeUnionJobSteps(CalpontSelectExecutionPlan* csep, JobInfo& jobInfo, JobStepVector& querySteps,
+                       JobStepVector& /*projectSteps*/, DeliveredTableMap& deliverySteps)
+{
+  if (csep->isRecursiveWithTable())
   {
-    auto* unionCSEP = dynamic_cast<CalpontSelectExecutionPlan*>(unionSub.get());
-    for (auto& obc : expOrderByCols)
+    CalpontSelectExecutionPlan::SelectList& unionVec = csep->unionVec();
+    uint8_t distinctUnionNum = csep->distinctUnionNum();
+    uint32_t unionRetColsCount = csep->returnedCols().size();
+    JobStepVector unionFeeders;
+
+    std::remove_cv_t<std::remove_reference_t<decltype(csep->orderByCols())>> expOrderByCols;
+    for (auto& obc : csep->orderByCols())
     {
-      // Replace any leaf of expressions in the ORDER BY list with the corresponding column for each table in
-      // the UNION, and add the expression to the returned columns.
-      auto* col = obc->clone();
-      auto* ac = dynamic_cast<ArithmeticColumn*>(col);
-      auto* fc = dynamic_cast<FunctionColumn*>(col);
-      if (ac)
+      if (obc->orderPos() != -1ull)
+      {
+        continue;
+      }
+      if (dynamic_cast<SimpleColumn*>(obc.get()) == nullptr &&
+          dynamic_cast<ConstantColumn*>(obc.get()) == nullptr)
       {
-        ac->expression()->walk(fixUnionExpressionCol, unionCSEP);
-        ac->setSimpleColumnList();
+        // Arithmetic & function columns need special processing
+        expOrderByCols.push_back(obc);
       }
-      else if (fc)
+    }
+    auto partitionPoint = std::partition(unionVec.begin(), unionVec.end(),
+                                         [](SCEP scep)
+                                         {
+                                           auto plan = dynamic_cast<CalpontSelectExecutionPlan*>(scep.get());
+                                           if (plan)
+                                           {
+                                             return !plan->containsRecursiveQuery();
+                                           }
+                                           return false;
+                                         });
+
+    CalpontSelectExecutionPlan* baseRecur;
+    CalpontSelectExecutionPlan* currRecur;
+
+    SJSTEP sub;
+
+    // iterate up to the non recursive queries
+    for (auto it = unionVec.begin(); it != partitionPoint; ++it)
+    {
+      auto& unionSub = *it;
+      auto* unionCSEP = dynamic_cast<CalpontSelectExecutionPlan*>(unionSub.get());
+      for (auto& obc : expOrderByCols)
       {
-        for (auto& parm : fc->functionParms())
+        // Replace any leaf of expressions in the ORDER BY list with the corresponding column for each table
+        // in the UNION, and add the expression to the returned columns.
+        auto* col = obc->clone();
+        auto* ac = dynamic_cast<ArithmeticColumn*>(col);
+        auto* fc = dynamic_cast<FunctionColumn*>(col);
+        if (ac)
         {
-          parm->walk(fixUnionExpressionCol, unionCSEP);
+          ac->expression()->walk(fixUnionExpressionCol, unionCSEP);
+          ac->setSimpleColumnList();
         }
-        fc->setSimpleColumnList();
+        else if (fc)
+        {
+          for (auto& parm : fc->functionParms())
+          {
+            parm->walk(fixUnionExpressionCol, unionCSEP);
+          }
+          fc->setSimpleColumnList();
+        }
+        unionCSEP->returnedCols().emplace_back(col);
       }
-      unionCSEP->returnedCols().emplace_back(col);
+      SJSTEP sub = doUnionSub(unionSub.get(), jobInfo);
+      querySteps.push_back(sub);
+      unionFeeders.push_back(sub);
     }
-    SJSTEP sub = doUnionSub(unionSub.get(), jobInfo);
-    querySteps.push_back(sub);
-    unionFeeders.push_back(sub);
-  }
 
-  for (auto& obc : expOrderByCols)
-  {
-    // Add a SimpleColumn to the outer query for the every ORDER BY expression
-    auto* sc = new SimpleColumn(*obc.get());
-    csep->returnedCols().emplace_back(sc);
-    sc->colPosition(csep->returnedCols().size() - 1);
-    sc->orderPos(csep->returnedCols().size() - 1);
-    obc->orderPos(csep->returnedCols().size() - 1);
-  }
+    for (auto cit = partitionPoint; cit != unionVec.end(); ++cit)
+    {
+      currRecur = dynamic_cast<CalpontSelectExecutionPlan*>(cit->get());
+      currRecur->isRecursiveQuery(true);
+    }
+    baseRecur = new CalpontSelectExecutionPlan(*currRecur);
+    uint32_t depth = (currRecur->maxRecursiveDepth() <= 100) ? csep->maxRecursiveDepth() : 100;
+    // uint32_t depth = 100;
+    for (uint32 i = 0; i < depth; ++i)
+    {
+      CalpontSelectExecutionPlan* workingRecur = new CalpontSelectExecutionPlan(*baseRecur);
+      CalpontSelectExecutionPlan::SelectList& currDerivedTbList = workingRecur->derivedTableList();
+      CalpontSelectExecutionPlan::SelectList& currUnionVec = workingRecur->unionVec();
 
-  jobInfo.deliveredCols = csep->returnedCols();
-  SJSTEP unionStep(unionQueries(unionFeeders, distinctUnionNum, jobInfo, unionRetColsCount));
-  querySteps.push_back(unionStep);
-  uint16_t stepNo = jobInfo.subId * 10000;
-  numberSteps(querySteps, stepNo, jobInfo.traceFlags);
-  deliverySteps[execplan::CNX_VTABLE_ID] = unionStep;
+      currRecur->isRecursiveWithTable(true);
+      workingRecur->isRecursiveQuery(true);
+
+      SCSEP replacement = boost::make_shared<CalpontSelectExecutionPlan>(*currRecur);
+      replaceDerivedTableList(currDerivedTbList, replacement);
+      replaceDerivedTableList(currUnionVec, replacement);
+
+      for (auto& obc : expOrderByCols)
+      {
+        // Replace any leaf of expressions in the ORDER BY list with the corresponding column for each table
+        // in the UNION, and add the expression to the returned columns.
+        auto* col = obc->clone();
+        auto* ac = dynamic_cast<ArithmeticColumn*>(col);
+        auto* fc = dynamic_cast<FunctionColumn*>(col);
+        if (ac)
+        {
+          ac->expression()->walk(fixUnionExpressionCol, workingRecur);
+          ac->setSimpleColumnList();
+        }
+        else if (fc)
+        {
+          for (auto& parm : fc->functionParms())
+          {
+            parm->walk(fixUnionExpressionCol, workingRecur);
+          }
+          fc->setSimpleColumnList();
+        }
+        workingRecur->returnedCols().emplace_back(col);
+      }
+      if (i == depth - 1)
+      {
+        sub = doUnionSub(workingRecur, jobInfo);
+        querySteps.push_back(sub);
+      }
+      // querySteps.push_back(sub);
+      // unionFeeders.push_back(sub);
+      currRecur = new CalpontSelectExecutionPlan(*workingRecur);
+    }
+
+    JobStepVector recursiveUnionFeeders;
+    findRecursiveSubSteps(sub, recursiveUnionFeeders);
+
+    for (auto& obc : expOrderByCols)
+    {
+      // Add a SimpleColumn to the outer query for the every ORDER BY expression
+      auto* sc = new SimpleColumn(*obc.get());
+      csep->returnedCols().emplace_back(sc);
+      sc->colPosition(csep->returnedCols().size() - 1);
+      sc->orderPos(csep->returnedCols().size() - 1);
+      obc->orderPos(csep->returnedCols().size() - 1);
+    }
+
+    jobInfo.deliveredCols = csep->returnedCols();
+    SJSTEP unionStep(recursiveUnionQueries(unionFeeders, distinctUnionNum, jobInfo, recursiveUnionFeeders,
+                                           unionRetColsCount));
+    querySteps.push_back(unionStep);
+    uint16_t stepNo = jobInfo.subId * 10000;
+    numberSteps(querySteps, stepNo, jobInfo.traceFlags);
+    deliverySteps[execplan::CNX_VTABLE_ID] = unionStep;
 
-  if (!csep->orderByCols().empty() || csep->limitStart() != 0 || csep->limitNum() != -1ull)
+    if (!csep->orderByCols().empty() || csep->limitStart() != 0 || csep->limitNum() != -1ull)
+    {
+      jobInfo.limitStart = csep->limitStart();
+      jobInfo.limitCount = csep->limitNum();
+      jobInfo.orderByThreads = csep->orderByThreads();
+      for (auto& obc : csep->orderByCols())
+      {
+        auto* osc = dynamic_cast<SimpleColumn*>(obc.get());
+        if (osc)
+        {
+          auto* sc = dynamic_cast<SimpleColumn*>(jobInfo.deliveredCols[obc->orderPos()].get());
+          idbassert(sc);
+          sc->schemaName("");
+          sc->tableAlias(querySteps[0]->alias());
+          sc->colPosition(obc->orderPos());
+          sc->oid(tableOid(sc, jobInfo.csc) + 1 + obc->orderPos());
+          jobInfo.orderByColVec.emplace_back(getTupleKey(jobInfo, sc), obc->asc());
+        }
+        else
+        {
+          auto* tus = dynamic_cast<TupleUnion*>(unionStep.get());
+          auto& keys = tus->getOutputRowGroup().getKeys();
+          idbassert(obc->orderPos() < keys.size());
+          jobInfo.orderByColVec.emplace_back(keys[obc->orderPos()], obc->asc());
+        }
+      }
+
+      for (auto& rc : csep->returnedCols())
+      {
+        // Replace ConstantColumns with SimpleColumns and fix OIDs
+        auto* sc = dynamic_cast<SimpleColumn*>(rc.get());
+        if (sc)
+        {
+          sc->schemaName("");
+          sc->tableAlias(querySteps[0]->alias());
+          sc->oid(tableOid(sc, jobInfo.csc) + 1 + rc->colPosition());
+        }
+        else
+        {
+          sc = new SimpleColumn(*rc.get());
+          rc.reset(sc);
+          sc->schemaName("");
+          sc->tableAlias(querySteps[0]->alias());
+          sc->oid(tableOid(sc, jobInfo.csc) + 1 + rc->colPosition());
+        }
+      }
+      doProject(csep->returnedCols(), jobInfo);
+      checkReturnedColumns(csep, jobInfo);
+      addAnnexStep(querySteps, deliverySteps, jobInfo, IDBQueryType::UNION);
+    }
+  }
+  else
   {
-    jobInfo.limitStart = csep->limitStart();
-    jobInfo.limitCount = csep->limitNum();
-    jobInfo.orderByThreads = csep->orderByThreads();
+    CalpontSelectExecutionPlan::SelectList& unionVec = csep->unionVec();
+    uint8_t distinctUnionNum = csep->distinctUnionNum();
+    uint32_t unionRetColsCount = csep->returnedCols().size();
+    JobStepVector unionFeeders;
+
+    std::remove_cv_t<std::remove_reference_t<decltype(csep->orderByCols())>> expOrderByCols;
     for (auto& obc : csep->orderByCols())
     {
-      auto* osc = dynamic_cast<SimpleColumn*>(obc.get());
-      if (osc)
+      if (obc->orderPos() != -1ull)
       {
-        auto* sc = dynamic_cast<SimpleColumn*>(jobInfo.deliveredCols[obc->orderPos()].get());
-        idbassert(sc);
-        sc->schemaName("");
-        sc->tableAlias(querySteps[0]->alias());
-        sc->colPosition(obc->orderPos());
-        sc->oid(tableOid(sc, jobInfo.csc) + 1 + obc->orderPos());
-        jobInfo.orderByColVec.emplace_back(getTupleKey(jobInfo, sc), obc->asc());
+        continue;
       }
-      else
+      if (dynamic_cast<SimpleColumn*>(obc.get()) == nullptr &&
+          dynamic_cast<ConstantColumn*>(obc.get()) == nullptr)
       {
-        auto* tus = dynamic_cast<TupleUnion*>(unionStep.get());
-        auto& keys = tus->getOutputRowGroup().getKeys();
-        idbassert(obc->orderPos() < keys.size());
-        jobInfo.orderByColVec.emplace_back(keys[obc->orderPos()], obc->asc());
+        // Arithmetic & function columns need special processing
+        expOrderByCols.push_back(obc);
       }
     }
 
-    for (auto& rc : csep->returnedCols())
+    for (auto& unionSub : unionVec)
     {
-      // Replace ConstantColumns with SimpleColumns and fix OIDs
-      auto* sc = dynamic_cast<SimpleColumn*>(rc.get());
-      if (sc)
+      auto* unionCSEP = dynamic_cast<CalpontSelectExecutionPlan*>(unionSub.get());
+      for (auto& obc : expOrderByCols)
       {
-        sc->schemaName("");
-        sc->tableAlias(querySteps[0]->alias());
-        sc->oid(tableOid(sc, jobInfo.csc) + 1 + rc->colPosition());
+        // Replace any leaf of expressions in the ORDER BY list with the corresponding column for each table
+        // in the UNION, and add the expression to the returned columns.
+        auto* col = obc->clone();
+        auto* ac = dynamic_cast<ArithmeticColumn*>(col);
+        auto* fc = dynamic_cast<FunctionColumn*>(col);
+        if (ac)
+        {
+          ac->expression()->walk(fixUnionExpressionCol, unionCSEP);
+          ac->setSimpleColumnList();
+        }
+        else if (fc)
+        {
+          for (auto& parm : fc->functionParms())
+          {
+            parm->walk(fixUnionExpressionCol, unionCSEP);
+          }
+          fc->setSimpleColumnList();
+        }
+        unionCSEP->returnedCols().emplace_back(col);
       }
-      else
+      SJSTEP sub = doUnionSub(unionSub.get(), jobInfo);
+      querySteps.push_back(sub);
+      unionFeeders.push_back(sub);
+    }
+
+    for (auto& obc : expOrderByCols)
+    {
+      // Add a SimpleColumn to the outer query for the every ORDER BY expression
+      auto* sc = new SimpleColumn(*obc.get());
+      csep->returnedCols().emplace_back(sc);
+      sc->colPosition(csep->returnedCols().size() - 1);
+      sc->orderPos(csep->returnedCols().size() - 1);
+      obc->orderPos(csep->returnedCols().size() - 1);
+    }
+
+    jobInfo.deliveredCols = csep->returnedCols();
+    SJSTEP unionStep(unionQueries(unionFeeders, distinctUnionNum, jobInfo, unionRetColsCount));
+    querySteps.push_back(unionStep);
+    uint16_t stepNo = jobInfo.subId * 10000;
+    numberSteps(querySteps, stepNo, jobInfo.traceFlags);
+    deliverySteps[execplan::CNX_VTABLE_ID] = unionStep;
+
+    if (!csep->orderByCols().empty() || csep->limitStart() != 0 || csep->limitNum() != -1ull)
+    {
+      jobInfo.limitStart = csep->limitStart();
+      jobInfo.limitCount = csep->limitNum();
+      jobInfo.orderByThreads = csep->orderByThreads();
+      for (auto& obc : csep->orderByCols())
       {
-        sc = new SimpleColumn(*rc.get());
-        rc.reset(sc);
-        sc->schemaName("");
-        sc->tableAlias(querySteps[0]->alias());
-        sc->oid(tableOid(sc, jobInfo.csc) + 1 + rc->colPosition());
+        auto* osc = dynamic_cast<SimpleColumn*>(obc.get());
+        if (osc)
+        {
+          auto* sc = dynamic_cast<SimpleColumn*>(jobInfo.deliveredCols[obc->orderPos()].get());
+          idbassert(sc);
+          sc->schemaName("");
+          sc->tableAlias(querySteps[0]->alias());
+          sc->colPosition(obc->orderPos());
+          sc->oid(tableOid(sc, jobInfo.csc) + 1 + obc->orderPos());
+          jobInfo.orderByColVec.emplace_back(getTupleKey(jobInfo, sc), obc->asc());
+        }
+        else
+        {
+          auto* tus = dynamic_cast<TupleUnion*>(unionStep.get());
+          auto& keys = tus->getOutputRowGroup().getKeys();
+          idbassert(obc->orderPos() < keys.size());
+          jobInfo.orderByColVec.emplace_back(keys[obc->orderPos()], obc->asc());
+        }
+      }
+
+      for (auto& rc : csep->returnedCols())
+      {
+        // Replace ConstantColumns with SimpleColumns and fix OIDs
+        auto* sc = dynamic_cast<SimpleColumn*>(rc.get());
+        if (sc)
+        {
+          sc->schemaName("");
+          sc->tableAlias(querySteps[0]->alias());
+          sc->oid(tableOid(sc, jobInfo.csc) + 1 + rc->colPosition());
+        }
+        else
+        {
+          sc = new SimpleColumn(*rc.get());
+          rc.reset(sc);
+          sc->schemaName("");
+          sc->tableAlias(querySteps[0]->alias());
+          sc->oid(tableOid(sc, jobInfo.csc) + 1 + rc->colPosition());
+        }
       }
+      doProject(csep->returnedCols(), jobInfo);
+      checkReturnedColumns(csep, jobInfo);
+      addAnnexStep(querySteps, deliverySteps, jobInfo, IDBQueryType::UNION);
     }
-    doProject(csep->returnedCols(), jobInfo);
-    checkReturnedColumns(csep, jobInfo);
-    addAnnexStep(querySteps, deliverySteps, jobInfo, IDBQueryType::UNION);
   }
 }
 }  // namespace joblist
diff --git a/dbcon/joblist/subquerystep.h b/dbcon/joblist/subquerystep.h
index ad81ca360..7ce1921c4 100644
--- a/dbcon/joblist/subquerystep.h
+++ b/dbcon/joblist/subquerystep.h
@@ -215,6 +215,16 @@ class SubAdapterStep : public JobStep, public TupleDeliveryStep
     return fSubStep;
   }
 
+  void isRecursiveStep(bool b)
+  {
+    fIsRecursiveStep = b;
+  }
+
+  bool isRecursiveStep()
+  {
+    return fIsRecursiveStep;
+  }
+
   /** @brief add filters (expression steps)
    */
   void addExpression(const JobStepVector&, JobInfo&);
@@ -252,6 +262,8 @@ class SubAdapterStep : public JobStep, public TupleDeliveryStep
   uint64_t fInputIterator;
   uint64_t fOutputIterator;
 
+  bool fIsRecursiveStep = false;
+
   class Runner
   {
    public:
diff --git a/dbcon/joblist/tupleunion.cpp b/dbcon/joblist/tupleunion.cpp
index 655454497..a9cd17d0e 100644
--- a/dbcon/joblist/tupleunion.cpp
+++ b/dbcon/joblist/tupleunion.cpp
@@ -60,1235 +60,1829 @@ inline double exp10(double x)
 
 namespace
 {
-  // union helper functions.
+// union helper functions.
 
-  inline uint64_t pickScaleForDouble(Row* out, uint32_t i, double val)
-  {
-    /* have to pick a scale to use for the double. using 5... */
-    uint32_t scale = 5;
-    uint64_t ival = (uint64_t)(double)(val * datatypes::scaleDivisor<double>(scale));
-    const int diff = out->getScale(i) - scale;
-    ival = datatypes::applySignedScale<uint64_t>(ival, diff);
-    return ival;
-  }
+inline uint64_t pickScaleForDouble(Row* out, uint32_t i, double val)
+{
+  /* have to pick a scale to use for the double. using 5... */
+  uint32_t scale = 5;
+  uint64_t ival = (uint64_t)(double)(val * datatypes::scaleDivisor<double>(scale));
+  const int diff = out->getScale(i) - scale;
+  ival = datatypes::applySignedScale<uint64_t>(ival, diff);
+  return ival;
+}
 
-  inline uint64_t pickScaleForLongDouble(Row* out, uint32_t i, long double val)
-  {
-    /* have to pick a scale to use for the double. using 5... */
-    uint32_t scale = 5;
-    uint64_t ival = (uint64_t)(double)(val * datatypes::scaleDivisor<double>(scale));
-    int diff = out->getScale(i) - scale;
-    ival = datatypes::applySignedScale<uint64_t>(ival, diff);
-    return ival;
-  }
+inline uint64_t pickScaleForLongDouble(Row* out, uint32_t i, long double val)
+{
+  /* have to pick a scale to use for the double. using 5... */
+  uint32_t scale = 5;
+  uint64_t ival = (uint64_t)(double)(val * datatypes::scaleDivisor<double>(scale));
+  int diff = out->getScale(i) - scale;
+  ival = datatypes::applySignedScale<uint64_t>(ival, diff);
+  return ival;
+}
 
-  NullString formatDouble(double val)
-  {
-    char buf[datatypes::INT128MAXPRECISION + 1];
-    my_bool error = 0;
-    auto len = my_gcvt(val, MY_GCVT_ARG_DOUBLE, sizeof(buf) - 1, buf, &error);
-    idbassert(error == 0 && len <= sizeof(buf));
-    return {buf, len};
-  }
+NullString formatDouble(double val)
+{
+  char buf[datatypes::INT128MAXPRECISION + 1];
+  my_bool error = 0;
+  auto len = my_gcvt(val, MY_GCVT_ARG_DOUBLE, sizeof(buf) - 1, buf, &error);
+  idbassert(error == 0 && len <= sizeof(buf));
+  return {buf, len};
+}
 
-  void normalizeIntToIntNoScale(const Row& in, Row* out, uint32_t i) 
-  {
-    out->setIntField(in.getIntField(i), i); 
-  }
+void normalizeIntToIntNoScale(const Row& in, Row* out, uint32_t i)
+{
+  out->setIntField(in.getIntField(i), i);
+}
 
-  void normalizeIntToIntWithScaleInt128(const Row& in, Row* out, uint32_t i) 
-  {
-    const int diff = out->getScale(i) - in.getScale(i);
-    idbassert(diff >= 0);
-    int128_t val = datatypes::applySignedScale<int128_t>(in.getIntField(i), diff);
-    out->setInt128Field(val, i);
-  }
+void normalizeIntToIntWithScaleInt128(const Row& in, Row* out, uint32_t i)
+{
+  const int diff = out->getScale(i) - in.getScale(i);
+  idbassert(diff >= 0);
+  int128_t val = datatypes::applySignedScale<int128_t>(in.getIntField(i), diff);
+  out->setInt128Field(val, i);
+}
 
-  void normalizeIntToIntWithScaleInt64(const Row& in, Row* out, uint32_t i) 
-  {
-    const int diff = out->getScale(i) - in.getScale(i);
-    idbassert(diff >= 0);
-    int64_t val = datatypes::applySignedScale<int64_t>(in.getIntField(i), diff);
-    out->setIntField(val, i);
-  }
-  
-  void normalizeIntToUintNoScale(const Row& in, Row* out, uint32_t i) 
-  {
-    out->setUintField(in.getIntField(i), i); 
-  }
+void normalizeIntToIntWithScaleInt64(const Row& in, Row* out, uint32_t i)
+{
+  const int diff = out->getScale(i) - in.getScale(i);
+  idbassert(diff >= 0);
+  int64_t val = datatypes::applySignedScale<int64_t>(in.getIntField(i), diff);
+  out->setIntField(val, i);
+}
 
-  void normalizeIntToUintWithScaleInt128(const Row& in, Row* out, uint32_t i) 
-  {
-    const int diff = out->getScale(i) - in.getScale(i);
-    idbassert(diff >= 0);
-    int128_t val = datatypes::applySignedScale<int128_t>(in.getIntField(i), diff);
-    out->setInt128Field(val, i);
-  }
+void normalizeIntToUintNoScale(const Row& in, Row* out, uint32_t i)
+{
+  out->setUintField(in.getIntField(i), i);
+}
 
-  void normalizeIntToUintWithScaleInt64(const Row& in, Row* out, uint32_t i) 
-  {
-    const int diff = out->getScale(i) - in.getScale(i);
-    idbassert(diff >= 0);
-    int64_t val = datatypes::applySignedScale<int64_t>(in.getIntField(i), diff);
-    out->setIntField(val, i);
-  }
+void normalizeIntToUintWithScaleInt128(const Row& in, Row* out, uint32_t i)
+{
+  const int diff = out->getScale(i) - in.getScale(i);
+  idbassert(diff >= 0);
+  int128_t val = datatypes::applySignedScale<int128_t>(in.getIntField(i), diff);
+  out->setInt128Field(val, i);
+}
 
-  void normalizeIntToStringWithScale(const Row& in, Row* out, uint32_t i) 
-  {
-    double d = in.getIntField(i);
-    d /= exp10(in.getScale(i));
-    out->setStringField(formatDouble(d), i);
-  }
+void normalizeIntToUintWithScaleInt64(const Row& in, Row* out, uint32_t i)
+{
+  const int diff = out->getScale(i) - in.getScale(i);
+  idbassert(diff >= 0);
+  int64_t val = datatypes::applySignedScale<int64_t>(in.getIntField(i), diff);
+  out->setIntField(val, i);
+}
 
-  void normalizeIntToStringNoScale(const Row& in, Row* out, uint32_t i) 
-  {
-    utils::NullString ns(std::to_string(in.getIntField(i)));
-    out->setStringField(ns, i);
-  }
+void normalizeIntToStringWithScale(const Row& in, Row* out, uint32_t i)
+{
+  double d = in.getIntField(i);
+  d /= exp10(in.getScale(i));
+  out->setStringField(formatDouble(d), i);
+}
 
-  void normalizeIntToXFloat(const Row& in, Row* out, uint32_t i) 
-  {
-    auto d = in.getScaledSInt64FieldAsXFloat<double>(i);
-    out->setFloatField((float)d, i);
-  }
+void normalizeIntToStringNoScale(const Row& in, Row* out, uint32_t i)
+{
+  utils::NullString ns(std::to_string(in.getIntField(i)));
+  out->setStringField(ns, i);
+}
 
-  void normalizeIntToXDouble(const Row& in, Row* out, uint32_t i) 
-  {
-    auto d = in.getScaledSInt64FieldAsXFloat<double>(i);
-    out->setDoubleField(d, i);
-  }
+void normalizeIntToXFloat(const Row& in, Row* out, uint32_t i)
+{
+  auto d = in.getScaledSInt64FieldAsXFloat<double>(i);
+  out->setFloatField((float)d, i);
+}
 
-  void normalizeIntToLongDouble(const Row& in, Row* out, uint32_t i) 
-  {
-    auto d = in.getScaledSInt64FieldAsXFloat<long double>(i);
-    out->setLongDoubleField(d, i);
-  }
+void normalizeIntToXDouble(const Row& in, Row* out, uint32_t i)
+{
+  auto d = in.getScaledSInt64FieldAsXFloat<double>(i);
+  out->setDoubleField(d, i);
+}
 
-  void normalizeIntToXDecimalInt128(const Row& in, Row* out, uint32_t i) 
-  {
-    const int diff = out->getScale(i) - in.getScale(i);
-    idbassert(diff >= 0);
-    int128_t val = datatypes::applySignedScale<int128_t>(in.getIntField(i), diff);
-    out->setInt128Field(val, i);
-  }
+void normalizeIntToLongDouble(const Row& in, Row* out, uint32_t i)
+{
+  auto d = in.getScaledSInt64FieldAsXFloat<long double>(i);
+  out->setLongDoubleField(d, i);
+}
 
-  void normalizeIntToXDecimalInt64(const Row& in, Row* out, uint32_t i) 
-  {
-    const int diff = out->getScale(i) - in.getScale(i);
-    idbassert(diff >= 0);
-    int64_t val = datatypes::applySignedScale<int64_t>(in.getIntField(i), diff);
-    out->setIntField(val, i);
-  }
+void normalizeIntToXDecimalInt128(const Row& in, Row* out, uint32_t i)
+{
+  const int diff = out->getScale(i) - in.getScale(i);
+  idbassert(diff >= 0);
+  int128_t val = datatypes::applySignedScale<int128_t>(in.getIntField(i), diff);
+  out->setInt128Field(val, i);
+}
 
-  void normalizeUintToIntNoScale(const Row& in, Row* out, uint32_t i) 
-  {
-    out->setIntField(in.getUintField(i), i); 
-  }
+void normalizeIntToXDecimalInt64(const Row& in, Row* out, uint32_t i)
+{
+  const int diff = out->getScale(i) - in.getScale(i);
+  idbassert(diff >= 0);
+  int64_t val = datatypes::applySignedScale<int64_t>(in.getIntField(i), diff);
+  out->setIntField(val, i);
+}
 
-  void normalizeUintToIntWithScaleInt128(const Row& in, Row* out, uint32_t i) 
-  {
-    const int diff = out->getScale(i) - in.getScale(i);
-    idbassert(diff >= 0);
-    int128_t val = datatypes::applySignedScale<int128_t>(in.getUintField(i), diff);
-    out->setInt128Field(val, i);
-  }
+void normalizeUintToIntNoScale(const Row& in, Row* out, uint32_t i)
+{
+  out->setIntField(in.getUintField(i), i);
+}
 
-  void normalizeUntToIntWithScaleInt64(const Row& in, Row* out, uint32_t i) 
-  {
-    const int diff = out->getScale(i) - in.getScale(i);
-    idbassert(diff >= 0);
-    uint64_t val = datatypes::applySignedScale<uint64_t>(in.getUintField(i), diff);
-    out->setIntField(val, i);
-  }
+void normalizeUintToIntWithScaleInt128(const Row& in, Row* out, uint32_t i)
+{
+  const int diff = out->getScale(i) - in.getScale(i);
+  idbassert(diff >= 0);
+  int128_t val = datatypes::applySignedScale<int128_t>(in.getUintField(i), diff);
+  out->setInt128Field(val, i);
+}
 
-  void normalizeUintToUint(const Row& in, Row* out, uint32_t i) 
-  {
-    out->setUintField(in.getUintField(i), i); 
-  }
+void normalizeUntToIntWithScaleInt64(const Row& in, Row* out, uint32_t i)
+{
+  const int diff = out->getScale(i) - in.getScale(i);
+  idbassert(diff >= 0);
+  uint64_t val = datatypes::applySignedScale<uint64_t>(in.getUintField(i), diff);
+  out->setIntField(val, i);
+}
 
-  void normalizeUintToStringWithScale(const Row& in, Row* out, uint32_t i) 
-  {
-    double d = in.getUintField(i);
-    d /= exp10(in.getScale(i));
-    out->setStringField(formatDouble(d), i);
-  }
+void normalizeUintToUint(const Row& in, Row* out, uint32_t i)
+{
+  out->setUintField(in.getUintField(i), i);
+}
 
-  void normalizeUintToStringNoScale(const Row& in, Row* out, uint32_t i) 
-  {
-    utils::NullString ns(std::to_string(in.getUintField(i)));
-    out->setStringField(ns, i);
-  }
+void normalizeUintToStringWithScale(const Row& in, Row* out, uint32_t i)
+{
+  double d = in.getUintField(i);
+  d /= exp10(in.getScale(i));
+  out->setStringField(formatDouble(d), i);
+}
 
-  void normalizUintToXFloat(const Row& in, Row* out, uint32_t i) 
-  {
-    auto d = in.getScaledUInt64FieldAsXFloat<double>(i);
-    out->setFloatField((float)d, i);
-  }
+void normalizeUintToStringNoScale(const Row& in, Row* out, uint32_t i)
+{
+  utils::NullString ns(std::to_string(in.getUintField(i)));
+  out->setStringField(ns, i);
+}
 
-  void normalizeUintToXDouble(const Row& in, Row* out, uint32_t i) 
-  {
-    auto d = in.getScaledUInt64FieldAsXFloat<double>(i);
-    out->setDoubleField(d, i);
-  }
+void normalizUintToXFloat(const Row& in, Row* out, uint32_t i)
+{
+  auto d = in.getScaledUInt64FieldAsXFloat<double>(i);
+  out->setFloatField((float)d, i);
+}
 
-  void normalizeUintToLongDouble(const Row& in, Row* out, uint32_t i) 
-  {
-    auto d = in.getScaledUInt64FieldAsXFloat<long double>(i);
-    out->setLongDoubleField(d, i);
-  }
+void normalizeUintToXDouble(const Row& in, Row* out, uint32_t i)
+{
+  auto d = in.getScaledUInt64FieldAsXFloat<double>(i);
+  out->setDoubleField(d, i);
+}
 
-  void normalizeUintToXDecimalInt128(const Row& in, Row* out, uint32_t i) 
-  {
-    const int diff = out->getScale(i) - in.getScale(i);
-    idbassert(diff >= 0);
-    int128_t val = datatypes::applySignedScale<int128_t>(in.getUintField(i), diff);
-    out->setInt128Field(val, i);
-  }
+void normalizeUintToLongDouble(const Row& in, Row* out, uint32_t i)
+{
+  auto d = in.getScaledUInt64FieldAsXFloat<long double>(i);
+  out->setLongDoubleField(d, i);
+}
 
-  void normalizeUintToXDecimalInt64(const Row& in, Row* out, uint32_t i) 
-  {
-    const int diff = out->getScale(i) - in.getScale(i);
-    idbassert(diff >= 0);
-    uint64_t val = datatypes::applySignedScale<uint64_t>(in.getUintField(i), diff);
-    out->setIntField(val, i);
-  }
+void normalizeUintToXDecimalInt128(const Row& in, Row* out, uint32_t i)
+{
+  const int diff = out->getScale(i) - in.getScale(i);
+  idbassert(diff >= 0);
+  int128_t val = datatypes::applySignedScale<int128_t>(in.getUintField(i), diff);
+  out->setInt128Field(val, i);
+}
 
-  void normalizeStringToString(const Row& in, Row* out, uint32_t i) 
-  {
-    out->setStringField(in.getStringField(i), i);
-  }
+void normalizeUintToXDecimalInt64(const Row& in, Row* out, uint32_t i)
+{
+  const int diff = out->getScale(i) - in.getScale(i);
+  idbassert(diff >= 0);
+  uint64_t val = datatypes::applySignedScale<uint64_t>(in.getUintField(i), diff);
+  out->setIntField(val, i);
+}
 
-  void normalizeDateToDate(const Row& in, Row* out, uint32_t i) 
-  {
-    out->setIntField(in.getIntField(i), i);
-  }
+void normalizeStringToString(const Row& in, Row* out, uint32_t i)
+{
+  out->setStringField(in.getStringField(i), i);
+}
 
-  void normalizeDateToDatetime(const Row& in, Row* out, uint32_t i) 
-  {
-    uint64_t date = in.getUintField(i);
-    date &= ~0x3f;  // zero the 'spare' field
-    date <<= 32;
-    out->setUintField(date, i);
-  }
+void normalizeDateToDate(const Row& in, Row* out, uint32_t i)
+{
+  out->setIntField(in.getIntField(i), i);
+}
 
-  void normalizeDateToTimestamp(const Row& in, Row* out, uint32_t i, long fTimeZone) 
-  {
-    dataconvert::Date date(in.getUintField(i));
-    dataconvert::MySQLTime m_time;
-    m_time.year = date.year;
-    m_time.month = date.month;
-    m_time.day = date.day;
-    m_time.hour = 0;
-    m_time.minute = 0;
-    m_time.second = 0;
-    m_time.second_part = 0;
-
-    dataconvert::TimeStamp timeStamp;
-    bool isValid = true;
-    int64_t seconds = dataconvert::mySQLTimeToGmtSec(m_time, fTimeZone, isValid);
-
-    if (!isValid)
-    {
-      timeStamp.reset();
-    }
-    else
-    {
-      timeStamp.second = seconds;
-      timeStamp.msecond = m_time.second_part;
-    }
+void normalizeDateToDatetime(const Row& in, Row* out, uint32_t i)
+{
+  uint64_t date = in.getUintField(i);
+  date &= ~0x3f;  // zero the 'spare' field
+  date <<= 32;
+  out->setUintField(date, i);
+}
 
-    uint64_t outValue = (uint64_t) * (reinterpret_cast<uint64_t*>(&timeStamp));
-    out->setUintField(outValue, i);
-  }
+void normalizeDateToTimestamp(const Row& in, Row* out, uint32_t i, long fTimeZone)
+{
+  dataconvert::Date date(in.getUintField(i));
+  dataconvert::MySQLTime m_time;
+  m_time.year = date.year;
+  m_time.month = date.month;
+  m_time.day = date.day;
+  m_time.hour = 0;
+  m_time.minute = 0;
+  m_time.second = 0;
+  m_time.second_part = 0;
 
-  void normalizeDateToString(const Row& in, Row* out, uint32_t i) 
-  {
-    string d = DataConvert::dateToString(in.getUintField(i));
-    utils::NullString ns(d);
-    out->setStringField(ns, i);
-  }
+  dataconvert::TimeStamp timeStamp;
+  bool isValid = true;
+  int64_t seconds = dataconvert::mySQLTimeToGmtSec(m_time, fTimeZone, isValid);
 
-  void normalizeDatetimeToDatetime(const Row& in, Row* out, uint32_t i) 
+  if (!isValid)
   {
-    out->setIntField(in.getIntField(i), i);
+    timeStamp.reset();
   }
-
-  void normalizeDatetimeToDate(const Row& in, Row* out, uint32_t i) 
+  else
   {
-    uint64_t val = in.getUintField(i);
-    val >>= 32;
-    out->setUintField(val, i);
+    timeStamp.second = seconds;
+    timeStamp.msecond = m_time.second_part;
   }
 
-  void normalizeDatetimeToTimestamp(const Row& in, Row* out, uint32_t i, long fTimeZone) 
-  {
-    uint64_t val = in.getUintField(i);
-    dataconvert::DateTime dtime(val);
-    dataconvert::MySQLTime m_time;
-    dataconvert::TimeStamp timeStamp;
-
-    m_time.year = dtime.year;
-    m_time.month = dtime.month;
-    m_time.day = dtime.day;
-    m_time.hour = dtime.hour;
-    m_time.minute = dtime.minute;
-    m_time.second = dtime.second;
-    m_time.second_part = dtime.msecond;
-
-    bool isValid = true;
-    int64_t seconds = mySQLTimeToGmtSec(m_time, fTimeZone, isValid);
-
-    if (!isValid)
-    {
-      timeStamp.reset();
-    }
-    else
-    {
-      timeStamp.second = seconds;
-      timeStamp.msecond = m_time.second_part;
-    }
+  uint64_t outValue = (uint64_t)*(reinterpret_cast<uint64_t*>(&timeStamp));
+  out->setUintField(outValue, i);
+}
 
-    uint64_t outValue = (uint64_t) * (reinterpret_cast<uint64_t*>(&timeStamp));
-    out->setUintField(outValue, i);
-  }
+void normalizeDateToString(const Row& in, Row* out, uint32_t i)
+{
+  string d = DataConvert::dateToString(in.getUintField(i));
+  utils::NullString ns(d);
+  out->setStringField(ns, i);
+}
 
-  void normalizeDatetimeToString(const Row& in, Row* out, uint32_t i) 
-  {
-    string d = DataConvert::datetimeToString(in.getUintField(i));
-    utils::NullString ns(d);
-    out->setStringField(ns, i);
-  }
+void normalizeDatetimeToDatetime(const Row& in, Row* out, uint32_t i)
+{
+  out->setIntField(in.getIntField(i), i);
+}
 
-  void normalizeTimestampToTimestamp(const Row& in, Row* out, uint32_t i) 
-  {
-    out->setIntField(in.getIntField(i), i);
-  }
+void normalizeDatetimeToDate(const Row& in, Row* out, uint32_t i)
+{
+  uint64_t val = in.getUintField(i);
+  val >>= 32;
+  out->setUintField(val, i);
+}
 
-  void normalizeTimestampToDate(const Row& in, Row* out, uint32_t i, long fTimeZone) 
-  {
-    uint64_t val = in.getUintField(i);
-    dataconvert::TimeStamp timestamp(val);
-    int64_t seconds = timestamp.second;
-    uint64_t outValue;
-
-    dataconvert::MySQLTime time;
-    dataconvert::gmtSecToMySQLTime(seconds, time, fTimeZone);
-
-    dataconvert::Date date;
-    date.year = time.year;
-    date.month = time.month;
-    date.day = time.day;
-    date.spare = 0;
-    outValue = (uint32_t) * (reinterpret_cast<uint32_t*>(&date));
-
-    out->setUintField(outValue, i);
-  }
+void normalizeDatetimeToTimestamp(const Row& in, Row* out, uint32_t i, long fTimeZone)
+{
+  uint64_t val = in.getUintField(i);
+  dataconvert::DateTime dtime(val);
+  dataconvert::MySQLTime m_time;
+  dataconvert::TimeStamp timeStamp;
 
-  void normalizeTimestampToDatetime(const Row& in, Row* out, uint32_t i, long fTimeZone) 
-  {
-    uint64_t val = in.getUintField(i);
-    dataconvert::TimeStamp timestamp(val);
-    int64_t seconds = timestamp.second;
-    uint64_t outValue;
-
-    dataconvert::MySQLTime time;
-    dataconvert::gmtSecToMySQLTime(seconds, time, fTimeZone);
-
-    dataconvert::DateTime datetime;
-    datetime.year = time.year;
-    datetime.month = time.month;
-    datetime.day = time.day;
-    datetime.hour = time.hour;
-    datetime.minute = time.minute;
-    datetime.second = time.second;
-    datetime.msecond = timestamp.msecond;
-    outValue = (uint64_t) * (reinterpret_cast<uint64_t*>(&datetime));
-
-    out->setUintField(outValue, i);
-  }
+  m_time.year = dtime.year;
+  m_time.month = dtime.month;
+  m_time.day = dtime.day;
+  m_time.hour = dtime.hour;
+  m_time.minute = dtime.minute;
+  m_time.second = dtime.second;
+  m_time.second_part = dtime.msecond;
 
-  void normalizeTimestampToString(const Row& in, Row* out, uint32_t i, long fTimeZone) 
-  {
-    string d = DataConvert::timestampToString(in.getUintField(i), fTimeZone);
-    utils::NullString ns(d);
-    out->setStringField(ns, i);
-  }
+  bool isValid = true;
+  int64_t seconds = mySQLTimeToGmtSec(m_time, fTimeZone, isValid);
 
-  void normalizeTimeToTime(const Row& in, Row* out, uint32_t i) 
+  if (!isValid)
   {
-    out->setIntField(in.getIntField(i), i);
+    timeStamp.reset();
   }
-
-  void normalizeTimeToString(const Row& in, Row* out, uint32_t i) 
+  else
   {
-    string d = DataConvert::timeToString(in.getIntField(i));
-    utils::NullString ns(d);
-    out->setStringField(ns, i);
+    timeStamp.second = seconds;
+    timeStamp.msecond = m_time.second_part;
   }
 
-  void normalizeXFloatToIntWithScaleInt128(const Row& in, Row* out, uint32_t i) 
-  {
-    double val = in.getFloatField(i);
-    out->setInt128Field(pickScaleForDouble(out, i, val), i);
-  }
+  uint64_t outValue = (uint64_t)*(reinterpret_cast<uint64_t*>(&timeStamp));
+  out->setUintField(outValue, i);
+}
 
-  void normalizeXDoubleToIntWithScaleInt128(const Row& in, Row* out, uint32_t i) 
-  {
-    double val = in.getDoubleField(i);
-    out->setInt128Field(pickScaleForDouble(out, i, val), i);
-  }
+void normalizeDatetimeToString(const Row& in, Row* out, uint32_t i)
+{
+  string d = DataConvert::datetimeToString(in.getUintField(i));
+  utils::NullString ns(d);
+  out->setStringField(ns, i);
+}
 
-  void normalizeXFloatToIntWithScaleInt64(const Row& in, Row* out, uint32_t i) 
-  {
-    double val = in.getFloatField(i);
-    out->setIntField(pickScaleForDouble(out, i, val), i);
-  }
+void normalizeTimestampToTimestamp(const Row& in, Row* out, uint32_t i)
+{
+  out->setIntField(in.getIntField(i), i);
+}
 
-  void normalizeXDoubleToIntWithScaleInt64(const Row& in, Row* out, uint32_t i) 
-  {
-    double val = in.getDoubleField(i);
-    out->setIntField(pickScaleForDouble(out, i, val), i);
-  }
+void normalizeTimestampToDate(const Row& in, Row* out, uint32_t i, long fTimeZone)
+{
+  uint64_t val = in.getUintField(i);
+  dataconvert::TimeStamp timestamp(val);
+  int64_t seconds = timestamp.second;
+  uint64_t outValue;
+
+  dataconvert::MySQLTime time;
+  dataconvert::gmtSecToMySQLTime(seconds, time, fTimeZone);
+
+  dataconvert::Date date;
+  date.year = time.year;
+  date.month = time.month;
+  date.day = time.day;
+  date.spare = 0;
+  outValue = (uint32_t)*(reinterpret_cast<uint32_t*>(&date));
+
+  out->setUintField(outValue, i);
+}
 
-  void normalizeXFloatToIntNoScale(const Row& in, Row* out, uint32_t i) 
-  {
-    double val = in.getFloatField(i);
-    out->setIntField((int64_t)val, i);
-  }
+void normalizeTimestampToDatetime(const Row& in, Row* out, uint32_t i, long fTimeZone)
+{
+  uint64_t val = in.getUintField(i);
+  dataconvert::TimeStamp timestamp(val);
+  int64_t seconds = timestamp.second;
+  uint64_t outValue;
+
+  dataconvert::MySQLTime time;
+  dataconvert::gmtSecToMySQLTime(seconds, time, fTimeZone);
+
+  dataconvert::DateTime datetime;
+  datetime.year = time.year;
+  datetime.month = time.month;
+  datetime.day = time.day;
+  datetime.hour = time.hour;
+  datetime.minute = time.minute;
+  datetime.second = time.second;
+  datetime.msecond = timestamp.msecond;
+  outValue = (uint64_t)*(reinterpret_cast<uint64_t*>(&datetime));
+
+  out->setUintField(outValue, i);
+}
 
-  void normalizeXDoubleToIntNoScale(const Row& in, Row* out, uint32_t i) 
-  {
-    double val = in.getDoubleField(i);
-    out->setIntField((int64_t)val, i);
-  }
+void normalizeTimestampToString(const Row& in, Row* out, uint32_t i, long fTimeZone)
+{
+  string d = DataConvert::timestampToString(in.getUintField(i), fTimeZone);
+  utils::NullString ns(d);
+  out->setStringField(ns, i);
+}
 
-  void normalizeXFloatToUint(const Row& in, Row* out, uint32_t i) 
-  {
-    double val = in.getFloatField(i);
-    out->setUintField((uint64_t)val, i);
-  }
+void normalizeTimeToTime(const Row& in, Row* out, uint32_t i)
+{
+  out->setIntField(in.getIntField(i), i);
+}
 
-  void normalizeXDoubleToUint(const Row& in, Row* out, uint32_t i) 
-  {
-    double val = in.getDoubleField(i);
-    out->setUintField((uint64_t)val, i);
-  }
+void normalizeTimeToString(const Row& in, Row* out, uint32_t i)
+{
+  string d = DataConvert::timeToString(in.getIntField(i));
+  utils::NullString ns(d);
+  out->setStringField(ns, i);
+}
 
-  void normalizeXFloatToXFloat(const Row& in, Row* out, uint32_t i) 
-  {
-    double val = in.getFloatField(i);
-    out->setFloatField(val, i);
-  }
+void normalizeXFloatToIntWithScaleInt128(const Row& in, Row* out, uint32_t i)
+{
+  double val = in.getFloatField(i);
+  out->setInt128Field(pickScaleForDouble(out, i, val), i);
+}
 
-  void normalizeXDoubleToXFloat(const Row& in, Row* out, uint32_t i) 
-  {
-    double val = in.getDoubleField(i);
-    out->setFloatField(val, i);
-  }
+void normalizeXDoubleToIntWithScaleInt128(const Row& in, Row* out, uint32_t i)
+{
+  double val = in.getDoubleField(i);
+  out->setInt128Field(pickScaleForDouble(out, i, val), i);
+}
 
-  void normalizeXFloatToXDouble(const Row& in, Row* out, uint32_t i) 
-  {
-    double val = in.getFloatField(i);
-    out->setDoubleField(val, i);
-  }
+void normalizeXFloatToIntWithScaleInt64(const Row& in, Row* out, uint32_t i)
+{
+  double val = in.getFloatField(i);
+  out->setIntField(pickScaleForDouble(out, i, val), i);
+}
 
-  void normalizeXDoubleToXDouble(const Row& in, Row* out, uint32_t i) 
-  {
-    double val = in.getDoubleField(i);
-    out->setDoubleField(val, i);
-  }
+void normalizeXDoubleToIntWithScaleInt64(const Row& in, Row* out, uint32_t i)
+{
+  double val = in.getDoubleField(i);
+  out->setIntField(pickScaleForDouble(out, i, val), i);
+}
 
-  void normalizeXFloatToLongDouble(const Row& in, Row* out, uint32_t i) 
-  {
-    double val = in.getFloatField(i);
-    out->setLongDoubleField(val, i);
-  }
+void normalizeXFloatToIntNoScale(const Row& in, Row* out, uint32_t i)
+{
+  double val = in.getFloatField(i);
+  out->setIntField((int64_t)val, i);
+}
 
-  void normalizeXDoubleToLongDouble(const Row& in, Row* out, uint32_t i) 
-  {
-    double val = in.getDoubleField(i);
-    out->setLongDoubleField(val, i);
-  }
+void normalizeXDoubleToIntNoScale(const Row& in, Row* out, uint32_t i)
+{
+  double val = in.getDoubleField(i);
+  out->setIntField((int64_t)val, i);
+}
 
-  void normalizeXFloatToString(const Row& in, Row* out, uint32_t i) 
-  {
-    double val = in.getFloatField(i);
-    out->setStringField(formatDouble(val), i);
-  }
+void normalizeXFloatToUint(const Row& in, Row* out, uint32_t i)
+{
+  double val = in.getFloatField(i);
+  out->setUintField((uint64_t)val, i);
+}
 
-  void normalizeXDoubleToString(const Row& in, Row* out, uint32_t i) 
-  {
-    double val = in.getDoubleField(i);
-    out->setStringField(formatDouble(val), i);
-  }
+void normalizeXDoubleToUint(const Row& in, Row* out, uint32_t i)
+{
+  double val = in.getDoubleField(i);
+  out->setUintField((uint64_t)val, i);
+}
 
-  void normalizeXFloatToWideXDecimal(const Row& in, Row* out, uint32_t i) 
-  {
-    double val = in.getFloatField(i);
-    out->setInt128Field(pickScaleForDouble(out, i, val), i);
-  }
+void normalizeXFloatToXFloat(const Row& in, Row* out, uint32_t i)
+{
+  double val = in.getFloatField(i);
+  out->setFloatField(val, i);
+}
 
-  void normalizeXDoubleToWideXDecimal(const Row& in, Row* out, uint32_t i) 
-  {
-    double val = in.getDoubleField(i);
-    out->setInt128Field(pickScaleForDouble(out, i, val), i);
-  }
+void normalizeXDoubleToXFloat(const Row& in, Row* out, uint32_t i)
+{
+  double val = in.getDoubleField(i);
+  out->setFloatField(val, i);
+}
 
-  void normalizeXFloatToXDecimal(const Row& in, Row* out, uint32_t i) 
-  {
-    double val = in.getFloatField(i);
-    out->setIntField(pickScaleForDouble(out, i, val), i);
-  }
+void normalizeXFloatToXDouble(const Row& in, Row* out, uint32_t i)
+{
+  double val = in.getFloatField(i);
+  out->setDoubleField(val, i);
+}
 
-  void normalizeXDoubleToXDecimal(const Row& in, Row* out, uint32_t i) 
-  {
-    double val = in.getDoubleField(i);
-    out->setIntField(pickScaleForDouble(out, i, val), i);
-  }
+void normalizeXDoubleToXDouble(const Row& in, Row* out, uint32_t i)
+{
+  double val = in.getDoubleField(i);
+  out->setDoubleField(val, i);
+}
 
-  void normalizeLongDoubleToIntNoScale(const Row& in, Row* out, uint32_t i) 
-  {
-    long double val = in.getLongDoubleField(i);
-    out->setIntField((int64_t)val, i);
-  }
+void normalizeXFloatToLongDouble(const Row& in, Row* out, uint32_t i)
+{
+  double val = in.getFloatField(i);
+  out->setLongDoubleField(val, i);
+}
 
-  void normalizeLongDoubleToIntWithScaleInt128(const Row& in, Row* out, uint32_t i) 
-  {
-    long double val = in.getLongDoubleField(i);
-    out->setInt128Field(pickScaleForLongDouble(out, i, val), i);
-  }
+void normalizeXDoubleToLongDouble(const Row& in, Row* out, uint32_t i)
+{
+  double val = in.getDoubleField(i);
+  out->setLongDoubleField(val, i);
+}
 
-  void normalizeLongDoubleToIntWithScaleInt(const Row& in, Row* out, uint32_t i) 
-  {
-    long double val = in.getLongDoubleField(i);
-    out->setIntField(pickScaleForLongDouble(out, i, val), i);
-  }
+void normalizeXFloatToString(const Row& in, Row* out, uint32_t i)
+{
+  double val = in.getFloatField(i);
+  out->setStringField(formatDouble(val), i);
+}
 
-  void normalizeLongDoubleToUint(const Row& in, Row* out, uint32_t i) 
-  {
-    long double val = in.getLongDoubleField(i);
-    out->setUintField((uint64_t)val, i);
-  }
+void normalizeXDoubleToString(const Row& in, Row* out, uint32_t i)
+{
+  double val = in.getDoubleField(i);
+  out->setStringField(formatDouble(val), i);
+}
 
-  void normalizeLongDoubleToXFloat(const Row& in, Row* out, uint32_t i) 
-  {
-    long double val = in.getLongDoubleField(i);
-    out->setFloatField(val, i);
-  }
+void normalizeXFloatToWideXDecimal(const Row& in, Row* out, uint32_t i)
+{
+  double val = in.getFloatField(i);
+  out->setInt128Field(pickScaleForDouble(out, i, val), i);
+}
 
-  void normalizeLongDoubleToXDouble(const Row& in, Row* out, uint32_t i) 
-  {
-    long double val = in.getLongDoubleField(i);
-    out->setDoubleField(val, i);
-  }
+void normalizeXDoubleToWideXDecimal(const Row& in, Row* out, uint32_t i)
+{
+  double val = in.getDoubleField(i);
+  out->setInt128Field(pickScaleForDouble(out, i, val), i);
+}
 
-  void normalizeLongDoubleToLongDouble(const Row& in, Row* out, uint32_t i) 
-  {
-    long double val = in.getLongDoubleField(i);
-    out->setLongDoubleField(val, i);
-  }
+void normalizeXFloatToXDecimal(const Row& in, Row* out, uint32_t i)
+{
+  double val = in.getFloatField(i);
+  out->setIntField(pickScaleForDouble(out, i, val), i);
+}
 
-  void normalizeLongDoubleToString(const Row& in, Row* out, uint32_t i) 
-  {
-    // FIXME: ostream output looks like '1.234e+56' while MDB output is '1.234e56'
-    long double val = in.getLongDoubleField(i);
-    ostringstream os;
-    os.precision(15);  // to match mysql's output
-    os << val;
-    utils::NullString ns(os.str());
-    out->setStringField(ns, i);
-  }
+void normalizeXDoubleToXDecimal(const Row& in, Row* out, uint32_t i)
+{
+  double val = in.getDoubleField(i);
+  out->setIntField(pickScaleForDouble(out, i, val), i);
+}
 
-  void normalizeLongDoubleToXDecimalInt128(const Row& in, Row* out, uint32_t i) 
-  {
-    long double val = in.getLongDoubleField(i);
-    out->setInt128Field(pickScaleForLongDouble(out, i, val), i);
-  }
+void normalizeLongDoubleToIntNoScale(const Row& in, Row* out, uint32_t i)
+{
+  long double val = in.getLongDoubleField(i);
+  out->setIntField((int64_t)val, i);
+}
 
-  void normalizeLongDoubleToXDecimalInt(const Row& in, Row* out, uint32_t i) 
-  {
-    long double val = in.getLongDoubleField(i);
-    out->setIntField(pickScaleForLongDouble(out, i, val), i);
-  }
+void normalizeLongDoubleToIntWithScaleInt128(const Row& in, Row* out, uint32_t i)
+{
+  long double val = in.getLongDoubleField(i);
+  out->setInt128Field(pickScaleForLongDouble(out, i, val), i);
+}
 
-  void normalizeWideXDecimalToWideXDecimalNoScale(const Row& in, Row* out, uint32_t i) 
-  {
-    int128_t val128 = 0;
-    in.getInt128Field(i, val128);
-    out->setInt128Field(val128, i);
-  }
+void normalizeLongDoubleToIntWithScaleInt(const Row& in, Row* out, uint32_t i)
+{
+  long double val = in.getLongDoubleField(i);
+  out->setIntField(pickScaleForLongDouble(out, i, val), i);
+}
 
-  void normalizeXDecimalToWideXDecimalNoScale(const Row& in, Row* out, uint32_t i) 
-  {
-    int64_t val = in.getIntField(i);      
-    out->setInt128Field(val, i);
-  }
+void normalizeLongDoubleToUint(const Row& in, Row* out, uint32_t i)
+{
+  long double val = in.getLongDoubleField(i);
+  out->setUintField((uint64_t)val, i);
+}
 
-  void normalizeWideXDecimalToWideXDecimalWithScale(const Row& in, Row* out, uint32_t i) 
-  {
-    int128_t val128 = 0;
-    in.getInt128Field(i, val128);
-    int128_t temp = datatypes::applySignedScale<int128_t>(val128, out->getScale(i) - in.getScale(i));
-    out->setInt128Field(temp, i);
-  }
+void normalizeLongDoubleToXFloat(const Row& in, Row* out, uint32_t i)
+{
+  long double val = in.getLongDoubleField(i);
+  out->setFloatField(val, i);
+}
 
-  void normalizeXDecimalToWideXDecimalWithScale(const Row& in, Row* out, uint32_t i) 
-  {
-    int64_t val = in.getIntField(i);
-    int128_t temp = datatypes::applySignedScale<int128_t>(val, out->getScale(i) - in.getScale(i));
-    out->setInt128Field(temp, i);
-  }
+void normalizeLongDoubleToXDouble(const Row& in, Row* out, uint32_t i)
+{
+  long double val = in.getLongDoubleField(i);
+  out->setDoubleField(val, i);
+}
 
-  void normalizeXDecimalToOtherNoScale(const Row& in, Row* out, uint32_t i) 
-  {
-    int64_t val = in.getIntField(i);
-    out->setIntField(val, i);
-  }
+void normalizeLongDoubleToLongDouble(const Row& in, Row* out, uint32_t i)
+{
+  long double val = in.getLongDoubleField(i);
+  out->setLongDoubleField(val, i);
+}
 
-  void normalizeXDecimalToOtherWithScale(const Row& in, Row* out, uint32_t i) 
-  {
-    int64_t val = in.getIntField(i);
-    int64_t temp = datatypes::applySignedScale<int64_t>(val, out->getScale(i) - in.getScale(i));
-    out->setIntField(temp, i);
-  }
+void normalizeLongDoubleToString(const Row& in, Row* out, uint32_t i)
+{
+  // FIXME: ostream output looks like '1.234e+56' while MDB output is '1.234e56'
+  long double val = in.getLongDoubleField(i);
+  ostringstream os;
+  os.precision(15);  // to match mysql's output
+  os << val;
+  utils::NullString ns(os.str());
+  out->setStringField(ns, i);
+}
 
-  void normalizeXDecimalToXFloat(const Row& in, Row* out, uint32_t i) 
-  {
-    int64_t val = in.getIntField(i);
-    float fval = ((float)val) / IDB_pow[in.getScale(i)];
-    out->setFloatField(fval, i);
-  }
+void normalizeLongDoubleToXDecimalInt128(const Row& in, Row* out, uint32_t i)
+{
+  long double val = in.getLongDoubleField(i);
+  out->setInt128Field(pickScaleForLongDouble(out, i, val), i);
+}
 
-  void normalizeXDecimalToXDouble(const Row& in, Row* out, uint32_t i) 
-  {
-    int64_t val = in.getIntField(i);
-    double dval = ((double)val) / IDB_pow[in.getScale(i)];
-    out->setDoubleField(dval, i);
-  }
+void normalizeLongDoubleToXDecimalInt(const Row& in, Row* out, uint32_t i)
+{
+  long double val = in.getLongDoubleField(i);
+  out->setIntField(pickScaleForLongDouble(out, i, val), i);
+}
 
-  void normalizeXDecimalToLongDouble(const Row& in, Row* out, uint32_t i) 
-  {
-    int64_t val = in.getIntField(i);
-    long double dval = ((long double)val) / IDB_pow[in.getScale(i)];
-    out->setLongDoubleField(dval, i);
-  }
+void normalizeWideXDecimalToWideXDecimalNoScale(const Row& in, Row* out, uint32_t i)
+{
+  int128_t val128 = 0;
+  in.getInt128Field(i, val128);
+  out->setInt128Field(val128, i);
+}
 
-  void normalizeWideXDecimalToString(const Row& in, Row* out, uint32_t i) 
-  {
-    int128_t val128 = 0;
-    in.getInt128Field(i, val128);
-    datatypes::Decimal dec(0, in.getScale(i), in.getPrecision(i), val128);
-    out->setStringField(dec.toNullString(), i);
-  }
+void normalizeXDecimalToWideXDecimalNoScale(const Row& in, Row* out, uint32_t i)
+{
+  int64_t val = in.getIntField(i);
+  out->setInt128Field(val, i);
+}
 
-  void normalizeXDecimalToString(const Row& in, Row* out, uint32_t i) 
-  {
-    int64_t val = in.getIntField(i);
-    datatypes::Decimal dec(val, in.getScale(i), in.getPrecision(i));
-    out->setStringField(dec.toNullString(), i);
-  }
+void normalizeWideXDecimalToWideXDecimalWithScale(const Row& in, Row* out, uint32_t i)
+{
+  int128_t val128 = 0;
+  in.getInt128Field(i, val128);
+  int128_t temp = datatypes::applySignedScale<int128_t>(val128, out->getScale(i) - in.getScale(i));
+  out->setInt128Field(temp, i);
+}
 
-  void normalizeBlobVarbinary(const Row& in, Row* out, uint32_t i) 
-  {
-    // out->setVarBinaryField(in.getVarBinaryStringField(i), i);  // not efficient
-    out->setVarBinaryField(in.getVarBinaryField(i), in.getVarBinaryLength(i), i);
-  }
+void normalizeXDecimalToWideXDecimalWithScale(const Row& in, Row* out, uint32_t i)
+{
+  int64_t val = in.getIntField(i);
+  int128_t temp = datatypes::applySignedScale<int128_t>(val, out->getScale(i) - in.getScale(i));
+  out->setInt128Field(temp, i);
+}
 
-  joblist::normalizeFunctionsT inferNormalizeFunctions(const Row& in, Row* out, long fTimeZone)
-  {
-    uint32_t i;
-    joblist::normalizeFunctionsT result;
+void normalizeXDecimalToOtherNoScale(const Row& in, Row* out, uint32_t i)
+{
+  int64_t val = in.getIntField(i);
+  out->setIntField(val, i);
+}
+
+void normalizeXDecimalToOtherWithScale(const Row& in, Row* out, uint32_t i)
+{
+  int64_t val = in.getIntField(i);
+  int64_t temp = datatypes::applySignedScale<int64_t>(val, out->getScale(i) - in.getScale(i));
+  out->setIntField(temp, i);
+}
+
+void normalizeXDecimalToXFloat(const Row& in, Row* out, uint32_t i)
+{
+  int64_t val = in.getIntField(i);
+  float fval = ((float)val) / IDB_pow[in.getScale(i)];
+  out->setFloatField(fval, i);
+}
+
+void normalizeXDecimalToXDouble(const Row& in, Row* out, uint32_t i)
+{
+  int64_t val = in.getIntField(i);
+  double dval = ((double)val) / IDB_pow[in.getScale(i)];
+  out->setDoubleField(dval, i);
+}
 
-    for (i = 0; i < out->getColumnCount(); i++)
+void normalizeXDecimalToLongDouble(const Row& in, Row* out, uint32_t i)
+{
+  int64_t val = in.getIntField(i);
+  long double dval = ((long double)val) / IDB_pow[in.getScale(i)];
+  out->setLongDoubleField(dval, i);
+}
+
+void normalizeWideXDecimalToString(const Row& in, Row* out, uint32_t i)
+{
+  int128_t val128 = 0;
+  in.getInt128Field(i, val128);
+  datatypes::Decimal dec(0, in.getScale(i), in.getPrecision(i), val128);
+  out->setStringField(dec.toNullString(), i);
+}
+
+void normalizeXDecimalToString(const Row& in, Row* out, uint32_t i)
+{
+  int64_t val = in.getIntField(i);
+  datatypes::Decimal dec(val, in.getScale(i), in.getPrecision(i));
+  out->setStringField(dec.toNullString(), i);
+}
+
+void normalizeBlobVarbinary(const Row& in, Row* out, uint32_t i)
+{
+  // out->setVarBinaryField(in.getVarBinaryStringField(i), i);  // not efficient
+  out->setVarBinaryField(in.getVarBinaryField(i), in.getVarBinaryLength(i), i);
+}
+
+joblist::normalizeFunctionsT inferNormalizeFunctions(const Row& in, Row* out, long fTimeZone)
+{
+  uint32_t i;
+  joblist::normalizeFunctionsT result;
+
+  for (i = 0; i < out->getColumnCount(); i++)
+  {
+    switch (in.getColTypes()[i])
     {
-      switch (in.getColTypes()[i])
-      {
-        case CalpontSystemCatalog::TINYINT:
-        case CalpontSystemCatalog::SMALLINT:
-        case CalpontSystemCatalog::MEDINT:
-        case CalpontSystemCatalog::INT:
-        case CalpontSystemCatalog::BIGINT:
-          switch (out->getColTypes()[i])
+      case CalpontSystemCatalog::TINYINT:
+      case CalpontSystemCatalog::SMALLINT:
+      case CalpontSystemCatalog::MEDINT:
+      case CalpontSystemCatalog::INT:
+      case CalpontSystemCatalog::BIGINT:
+        switch (out->getColTypes()[i])
+        {
+          case CalpontSystemCatalog::TINYINT:
+          case CalpontSystemCatalog::SMALLINT:
+          case CalpontSystemCatalog::MEDINT:
+          case CalpontSystemCatalog::INT:
+          case CalpontSystemCatalog::BIGINT:
           {
-            case CalpontSystemCatalog::TINYINT:
-            case CalpontSystemCatalog::SMALLINT:
-            case CalpontSystemCatalog::MEDINT:
-            case CalpontSystemCatalog::INT:
-            case CalpontSystemCatalog::BIGINT:
+            if (out->getScale(i) || in.getScale(i))
             {
-              if (out->getScale(i) || in.getScale(i)) 
-              {
-                if (out->getColumnWidth(i) == datatypes::MAXDECIMALWIDTH)
-                  result.emplace_back(normalizeIntToIntWithScaleInt128);
-                else
-                  result.emplace_back(normalizeIntToIntWithScaleInt64);
-              } 
+              if (out->getColumnWidth(i) == datatypes::MAXDECIMALWIDTH)
+                result.emplace_back(normalizeIntToIntWithScaleInt128);
               else
-                result.emplace_back(normalizeIntToIntNoScale); 
-              break;
+                result.emplace_back(normalizeIntToIntWithScaleInt64);
             }
+            else
+              result.emplace_back(normalizeIntToIntNoScale);
+            break;
+          }
 
-            case CalpontSystemCatalog::UTINYINT:
-            case CalpontSystemCatalog::USMALLINT:
-            case CalpontSystemCatalog::UMEDINT:
-            case CalpontSystemCatalog::UINT:
-            case CalpontSystemCatalog::UBIGINT:
+          case CalpontSystemCatalog::UTINYINT:
+          case CalpontSystemCatalog::USMALLINT:
+          case CalpontSystemCatalog::UMEDINT:
+          case CalpontSystemCatalog::UINT:
+          case CalpontSystemCatalog::UBIGINT:
+          {
+            if (in.getScale(i))
             {
-              if (in.getScale(i))
-              {
-                if (out->getColumnWidth(i) == datatypes::MAXDECIMALWIDTH)
-                  result.emplace_back(normalizeIntToUintWithScaleInt128);
-                else
-                  result.emplace_back(normalizeIntToUintWithScaleInt64);
-              } 
+              if (out->getColumnWidth(i) == datatypes::MAXDECIMALWIDTH)
+                result.emplace_back(normalizeIntToUintWithScaleInt128);
               else
-                result.emplace_back(normalizeIntToUintNoScale); 
-              break;
+                result.emplace_back(normalizeIntToUintWithScaleInt64);
             }
+            else
+              result.emplace_back(normalizeIntToUintNoScale);
+            break;
+          }
 
-            case CalpontSystemCatalog::CHAR:
-            case CalpontSystemCatalog::TEXT:
-            case CalpontSystemCatalog::VARCHAR: 
-            {
-              if (in.getScale(i))
-                result.emplace_back(normalizeIntToStringWithScale);
-              else
-                result.emplace_back(normalizeIntToStringNoScale);
-              break;
-            }
+          case CalpontSystemCatalog::CHAR:
+          case CalpontSystemCatalog::TEXT:
+          case CalpontSystemCatalog::VARCHAR:
+          {
+            if (in.getScale(i))
+              result.emplace_back(normalizeIntToStringWithScale);
+            else
+              result.emplace_back(normalizeIntToStringNoScale);
+            break;
+          }
+
+          case CalpontSystemCatalog::DATE:
+          case CalpontSystemCatalog::DATETIME:
+          case CalpontSystemCatalog::TIME:
+          case CalpontSystemCatalog::TIMESTAMP:
+            throw logic_error(
+                "TupleUnion::normalize(): tried to normalize an int to a timestamp, time, date or datetime");
 
-            case CalpontSystemCatalog::DATE:
-            case CalpontSystemCatalog::DATETIME:
-            case CalpontSystemCatalog::TIME:
-            case CalpontSystemCatalog::TIMESTAMP:
-              throw logic_error(
-                  "TupleUnion::normalize(): tried to normalize an int to a timestamp, time, date or datetime");
+          case CalpontSystemCatalog::FLOAT:
+          case CalpontSystemCatalog::UFLOAT: result.emplace_back(normalizeIntToXFloat); break;
+
+          case CalpontSystemCatalog::DOUBLE:
+          case CalpontSystemCatalog::UDOUBLE: result.emplace_back(normalizeIntToXDouble); break;
+
+          case CalpontSystemCatalog::LONGDOUBLE: result.emplace_back(normalizeIntToLongDouble); break;
 
-            case CalpontSystemCatalog::FLOAT:
-            case CalpontSystemCatalog::UFLOAT: result.emplace_back(normalizeIntToXFloat); break;
+          case CalpontSystemCatalog::DECIMAL:
+          case CalpontSystemCatalog::UDECIMAL:
+          {
+            /*
+              Signed INT to XDecimal
+              TODO:
+              - This code does not handle overflow that may happen on
+                scale multiplication. Instead of returning a garbage value
+                we should probably apply saturation here. In long terms we
+                should implement DECIMAL(65,x) to avoid overflow completely
+                (so the UNION between DECIMAL and integer can choose a proper
+                  DECIMAL(M,N) result data type to guarantee that any incoming
+                  integer value can fit into it).
+            */
+            if (out->getColumnWidth(i) == datatypes::MAXDECIMALWIDTH)
+              result.emplace_back(normalizeIntToXDecimalInt128);
+            else
+              result.emplace_back(normalizeIntToXDecimalInt64);
+            break;
+          }
 
-            case CalpontSystemCatalog::DOUBLE:
-            case CalpontSystemCatalog::UDOUBLE: result.emplace_back(normalizeIntToXDouble); break;
+          default:
+            ostringstream os;
+            os << "TupleUnion::normalize(): tried an illegal conversion: integer to "
+               << out->getColTypes()[i];
+            throw logic_error(os.str());
+        }
 
-            case CalpontSystemCatalog::LONGDOUBLE: result.emplace_back(normalizeIntToLongDouble); break;
+        break;
 
-            case CalpontSystemCatalog::DECIMAL:
-            case CalpontSystemCatalog::UDECIMAL:
+      case CalpontSystemCatalog::UTINYINT:
+      case CalpontSystemCatalog::USMALLINT:
+      case CalpontSystemCatalog::UMEDINT:
+      case CalpontSystemCatalog::UINT:
+      case CalpontSystemCatalog::UBIGINT:
+        switch (out->getColTypes()[i])
+        {
+          case CalpontSystemCatalog::TINYINT:
+          case CalpontSystemCatalog::SMALLINT:
+          case CalpontSystemCatalog::MEDINT:
+          case CalpontSystemCatalog::INT:
+          case CalpontSystemCatalog::BIGINT:
+          {
+            if (out->getScale(i))
             {
-              /*
-                Signed INT to XDecimal
-                TODO:
-                - This code does not handle overflow that may happen on
-                  scale multiplication. Instead of returning a garbage value
-                  we should probably apply saturation here. In long terms we
-                  should implement DECIMAL(65,x) to avoid overflow completely
-                  (so the UNION between DECIMAL and integer can choose a proper
-                    DECIMAL(M,N) result data type to guarantee that any incoming
-                    integer value can fit into it).
-              */
               if (out->getColumnWidth(i) == datatypes::MAXDECIMALWIDTH)
-                result.emplace_back(normalizeIntToXDecimalInt128);
+                result.emplace_back(normalizeUintToIntWithScaleInt128);
               else
-                result.emplace_back(normalizeIntToXDecimalInt64);
-              break;
+                result.emplace_back(normalizeUntToIntWithScaleInt64);
             }
+            else
+              result.emplace_back(normalizeUintToIntNoScale);
+            break;
+          }
+
+          case CalpontSystemCatalog::UTINYINT:
+          case CalpontSystemCatalog::USMALLINT:
+          case CalpontSystemCatalog::UMEDINT:
+          case CalpontSystemCatalog::UINT:
+          case CalpontSystemCatalog::UBIGINT: result.emplace_back(normalizeUintToUint); break;
+
+          case CalpontSystemCatalog::CHAR:
+          case CalpontSystemCatalog::TEXT:
+          case CalpontSystemCatalog::VARCHAR:
+          {
+            if (in.getScale(i))
+              result.emplace_back(normalizeUintToStringWithScale);
+            else
+              result.emplace_back(normalizeUintToStringNoScale);
+            break;
+          }
+
+          case CalpontSystemCatalog::DATE:
+          case CalpontSystemCatalog::DATETIME:
+          case CalpontSystemCatalog::TIME:
+          case CalpontSystemCatalog::TIMESTAMP:
+            throw logic_error(
+                "TupleUnion::normalize(): tried to normalize an int to a timestamp, time, date or datetime");
+
+          case CalpontSystemCatalog::FLOAT:
+          case CalpontSystemCatalog::UFLOAT: result.emplace_back(normalizUintToXFloat); break;
+
+          case CalpontSystemCatalog::DOUBLE:
+          case CalpontSystemCatalog::UDOUBLE: result.emplace_back(normalizeUintToXDouble); break;
+
+          case CalpontSystemCatalog::LONGDOUBLE: result.emplace_back(normalizeUintToLongDouble); break;
+
+          case CalpontSystemCatalog::DECIMAL:
+          case CalpontSystemCatalog::UDECIMAL:
+          {
+            /*
+              Unsigned INT to XDecimal
+              TODO:
+              - The overflow problem mentioned in the code under case "Signed INT to XDecimal:" is
+                also applicable here.
+            */
+
+            if (out->getColumnWidth(i) == datatypes::MAXDECIMALWIDTH)
+              result.emplace_back(normalizeUintToXDecimalInt128);
+            else
+              result.emplace_back(normalizeUintToXDecimalInt64);
+            break;
+          }
+
+          default:
+            ostringstream os;
+            os << "TupleUnion::normalize(): tried an illegal conversion: integer to "
+               << out->getColTypes()[i];
+            throw logic_error(os.str());
+        }
+
+        break;
+
+      case CalpontSystemCatalog::CHAR:
+      case CalpontSystemCatalog::TEXT:
+      case CalpontSystemCatalog::VARCHAR:
+        switch (out->getColTypes()[i])
+        {
+          case CalpontSystemCatalog::CHAR:
+          case CalpontSystemCatalog::TEXT:
+          case CalpontSystemCatalog::VARCHAR: result.emplace_back(normalizeStringToString); break;
+
+          default:
+          {
+            ostringstream os;
+            os << "TupleUnion::normalize(): tried an illegal conversion: string to " << out->getColTypes()[i];
+            throw logic_error(os.str());
+          }
+        }
+
+        break;
+
+      case CalpontSystemCatalog::DATE:
+        switch (out->getColTypes()[i])
+        {
+          case CalpontSystemCatalog::DATE: result.emplace_back(normalizeDateToDate); break;
 
-            default:
-              ostringstream os;
-              os << "TupleUnion::normalize(): tried an illegal conversion: integer to "
-                << out->getColTypes()[i];
-              throw logic_error(os.str());
+          case CalpontSystemCatalog::DATETIME: result.emplace_back(normalizeDateToDatetime); break;
+
+          case CalpontSystemCatalog::TIMESTAMP:
+            result.emplace_back(std::bind(normalizeDateToTimestamp, std::placeholders::_1,
+                                          std::placeholders::_2, std::placeholders::_3, fTimeZone));
+            break;
+
+          case CalpontSystemCatalog::CHAR:
+          case CalpontSystemCatalog::TEXT:
+          case CalpontSystemCatalog::VARCHAR: result.emplace_back(normalizeDateToString); break;
+
+          default:
+          {
+            ostringstream os;
+            os << "TupleUnion::normalize(): tried an illegal conversion: date to " << out->getColTypes()[i];
+            throw logic_error(os.str());
+          }
+        }
+
+        break;
+
+      case CalpontSystemCatalog::DATETIME:
+        switch (out->getColTypes()[i])
+        {
+          case CalpontSystemCatalog::DATETIME: result.emplace_back(normalizeDatetimeToDatetime); break;
+
+          case CalpontSystemCatalog::DATE: result.emplace_back(normalizeDatetimeToDate); break;
+
+          case CalpontSystemCatalog::TIMESTAMP:
+            result.emplace_back(std::bind(normalizeDatetimeToTimestamp, std::placeholders::_1,
+                                          std::placeholders::_2, std::placeholders::_3, fTimeZone));
+            break;
+
+          case CalpontSystemCatalog::CHAR:
+          case CalpontSystemCatalog::TEXT:
+          case CalpontSystemCatalog::VARCHAR: result.emplace_back(normalizeDatetimeToString); break;
+
+          default:
+          {
+            ostringstream os;
+            os << "TupleUnion::normalize(): tried an illegal conversion: datetime to "
+               << out->getColTypes()[i];
+            throw logic_error(os.str());
+          }
+        }
+
+        break;
+
+      case CalpontSystemCatalog::TIMESTAMP:
+        switch (out->getColTypes()[i])
+        {
+          case CalpontSystemCatalog::TIMESTAMP: result.emplace_back(normalizeTimestampToTimestamp); break;
+
+          case CalpontSystemCatalog::DATE:
+            result.emplace_back(std::bind(normalizeTimestampToDate, std::placeholders::_1,
+                                          std::placeholders::_2, std::placeholders::_3, fTimeZone));
+            break;
+
+          case CalpontSystemCatalog::DATETIME:
+            result.emplace_back(std::bind(normalizeTimestampToDatetime, std::placeholders::_1,
+                                          std::placeholders::_2, std::placeholders::_3, fTimeZone));
+            break;
+
+          case CalpontSystemCatalog::CHAR:
+          case CalpontSystemCatalog::TEXT:
+          case CalpontSystemCatalog::VARCHAR:
+            result.emplace_back(std::bind(normalizeTimestampToString, std::placeholders::_1,
+                                          std::placeholders::_2, std::placeholders::_3, fTimeZone));
+            break;
+
+          default:
+          {
+            ostringstream os;
+            os << "TupleUnion::normalize(): tried an illegal conversion: timestamp to "
+               << out->getColTypes()[i];
+            throw logic_error(os.str());
+          }
+        }
+
+        break;
+
+      case CalpontSystemCatalog::TIME:
+        switch (out->getColTypes()[i])
+        {
+          case CalpontSystemCatalog::TIME: result.emplace_back(normalizeTimeToTime); break;
+
+          case CalpontSystemCatalog::CHAR:
+          case CalpontSystemCatalog::TEXT:
+          case CalpontSystemCatalog::VARCHAR: result.emplace_back(normalizeTimeToString); break;
+
+          default:
+          {
+            ostringstream os;
+            os << "TupleUnion::normalize(): tried an illegal conversion: time to " << out->getColTypes()[i];
+            throw logic_error(os.str());
           }
+        }
 
-          break;
+        break;
 
-        case CalpontSystemCatalog::UTINYINT:
-        case CalpontSystemCatalog::USMALLINT:
-        case CalpontSystemCatalog::UMEDINT:
-        case CalpontSystemCatalog::UINT:
-        case CalpontSystemCatalog::UBIGINT:
-          switch (out->getColTypes()[i])
+      case CalpontSystemCatalog::FLOAT:
+      case CalpontSystemCatalog::UFLOAT:
+      case CalpontSystemCatalog::DOUBLE:
+      case CalpontSystemCatalog::UDOUBLE:
+      {
+        switch (out->getColTypes()[i])
+        {
+          case CalpontSystemCatalog::TINYINT:
+          case CalpontSystemCatalog::SMALLINT:
+          case CalpontSystemCatalog::MEDINT:
+          case CalpontSystemCatalog::INT:
+          case CalpontSystemCatalog::BIGINT:
           {
-            case CalpontSystemCatalog::TINYINT:
-            case CalpontSystemCatalog::SMALLINT:
-            case CalpontSystemCatalog::MEDINT:
-            case CalpontSystemCatalog::INT:
-            case CalpontSystemCatalog::BIGINT:
+            if (out->getScale(i))
             {
-              if (out->getScale(i))
+              if (out->getColumnWidth(i) == datatypes::MAXDECIMALWIDTH)
               {
-                if (out->getColumnWidth(i) == datatypes::MAXDECIMALWIDTH)
-                  result.emplace_back(normalizeUintToIntWithScaleInt128);
+                if (in.getColTypes()[i] == CalpontSystemCatalog::FLOAT ||
+                    in.getColTypes()[i] == CalpontSystemCatalog::UFLOAT)
+                  result.emplace_back(normalizeXFloatToIntWithScaleInt128);
                 else
-                  result.emplace_back(normalizeUntToIntWithScaleInt64);
-              } 
+                  result.emplace_back(normalizeXDoubleToIntWithScaleInt128);
+              }
               else
-                result.emplace_back(normalizeUintToIntNoScale); 
-              break;
+              {
+                if (in.getColTypes()[i] == CalpontSystemCatalog::FLOAT ||
+                    in.getColTypes()[i] == CalpontSystemCatalog::UFLOAT)
+                  result.emplace_back(normalizeXFloatToIntWithScaleInt64);
+                else
+                  result.emplace_back(normalizeXDoubleToIntWithScaleInt64);
+              }
+            }
+            else
+            {
+              if (in.getColTypes()[i] == CalpontSystemCatalog::FLOAT ||
+                  in.getColTypes()[i] == CalpontSystemCatalog::UFLOAT)
+                result.emplace_back(normalizeXFloatToIntNoScale);
+              else
+                result.emplace_back(normalizeXDoubleToIntNoScale);
             }
+            break;
+          }
+
+          case CalpontSystemCatalog::UTINYINT:
+          case CalpontSystemCatalog::USMALLINT:
+          case CalpontSystemCatalog::UMEDINT:
+          case CalpontSystemCatalog::UINT:
+          case CalpontSystemCatalog::UBIGINT:
+          {
+            if (in.getColTypes()[i] == CalpontSystemCatalog::FLOAT ||
+                in.getColTypes()[i] == CalpontSystemCatalog::UFLOAT)
+              result.emplace_back(normalizeXFloatToUint);
+            else
+              result.emplace_back(normalizeXDoubleToUint);
+            break;
+          }
+
+          case CalpontSystemCatalog::FLOAT:
+          case CalpontSystemCatalog::UFLOAT:
+          {
+            if (in.getColTypes()[i] == CalpontSystemCatalog::FLOAT ||
+                in.getColTypes()[i] == CalpontSystemCatalog::UFLOAT)
+              result.emplace_back(normalizeXFloatToXFloat);
+            else
+              result.emplace_back(normalizeXDoubleToXFloat);
+            break;
+          }
+
+          case CalpontSystemCatalog::DOUBLE:
+          case CalpontSystemCatalog::UDOUBLE:
+          {
+            if (in.getColTypes()[i] == CalpontSystemCatalog::FLOAT ||
+                in.getColTypes()[i] == CalpontSystemCatalog::UFLOAT)
+              result.emplace_back(normalizeXFloatToXDouble);
+            else
+              result.emplace_back(normalizeXDoubleToXDouble);
+            break;
+          }
+
+          case CalpontSystemCatalog::LONGDOUBLE:
+          {
+            if (in.getColTypes()[i] == CalpontSystemCatalog::FLOAT ||
+                in.getColTypes()[i] == CalpontSystemCatalog::UFLOAT)
+              result.emplace_back(normalizeXFloatToLongDouble);
+            else
+              result.emplace_back(normalizeXDoubleToLongDouble);
+            break;
+          }
 
-            case CalpontSystemCatalog::UTINYINT:
-            case CalpontSystemCatalog::USMALLINT:
-            case CalpontSystemCatalog::UMEDINT:
-            case CalpontSystemCatalog::UINT:
-            case CalpontSystemCatalog::UBIGINT: result.emplace_back(normalizeUintToUint); break;
+          case CalpontSystemCatalog::CHAR:
+          case CalpontSystemCatalog::TEXT:
+          case CalpontSystemCatalog::VARCHAR:
+          {
+            if (in.getColTypes()[i] == CalpontSystemCatalog::FLOAT ||
+                in.getColTypes()[i] == CalpontSystemCatalog::UFLOAT)
+              result.emplace_back(normalizeXFloatToString);
+            else
+              result.emplace_back(normalizeXDoubleToString);
+            break;
+          }
 
-            case CalpontSystemCatalog::CHAR:
-            case CalpontSystemCatalog::TEXT:
-            case CalpontSystemCatalog::VARCHAR: 
+          case CalpontSystemCatalog::DECIMAL:
+          case CalpontSystemCatalog::UDECIMAL:
+          {
+            // xFLOAT or xDOUBLE to xDECIMAL conversion. Is it really possible?
+            // TODO:
+            // Perhaps we should add an assert here that this combination is not possible
+            // In the current reduction all problems mentioned in the code under
+            //  case "Signed INT to XDecimal" are also applicable here.
+            // TODO: isn't overflow possible below?
+            if (out->getColumnWidth(i) == datatypes::MAXDECIMALWIDTH)
             {
-              if (in.getScale(i))
-                result.emplace_back(normalizeUintToStringWithScale);
+              if (in.getColTypes()[i] == CalpontSystemCatalog::FLOAT ||
+                  in.getColTypes()[i] == CalpontSystemCatalog::UFLOAT)
+                result.emplace_back(normalizeXFloatToWideXDecimal);
               else
-                result.emplace_back(normalizeUintToStringNoScale);
+                result.emplace_back(normalizeXDoubleToWideXDecimal);
               break;
             }
-            
-            case CalpontSystemCatalog::DATE:
-            case CalpontSystemCatalog::DATETIME:
-            case CalpontSystemCatalog::TIME:
-            case CalpontSystemCatalog::TIMESTAMP:
-              throw logic_error(
-                  "TupleUnion::normalize(): tried to normalize an int to a timestamp, time, date or datetime");
-
-            case CalpontSystemCatalog::FLOAT:
-            case CalpontSystemCatalog::UFLOAT: result.emplace_back(normalizUintToXFloat); break;
+            else
+            {
+              if (in.getColTypes()[i] == CalpontSystemCatalog::FLOAT ||
+                  in.getColTypes()[i] == CalpontSystemCatalog::UFLOAT)
+                result.emplace_back(normalizeXFloatToXDecimal);
+              else
+                result.emplace_back(normalizeXDoubleToXDecimal);
+              break;
+            }
+            break;
+          }
 
-            case CalpontSystemCatalog::DOUBLE:
-            case CalpontSystemCatalog::UDOUBLE: result.emplace_back(normalizeUintToXDouble); break;
+          default:
+            ostringstream os;
+            os << "TupleUnion::normalize(): tried an illegal conversion: floating point to "
+               << out->getColTypes()[i];
+            throw logic_error(os.str());
+        }
 
-            case CalpontSystemCatalog::LONGDOUBLE: result.emplace_back(normalizeUintToLongDouble); break;
+        break;
+      }
 
-            case CalpontSystemCatalog::DECIMAL:
-            case CalpontSystemCatalog::UDECIMAL:
+      case CalpontSystemCatalog::LONGDOUBLE:
+      {
+        switch (out->getColTypes()[i])
+        {
+          case CalpontSystemCatalog::TINYINT:
+          case CalpontSystemCatalog::SMALLINT:
+          case CalpontSystemCatalog::MEDINT:
+          case CalpontSystemCatalog::INT:
+          case CalpontSystemCatalog::BIGINT:
+          {
+            if (out->getScale(i))
             {
-              /*
-                Unsigned INT to XDecimal
-                TODO:
-                - The overflow problem mentioned in the code under case "Signed INT to XDecimal:" is
-                  also applicable here.
-              */
-
               if (out->getColumnWidth(i) == datatypes::MAXDECIMALWIDTH)
-                result.emplace_back(normalizeUintToXDecimalInt128);
+                result.emplace_back(normalizeLongDoubleToIntWithScaleInt128);
               else
-                result.emplace_back(normalizeUintToXDecimalInt64);
-              break;
+                result.emplace_back(normalizeLongDoubleToIntWithScaleInt);
             }
+            else
+              result.emplace_back(normalizeLongDoubleToIntNoScale);
+            break;
+          }
+
+          case CalpontSystemCatalog::UTINYINT:
+          case CalpontSystemCatalog::USMALLINT:
+          case CalpontSystemCatalog::UMEDINT:
+          case CalpontSystemCatalog::UINT:
+          case CalpontSystemCatalog::UBIGINT: result.emplace_back(normalizeLongDoubleToUint); break;
+
+          case CalpontSystemCatalog::FLOAT:
+          case CalpontSystemCatalog::UFLOAT: result.emplace_back(normalizeLongDoubleToXFloat); break;
+
+          case CalpontSystemCatalog::DOUBLE:
+          case CalpontSystemCatalog::UDOUBLE: result.emplace_back(normalizeLongDoubleToXDouble); break;
+
+          case CalpontSystemCatalog::LONGDOUBLE: result.emplace_back(normalizeLongDoubleToLongDouble); break;
 
-            default:
-              ostringstream os;
-              os << "TupleUnion::normalize(): tried an illegal conversion: integer to "
-                << out->getColTypes()[i];
-              throw logic_error(os.str());
+          case CalpontSystemCatalog::CHAR:
+          case CalpontSystemCatalog::TEXT:
+          case CalpontSystemCatalog::VARCHAR: result.emplace_back(normalizeLongDoubleToString); break;
+
+          case CalpontSystemCatalog::DECIMAL:
+          case CalpontSystemCatalog::UDECIMAL:
+          {
+            // LONGDOUBLE to xDECIMAL conversions: is it really possible?
+            // TODO:
+            // Perhaps we should add an assert here that this combination is not possible
+            // In the current reduction all problems mentioned in the code under
+            //  case "Signed INT to XDecimal" are also applicable here.
+            if (out->getColumnWidth(i) == datatypes::MAXDECIMALWIDTH)
+              result.emplace_back(normalizeLongDoubleToXDecimalInt128);
+            else
+              result.emplace_back(normalizeLongDoubleToXDecimalInt);
+
+            break;
           }
 
-          break;
+          default:
+            ostringstream os;
+            os << "TupleUnion::normalize(): tried an illegal conversion: floating point to "
+               << out->getColTypes()[i];
+            throw logic_error(os.str());
+        }
+
+        break;
+      }
+
+      case CalpontSystemCatalog::DECIMAL:
+      case CalpontSystemCatalog::UDECIMAL:
+      {
+        switch (out->getColTypes()[i])
+        {
+          case CalpontSystemCatalog::TINYINT:
+          case CalpontSystemCatalog::SMALLINT:
+          case CalpontSystemCatalog::MEDINT:
+          case CalpontSystemCatalog::INT:
+          case CalpontSystemCatalog::BIGINT:
+          case CalpontSystemCatalog::UTINYINT:
+          case CalpontSystemCatalog::USMALLINT:
+          case CalpontSystemCatalog::UMEDINT:
+          case CalpontSystemCatalog::UINT:
+          case CalpontSystemCatalog::UBIGINT:
+          case CalpontSystemCatalog::DECIMAL:
+          case CalpontSystemCatalog::UDECIMAL:
+          {
+            if (datatypes::isWideDecimalType(out->getColTypes()[i], out->getColumnWidth(i)))
+            {
+              if (out->getScale(i) == in.getScale(i))
+              {
+                if (in.getColumnWidth(i) == datatypes::MAXDECIMALWIDTH)
+                  result.emplace_back(normalizeWideXDecimalToWideXDecimalNoScale);
+                else
+                  result.emplace_back(normalizeXDecimalToWideXDecimalNoScale);
+              }
+              else if (out->getScale(i) > in.getScale(i))
+              {
+                if (in.getColumnWidth(i) == datatypes::MAXDECIMALWIDTH)
+                  result.emplace_back(normalizeWideXDecimalToWideXDecimalWithScale);
+                else
+                  result.emplace_back(normalizeXDecimalToWideXDecimalWithScale);
+              }
+              else  // should not happen, the output's scale is the largest
+                throw logic_error("TupleUnion::normalize(): incorrect scale setting");
+            }
+            // If output type is narrow decimal, input type
+            // has to be narrow decimal as well.
+            else
+            {
+              if (out->getScale(i) == in.getScale(i))
+                result.emplace_back(normalizeXDecimalToOtherNoScale);
+              else if (out->getScale(i) > in.getScale(i))
+                result.emplace_back(normalizeXDecimalToOtherWithScale);
+              else  // should not happen, the output's scale is the largest
+                throw logic_error("TupleUnion::normalize(): incorrect scale setting");
+            }
+
+            break;
+          }
+
+          case CalpontSystemCatalog::FLOAT:
+          case CalpontSystemCatalog::UFLOAT: result.emplace_back(normalizeXDecimalToXFloat); break;
+
+          case CalpontSystemCatalog::DOUBLE:
+          case CalpontSystemCatalog::UDOUBLE: result.emplace_back(normalizeXDecimalToXDouble); break;
+
+          case CalpontSystemCatalog::LONGDOUBLE: result.emplace_back(normalizeXDecimalToLongDouble); break;
+
+          case CalpontSystemCatalog::CHAR:
+          case CalpontSystemCatalog::TEXT:
+          case CalpontSystemCatalog::VARCHAR:
+          default:
+          {
+            if (LIKELY(in.getColumnWidth(i) == datatypes::MAXDECIMALWIDTH))
+              result.emplace_back(normalizeWideXDecimalToString);
+            else
+              result.emplace_back(normalizeXDecimalToString);
+            break;
+          }
+        }
+
+        break;
+      }
+
+      case CalpontSystemCatalog::BLOB:
+      case CalpontSystemCatalog::VARBINARY: result.emplace_back(normalizeBlobVarbinary); break;
+
+      default:
+      {
+        ostringstream os;
+        os << "TupleUnion::normalize(): unknown input type (" << in.getColTypes()[i] << ")";
+        cout << os.str() << endl;
+        throw logic_error(os.str());
+      }
+    }
+  }
+
+  idbassert(out->getColumnCount() == result.size());
+  return result;
+}
+
+}  // namespace
+
+namespace joblist
+{
+inline uint64_t TupleUnion::Hasher::operator()(const RowPosition& p) const
+{
+  Row& row = ts->row;
+
+  if (p.group & RowPosition::normalizedFlag)
+    ts->normalizedData[p.group & ~RowPosition::normalizedFlag].getRow(p.row, &row);
+  else
+    ts->rowMemory[p.group].getRow(p.row, &row);
+
+  return row.hash(ts->fLastCol);
+}
+
+inline bool TupleUnion::Eq::operator()(const RowPosition& d1, const RowPosition& d2) const
+{
+  Row &r1 = ts->row, &r2 = ts->row2;
+
+  if (d1.group & RowPosition::normalizedFlag)
+    ts->normalizedData[d1.group & ~RowPosition::normalizedFlag].getRow(d1.row, &r1);
+  else
+    ts->rowMemory[d1.group].getRow(d1.row, &r1);
+
+  if (d2.group & RowPosition::normalizedFlag)
+    ts->normalizedData[d2.group & ~RowPosition::normalizedFlag].getRow(d2.row, &r2);
+  else
+    ts->rowMemory[d2.group].getRow(d2.row, &r2);
+
+  return r1.equals(r2, ts->fLastCol);
+}
+
+TupleUnion::TupleUnion(CalpontSystemCatalog::OID tableOID, const JobInfo& jobInfo, uint32_t keyCount)
+ : JobStep(jobInfo)
+ , fTableOID(tableOID)
+ , output(NULL)
+ , outputIt(-1)
+ , memUsage(0)
+ , rm(jobInfo.rm)
+ , runnersDone(0)
+ , distinctCount(0)
+ , distinctDone(0)
+ , fRowsReturned(0)
+ , runRan(false)
+ , joinRan(false)
+ , sessionMemLimit(jobInfo.umMemLimit)
+ , fTimeZone(jobInfo.timeZone)
+ , fLastCol(keyCount - 1)
+{
+  uniquer.reset(new Uniquer_t(10, Hasher(this), Eq(this), allocator));
+  fExtendedInfo = "TUN: ";
+  fQtc.stepParms().stepType = StepTeleStats::T_TUN;
+}
+
+TupleUnion::~TupleUnion()
+{
+  rm->returnMemory(memUsage, sessionMemLimit);
+
+  if (!runRan && output)
+    output->endOfInput();
+}
+
+CalpontSystemCatalog::OID TupleUnion::tableOid() const
+{
+  return fTableOID;
+}
+
+void TupleUnion::setInputRowGroups(const vector<rowgroup::RowGroup>& in)
+{
+  inputRGs = in;
+}
+
+void TupleUnion::setOutputRowGroup(const rowgroup::RowGroup& out)
+{
+  outputRG = out;
+  rowLength = outputRG.getRowSizeWithStrings();
+}
+
+void TupleUnion::setDistinctFlags(const vector<bool>& v)
+{
+  distinctFlags = v;
+}
+
+void TupleUnion::readInput(uint32_t which)
+{
+  /* The handling of the output got a little kludgey with the string table enhancement.
+   * When there is no distinct check, the outputs are all generated independently of
+   * each other locally in this fcn.  When there is a distinct check, threads
+   * share the output, which is built in the 'rowMemory' vector rather than in
+   * thread-local memory.  Building the result in a common space allows us to
+   * store 8-byte offsets in rowMemory rather than 16-bytes for absolute pointers.
+   */
+
+  RowGroupDL* dl = NULL;
+  bool more = true;
+  RGData inRGData, outRGData, *tmpRGData;
+  uint32_t it = numeric_limits<uint32_t>::max();
+  RowGroup l_inputRG, l_outputRG, l_tmpRG;
+  Row inRow, outRow, tmpRow;
+  bool distinct;
+  uint64_t memUsageBefore, memUsageAfter, memDiff;
+  l_outputRG = outputRG;
+  dl = inputs[which];
+  l_inputRG = inputRGs[which];
+  l_inputRG.initRow(&inRow);
+  l_outputRG.initRow(&outRow);
+  distinct = distinctFlags[which];
+
+  if (distinct)
+  {
+    l_tmpRG = outputRG;
+    tmpRGData = &normalizedData[which];
+    l_tmpRG.initRow(&tmpRow);
+    l_tmpRG.setData(tmpRGData);
+    l_tmpRG.resetRowGroup(0);
+    l_tmpRG.getRow(0, &tmpRow);
+  }
+  else
+  {
+    outRGData = RGData(l_outputRG);
+    l_outputRG.setData(&outRGData);
+    l_outputRG.resetRowGroup(0);
+    l_outputRG.getRow(0, &outRow);
+  }
+
+  try
+  {
+    it = dl->getIterator();
+    more = dl->next(it, &inRGData);
+
+    if (dlTimes.FirstReadTime().tv_sec == 0)
+      dlTimes.setFirstReadTime();
+
+    if (fStartTime == -1)
+    {
+      StepTeleStats sts(fQueryUuid, fStepUuid, StepTeleStats::ST_START, 1);
+      postStepStartTele(sts);
+    }
+
+    while (more && !cancelled())
+    {
+      /*
+          normalize each row
+            if distinct flag is set
+                  copy the row into the output and test for uniqueness
+                    if unique, increment the row count
+            else
+              copy the row into the output & inc row count
+      */
+      l_inputRG.setData(&inRGData);
+      l_inputRG.getRow(0, &inRow);
+
+      if (distinct)
+      {
+        memDiff = 0;
+        l_tmpRG.resetRowGroup(0);
+        l_tmpRG.getRow(0, &tmpRow);
+        l_tmpRG.setRowCount(l_inputRG.getRowCount());
+
+        const normalizeFunctionsT normalizeFunctions = inferNormalizeFunctions(inRow, &tmpRow, fTimeZone);
+        for (uint32_t i = 0; i < l_inputRG.getRowCount(); i++, inRow.nextRow(), tmpRow.nextRow())
+          normalize(inRow, &tmpRow, normalizeFunctions);
+
+        l_tmpRG.getRow(0, &tmpRow);
+        {
+          boost::mutex::scoped_lock lk(uniquerMutex);
+          getOutput(&l_outputRG, &outRow, &outRGData);
+          memUsageBefore = allocator.getMemUsage();
+
+          uint32_t tmpOutputRowCount = l_outputRG.getRowCount();
+          const uint32_t tmpRGRowCount = l_tmpRG.getRowCount();
+          for (uint32_t i = 0; i < tmpRGRowCount; i++, tmpRow.nextRow())
+          {
+            pair<Uniquer_t::iterator, bool> inserted;
+            inserted = uniquer->insert(RowPosition(which | RowPosition::normalizedFlag, i));
+
+            if (inserted.second)
+            {
+              copyRow(tmpRow, &outRow);
+              const_cast<RowPosition&>(*(inserted.first)) =
+                  RowPosition(rowMemory.size() - 1, tmpOutputRowCount);
+              memDiff += outRow.getRealSize();
+              addToOutput(&outRow, &l_outputRG, true, outRGData, tmpOutputRowCount);
+              fRowsReturned++;
+            }
+          }
+
+          l_outputRG.setRowCount(tmpOutputRowCount);
+
+          memUsageAfter = allocator.getMemUsage();
+          memDiff += (memUsageAfter - memUsageBefore);
+        }
+
+        if (rm->getMemory(memDiff, sessionMemLimit))
+        {
+          memUsage += memDiff;
+        }
+        else
+        {
+          fLogger->logMessage(logging::LOG_TYPE_INFO, logging::ERR_UNION_TOO_BIG);
+
+          if (status() == 0)  // preserve existing error code
+          {
+            errorMessage(logging::IDBErrorInfo::instance()->errorMsg(logging::ERR_UNION_TOO_BIG));
+            status(logging::ERR_UNION_TOO_BIG);
+          }
+
+          abort();
+        }
+      }
+      else
+      {
+        const normalizeFunctionsT normalizeFunctions = inferNormalizeFunctions(inRow, &outRow, fTimeZone);
+        const uint32_t inputRGRowCount = l_inputRG.getRowCount();
+        uint32_t tmpOutputRowCount = l_outputRG.getRowCount();
+
+        for (uint32_t i = 0; i < inputRGRowCount; i++, inRow.nextRow())
+        {
+          normalize(inRow, &outRow, normalizeFunctions);
+          addToOutput(&outRow, &l_outputRG, false, outRGData, tmpOutputRowCount);
+        }
+
+        fRowsReturned += inputRGRowCount;
+        l_outputRG.setRowCount(tmpOutputRowCount);
+      }
+
+      more = dl->next(it, &inRGData);
+    }
+  }
+  catch (...)
+  {
+    handleException(std::current_exception(), logging::unionStepErr, logging::ERR_UNION_TOO_BIG,
+                    "TupleUnion::readInput()");
+    status(logging::unionStepErr);
+    abort();
+  }
+
+  /* make sure that the input was drained before exiting.  This can happen if the
+  query was aborted */
+  if (dl && it != numeric_limits<uint32_t>::max())
+    while (more)
+      more = dl->next(it, &inRGData);
+
+  {
+    boost::mutex::scoped_lock lock1(uniquerMutex);
+    boost::mutex::scoped_lock lock2(sMutex);
+
+    if (!distinct && l_outputRG.getRowCount() > 0)
+      output->insert(outRGData);
+
+    if (distinct)
+    {
+      getOutput(&l_outputRG, &outRow, &outRGData);
+
+      if (++distinctDone == distinctCount && l_outputRG.getRowCount() > 0)
+        output->insert(outRGData);
+    }
+
+    if (++runnersDone == fInputJobStepAssociation.outSize())
+    {
+      output->endOfInput();
+
+      StepTeleStats sts(fQueryUuid, fStepUuid, StepTeleStats::ST_SUMMARY, 1, 1, fRowsReturned);
+      postStepSummaryTele(sts);
+
+      if (traceOn())
+      {
+        dlTimes.setLastReadTime();
+        dlTimes.setEndOfInputTime();
+
+        time_t t = time(0);
+        char timeString[50];
+        ctime_r(&t, timeString);
+        timeString[strlen(timeString) - 1] = '\0';
+        ostringstream logStr;
+        logStr << "ses:" << fSessionId << " st: " << fStepId << " finished at " << timeString
+               << "; total rows returned-" << fRowsReturned << endl
+               << "\t1st read " << dlTimes.FirstReadTimeString() << "; EOI " << dlTimes.EndOfInputTimeString()
+               << "; runtime-" << JSTimeStamp::tsdiffstr(dlTimes.EndOfInputTime(), dlTimes.FirstReadTime())
+               << "s;\n\tUUID " << uuids::to_string(fStepUuid) << endl
+               << "\tJob completion status " << status() << endl;
+        logEnd(logStr.str().c_str());
+        fExtendedInfo += logStr.str();
+        formatMiniStats();
+      }
+    }
+  }
+}
+
+uint32_t TupleUnion::nextBand(messageqcpp::ByteStream& bs)
+{
+  RGData mem;
+  bool more;
+  uint32_t ret = 0;
+
+  bs.restart();
+  more = output->next(outputIt, &mem);
+
+  if (more)
+    outputRG.setData(&mem);
+  else
+  {
+    mem = RGData(outputRG, 0U);
+    outputRG.setData(&mem);
+    outputRG.resetRowGroup(0);
+    outputRG.setStatus(status());
+  }
+
+  outputRG.serializeRGData(bs);
+  ret = outputRG.getRowCount();
+
+  return ret;
+}
+
+void TupleUnion::getOutput(RowGroup* rg, Row* row, RGData* data)
+{
+  if (UNLIKELY(rowMemory.empty()))
+  {
+    *data = RGData(*rg);
+    rg->setData(data);
+    rg->resetRowGroup(0);
+    rowMemory.push_back(*data);
+  }
+  else
+  {
+    *data = rowMemory.back();
+    rg->setData(data);
+  }
+
+  rg->getRow(rg->getRowCount(), row);
+}
+
+void TupleUnion::addToOutput(Row* r, RowGroup* rg, bool keepit, RGData& data, uint32_t& tmpOutputRowCount)
+{
+  r->nextRow();
+  tmpOutputRowCount++;
+
+  if (UNLIKELY(tmpOutputRowCount == 8192))
+  {
+    rg->setRowCount(8192);
+    {
+      boost::mutex::scoped_lock lock(sMutex);
+      output->insert(data);
+    }
+    data = RGData(*rg);
+    rg->setData(&data);
+    rg->resetRowGroup(0);
+    rg->getRow(0, r);
+    tmpOutputRowCount = 0;
+
+    if (keepit)
+      rowMemory.push_back(data);
+  }
+}
+
+void TupleUnion::normalize(const Row& in, Row* out, const normalizeFunctionsT& normalizeFunctions)
+{
+  uint32_t i;
+
+  out->setRid(0);
+
+  for (i = 0; i < out->getColumnCount(); i++)
+  {
+    if (in.isNullValue(i))
+    {
+      TupleUnion::writeNull(out, i);
+      continue;
+    }
+
+    /// Call the pre-compiled function.
+    normalizeFunctions[i](in, out, i);
+  }
+}
+
+void TupleUnion::run()
+{
+  uint32_t i;
+
+  boost::mutex::scoped_lock lk(jlLock);
+
+  if (runRan)
+    return;
+
+  runRan = true;
+  lk.unlock();
+
+  for (i = 0; i < fInputJobStepAssociation.outSize(); i++)
+    inputs.push_back(fInputJobStepAssociation.outAt(i)->rowGroupDL());
 
-        case CalpontSystemCatalog::CHAR:
-        case CalpontSystemCatalog::TEXT:
-        case CalpontSystemCatalog::VARCHAR:
-          switch (out->getColTypes()[i])
-          {
-            case CalpontSystemCatalog::CHAR:
-            case CalpontSystemCatalog::TEXT:
-            case CalpontSystemCatalog::VARCHAR: result.emplace_back(normalizeStringToString); break;
+  output = fOutputJobStepAssociation.outAt(0)->rowGroupDL();
 
-            default:
-            {
-              ostringstream os;
-              os << "TupleUnion::normalize(): tried an illegal conversion: string to " << out->getColTypes()[i];
-              throw logic_error(os.str());
-            }
-          }
+  if (fDelivery)
+  {
+    outputIt = output->getIterator();
+  }
 
-          break;
+  outputRG.initRow(&row);
+  outputRG.initRow(&row2);
 
-        case CalpontSystemCatalog::DATE:
-          switch (out->getColTypes()[i])
-          {
-            case CalpontSystemCatalog::DATE: result.emplace_back(normalizeDateToDate); break;
+  distinctCount = 0;
+  normalizedData.reset(new RGData[inputs.size()]);
 
-            case CalpontSystemCatalog::DATETIME: result.emplace_back(normalizeDateToDatetime); break;
+  for (i = 0; i < inputs.size(); i++)
+  {
+    if (distinctFlags[i])
+    {
+      distinctCount++;
+      normalizedData[i].reinit(outputRG);
+    }
+  }
 
-            case CalpontSystemCatalog::TIMESTAMP: result.emplace_back(std::bind(normalizeDateToTimestamp, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, fTimeZone)); break;
-            
-            case CalpontSystemCatalog::CHAR:
-            case CalpontSystemCatalog::TEXT:
-            case CalpontSystemCatalog::VARCHAR: result.emplace_back(normalizeDateToString); break;
+  runners.reserve(inputs.size());
 
-            default:
-            {
-              ostringstream os;
-              os << "TupleUnion::normalize(): tried an illegal conversion: date to " << out->getColTypes()[i];
-              throw logic_error(os.str());
-            }
-          }
+  for (i = 0; i < inputs.size(); i++)
+  {
+    runners.push_back(jobstepThreadPool.invoke(Runner(this, i)));
+  }
+}
 
-          break;
+void TupleUnion::join()
+{
+  boost::mutex::scoped_lock lk(jlLock);
 
-        case CalpontSystemCatalog::DATETIME:
-          switch (out->getColTypes()[i])
-          {
-            case CalpontSystemCatalog::DATETIME: result.emplace_back(normalizeDatetimeToDatetime); break;
+  if (joinRan)
+    return;
 
-            case CalpontSystemCatalog::DATE: result.emplace_back(normalizeDatetimeToDate); break;
+  joinRan = true;
+  lk.unlock();
 
-            case CalpontSystemCatalog::TIMESTAMP: result.emplace_back(std::bind(normalizeDatetimeToTimestamp, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, fTimeZone)); break;
+  jobstepThreadPool.join(runners);
+  runners.clear();
+  uniquer->clear();
+  rowMemory.clear();
+  rm->returnMemory(memUsage, sessionMemLimit);
+  memUsage = 0;
+}
 
-            case CalpontSystemCatalog::CHAR:
-            case CalpontSystemCatalog::TEXT:
-            case CalpontSystemCatalog::VARCHAR: result.emplace_back(normalizeDatetimeToString); break;
+const string TupleUnion::toString() const
+{
+  ostringstream oss;
+  oss << "TupleUnion       ses:" << fSessionId << " txn:" << fTxnId << " ver:" << fVerId;
+  oss << " st:" << fStepId;
+  oss << " in:";
 
-            default:
-            {
-              ostringstream os;
-              os << "TupleUnion::normalize(): tried an illegal conversion: datetime to "
-                << out->getColTypes()[i];
-              throw logic_error(os.str());
-            }
-          }
+  for (unsigned i = 0; i < fInputJobStepAssociation.outSize(); i++)
+    oss << ((i == 0) ? " " : ", ") << fInputJobStepAssociation.outAt(i);
 
-          break;
+  oss << " out:";
 
-        case CalpontSystemCatalog::TIMESTAMP:
-          switch (out->getColTypes()[i])
-          {
-            case CalpontSystemCatalog::TIMESTAMP: result.emplace_back(normalizeTimestampToTimestamp); break;
+  for (unsigned i = 0; i < fOutputJobStepAssociation.outSize(); i++)
+    oss << ((i == 0) ? " " : ", ") << fOutputJobStepAssociation.outAt(i);
 
-            case CalpontSystemCatalog::DATE: result.emplace_back(std::bind(normalizeTimestampToDate, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, fTimeZone)); break;
-          
-            case CalpontSystemCatalog::DATETIME: result.emplace_back(std::bind(normalizeTimestampToDatetime, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, fTimeZone)); break;
-            
-            case CalpontSystemCatalog::CHAR:
-            case CalpontSystemCatalog::TEXT:
-            case CalpontSystemCatalog::VARCHAR: result.emplace_back(std::bind(normalizeTimestampToString, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, fTimeZone)); break;
+  oss << endl;
 
-            default:
-            {
-              ostringstream os;
-              os << "TupleUnion::normalize(): tried an illegal conversion: timestamp to "
-                << out->getColTypes()[i];
-              throw logic_error(os.str());
-            }
-          }
+  return oss.str();
+}
 
-          break;
+void TupleUnion::writeNull(Row* out, uint32_t col)
+{
+  switch (out->getColTypes()[col])
+  {
+    case CalpontSystemCatalog::TINYINT: out->setUintField<1>(joblist::TINYINTNULL, col); break;
 
-        case CalpontSystemCatalog::TIME:
-          switch (out->getColTypes()[i])
-          {
-            case CalpontSystemCatalog::TIME: result.emplace_back(normalizeTimeToTime); break;
+    case CalpontSystemCatalog::SMALLINT: out->setUintField<1>(joblist::SMALLINTNULL, col); break;
 
-            case CalpontSystemCatalog::CHAR:
-            case CalpontSystemCatalog::TEXT:
-            case CalpontSystemCatalog::VARCHAR: result.emplace_back(normalizeTimeToString); break;
+    case CalpontSystemCatalog::UTINYINT: out->setUintField<1>(joblist::UTINYINTNULL, col); break;
 
-            default:
-            {
-              ostringstream os;
-              os << "TupleUnion::normalize(): tried an illegal conversion: time to " << out->getColTypes()[i];
-              throw logic_error(os.str());
-            }
-          }
+    case CalpontSystemCatalog::USMALLINT: out->setUintField<1>(joblist::USMALLINTNULL, col); break;
 
-          break;
+    case CalpontSystemCatalog::DECIMAL:
+    case CalpontSystemCatalog::UDECIMAL:
+    {
+      uint32_t len = out->getColumnWidth(col);
 
-          case CalpontSystemCatalog::FLOAT:
-          case CalpontSystemCatalog::UFLOAT:
-          case CalpontSystemCatalog::DOUBLE:
-          case CalpontSystemCatalog::UDOUBLE:
-          {
-            switch (out->getColTypes()[i])
-            {
-              case CalpontSystemCatalog::TINYINT:
-              case CalpontSystemCatalog::SMALLINT:
-              case CalpontSystemCatalog::MEDINT:
-              case CalpontSystemCatalog::INT:
-              case CalpontSystemCatalog::BIGINT:
-              {
-                if (out->getScale(i))
-                {
-                  if (out->getColumnWidth(i) == datatypes::MAXDECIMALWIDTH)
-                  {
-                    if (in.getColTypes()[i] == CalpontSystemCatalog::FLOAT || in.getColTypes()[i] == CalpontSystemCatalog::UFLOAT)
-                      result.emplace_back(normalizeXFloatToIntWithScaleInt128);
-                    else
-                      result.emplace_back(normalizeXDoubleToIntWithScaleInt128);
-                  }
-                  else
-                  {
-                    if (in.getColTypes()[i] == CalpontSystemCatalog::FLOAT || in.getColTypes()[i] == CalpontSystemCatalog::UFLOAT)
-                      result.emplace_back(normalizeXFloatToIntWithScaleInt64);
-                    else
-                      result.emplace_back(normalizeXDoubleToIntWithScaleInt64);
-                  }
-                } 
-                else
-                {
-                  if (in.getColTypes()[i] == CalpontSystemCatalog::FLOAT || in.getColTypes()[i] == CalpontSystemCatalog::UFLOAT)
-                    result.emplace_back(normalizeXFloatToIntNoScale);
-                  else
-                    result.emplace_back(normalizeXDoubleToIntNoScale);
-                }
-                break;
-              }
+      switch (len)
+      {
+        case 1: out->setUintField<1>(joblist::TINYINTNULL, col); break;
 
-            case CalpontSystemCatalog::UTINYINT:
-            case CalpontSystemCatalog::USMALLINT:
-            case CalpontSystemCatalog::UMEDINT:
-            case CalpontSystemCatalog::UINT:
-            case CalpontSystemCatalog::UBIGINT: 
-            {
-              if (in.getColTypes()[i] == CalpontSystemCatalog::FLOAT || in.getColTypes()[i] == CalpontSystemCatalog::UFLOAT)
-                result.emplace_back(normalizeXFloatToUint);
-              else
-                result.emplace_back(normalizeXDoubleToUint);
-              break;
-            }
+        case 2: out->setUintField<2>(joblist::SMALLINTNULL, col); break;
 
-            case CalpontSystemCatalog::FLOAT:
-            case CalpontSystemCatalog::UFLOAT: 
-            {
-              if (in.getColTypes()[i] == CalpontSystemCatalog::FLOAT || in.getColTypes()[i] == CalpontSystemCatalog::UFLOAT)
-                result.emplace_back(normalizeXFloatToXFloat);
-              else
-                result.emplace_back(normalizeXDoubleToXFloat);
-              break;
-            }
-            
-            case CalpontSystemCatalog::DOUBLE:
-            case CalpontSystemCatalog::UDOUBLE: 
-            {
-              if (in.getColTypes()[i] == CalpontSystemCatalog::FLOAT || in.getColTypes()[i] == CalpontSystemCatalog::UFLOAT)
-                result.emplace_back(normalizeXFloatToXDouble);
-              else
-                result.emplace_back(normalizeXDoubleToXDouble);
-              break;
-            }
-            
-            case CalpontSystemCatalog::LONGDOUBLE: 
-            {
-              if (in.getColTypes()[i] == CalpontSystemCatalog::FLOAT || in.getColTypes()[i] == CalpontSystemCatalog::UFLOAT)
-                result.emplace_back(normalizeXFloatToLongDouble);
-              else
-                result.emplace_back(normalizeXDoubleToLongDouble);
-              break;
-            }
-            
-            case CalpontSystemCatalog::CHAR:
-            case CalpontSystemCatalog::TEXT:
-            case CalpontSystemCatalog::VARCHAR: 
-            {
-              if (in.getColTypes()[i] == CalpontSystemCatalog::FLOAT || in.getColTypes()[i] == CalpontSystemCatalog::UFLOAT)
-                result.emplace_back(normalizeXFloatToString);
-              else
-                result.emplace_back(normalizeXDoubleToString);
-              break;
-            }            
-                        
-            case CalpontSystemCatalog::DECIMAL:
-            case CalpontSystemCatalog::UDECIMAL:
-            {
-              // xFLOAT or xDOUBLE to xDECIMAL conversion. Is it really possible?
-              // TODO:
-              // Perhaps we should add an assert here that this combination is not possible
-              // In the current reduction all problems mentioned in the code under
-              //  case "Signed INT to XDecimal" are also applicable here.
-              // TODO: isn't overflow possible below?
-              if (out->getColumnWidth(i) == datatypes::MAXDECIMALWIDTH)
-              {
-                if (in.getColTypes()[i] == CalpontSystemCatalog::FLOAT || in.getColTypes()[i] == CalpontSystemCatalog::UFLOAT)
-                  result.emplace_back(normalizeXFloatToWideXDecimal);
-                else
-                  result.emplace_back(normalizeXDoubleToWideXDecimal);
-                break;
-              }
-              else              
-              {
-                if (in.getColTypes()[i] == CalpontSystemCatalog::FLOAT || in.getColTypes()[i] == CalpontSystemCatalog::UFLOAT)
-                  result.emplace_back(normalizeXFloatToXDecimal);
-                else
-                  result.emplace_back(normalizeXDoubleToXDecimal);
-                break;
-              }              
-              break;
-            }
+        case 4: out->setUintField<4>(joblist::INTNULL, col); break;
 
-            default:
-              ostringstream os;
-              os << "TupleUnion::normalize(): tried an illegal conversion: floating point to "
-                << out->getColTypes()[i];
-              throw logic_error(os.str());
-          }
+        case 8: out->setUintField<8>(joblist::BIGINTNULL, col); break;
 
-          break;
-        }
+        case 16: out->setInt128Field(datatypes::Decimal128Null, col); break;
 
-        case CalpontSystemCatalog::LONGDOUBLE:
+        default:
         {
-          switch (out->getColTypes()[i])
-          {
-            case CalpontSystemCatalog::TINYINT:
-            case CalpontSystemCatalog::SMALLINT:
-            case CalpontSystemCatalog::MEDINT:
-            case CalpontSystemCatalog::INT:
-            case CalpontSystemCatalog::BIGINT:
-            {
-              if (out->getScale(i))
-              {
-                if (out->getColumnWidth(i) == datatypes::MAXDECIMALWIDTH)
-                  result.emplace_back(normalizeLongDoubleToIntWithScaleInt128);
-                else
-                  result.emplace_back(normalizeLongDoubleToIntWithScaleInt);
-              } 
-              else
-                result.emplace_back(normalizeLongDoubleToIntNoScale); 
-              break;
-            }
+        }
+      }
 
-            case CalpontSystemCatalog::UTINYINT:
-            case CalpontSystemCatalog::USMALLINT:
-            case CalpontSystemCatalog::UMEDINT:
-            case CalpontSystemCatalog::UINT:
-            case CalpontSystemCatalog::UBIGINT: result.emplace_back(normalizeLongDoubleToUint); break;
+      break;
+    }
 
-            case CalpontSystemCatalog::FLOAT:
-            case CalpontSystemCatalog::UFLOAT: result.emplace_back(normalizeLongDoubleToXFloat); break;
+    case CalpontSystemCatalog::MEDINT:
+    case CalpontSystemCatalog::INT: out->setUintField<4>(joblist::INTNULL, col); break;
 
-            case CalpontSystemCatalog::DOUBLE:
-            case CalpontSystemCatalog::UDOUBLE: result.emplace_back(normalizeLongDoubleToXDouble); break;
+    case CalpontSystemCatalog::UMEDINT:
+    case CalpontSystemCatalog::UINT: out->setUintField<4>(joblist::UINTNULL, col); break;
 
-            case CalpontSystemCatalog::LONGDOUBLE: result.emplace_back(normalizeLongDoubleToLongDouble); break;
+    case CalpontSystemCatalog::FLOAT:
+    case CalpontSystemCatalog::UFLOAT: out->setUintField<4>(joblist::FLOATNULL, col); break;
 
-            case CalpontSystemCatalog::CHAR:
-            case CalpontSystemCatalog::TEXT:
-            case CalpontSystemCatalog::VARCHAR: result.emplace_back(normalizeLongDoubleToString); break;
+    case CalpontSystemCatalog::DATE: out->setUintField<4>(joblist::DATENULL, col); break;
 
-            case CalpontSystemCatalog::DECIMAL:
-            case CalpontSystemCatalog::UDECIMAL:
-            {
-              // LONGDOUBLE to xDECIMAL conversions: is it really possible?
-              // TODO:
-              // Perhaps we should add an assert here that this combination is not possible
-              // In the current reduction all problems mentioned in the code under
-              //  case "Signed INT to XDecimal" are also applicable here.
-              if (out->getColumnWidth(i) == datatypes::MAXDECIMALWIDTH)
-                result.emplace_back(normalizeLongDoubleToXDecimalInt128);
-              else
-                result.emplace_back(normalizeLongDoubleToXDecimalInt);
-              
-              break;
-            }
+    case CalpontSystemCatalog::BIGINT: out->setUintField<8>(joblist::BIGINTNULL, col); break;
 
-            default:
-              ostringstream os;
-              os << "TupleUnion::normalize(): tried an illegal conversion: floating point to "
-                << out->getColTypes()[i];
-              throw logic_error(os.str());
-          }
+    case CalpontSystemCatalog::UBIGINT: out->setUintField<8>(joblist::UBIGINTNULL, col); break;
 
-          break;
-        }
+    case CalpontSystemCatalog::DOUBLE:
+    case CalpontSystemCatalog::UDOUBLE: out->setUintField<8>(joblist::DOUBLENULL, col); break;
 
-        case CalpontSystemCatalog::DECIMAL:
-        case CalpontSystemCatalog::UDECIMAL:
-        {
-          switch (out->getColTypes()[i])
-          {
-            case CalpontSystemCatalog::TINYINT:
-            case CalpontSystemCatalog::SMALLINT:
-            case CalpontSystemCatalog::MEDINT:
-            case CalpontSystemCatalog::INT:
-            case CalpontSystemCatalog::BIGINT:
-            case CalpontSystemCatalog::UTINYINT:
-            case CalpontSystemCatalog::USMALLINT:
-            case CalpontSystemCatalog::UMEDINT:
-            case CalpontSystemCatalog::UINT:
-            case CalpontSystemCatalog::UBIGINT:
-            case CalpontSystemCatalog::DECIMAL:
-            case CalpontSystemCatalog::UDECIMAL:
-            {
-              if (datatypes::isWideDecimalType(out->getColTypes()[i], out->getColumnWidth(i)))
-              {
-                if (out->getScale(i) == in.getScale(i))
-                {
-                  if (in.getColumnWidth(i) == datatypes::MAXDECIMALWIDTH)
-                    result.emplace_back(normalizeWideXDecimalToWideXDecimalNoScale);
-                  else
-                    result.emplace_back(normalizeXDecimalToWideXDecimalNoScale);
-                }
-                else if (out->getScale(i) > in.getScale(i))
-                {
-                  if (in.getColumnWidth(i) == datatypes::MAXDECIMALWIDTH)
-                    result.emplace_back(normalizeWideXDecimalToWideXDecimalWithScale);
-                  else
-                    result.emplace_back(normalizeXDecimalToWideXDecimalWithScale);
-                }
-                else  // should not happen, the output's scale is the largest
-                  throw logic_error("TupleUnion::normalize(): incorrect scale setting");
-              }
-              // If output type is narrow decimal, input type
-              // has to be narrow decimal as well.
-              else
-              {
-                if (out->getScale(i) == in.getScale(i))
-                  result.emplace_back(normalizeXDecimalToOtherNoScale);
-                else if (out->getScale(i) > in.getScale(i))
-                  result.emplace_back(normalizeXDecimalToOtherWithScale);
-                else  // should not happen, the output's scale is the largest
-                  throw logic_error("TupleUnion::normalize(): incorrect scale setting");
-              }
+    case CalpontSystemCatalog::DATETIME: out->setUintField<8>(joblist::DATETIMENULL, col); break;
 
-              break;
-            }
+    case CalpontSystemCatalog::TIMESTAMP: out->setUintField<8>(joblist::TIMESTAMPNULL, col); break;
 
-            case CalpontSystemCatalog::FLOAT:
-            case CalpontSystemCatalog::UFLOAT: result.emplace_back(normalizeXDecimalToXFloat); break; 
+    case CalpontSystemCatalog::TIME: out->setUintField<8>(joblist::TIMENULL, col); break;
 
-            case CalpontSystemCatalog::DOUBLE:
-            case CalpontSystemCatalog::UDOUBLE: result.emplace_back(normalizeXDecimalToXDouble); break;
+    case CalpontSystemCatalog::CHAR:
+    case CalpontSystemCatalog::TEXT:
+    case CalpontSystemCatalog::VARCHAR:
+    {
+      uint32_t len = out->getColumnWidth(col);
 
-            case CalpontSystemCatalog::LONGDOUBLE: result.emplace_back(normalizeXDecimalToLongDouble); break;
+      switch (len)
+      {
+        case 1: out->setUintField<1>(joblist::CHAR1NULL, col); break;
 
-            case CalpontSystemCatalog::CHAR:
-            case CalpontSystemCatalog::TEXT:
-            case CalpontSystemCatalog::VARCHAR:
-            default:
-            {
-              if (LIKELY(in.getColumnWidth(i) == datatypes::MAXDECIMALWIDTH))
-                result.emplace_back(normalizeWideXDecimalToString);
-              else
-                result.emplace_back(normalizeXDecimalToString);
-              break;
-            }
-          }
+        case 2: out->setUintField<2>(joblist::CHAR2NULL, col); break;
 
-          break;
-        }
+        case 3:
+        case 4: out->setUintField<4>(joblist::CHAR4NULL, col); break;
 
-        case CalpontSystemCatalog::BLOB:
-        case CalpontSystemCatalog::VARBINARY: result.emplace_back(normalizeBlobVarbinary); break;
+        case 5:
+        case 6:
+        case 7:
+        case 8: out->setUintField<8>(joblist::CHAR8NULL, col); break;
 
-        default:
-        {
-          ostringstream os;
-          os << "TupleUnion::normalize(): unknown input type (" << in.getColTypes()[i] << ")";
-          cout << os.str() << endl;
-          throw logic_error(os.str());
-        }
+        default: out->setStringField(nullptr, 0, col); break;
       }
+
+      break;
     }
 
-    idbassert(out->getColumnCount() == result.size());
-    return result;
-  }
+    case CalpontSystemCatalog::BLOB:
+    case CalpontSystemCatalog::VARBINARY:
+      // could use below if zero length and NULL are treated the same
+      // out->setVarBinaryField("", col); break;
+      out->setVarBinaryField(nullptr, 0, col);
+      break;
 
-}  // namespace
+    default:
+    {
+    }
+  }
+}
 
-namespace joblist
+void TupleUnion::formatMiniStats()
 {
-inline uint64_t TupleUnion::Hasher::operator()(const RowPosition& p) const
+  ostringstream oss;
+  oss << "TUS "
+      << "UM "
+      << "- "
+      << "- "
+      << "- "
+      << "- "
+      << "- "
+      << "- " << JSTimeStamp::tsdiffstr(dlTimes.EndOfInputTime(), dlTimes.FirstReadTime()) << " "
+      << fRowsReturned << " ";
+  fMiniInfo += oss.str();
+}
+
+inline uint64_t TupleRecursiveUnion::Hasher::operator()(const RowPosition& p) const
 {
   Row& row = ts->row;
 
@@ -1300,7 +1894,7 @@ inline uint64_t TupleUnion::Hasher::operator()(const RowPosition& p) const
   return row.hash(ts->fLastCol);
 }
 
-inline bool TupleUnion::Eq::operator()(const RowPosition& d1, const RowPosition& d2) const
+inline bool TupleRecursiveUnion::Eq::operator()(const RowPosition& d1, const RowPosition& d2) const
 {
   Row &r1 = ts->row, &r2 = ts->row2;
 
@@ -1317,7 +1911,8 @@ inline bool TupleUnion::Eq::operator()(const RowPosition& d1, const RowPosition&
   return r1.equals(r2, ts->fLastCol);
 }
 
-TupleUnion::TupleUnion(CalpontSystemCatalog::OID tableOID, const JobInfo& jobInfo, uint32_t keyCount)
+TupleRecursiveUnion::TupleRecursiveUnion(CalpontSystemCatalog::OID tableOID, const JobInfo& jobInfo,
+                                         uint32_t keyCount)
  : JobStep(jobInfo)
  , fTableOID(tableOID)
  , output(NULL)
@@ -1339,7 +1934,7 @@ TupleUnion::TupleUnion(CalpontSystemCatalog::OID tableOID, const JobInfo& jobInf
   fQtc.stepParms().stepType = StepTeleStats::T_TUN;
 }
 
-TupleUnion::~TupleUnion()
+TupleRecursiveUnion::~TupleRecursiveUnion()
 {
   rm->returnMemory(memUsage, sessionMemLimit);
 
@@ -1347,28 +1942,28 @@ TupleUnion::~TupleUnion()
     output->endOfInput();
 }
 
-CalpontSystemCatalog::OID TupleUnion::tableOid() const
+CalpontSystemCatalog::OID TupleRecursiveUnion::tableOid() const
 {
   return fTableOID;
 }
 
-void TupleUnion::setInputRowGroups(const vector<rowgroup::RowGroup>& in)
+void TupleRecursiveUnion::setInputRowGroups(const vector<rowgroup::RowGroup>& in)
 {
   inputRGs = in;
 }
 
-void TupleUnion::setOutputRowGroup(const rowgroup::RowGroup& out)
+void TupleRecursiveUnion::setOutputRowGroup(const rowgroup::RowGroup& out)
 {
   outputRG = out;
   rowLength = outputRG.getRowSizeWithStrings();
 }
 
-void TupleUnion::setDistinctFlags(const vector<bool>& v)
+void TupleRecursiveUnion::setDistinctFlags(const vector<bool>& v)
 {
   distinctFlags = v;
 }
 
-void TupleUnion::readInput(uint32_t which)
+void TupleRecursiveUnion::readInput(uint32_t which)
 {
   /* The handling of the output got a little kludgey with the string table enhancement.
    * When there is no distinct check, the outputs are all generated independently of
@@ -1424,6 +2019,11 @@ void TupleUnion::readInput(uint32_t which)
       postStepStartTele(sts);
     }
 
+    if (!more)
+    {
+      fRecursiveSteps[0]->abort();
+    }
+
     while (more && !cancelled())
     {
       /*
@@ -1574,7 +2174,223 @@ void TupleUnion::readInput(uint32_t which)
   }
 }
 
-uint32_t TupleUnion::nextBand(messageqcpp::ByteStream& bs)
+// bool TupleRecursiveUnion::readInput(uint32_t which)
+// {
+//   /* The handling of the output got a little kludgey with the string table enhancement.
+//    * When there is no distinct check, the outputs are all generated independently of
+//    * each other locally in this fcn.  When there is a distinct check, threads
+//    * share the output, which is built in the 'rowMemory' vector rather than in
+//    * thread-local memory.  Building the result in a common space allows us to
+//    * store 8-byte offsets in rowMemory rather than 16-bytes for absolute pointers.
+//    */
+//
+//   // recursive union returns a boolean if there's the program should continue with the outer loop
+//   isStablised = false;
+//   RowGroupDL* dl = NULL;
+//   bool more = true;
+//   RGData inRGData, outRGData, *tmpRGData;
+//   uint32_t it = numeric_limits<uint32_t>::max();
+//   RowGroup l_inputRG, l_outputRG, l_tmpRG;
+//   Row inRow, outRow, tmpRow;
+//   bool distinct;
+//   uint64_t memUsageBefore, memUsageAfter, memDiff;
+//   l_outputRG = outputRG;
+//   dl = inputs[which];
+//   l_inputRG = inputRGs[which];
+//   l_inputRG.initRow(&inRow);
+//   l_outputRG.initRow(&outRow);
+//   distinct = distinctFlags[which];
+//
+//   if (distinct)
+//   {
+//     l_tmpRG = outputRG;
+//     tmpRGData = &normalizedData[which];
+//     l_tmpRG.initRow(&tmpRow);
+//     l_tmpRG.setData(tmpRGData);
+//     l_tmpRG.resetRowGroup(0);
+//     l_tmpRG.getRow(0, &tmpRow);
+//   }
+//   else
+//   {
+//     outRGData = RGData(l_outputRG);
+//     l_outputRG.setData(&outRGData);
+//     l_outputRG.resetRowGroup(0);
+//     l_outputRG.getRow(0, &outRow);
+//   }
+//
+//   try
+//   {
+//     it = dl->getIterator();
+//     more = dl->next(it, &inRGData);
+//
+//     if (dlTimes.FirstReadTime().tv_sec == 0)
+//       dlTimes.setFirstReadTime();
+//
+//     if (!more)
+//     {
+//       isStablised = true;
+//     }
+//
+//     if (fStartTime == -1)
+//     {
+//       StepTeleStats sts(fQueryUuid, fStepUuid, StepTeleStats::ST_START, 1);
+//       postStepStartTele(sts);
+//     }
+//
+//     while (more && !cancelled())
+//     {
+//       /*
+//           normalize each row
+//             if distinct flag is set
+//                   copy the row into the output and test for uniqueness
+//                     if unique, increment the row count
+//             else
+//               copy the row into the output & inc row count
+//       */
+//       l_inputRG.setData(&inRGData);
+//       l_inputRG.getRow(0, &inRow);
+//
+//       if (distinct)
+//       {
+//         memDiff = 0;
+//         l_tmpRG.resetRowGroup(0);
+//         l_tmpRG.getRow(0, &tmpRow);
+//         l_tmpRG.setRowCount(l_inputRG.getRowCount());
+//
+//         const normalizeFunctionsT normalizeFunctions = inferNormalizeFunctions(inRow, &tmpRow, fTimeZone);
+//         for (uint32_t i = 0; i < l_inputRG.getRowCount(); i++, inRow.nextRow(), tmpRow.nextRow())
+//           normalize(inRow, &tmpRow, normalizeFunctions);
+//
+//         l_tmpRG.getRow(0, &tmpRow);
+//         {
+//           boost::mutex::scoped_lock lk(uniquerMutex);
+//           getOutput(&l_outputRG, &outRow, &outRGData);
+//           memUsageBefore = allocator.getMemUsage();
+//
+//           uint32_t tmpOutputRowCount = l_outputRG.getRowCount();
+//           const uint32_t tmpRGRowCount = l_tmpRG.getRowCount();
+//           for (uint32_t i = 0; i < tmpRGRowCount; i++, tmpRow.nextRow())
+//           {
+//             pair<Uniquer_t::iterator, bool> inserted;
+//             inserted = uniquer->insert(RowPosition(which | RowPosition::normalizedFlag, i));
+//
+//             if (inserted.second)
+//             {
+//               copyRow(tmpRow, &outRow);
+//               const_cast<RowPosition&>(*(inserted.first)) =
+//                   RowPosition(rowMemory.size() - 1, tmpOutputRowCount);
+//               memDiff += outRow.getRealSize();
+//               addToOutput(&outRow, &l_outputRG, true, outRGData, tmpOutputRowCount);
+//               fRowsReturned++;
+//             }
+//           }
+//
+//           l_outputRG.setRowCount(tmpOutputRowCount);
+//
+//           memUsageAfter = allocator.getMemUsage();
+//           memDiff += (memUsageAfter - memUsageBefore);
+//         }
+//
+//         if (rm->getMemory(memDiff, sessionMemLimit))
+//         {
+//           memUsage += memDiff;
+//         }
+//         else
+//         {
+//           fLogger->logMessage(logging::LOG_TYPE_INFO, logging::ERR_UNION_TOO_BIG);
+//
+//           if (status() == 0)  // preserve existing error code
+//           {
+//             errorMessage(logging::IDBErrorInfo::instance()->errorMsg(logging::ERR_UNION_TOO_BIG));
+//             status(logging::ERR_UNION_TOO_BIG);
+//           }
+//
+//           abort();
+//         }
+//       }
+//       else
+//       {
+//         const normalizeFunctionsT normalizeFunctions = inferNormalizeFunctions(inRow, &outRow, fTimeZone);
+//         const uint32_t inputRGRowCount = l_inputRG.getRowCount();
+//         uint32_t tmpOutputRowCount = l_outputRG.getRowCount();
+//
+//         for (uint32_t i = 0; i < inputRGRowCount; i++, inRow.nextRow())
+//         {
+//           normalize(inRow, &outRow, normalizeFunctions);
+//           addToOutput(&outRow, &l_outputRG, false, outRGData, tmpOutputRowCount);
+//         }
+//
+//         fRowsReturned += inputRGRowCount;
+//         l_outputRG.setRowCount(tmpOutputRowCount);
+//       }
+//
+//       more = dl->next(it, &inRGData);
+//     }
+//   }
+//   catch (...)
+//   {
+//     handleException(std::current_exception(), logging::unionStepErr, logging::ERR_UNION_TOO_BIG,
+//                     "TupleRecursiveUnion::readInput()");
+//     status(logging::unionStepErr);
+//     abort();
+//   }
+//
+//   /* make sure that the input was drained before exiting.  This can happen if the
+//   query was aborted */
+//   if (dl && it != numeric_limits<uint32_t>::max())
+//     while (more)
+//       more = dl->next(it, &inRGData);
+//
+//   {
+//     boost::mutex::scoped_lock lock1(uniquerMutex);
+//     boost::mutex::scoped_lock lock2(sMutex);
+//
+//     if (!distinct && l_outputRG.getRowCount() > 0)
+//       output->insert(outRGData);
+//
+//     if (distinct)
+//     {
+//       getOutput(&l_outputRG, &outRow, &outRGData);
+//
+//       if (++distinctDone == distinctCount && l_outputRG.getRowCount() > 0)
+//         output->insert(outRGData);
+//     }
+//
+//     if (++runnersDone == fInputJobStepAssociation.outSize() || isStablised)
+//     {
+//       output->endOfInput();
+//
+//       StepTeleStats sts(fQueryUuid, fStepUuid, StepTeleStats::ST_SUMMARY, 1, 1, fRowsReturned);
+//       postStepSummaryTele(sts);
+//
+//       if (traceOn())
+//       {
+//         dlTimes.setLastReadTime();
+//         dlTimes.setEndOfInputTime();
+//
+//         time_t t = time(0);
+//         char timeString[50];
+//         ctime_r(&t, timeString);
+//         timeString[strlen(timeString) - 1] = '\0';
+//         ostringstream logStr;
+//         logStr << "ses:" << fSessionId << " st: " << fStepId << " finished at " << timeString
+//                << "; total rows returned-" << fRowsReturned << endl
+//                << "\t1st read " << dlTimes.FirstReadTimeString() << "; EOI " <<
+//                dlTimes.EndOfInputTimeString()
+//                << "; runtime-" << JSTimeStamp::tsdiffstr(dlTimes.EndOfInputTime(), dlTimes.FirstReadTime())
+//                << "s;\n\tUUID " << uuids::to_string(fStepUuid) << endl
+//                << "\tJob completion status " << status() << endl;
+//         logEnd(logStr.str().c_str());
+//         fExtendedInfo += logStr.str();
+//         formatMiniStats();
+//       }
+//       return false;
+//     }
+//     return true;
+//   }
+// }
+//
+uint32_t TupleRecursiveUnion::nextBand(messageqcpp::ByteStream& bs)
 {
   RGData mem;
   bool more;
@@ -1599,7 +2415,7 @@ uint32_t TupleUnion::nextBand(messageqcpp::ByteStream& bs)
   return ret;
 }
 
-void TupleUnion::getOutput(RowGroup* rg, Row* row, RGData* data)
+void TupleRecursiveUnion::getOutput(RowGroup* rg, Row* row, RGData* data)
 {
   if (UNLIKELY(rowMemory.empty()))
   {
@@ -1617,7 +2433,8 @@ void TupleUnion::getOutput(RowGroup* rg, Row* row, RGData* data)
   rg->getRow(rg->getRowCount(), row);
 }
 
-void TupleUnion::addToOutput(Row* r, RowGroup* rg, bool keepit, RGData& data, uint32_t& tmpOutputRowCount)
+void TupleRecursiveUnion::addToOutput(Row* r, RowGroup* rg, bool keepit, RGData& data,
+                                      uint32_t& tmpOutputRowCount)
 {
   r->nextRow();
   tmpOutputRowCount++;
@@ -1640,7 +2457,7 @@ void TupleUnion::addToOutput(Row* r, RowGroup* rg, bool keepit, RGData& data, ui
   }
 }
 
-void TupleUnion::normalize(const Row& in, Row* out, const normalizeFunctionsT& normalizeFunctions)
+void TupleRecursiveUnion::normalize(const Row& in, Row* out, const normalizeFunctionsT& normalizeFunctions)
 {
   uint32_t i;
 
@@ -1650,7 +2467,7 @@ void TupleUnion::normalize(const Row& in, Row* out, const normalizeFunctionsT& n
   {
     if (in.isNullValue(i))
     {
-      TupleUnion::writeNull(out, i);
+      TupleRecursiveUnion::writeNull(out, i);
       continue;
     }
 
@@ -1659,7 +2476,7 @@ void TupleUnion::normalize(const Row& in, Row* out, const normalizeFunctionsT& n
   }
 }
 
-void TupleUnion::run()
+void TupleRecursiveUnion::run()
 {
   uint32_t i;
 
@@ -1696,6 +2513,31 @@ void TupleUnion::run()
     }
   }
 
+  // for (i = 0; i < inputs.size(); i++)
+  // {
+  // bool more = readInput(i);
+  // if (!more)
+  // {
+  //   // Drain all remaining inputs so they don’t block upstream producers
+  //
+  //   // runners.reserve(inputs.size() - i - 1);
+  //   for (uint32_t j = i + 1; j < inputs.size(); j++)
+  //   {
+  //     // runners.push_back(jobstepThreadPool.invoke(Runner(this, j)));
+  //
+  //     RowGroupDL* dl = inputs[j];
+  //     uint32_t it = dl->getIterator();
+  //     rowgroup::RGData tmp;
+  //     while (dl->next(it, &tmp))
+  //     {
+  //       // discard rows
+  //     }
+  //   }
+  //   break;  // we’re stabilized, stop real work
+  // }
+  //
+  // }
+
   runners.reserve(inputs.size());
 
   for (i = 0; i < inputs.size(); i++)
@@ -1704,7 +2546,7 @@ void TupleUnion::run()
   }
 }
 
-void TupleUnion::join()
+void TupleRecursiveUnion::join()
 {
   boost::mutex::scoped_lock lk(jlLock);
 
@@ -1722,10 +2564,10 @@ void TupleUnion::join()
   memUsage = 0;
 }
 
-const string TupleUnion::toString() const
+const string TupleRecursiveUnion::toString() const
 {
   ostringstream oss;
-  oss << "TupleUnion       ses:" << fSessionId << " txn:" << fTxnId << " ver:" << fVerId;
+  oss << "TupleRecursiveUnion       ses:" << fSessionId << " txn:" << fTxnId << " ver:" << fVerId;
   oss << " st:" << fStepId;
   oss << " in:";
 
@@ -1742,7 +2584,7 @@ const string TupleUnion::toString() const
   return oss.str();
 }
 
-void TupleUnion::writeNull(Row* out, uint32_t col)
+void TupleRecursiveUnion::writeNull(Row* out, uint32_t col)
 {
   switch (out->getColTypes()[col])
   {
@@ -1842,7 +2684,7 @@ void TupleUnion::writeNull(Row* out, uint32_t col)
   }
 }
 
-void TupleUnion::formatMiniStats()
+void TupleRecursiveUnion::formatMiniStats()
 {
   ostringstream oss;
   oss << "TUS "
@@ -1856,5 +2698,4 @@ void TupleUnion::formatMiniStats()
       << fRowsReturned << " ";
   fMiniInfo += oss.str();
 }
-
 }  // namespace joblist
diff --git a/dbcon/joblist/tupleunion.h b/dbcon/joblist/tupleunion.h
index 248ae2fd5..e2ec005fc 100644
--- a/dbcon/joblist/tupleunion.h
+++ b/dbcon/joblist/tupleunion.h
@@ -27,6 +27,7 @@
 //
 //
 
+#include "joblist.h"
 #include "jobstep.h"
 #include <unordered.h>
 
@@ -202,5 +203,176 @@ class TupleUnion : public JobStep, public TupleDeliveryStep
   long fTimeZone;
   uint32_t fLastCol;
 };
+class TupleRecursiveUnion : public JobStep, public TupleDeliveryStep
+{
+ public:
+  TupleRecursiveUnion(execplan::CalpontSystemCatalog::OID tableOID, const JobInfo& jobInfo,
+                      uint32_t keyCount);
+  ~TupleRecursiveUnion() override;
+
+  void run() override;
+  void join() override;
+
+  const std::string toString() const override;
+  execplan::CalpontSystemCatalog::OID tableOid() const override;
+
+  void setInputRowGroups(const std::vector<rowgroup::RowGroup>&);
+  void setOutputRowGroup(const rowgroup::RowGroup&) override;
+  void setDistinctFlags(const std::vector<bool>&);
+
+  const rowgroup::RowGroup& getOutputRowGroup() const override
+  {
+    return outputRG;
+  }
+  const rowgroup::RowGroup& getDeliveredRowGroup() const override
+  {
+    return outputRG;
+  }
+  void deliverStringTableRowGroup(bool b) override
+  {
+    outputRG.setUseStringTable(b);
+  }
+  bool deliverStringTableRowGroup() const override
+  {
+    return outputRG.usesStringTable();
+  }
+
+  // @bug 598 for self-join
+  std::string alias1() const
+  {
+    return fAlias1;
+  }
+  void alias1(const std::string& alias)
+  {
+    fAlias = fAlias1 = alias;
+  }
+  std::string alias2() const
+  {
+    return fAlias2;
+  }
+  void alias2(const std::string& alias)
+  {
+    fAlias2 = alias;
+  }
+
+  std::string view1() const
+  {
+    return fView1;
+  }
+  void view1(const std::string& vw)
+  {
+    fView = fView1 = vw;
+  }
+  std::string view2() const
+  {
+    return fView2;
+  }
+  void view2(const std::string& vw)
+  {
+    fView2 = vw;
+  }
+
+  uint32_t nextBand(messageqcpp::ByteStream& bs) override;
+
+  void recursiveSteps(JobStepVector jsv)
+  {
+    fRecursiveSteps = jsv;
+  }
+
+ private:
+  struct RowPosition
+  {
+    uint64_t group : 48;
+    uint64_t row : 16;
+
+    inline explicit RowPosition(uint64_t i = 0, uint64_t j = 0) : group(i), row(j) {};
+    static const uint64_t normalizedFlag = 0x800000000000ULL;  // 48th bit is set
+  };
+
+  void getOutput(rowgroup::RowGroup* rg, rowgroup::Row* row, rowgroup::RGData* data);
+  void addToOutput(rowgroup::Row* r, rowgroup::RowGroup* rg, bool keepit, rowgroup::RGData& data,
+                   uint32_t& tmpOutputRowCount);
+  void normalize(const rowgroup::Row& in, rowgroup::Row* out, const normalizeFunctionsT& normalizeFunctions);
+  void writeNull(rowgroup::Row* out, uint32_t col);
+  void readInput(uint32_t);
+  void formatMiniStats();
+
+  execplan::CalpontSystemCatalog::OID fTableOID;
+  // @bug 598 for self-join
+  std::string fAlias1;
+  std::string fAlias2;
+
+  std::string fView1;
+  std::string fView2;
+
+  rowgroup::RowGroup outputRG;
+  std::vector<rowgroup::RowGroup> inputRGs;
+  std::vector<RowGroupDL*> inputs;
+  RowGroupDL* output;
+  uint32_t outputIt;
+
+  JobStepVector fRecursiveSteps;
+
+  struct Runner
+  {
+    TupleRecursiveUnion* tu;
+    uint32_t index;
+    Runner(TupleRecursiveUnion* t, uint32_t in) : tu(t), index(in)
+    {
+    }
+    void operator()()
+    {
+      utils::setThreadName("TRUSRunner");
+      tu->readInput(index);
+    }
+  };
+  std::vector<uint64_t> runners;  // thread pool handles
+
+  struct Hasher
+  {
+    TupleRecursiveUnion* ts;
+    utils::Hasher_r h;
+    explicit Hasher(TupleRecursiveUnion* t) : ts(t)
+    {
+    }
+    uint64_t operator()(const RowPosition&) const;
+  };
+  struct Eq
+  {
+    TupleRecursiveUnion* ts;
+    explicit Eq(TupleRecursiveUnion* t) : ts(t)
+    {
+    }
+    bool operator()(const RowPosition&, const RowPosition&) const;
+  };
+
+  typedef std::tr1::unordered_set<RowPosition, Hasher, Eq, utils::STLPoolAllocator<RowPosition>> Uniquer_t;
 
+  boost::scoped_ptr<Uniquer_t> uniquer;
+  std::vector<rowgroup::RGData> rowMemory;
+  boost::mutex sMutex, uniquerMutex;
+  uint64_t memUsage;
+  uint32_t rowLength;
+  rowgroup::Row row, row2;
+  std::vector<bool> distinctFlags;
+  ResourceManager* rm;
+  utils::STLPoolAllocator<RowPosition> allocator;
+  boost::scoped_array<rowgroup::RGData> normalizedData;
+
+  uint32_t runnersDone;
+  uint32_t distinctCount;
+  uint32_t distinctDone;
+
+  uint64_t fRowsReturned;
+
+  // temporary hack to make sure JobList only calls run, join once
+  boost::mutex jlLock;
+  bool runRan, joinRan;
+
+  boost::shared_ptr<int64_t> sessionMemLimit;
+  long fTimeZone;
+  uint32_t fLastCol;
+
+  bool isStablised = false;
+};
 }  // namespace joblist
diff --git a/dbcon/mysql/ha_from_sub.cpp b/dbcon/mysql/ha_from_sub.cpp
index 8f9da0c76..788e2aca0 100644
--- a/dbcon/mysql/ha_from_sub.cpp
+++ b/dbcon/mysql/ha_from_sub.cpp
@@ -220,6 +220,7 @@ SCSEP FromSubQuery::transform()
   gwi.thd = fGwip.thd;
   gwi.subQuery = this;
   gwi.viewName = fGwip.viewName;
+  gwi.isRecursiveWithTable = fGwip.isRecursiveWithTable;
   csep->derivedTbAlias(fAlias);  // always lower case
   csep->derivedTbView(fGwip.viewName.alias, lower_case_table_names);
 
@@ -243,4 +244,40 @@ SCSEP FromSubQuery::transform()
   return csep;
 }
 
+SCSEP FromSubQuery::transform(bool b)
+{
+  assert(fFromSub);
+  SCSEP csep(new CalpontSelectExecutionPlan());
+  csep->sessionID(fGwip.sessionid);
+  csep->location(CalpontSelectExecutionPlan::FROM);
+  csep->subType(CalpontSelectExecutionPlan::FROM_SUBS);
+
+  // gwi for the sub query
+  gp_walk_info gwi(fGwip.timeZone, fGwip.subQueriesChain);
+  gwi.thd = fGwip.thd;
+  gwi.subQuery = this;
+  gwi.viewName = fGwip.viewName;
+  gwi.isRecursiveWithTable = fGwip.isRecursiveWithTable;
+  csep->derivedTbAlias(fAlias);  // always lower case
+  csep->derivedTbView(fGwip.viewName.alias, lower_case_table_names);
+
+  if (getSelectPlan(gwi, *fFromSub, csep, b) != 0)
+  {
+    fGwip.fatalParseError = true;
+
+    if (!gwi.parseErrorText.empty())
+      fGwip.parseErrorText = gwi.parseErrorText;
+    else
+      fGwip.parseErrorText = "Error occurred in FromSubQuery::transform()";
+
+    csep.reset();
+    return csep;
+  }
+
+  // Insert column statistics
+  fGwip.mergeTableStatistics(gwi.tableStatistics);
+
+  fGwip.subselectList.push_back(csep);
+  return csep;
+}
 }  // namespace cal_impl_if
diff --git a/dbcon/mysql/ha_mcs_execplan.cpp b/dbcon/mysql/ha_mcs_execplan.cpp
index 701aac3b1..d12a00122 100644
--- a/dbcon/mysql/ha_mcs_execplan.cpp
+++ b/dbcon/mysql/ha_mcs_execplan.cpp
@@ -31,6 +31,7 @@
 #include <vector>
 #include <map>
 #include <limits>
+#include "idberrorinfo.h"
 #include "messagelog.h"
 
 #include <string.h>
@@ -41,6 +42,7 @@
 #include <boost/thread.hpp>
 
 #include "errorids.h"
+#include "mysqld_error.h"
 using namespace logging;
 
 #define PREFER_MY_CONFIG_H
@@ -5315,22 +5317,95 @@ int processFrom(bool& isUnion, SELECT_LEX& select_lex, gp_walk_info& gwi, SCSEP&
     {
       // Until we handle recursive cte:
       // Checking here ensures we catch all with clauses in the query.
-      if (table_ptr->is_recursive_with_table())
-      {
-        gwi.fatalParseError = true;
-        gwi.parseErrorText = "Recursive CTE";
-        setError(gwi.thd, ER_CHECK_NOT_IMPLEMENTED, gwi.parseErrorText, gwi);
-        return ER_CHECK_NOT_IMPLEMENTED;
-      }
+      /*
+
+      refer to sql_union.cc, exec_recursive for a sample implementation
+
+      might just work by setting isUnion to true, then calling get select again.
+      need to set relevant meta data.
 
+      needs to write all to the first table, probably can be achieved
+      */
       string viewName = getViewName(table_ptr);
       if (lower_case_table_names)
       {
         boost::algorithm::to_lower(viewName);
       }
+      if (table_ptr->is_recursive_with_table())
+      {
+        dynamic_cast<CalpontSelectExecutionPlan*>(csep.get())->containsRecursiveQuery(true);
+        SELECT_LEX* start = table_ptr->derived->first_select();
+        // SELECT_LEX* end = NULL;
+        dynamic_cast<CalpontSelectExecutionPlan*>(csep.get())
+            ->maxRecursiveDepth(gwi.thd->variables.max_recursive_iterations);
+        // CalpontSelectExecutionPlan::SelectList unionVec;
+        // bool unionSel = true;
+        // uint8_t distUnionNum = 0;
+        SCSEP anchor_plan = NULL;
+
+        gwi.isRecursiveWithTable = true;
+#ifdef DEBUG_WALK_COND
+
+        if (gwi.recursiveWithTableName == table_ptr->table_name.str)
+        {
+          cerr << "RECURSIVE TABLE: " << gwi.recursiveWithTableName << endl;
+        }
+
+#endif
+
+        FromSubQuery* fromSub = new FromSubQuery(gwi, start);
+        string alias(table_ptr->alias.str);
+        if (lower_case_table_names)
+        {
+          boost::algorithm::to_lower(alias);
+        }
+        fromSub->alias(alias);
+
+        CalpontSystemCatalog::TableAliasName tn =
+            make_aliasview("", table_ptr->table_name.str, alias, viewName);
+        // @bug 3852. check return execplan
+        anchor_plan = fromSub->transform(isUnion);
+        if (!anchor_plan)
+        {
+          setError(gwi.thd, ER_INTERNAL_ERROR, fromSub->gwip().parseErrorText, gwi);
+          CalpontSystemCatalog::removeCalpontSystemCatalog(gwi.sessionid);
+          return ER_INTERNAL_ERROR;
+        }
+        dynamic_cast<CalpontSelectExecutionPlan*>(anchor_plan.get())->isRecursiveWithTable(true);
+
+        gwi.derivedTbList.push_back(anchor_plan);
+        gwi.tbList.push_back(tn);
+        CalpontSystemCatalog::TableAliasName tan = make_aliastable("", table_ptr->table_name.str, alias);
+        gwi.tableMap[tan] = make_pair(0, table_ptr);
+        // MCOL-2178 isUnion member only assigned, never used
+        // MIGR::infinidb_vtable.isUnion = true; //by-pass the 2nd pass of rnd_init
+        start = table_ptr->derived->first_select();
+
+        // if (with_element->with_anchor)
+        //   end = with_element->first_recursive;
+
+        if (!anchor_plan)
+        {
+          setError(gwi.thd, ER_INTERNAL_ERROR, "No Anchor Query", gwi);
+          CalpontSystemCatalog::removeCalpontSystemCatalog(gwi.sessionid);
+          return ER_INTERNAL_ERROR;
+        }
+
+        // if (table_ptr->view)
+        // {
+        //   gwi.parseErrorText = "Recursive CTE view";
+        // }
+        // else
+        // {
+        //   gwi.parseErrorText = "Recursive CTE";
+        // }
+
+        // setError(gwi.thd, ER_CHECK_NOT_IMPLEMENTED, gwi.parseErrorText, gwi);
+        // return ER_CHECK_NOT_IMPLEMENTED;
+      }
 
       // @todo process from subquery
-      if (table_ptr->derived)
+      else if (table_ptr->derived)
       {
         SELECT_LEX* select_cursor = table_ptr->derived->first_select();
         FromSubQuery* fromSub = new FromSubQuery(gwi, select_cursor);
@@ -5341,7 +5416,8 @@ int processFrom(bool& isUnion, SELECT_LEX& select_lex, gp_walk_info& gwi, SCSEP&
         }
         fromSub->alias(alias);
 
-        CalpontSystemCatalog::TableAliasName tn = make_aliasview("", "", alias, viewName);
+        CalpontSystemCatalog::TableAliasName tn =
+            make_aliasview("", table_ptr->table_name.str, alias, viewName);
         // @bug 3852. check return execplan
         SCSEP plan = fromSub->transform();
 
@@ -5352,10 +5428,17 @@ int processFrom(bool& isUnion, SELECT_LEX& select_lex, gp_walk_info& gwi, SCSEP&
           return ER_INTERNAL_ERROR;
         }
 
+        if (plan->containsRecursiveQuery())
+        {
+          csep->containsRecursiveQuery(true);
+        }
+
         gwi.derivedTbList.push_back(plan);
         gwi.tbList.push_back(tn);
-        CalpontSystemCatalog::TableAliasName tan = make_aliastable("", alias, alias);
+        CalpontSystemCatalog::TableAliasName tan = make_aliastable("", table_ptr->table_name.str, alias);
         gwi.tableMap[tan] = make_pair(0, table_ptr);
+        // MCOL-2178 isUnion member only assigned, never used
+        // MIGR::infinidb_vtable.isUnion = true; //by-pass the 2nd pass of rnd_init
       }
       else if (table_ptr->view)
       {
@@ -5392,12 +5475,9 @@ int processFrom(bool& isUnion, SELECT_LEX& select_lex, gp_walk_info& gwi, SCSEP&
         CalpontSystemCatalog::TableAliasName tn =
             make_aliasview(table_ptr->db.str, table_name, table_ptr->alias.str, viewName, columnStore,
                            lower_case_table_names);
-        execplan::Partitions parts = getPartitions(table_ptr);
-        tn.partitions = parts;
         gwi.tbList.push_back(tn);
         CalpontSystemCatalog::TableAliasName tan = make_aliastable(
             table_ptr->db.str, table_name, table_ptr->alias.str, columnStore, lower_case_table_names);
-        tan.partitions = parts;
         gwi.tableMap[tan] = make_pair(0, table_ptr);
 #ifdef DEBUG_WALK_COND
         cerr << tn << endl;
@@ -5461,6 +5541,8 @@ int processFrom(bool& isUnion, SELECT_LEX& select_lex, gp_walk_info& gwi, SCSEP&
 
   if (!isUnion && (!isSelectHandlerTop || isSelectLexUnit) && select_lex.master_unit()->is_unit_op())
   {
+    // MCOL-2178 isUnion member only assigned, never used
+    // MIGR::infinidb_vtable.isUnion = true;
     CalpontSelectExecutionPlan::SelectList unionVec;
     SELECT_LEX* select_cursor = select_lex.master_unit()->first_select();
     unionSel = true;
@@ -5488,6 +5570,8 @@ int processFrom(bool& isUnion, SELECT_LEX& select_lex, gp_walk_info& gwi, SCSEP&
       // distinct union num
       if (sl == select_lex.master_unit()->union_distinct)
         distUnionNum = unionVec.size();
+      // if (sl->get_table_list()->is_recursive_with_table())
+      //   break;
     }
 
     csep->unionVec(unionVec);
@@ -5751,6 +5835,14 @@ int processGroupBy(SELECT_LEX& select_lex, gp_walk_info& gwi, const bool withRol
   gwi.hasWindowFunc = hasWindowFunc;
   groupcol = static_cast<ORDER*>(select_lex.group_list.first);
 
+  if (gwi.isRecursiveWithTable && groupcol)
+  {
+    gwi.fatalParseError = true;
+    gwi.parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_NON_SUPPORT_GROUP_BY, "GROUP BY clause");
+    setError(gwi.thd, ER_CHECK_NOT_IMPLEMENTED, gwi.parseErrorText, gwi);
+    return ER_CHECK_NOT_IMPLEMENTED;
+  }
+
   gwi.disableWrapping = true;
   for (; groupcol; groupcol = groupcol->next)
   {
@@ -7056,9 +7148,16 @@ int processOrderBy(SELECT_LEX& select_lex, gp_walk_info& gwi, SCSEP& csep,
 {
   SQL_I_List<ORDER> order_list = select_lex.order_list;
   ORDER* ordercol = static_cast<ORDER*>(order_list.first);
-
   // check if window functions are in order by. InfiniDB process order by list if
   // window functions are involved, either in order by or projection.
+  if (gwi.isRecursiveWithTable && ordercol)
+  {
+    gwi.fatalParseError = true;
+    gwi.parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_NON_SUPPORT_ORDER_BY, "WITH RECURSIVE");
+    setError(gwi.thd, ER_CHECK_NOT_IMPLEMENTED, gwi.parseErrorText, gwi);
+    return ER_CHECK_NOT_IMPLEMENTED;
+  }
+
   for (; ordercol; ordercol = ordercol->next)
   {
     if ((*(ordercol->item))->type() == Item::WINDOW_FUNC_ITEM)
diff --git a/dbcon/mysql/ha_mcs_impl_if.h b/dbcon/mysql/ha_mcs_impl_if.h
index 5a4291f65..efdf94359 100644
--- a/dbcon/mysql/ha_mcs_impl_if.h
+++ b/dbcon/mysql/ha_mcs_impl_if.h
@@ -415,6 +415,7 @@ struct gp_walk_info
   bool aggOnSelect;
   bool hasWindowFunc;
   bool hasSubSelect;
+  bool isRecursiveWithTable = false;
   SubQuery* lastSub;
   std::vector<View*> viewList;
   std::map<std::string, execplan::ParseTree*> derivedTbFilterMap;
@@ -722,9 +723,9 @@ execplan::CalpontSystemCatalog::ColType colType_MysqlToIDB(const Item* item);
 execplan::SPTP getIntervalType(gp_walk_info* gwip, int interval_type);
 uint32_t isPseudoColumn(std::string funcName);
 void setDerivedTable(execplan::ParseTree* n);
-execplan::ParseTree* setDerivedFilter(gp_walk_info* gwip, execplan::ParseTree*& n,
-                                      std::map<std::string, execplan::ParseTree*>& obj,
-                                      execplan::CalpontSelectExecutionPlan::SelectList& derivedTbList);
+// execplan::ParseTree* setDerivedFilter(gp_walk_info* gwip, execplan::ParseTree*& n,
+//                                       std::map<std::string, execplan::ParseTree*>& obj,
+//                                       execplan::CalpontSelectExecutionPlan::SelectList& derivedTbList);
 void derivedTableOptimization(gp_walk_info* gwip, execplan::SCSEP& csep);
 bool buildEqualityPredicate(execplan::ReturnedColumn* lhs, execplan::ReturnedColumn* rhs, gp_walk_info* gwip,
                             boost::shared_ptr<execplan::Operator>& sop, const Item_func::Functype& funcType,
diff --git a/dbcon/mysql/ha_mcs_opt_rewrites.cpp b/dbcon/mysql/ha_mcs_opt_rewrites.cpp
index c6bea2401..0e718095a 100644
--- a/dbcon/mysql/ha_mcs_opt_rewrites.cpp
+++ b/dbcon/mysql/ha_mcs_opt_rewrites.cpp
@@ -106,7 +106,10 @@ void first_cond_optimization_flag_toggle(SELECT_LEX* select_lex, void (*func)(SE
       {
         for (SELECT_LEX* sl = unit->first_select(); sl; sl = sl->next_select())
         {
-          first_cond_optimization_flag_toggle(sl, func);
+          if (sl->get_table_list())
+          {
+            first_cond_optimization_flag_toggle(sl, func);
+          }
         }
       }
     }
diff --git a/dbcon/mysql/ha_mcs_pushdown.cpp b/dbcon/mysql/ha_mcs_pushdown.cpp
index 134188dce..080f76f27 100644
--- a/dbcon/mysql/ha_mcs_pushdown.cpp
+++ b/dbcon/mysql/ha_mcs_pushdown.cpp
@@ -364,7 +364,8 @@ void item_check(Item* item, bool* unsupported_feature)
 
 bool check_user_var(SELECT_LEX* select_lex)
 {
-  if (!select_lex) {
+  if (!select_lex)
+  {
     // There are definitely no user vars if select_lex is null
     return false;
   }
@@ -878,7 +879,7 @@ select_handler* create_columnstore_select_handler_(THD* thd, SELECT_LEX* sel_lex
       // Unset select_lex::first_cond_optimization
       if (select_lex->first_cond_optimization)
       {
-        first_cond_optimization_flag_toggle(select_lex, &first_cond_optimization_flag_unset);
+        // first_cond_optimization_flag_toggle(select_lex, &first_cond_optimization_flag_unset);
       }
     }
   }
diff --git a/dbcon/mysql/ha_subquery.h b/dbcon/mysql/ha_subquery.h
index 403881e21..4d961a95d 100644
--- a/dbcon/mysql/ha_subquery.h
+++ b/dbcon/mysql/ha_subquery.h
@@ -228,6 +228,8 @@ class FromSubQuery : public SubQuery
   }
   execplan::SCSEP transform();
 
+  execplan::SCSEP transform(bool b);
+
  private:
   SELECT_LEX* fFromSub;
   std::string fAlias;
diff --git a/dbcon/mysql/ha_window_function.cpp b/dbcon/mysql/ha_window_function.cpp
index 887ebbef3..ee9aec183 100644
--- a/dbcon/mysql/ha_window_function.cpp
+++ b/dbcon/mysql/ha_window_function.cpp
@@ -21,6 +21,7 @@
  *
  *
  ***********************************************************************/
+#include <cstddef>
 #define PREFER_MY_CONFIG_H
 #include <my_config.h>
 #include <iostream>
@@ -307,6 +308,13 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n
   // String str;
   // item->print(&str, QT_INFINIDB_NO_QUOTE);
   // cout << str.c_ptr() << endl;
+  if (gwi.isRecursiveWithTable)
+  {
+    gwi.fatalParseError = true;
+    gwi.parseErrorText = "Window Functions not supported in recursive CTE";
+    return NULL;
+  }
+
   if (get_fe_conn_info_ptr() == NULL)
   {
     set_fe_conn_info_ptr((void*)new cal_connection_info());
@@ -536,7 +544,7 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n
 
         srcp->asc(orderCol->direction == ORDER::ORDER_ASC ? true : false);
         //					srcp->nullsFirst(orderCol->nulls); // nulls 2-default, 1-nulls
-        //first, 0-nulls last
+        // first, 0-nulls last
         srcp->nullsFirst(orderCol->direction == ORDER::ORDER_ASC
                              ? 1
                              : 0);  // WINDOWS TODO: implement NULLS FIRST/LAST in 10.2 front end
diff --git a/dbcon/rbo/rbo_predicate_pushdown.h b/dbcon/rbo/rbo_predicate_pushdown.h
index de8b5fa57..8a86f1d9e 100644
--- a/dbcon/rbo/rbo_predicate_pushdown.h
+++ b/dbcon/rbo/rbo_predicate_pushdown.h
@@ -20,11 +20,17 @@
 #define PREFER_MY_CONFIG_H
 #include <my_config.h>
 #include <dbcon/mysql/idb_mysql.h>
+#include <dbcon/mysql/ha_mcs_impl_if.h>
 
 #include "execplan/calpontselectexecutionplan.h"
 #include "rulebased_optimizer.h"
 
-namespace optimizer {
-  bool predicatePushdownFilter(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimizerContext& ctx);
-  bool applyPredicatePushdown(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimizerContext& ctx);
-}
\ No newline at end of file
+namespace optimizer
+{
+bool predicatePushdownFilter(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimizerContext& ctx);
+bool applyPredicatePushdown(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimizerContext& ctx);
+execplan::ParseTree* setDerivedFilter(cal_impl_if::gp_walk_info* gwip, execplan::ParseTree*& n,
+                                      std::map<std::string, execplan::ParseTree*>& obj,
+                                      execplan::CalpontSelectExecutionPlan::SelectList& derivedTbList);
+
+}  // namespace optimizer
diff --git a/mysql-test/columnstore/future/MCOL-5142-additional-cases.result b/mysql-test/columnstore/future/MCOL-5142-additional-cases.result
new file mode 100644
index 000000000..0e2ccd371
--- /dev/null
+++ b/mysql-test/columnstore/future/MCOL-5142-additional-cases.result
@@ -0,0 +1,150 @@
+DROP DATABASE IF EXISTS MCOL5142_COMP;
+CREATE DATABASE MCOL5142_COMP;
+USE MCOL5142_COMP;
+CREATE TABLE t_cycle (id INT, next_id INT) ENGINE=ColumnStore;
+INSERT INTO t_cycle VALUES (1, 2), (2, 3), (3, 1);
+SET @@max_recursive_iterations = 10;
+WITH RECURSIVE cte AS (
+SELECT id, next_id FROM t_cycle WHERE id = 1
+UNION ALL
+SELECT t.id, t.next_id
+FROM t_cycle t JOIN cte c ON t.id = c.next_id
+)
+SELECT * FROM cte LIMIT 100;
+id	next_id
+1	2
+WITH RECURSIVE cte AS (
+SELECT id, next_id FROM t_cycle WHERE id = 1
+UNION ALL
+SELECT t.id, t.next_id
+FROM t_cycle t JOIN cte c ON t.id = c.next_id
+)
+SELECT COUNT(*) FROM cte;
+ERROR HY000: Internal error: No key found.
+WITH RECURSIVE cte AS (
+SELECT id, next_id FROM t_cycle WHERE id = 1
+UNION ALL
+SELECT t.id, t.next_id
+FROM t_cycle t JOIN cte c ON t.id = c.next_id
+)
+SELECT id FROM cte LIMIT 20;
+ERROR HY000: Internal error: No key found.
+SET @@max_recursive_iterations = 1000;
+CREATE TABLE t_base (id INT, val INT) ENGINE=ColumnStore;
+INSERT INTO t_base VALUES (1, 10), (2, 20);
+WITH RECURSIVE cte AS (
+SELECT id, val FROM t_base
+UNION ALL
+SELECT t.id, SUM(t.val)
+FROM t_base t JOIN cte c ON t.id = c.id + 1
+GROUP BY t.id
+)
+SELECT * FROM cte;
+ERROR HY000: Restrictions imposed on recursive definitions are violated for table 'cte'
+WITH RECURSIVE cte AS (
+SELECT id, val FROM t_base
+UNION ALL
+SELECT t.id, t.val
+FROM t_base t JOIN cte c ON t.id = c.id + 1
+ORDER BY t.val
+)
+SELECT * FROM cte;
+ERROR 42000: Table 't' from one of the SELECTs cannot be used in ORDER BY
+WITH RECURSIVE cte AS (
+SELECT id, val FROM t_base
+UNION ALL
+SELECT t.id, ROW_NUMBER() OVER (ORDER BY t.val)
+FROM t_base t JOIN cte c ON t.id = c.id + 1
+)
+SELECT * FROM cte;
+ERROR HY000: Restrictions imposed on recursive definitions are violated for table 'cte'
+CREATE TABLE t_empty_1 (id INT) ENGINE=ColumnStore;
+CREATE TABLE t_empty_2 (id INT) ENGINE=ColumnStore;
+WITH RECURSIVE cte AS (
+SELECT id FROM t_empty_1
+UNION ALL
+SELECT t.id FROM t_empty_2 t JOIN cte c ON t.id = c.id
+)
+SELECT * FROM cte;
+id
+CREATE TABLE t_linear (id INT) ENGINE=ColumnStore;
+INSERT INTO t_linear VALUES (1), (2), (3);
+WITH RECURSIVE cte AS (
+SELECT id
+FROM t_linear
+WHERE id = 1
+UNION ALL
+SELECT t.id
+FROM t_linear t
+JOIN cte c ON t.id = c.id + 1
+)
+SELECT * FROM cte;
+id
+1
+2
+3
+WITH RECURSIVE cte AS (
+SELECT id
+FROM t_linear
+WHERE id = 1
+UNION ALL
+SELECT t.id
+FROM t_linear t
+JOIN cte c ON t.id = c.id + 1
+)
+SELECT COUNT(*) FROM cte;
+COUNT(*)
+3
+CREATE TABLE t_external (id INT, multiplier INT) ENGINE=ColumnStore;
+INSERT INTO t_external VALUES (1, 2), (2, 3), (3, 4);
+WITH RECURSIVE cte AS (
+SELECT t_linear.id, t_linear.id AS val
+FROM t_linear
+WHERE id = 1
+UNION ALL
+SELECT t_linear.id, t_linear.id * t_external.multiplier
+FROM t_linear
+JOIN cte c ON t_linear.id = c.id + 1
+JOIN t_external ON t_external.id = t_linear.id
+)
+SELECT * FROM cte;
+id	val
+1	1
+2	6
+3	12
+CREATE TABLE t_empty_anchor (id INT) ENGINE=ColumnStore;
+INSERT INTO t_empty_anchor VALUES (1), (2), (3);
+WITH RECURSIVE cte AS (
+SELECT id
+FROM t_empty_anchor
+WHERE id = 999
+UNION ALL
+SELECT t.id
+FROM t_empty_anchor t
+JOIN cte c ON t.id = c.id + 1
+)
+SELECT * FROM cte;
+id
+WITH RECURSIVE
+cte1 AS (
+SELECT 1 AS n
+UNION ALL
+SELECT n + 1 FROM cte1 WHERE n < 3
+),
+cte2 AS (
+SELECT 10 AS m
+UNION ALL
+SELECT m + 10 FROM cte2 WHERE m < 30
+)
+SELECT 'cte1' AS source, n AS value FROM cte1
+UNION ALL
+SELECT 'cte2' AS source, m AS value FROM cte2
+ORDER BY source, value;
+source	value
+cte1	1
+cte1	2
+cte1	3
+cte2	10
+cte2	20
+cte2	30
+DROP DATABASE MCOL5142_COMP;
diff --git a/mysql-test/columnstore/future/MCOL-5142-additional-cases.test b/mysql-test/columnstore/future/MCOL-5142-additional-cases.test
new file mode 100644
index 000000000..5db0b4813
--- /dev/null
+++ b/mysql-test/columnstore/future/MCOL-5142-additional-cases.test
@@ -0,0 +1,178 @@
+--disable_warnings
+DROP DATABASE IF EXISTS MCOL5142_COMP;
+--enable_warnings
+
+CREATE DATABASE MCOL5142_COMP;
+
+USE MCOL5142_COMP;
+
+# FAILING TEST CASES (bugs)
+
+# Cyclic recursion tests
+CREATE TABLE t_cycle (id INT, next_id INT) ENGINE=ColumnStore;
+INSERT INTO t_cycle VALUES (1, 2), (2, 3), (3, 1);
+
+SET @@max_recursive_iterations = 10;
+
+# Cyclic recursion with SELECT * - non-deterministic behavior, seems to ignore max_recursive_iterations value
+# may hang or return 1000, 1 or other number of rows
+WITH RECURSIVE cte AS (
+  SELECT id, next_id FROM t_cycle WHERE id = 1
+  UNION ALL
+  SELECT t.id, t.next_id
+  FROM t_cycle t JOIN cte c ON t.id = c.next_id
+)
+SELECT * FROM cte LIMIT 100;
+
+# Cyclic recursion with aggregate functions - fails with ERROR 1815
+--error 1815
+WITH RECURSIVE cte AS (
+  SELECT id, next_id FROM t_cycle WHERE id = 1
+  UNION ALL
+  SELECT t.id, t.next_id
+  FROM t_cycle t JOIN cte c ON t.id = c.next_id
+)
+SELECT COUNT(*) FROM cte;
+
+# Cyclic recursion with column projection - fails with ERROR 1815
+--error 1815
+WITH RECURSIVE cte AS (
+  SELECT id, next_id FROM t_cycle WHERE id = 1
+  UNION ALL
+  SELECT t.id, t.next_id
+  FROM t_cycle t JOIN cte c ON t.id = c.next_id
+)
+SELECT id FROM cte LIMIT 20;
+
+SET @@max_recursive_iterations = 1000;
+
+# ============================================================
+# WORKING TEST CASES
+# ============================================================
+
+#Prohibited clauses in recursive part - correctly fail with expected errors
+
+CREATE TABLE t_base (id INT, val INT) ENGINE=ColumnStore;
+INSERT INTO t_base VALUES (1, 10), (2, 20);
+
+# GROUP BY in recursive part -> Should fail
+--error 4008
+WITH RECURSIVE cte AS (
+  SELECT id, val FROM t_base
+  UNION ALL
+  SELECT t.id, SUM(t.val)
+  FROM t_base t JOIN cte c ON t.id = c.id + 1
+  GROUP BY t.id
+)
+SELECT * FROM cte;
+
+
+#ORDER BY in recursive part -> Should fail
+--error 1250
+WITH RECURSIVE cte AS (
+  SELECT id, val FROM t_base
+  UNION ALL
+  SELECT t.id, t.val
+  FROM t_base t JOIN cte c ON t.id = c.id + 1
+  ORDER BY t.val
+)
+SELECT * FROM cte;
+
+
+# Window Function in recursive part -> Should fail
+--error 4008
+WITH RECURSIVE cte AS (
+  SELECT id, val FROM t_base
+  UNION ALL
+  SELECT t.id, ROW_NUMBER() OVER (ORDER BY t.val)
+  FROM t_base t JOIN cte c ON t.id = c.id + 1
+)
+SELECT * FROM cte;
+
+#empty tables
+CREATE TABLE t_empty_1 (id INT) ENGINE=ColumnStore;
+CREATE TABLE t_empty_2 (id INT) ENGINE=ColumnStore;
+
+WITH RECURSIVE cte AS (
+  SELECT id FROM t_empty_1
+  UNION ALL
+  SELECT t.id FROM t_empty_2 t JOIN cte c ON t.id = c.id
+)
+SELECT * FROM cte;
+
+# Linear recursion - works
+CREATE TABLE t_linear (id INT) ENGINE=ColumnStore;
+INSERT INTO t_linear VALUES (1), (2), (3);
+
+WITH RECURSIVE cte AS (
+  SELECT id
+  FROM t_linear
+  WHERE id = 1
+  UNION ALL
+  SELECT t.id
+  FROM t_linear t
+  JOIN cte c ON t.id = c.id + 1
+)
+SELECT * FROM cte;
+
+WITH RECURSIVE cte AS (
+  SELECT id
+  FROM t_linear
+  WHERE id = 1
+  UNION ALL
+  SELECT t.id
+  FROM t_linear t
+  JOIN cte c ON t.id = c.id + 1
+)
+SELECT COUNT(*) FROM cte;
+
+# JOIN with external table inside recursive CTE - works
+CREATE TABLE t_external (id INT, multiplier INT) ENGINE=ColumnStore;
+INSERT INTO t_external VALUES (1, 2), (2, 3), (3, 4);
+
+WITH RECURSIVE cte AS (
+  SELECT t_linear.id, t_linear.id AS val
+  FROM t_linear
+  WHERE id = 1
+  UNION ALL
+  SELECT t_linear.id, t_linear.id * t_external.multiplier
+  FROM t_linear
+  JOIN cte c ON t_linear.id = c.id + 1
+  JOIN t_external ON t_external.id = t_linear.id
+)
+SELECT * FROM cte;
+
+# Empty anchor set
+CREATE TABLE t_empty_anchor (id INT) ENGINE=ColumnStore;
+INSERT INTO t_empty_anchor VALUES (1), (2), (3);
+
+WITH RECURSIVE cte AS (
+  SELECT id
+  FROM t_empty_anchor
+  WHERE id = 999
+  UNION ALL
+  SELECT t.id
+  FROM t_empty_anchor t
+  JOIN cte c ON t.id = c.id + 1
+)
+SELECT * FROM cte;
+
+#Multiple independent recursive CTEs
+
+WITH RECURSIVE
+  cte1 AS (
+    SELECT 1 AS n
+    UNION ALL
+    SELECT n + 1 FROM cte1 WHERE n < 3
+  ),
+  cte2 AS (
+    SELECT 10 AS m
+    UNION ALL
+    SELECT m + 10 FROM cte2 WHERE m < 30
+  )
+SELECT 'cte1' AS source, n AS value FROM cte1
+UNION ALL
+SELECT 'cte2' AS source, m AS value FROM cte2
+ORDER BY source, value;
+
+DROP DATABASE MCOL5142_COMP;
diff --git a/mysql-test/columnstore/future/MCOL-5142-basic.test b/mysql-test/columnstore/future/MCOL-5142-basic.test
new file mode 100644
index 000000000..3ceb4cb38
--- /dev/null
+++ b/mysql-test/columnstore/future/MCOL-5142-basic.test
@@ -0,0 +1,46 @@
+--disable_warnings
+DROP DATABASE IF EXISTS MCOL5142;
+--enable_warnings
+
+CREATE DATABASE MCOL5142;
+
+
+USE MCOL5142;
+CREATE TABLE employees (
+    id INT NOT NULL,
+    name VARCHAR(100),
+    manager_id INT
+) ENGINE = ColumnStore;
+
+INSERT INTO employees (id, name, manager_id) VALUES
+(1, 'Alice', NULL),
+(2, 'Bob', 1),
+(3, 'Charlie', 1),
+(4, 'David', 2),
+(5, 'Eve', 2),
+(6, 'Frank', 3);
+
+INSERT INTO employees (id, name, manager_id) VALUES
+(7, 'Grace', 4),    
+(8, 'Heidi', 4),     
+(9, 'Ivan', 5),      
+(10, 'Judy', 6),     
+(11, 'Karl', 7),     
+(12, 'Laura', 11);   
+
+
+
+WITH RECURSIVE employee_hierarchy AS (
+    SELECT id, name, manager_id, 0 AS level
+    FROM employees
+    WHERE id = 1
+
+    UNION ALL
+
+    SELECT e.id, e.name, e.manager_id, eh.level + 1
+    FROM employees as e
+    JOIN employee_hierarchy eh ON e.manager_id = eh.id
+)
+SELECT * FROM employee_hierarchy;
+
+DROP DATABASE MCOL5142;
diff --git a/mysql-test/columnstore/future/MCOL-5142-multi-table.test b/mysql-test/columnstore/future/MCOL-5142-multi-table.test
new file mode 100644
index 000000000..40a5848ea
--- /dev/null
+++ b/mysql-test/columnstore/future/MCOL-5142-multi-table.test
@@ -0,0 +1,73 @@
+--disable_warnings
+DROP DATABASE IF EXISTS MULTI_TABLE_TEST;
+--enable_warnings
+
+CREATE DATABASE MULTI_TABLE_TEST;
+
+USE MULTI_TABLE_TEST;
+
+CREATE TABLE products (
+    product_id INT NOT NULL,
+    product_name VARCHAR(100)
+) ENGINE = ColumnStore;
+
+CREATE TABLE components (
+    component_id INT NOT NULL,
+    component_name VARCHAR(100),
+    parent_product_id INT
+) ENGINE = ColumnStore;
+
+CREATE TABLE suppliers (
+    supplier_id INT NOT NULL,
+    supplier_name VARCHAR(100),
+    component_id INT
+) ENGINE = ColumnStore;
+
+INSERT INTO products (product_id, product_name) VALUES
+(10, 'Smartphone');
+
+INSERT INTO components (component_id, component_name, parent_product_id) VALUES
+(100, 'Screen', 10),
+(101, 'Battery', 10),
+(200, 'Glass Panel', 100),
+(201, 'LCD', 100),
+(202, 'Connector', 101);
+
+INSERT INTO suppliers (supplier_id, supplier_name, component_id) VALUES
+(1000, 'Supplier A', 200),
+(1001, 'Supplier B', 201),
+(1002, 'Supplier C', 202);
+
+WITH RECURSIVE product_tree AS (
+    SELECT
+        p.product_id AS root_id,
+        p.product_name AS root_name,
+        c.component_id,
+        c.component_name,
+        c.parent_product_id,
+        1 AS level
+    FROM products p
+    JOIN components c ON p.product_id = c.parent_product_id
+
+    UNION ALL
+
+    SELECT
+        pt.root_id,
+        pt.root_name,
+        c.component_id,
+        c.component_name,
+        c.parent_product_id,
+        pt.level + 1
+    FROM components c
+    JOIN product_tree pt ON c.parent_product_id = pt.component_id
+)
+SELECT
+    pt.root_name AS product,
+    pt.component_name AS component,
+    s.supplier_name AS supplier,
+    pt.level
+FROM product_tree pt
+JOIN suppliers s ON pt.component_id = s.component_id;
+
+DROP DATABASE MULTI_TABLE_TEST;
+
diff --git a/mysql-test/columnstore/future/MCOL-5142-nested-subquery-anchor.test b/mysql-test/columnstore/future/MCOL-5142-nested-subquery-anchor.test
new file mode 100644
index 000000000..9f93de10d
--- /dev/null
+++ b/mysql-test/columnstore/future/MCOL-5142-nested-subquery-anchor.test
@@ -0,0 +1,54 @@
+--disable_warnings
+DROP DATABASE IF EXISTS NESTED_SUBQUERY_TEST;
+--enable_warnings
+
+CREATE DATABASE NESTED_SUBQUERY_TEST;
+
+USE NESTED_SUBQUERY_TEST;
+
+CREATE TABLE employees (
+    id INT NOT NULL,
+    name VARCHAR(100),
+    manager_id INT,
+    department_id INT
+) ENGINE = ColumnStore;
+
+CREATE TABLE departments (
+    id INT NOT NULL,
+    name VARCHAR(100)
+) ENGINE = ColumnStore;
+
+INSERT INTO departments (id, name) VALUES
+(101, 'Sales'),
+(102, 'Engineering'),
+(103, 'Marketing');
+
+INSERT INTO employees (id, name, manager_id, department_id) VALUES
+(1, 'Alice', NULL, 101),
+(2, 'Bob', 1, 101),
+(3, 'Charlie', 1, 102),
+(4, 'David', 2, 101),
+(5, 'Eve', 3, 102),
+(6, 'Frank', 3, 103);
+
+WITH RECURSIVE sales_hierarchy AS (
+    SELECT id, name, manager_id, 0 AS level, department_id
+    FROM employees
+    WHERE department_id IN (SELECT id FROM departments WHERE name = 'Sales')
+    AND manager_id IS NULL
+
+    UNION ALL
+
+    SELECT
+        e.id,
+        e.name,
+        e.manager_id,
+        eh.level + 1,
+        e.department_id
+    FROM employees AS e
+    JOIN sales_hierarchy eh ON e.manager_id = eh.id
+)
+SELECT * FROM sales_hierarchy;
+
+DROP DATABASE NESTED_SUBQUERY_TEST;
+
diff --git a/utils/libmarias3/libmarias3 b/utils/libmarias3/libmarias3
index d9cb536a5..f74150b05 160000
--- a/utils/libmarias3/libmarias3
+++ b/utils/libmarias3/libmarias3
@@ -1 +1 @@
-Subproject commit d9cb536a532ef6e71df66d99e95562e1169ec93f
+Subproject commit f74150b05693440d35f93c43e2d2411cc66fee19
diff --git a/utils/rowgroup/rowgroup.h b/utils/rowgroup/rowgroup.h
index 50baf681c..e6eb2195d 100644
--- a/utils/rowgroup/rowgroup.h
+++ b/utils/rowgroup/rowgroup.h
@@ -27,6 +27,7 @@
 
 #pragma once
 
+#include <cstdint>
 #include <vector>
 #include <string>
 #include <stdexcept>
@@ -377,6 +378,8 @@ class RGData
   // Need sig to support backward compat.  RGData can deserialize both forms.
   static const uint32_t RGDATA_SIG = 0xffffffff;  // won't happen for 'old' Rowgroup data
 
+  uint32_t skipSteps = 0;  // TEST:: for dealing with recursive CTE
+
   friend class RowGroup;
   friend class RowGroupStorage;
 };