Skip to content

Commit c8ac87c

Browse files
author
Koji Noguchi
committed
PIG-5375: NullPointerException for multi-level self unions with Tez UnionOptimizer (knoguchi)
git-svn-id: https://svn.apache.org/repos/asf/pig/trunk@1862504 13f79535-47bb-0310-9956-ffa450edef68
1 parent 61cb452 commit c8ac87c

File tree

5 files changed

+132
-126
lines changed

5 files changed

+132
-126
lines changed

CHANGES.txt

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,9 +92,11 @@ OPTIMIZATIONS
9292

9393
BUG FIXES
9494

95-
PIG-5386: Pig local mode with bundled Hadoop broken
95+
PIG-5375: NullPointerException for multi-level self unions with Tez UnionOptimizer (knoguchi)
9696

97-
PIG-5387: Test failures on JRE 11
97+
PIG-5386: Pig local mode with bundled Hadoop broken (nkollar)
98+
99+
PIG-5387: Test failures on JRE 11 (nkollar)
98100

99101
PIG-5383: OrcStorage fails when "bytearray" represents unknown type (knoguchi)
100102

src/org/apache/pig/backend/hadoop/executionengine/tez/plan/optimizer/UnionOptimizer.java

Lines changed: 34 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -289,18 +289,16 @@ public void visitTezOp(TezOperator tezOp) throws VisitorException {
289289
if (storeVertexGroupOps[i] != null) {
290290
continue;
291291
}
292-
}
293-
if (existingVertexGroup != null) {
294-
storeVertexGroupOps[i] = existingVertexGroup;
295-
existingVertexGroup.getVertexGroupMembers().remove(unionOp.getOperatorKey());
296-
existingVertexGroup.getVertexGroupMembers().addAll(unionOp.getUnionMembers());
297-
existingVertexGroup.getVertexGroupInfo().removeInput(unionOp.getOperatorKey());
298-
} else {
299292
storeVertexGroupOps[i] = new TezOperator(OperatorKey.genOpKey(scope));
300293
storeVertexGroupOps[i].setVertexGroupInfo(new VertexGroupInfo(unionStoreOutputs.get(i)));
301294
storeVertexGroupOps[i].getVertexGroupInfo().setSFile(unionStoreOutputs.get(i).getSFile());
302295
storeVertexGroupOps[i].setVertexGroupMembers(new ArrayList<OperatorKey>(unionOp.getUnionMembers()));
303296
tezPlan.add(storeVertexGroupOps[i]);
297+
} else {
298+
storeVertexGroupOps[i] = existingVertexGroup;
299+
existingVertexGroup.getVertexGroupMembers().remove(unionOp.getOperatorKey());
300+
existingVertexGroup.getVertexGroupMembers().addAll(unionOp.getUnionMembers());
301+
existingVertexGroup.getVertexGroupInfo().removeInput(unionOp.getOperatorKey());
304302
}
305303
}
306304

@@ -320,19 +318,36 @@ public void visitTezOp(TezOperator tezOp) throws VisitorException {
320318
TezOperator[] outputVertexGroupOps = new TezOperator[unionOutputKeys.size()];
321319
String[] newOutputKeys = new String[unionOutputKeys.size()];
322320
for (int i=0; i < outputVertexGroupOps.length; i++) {
323-
for (int j = 0; j < i; j++) {
324-
if (unionOutputKeys.get(i).equals(unionOutputKeys.get(j))) {
325-
outputVertexGroupOps[i] = outputVertexGroupOps[j];
326-
break;
321+
TezOperator existingVertexGroup = null;
322+
if (successors != null) {
323+
for (TezOperator succ : successors) {
324+
if (succ.isVertexGroup()
325+
&& unionOutputKeys.get(i).equals(succ.getVertexGroupInfo().getOutput()) ) {
326+
existingVertexGroup = succ;
327+
break;
328+
}
327329
}
328330
}
329-
if (outputVertexGroupOps[i] != null) {
330-
continue;
331+
if (existingVertexGroup == null) {
332+
for (int j = 0; j < i; j++) {
333+
if (unionOutputKeys.get(i).equals(unionOutputKeys.get(j))) {
334+
outputVertexGroupOps[i] = outputVertexGroupOps[j];
335+
break;
336+
}
337+
}
338+
if (outputVertexGroupOps[i] != null) {
339+
continue;
340+
}
341+
outputVertexGroupOps[i] = new TezOperator(OperatorKey.genOpKey(scope));
342+
outputVertexGroupOps[i].setVertexGroupInfo(new VertexGroupInfo());
343+
outputVertexGroupOps[i].getVertexGroupInfo().setOutput(unionOutputKeys.get(i));
344+
outputVertexGroupOps[i].setVertexGroupMembers(new ArrayList<OperatorKey>(unionOp.getUnionMembers()));
345+
} else {
346+
outputVertexGroupOps[i] = existingVertexGroup;
347+
existingVertexGroup.getVertexGroupMembers().remove(unionOp.getOperatorKey());
348+
existingVertexGroup.getVertexGroupMembers().addAll(unionOp.getUnionMembers());
349+
existingVertexGroup.getVertexGroupInfo().removeInput(unionOp.getOperatorKey());
331350
}
332-
outputVertexGroupOps[i] = new TezOperator(OperatorKey.genOpKey(scope));
333-
outputVertexGroupOps[i].setVertexGroupInfo(new VertexGroupInfo());
334-
outputVertexGroupOps[i].getVertexGroupInfo().setOutput(unionOutputKeys.get(i));
335-
outputVertexGroupOps[i].setVertexGroupMembers(new ArrayList<OperatorKey>(unionOp.getUnionMembers()));
336351
newOutputKeys[i] = outputVertexGroupOps[i].getOperatorKey().toString();
337352
tezPlan.add(outputVertexGroupOps[i]);
338353
}
@@ -619,18 +634,6 @@ private void connectVertexGroupsToSuccessors(TezOperator unionOp,
619634
// Connect to outputVertexGroupOps
620635
for (Entry<OperatorKey, TezEdgeDescriptor> entry : unionOp.outEdges.entrySet()) {
621636
TezOperator succOp = tezPlan.getOperator(entry.getKey());
622-
// Case of union followed by union.
623-
// unionOp.outEdges will not point to vertex group, but to its output.
624-
// So find the vertex group if there is one.
625-
TezOperator succOpVertexGroup = null;
626-
for (TezOperator succ : successors) {
627-
if (succ.isVertexGroup()
628-
&& succOp.getOperatorKey().toString()
629-
.equals(succ.getVertexGroupInfo().getOutput())) {
630-
succOpVertexGroup = succ;
631-
break;
632-
}
633-
}
634637
TezEdgeDescriptor edge = entry.getValue();
635638
// Edge cannot be one to one as it will get input from two or
636639
// more union predecessors. Change it to SCATTER_GATHER
@@ -641,26 +644,14 @@ private void connectVertexGroupsToSuccessors(TezOperator unionOp,
641644
edge.inputClassName = UnorderedKVInput.class.getName();
642645
}
643646
TezOperator vertexGroupOp = outputVertexGroupOps[unionOutputKeys.indexOf(entry.getKey().toString())];
644-
for (OperatorKey predKey : vertexGroupOp.getVertexGroupMembers()) {
647+
for (OperatorKey predKey : unionOp.getUnionMembers()) {
645648
TezOperator pred = tezPlan.getOperator(predKey);
646649
// Keep the output edge directly to successor
647650
// Don't need to keep output edge for vertexgroup
648651
pred.outEdges.put(entry.getKey(), edge);
649652
succOp.inEdges.put(predKey, edge);
650-
if (succOpVertexGroup != null) {
651-
succOpVertexGroup.getVertexGroupMembers().add(predKey);
652-
succOpVertexGroup.getVertexGroupInfo().addInput(predKey);
653-
// Connect directly to the successor vertex group
654-
tezPlan.disconnect(pred, vertexGroupOp);
655-
tezPlan.connect(pred, succOpVertexGroup);
656-
}
657653
}
658-
if (succOpVertexGroup != null) {
659-
succOpVertexGroup.getVertexGroupMembers().remove(unionOp.getOperatorKey());
660-
succOpVertexGroup.getVertexGroupInfo().removeInput(unionOp.getOperatorKey());
661-
//Discard the new vertex group created
662-
tezPlan.remove(vertexGroupOp);
663-
} else {
654+
if(!tezPlan.pathExists(vertexGroupOp, succOp)) {
664655
tezPlan.connect(vertexGroupOp, succOp);
665656
}
666657
}

test/e2e/pig/tests/multiquery.conf

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -773,6 +773,19 @@ u3 = UNION ONSCHEMA e, f;
773773
SPLIT u3 INTO t if age > 75, u OTHERWISE;
774774
v = JOIN t BY name LEFT, c BY votername;
775775
store v into ':OUTPATH:';\,
776+
},
777+
{
778+
# PIG-5375. multi-level Unions with splits
779+
'num' => 15,
780+
'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age:int, gpa);
781+
b= load ':INPATH:/singlefile/studentnulltab10k' as (name, age:int, gpa);
782+
c= load ':INPATH:/singlefile/studentnulltab10k' as (name, age:int, gpa);
783+
a_and_b = UNION ONSCHEMA a, b;
784+
SPLIT a_and_b INTO a_and_b2 IF age < 30, a_and_b3 OTHERWISE;
785+
a_and_b_and_c = UNION ONSCHEMA c, a_and_b;
786+
v = UNION ONSCHEMA a_and_b_and_c, a_and_b2, a_and_b3;
787+
v2 = GROUP v by *;
788+
store v2 into ':OUTPATH:';\,
776789
}
777790
] # end of tests
778791
},

test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-10.gld

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,13 @@ e: Split - scope-61
3434
|---d: Load(file:///tmp/input1:org.apache.pig.builtin.PigStorage) - scope-17
3535
Tez vertex scope-37
3636
# Plan on vertex
37-
e: Split - scope-66
37+
e: Split - scope-65
3838
| |
39-
| e: Store(file:///tmp/pigoutput1:org.apache.pig.builtin.PigStorage) - scope-67 -> scope-29
39+
| e: Store(file:///tmp/pigoutput1:org.apache.pig.builtin.PigStorage) - scope-66 -> scope-29
4040
| |
41-
| f: Local Rearrange[tuple]{int}(false) - scope-68 -> scope-53
41+
| f: Local Rearrange[tuple]{int}(false) - scope-67 -> scope-53
4242
| | |
43-
| | Project[int][0] - scope-69
43+
| | Project[int][0] - scope-68
4444
|
4545
|---a: New For Each(false,false)[bag] - scope-7
4646
| |
@@ -55,13 +55,13 @@ e: Split - scope-66
5555
|---a: Load(file:///tmp/input:org.apache.pig.builtin.PigStorage) - scope-0
5656
Tez vertex scope-38
5757
# Plan on vertex
58-
e: Split - scope-70
58+
e: Split - scope-69
5959
| |
60-
| e: Store(file:///tmp/pigoutput1:org.apache.pig.builtin.PigStorage) - scope-71 -> scope-29
60+
| e: Store(file:///tmp/pigoutput1:org.apache.pig.builtin.PigStorage) - scope-70 -> scope-29
6161
| |
62-
| f: Local Rearrange[tuple]{int}(false) - scope-72 -> scope-53
62+
| f: Local Rearrange[tuple]{int}(false) - scope-71 -> scope-53
6363
| | |
64-
| | Project[int][0] - scope-73
64+
| | Project[int][0] - scope-72
6565
|
6666
|---c: New For Each(false,false)[bag] - scope-15
6767
| |

0 commit comments

Comments
 (0)