This error is so very strange. I received a Spark TreeNodeException executing a union of two data frames. When I assign the union results to a DataFrame that will be returned from by a function, this error occurs. However, I am able to assign the union results to a DataFrame that will not be returned. The Scala code: val aDF = bDF.union(cDF).
Below is the error:
org.apache.spark.sql.catalyst.errors.package$TreeNodeException: Binding attribute, tree: DP_Acct_Identifier#140575 at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56) at org.apache.spark.sql.catalyst.expressions.BindReferences$$anonfun$bindReference$1.applyOrElse(BoundAttribute.scala:79) at org.apache.spark.sql.catalyst.expressions.BindReferences$$anonfun$bindReference$1.applyOrElse(BoundAttribute.scala:78) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$2.apply(TreeNode.scala:256) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$2.apply(TreeNode.scala:256) at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:70) at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:255) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:261) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:261) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:326) at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187) at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:324) at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:261) at org.apache.spark.sql.catalyst.trees.TreeNode.transform(TreeNode.scala:245) at org.apache.spark.sql.catalyst.expressions.BindReferences$.bindReference(BoundAttribute.scala:78) at org.apache.spark.sql.catalyst.expressions.codegen.GeneratePredicate$.bind(GeneratePredicate.scala:45) at org.apache.spark.sql.catalyst.expressions.codegen.GeneratePredicate$.bind(GeneratePredicate.scala:40) at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator.generate(CodeGenerator.scala:1190) at org.apache.spark.sql.execution.SparkPlan.newPredicate(SparkPlan.scala:403) at org.apache.spark.sql.execution.joins.BroadcastNestedLoopJoinExec.org$apache$spark$sql$execution$joins$BroadcastNestedLoopJoinExec$$boundCondition$lzycompute(BroadcastNestedLoopJoinExec.scala:87) at org.apache.spark.sql.execution.joins.BroadcastNestedLoopJoinExec.org$apache$spark$sql$execution$joins$BroadcastNestedLoopJoinExec$$boundCondition(BroadcastNestedLoopJoinExec.scala:85) at org.apache.spark.sql.execution.joins.BroadcastNestedLoopJoinExec$$anonfun$4$$anonfun$apply$2$$anonfun$apply$3.apply(BroadcastNestedLoopJoinExec.scala:191) at org.apache.spark.sql.execution.joins.BroadcastNestedLoopJoinExec$$anonfun$4$$anonfun$apply$2$$anonfun$apply$3.apply(BroadcastNestedLoopJoinExec.scala:191) at scala.collection.IndexedSeqOptimized$class.prefixLengthImpl(IndexedSeqOptimized.scala:38) at scala.collection.IndexedSeqOptimized$class.exists(IndexedSeqOptimized.scala:46) at scala.collection.mutable.ArrayOps$ofRef.exists(ArrayOps.scala:186) at org.apache.spark.sql.execution.joins.BroadcastNestedLoopJoinExec$$anonfun$4$$anonfun$apply$2.apply(BroadcastNestedLoopJoinExec.scala:191) at org.apache.spark.sql.execution.joins.BroadcastNestedLoopJoinExec$$anonfun$4$$anonfun$apply$2.apply(BroadcastNestedLoopJoinExec.scala:190) at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:464) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409) at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:125) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:55) at org.apache.spark.scheduler.Task.run(Task.scala:121) at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Caused by: java.lang.RuntimeException: Couldn't find DP_Acct_Identifier#140575 in [AP_Participant_ID#34180,DP_Acct_Identifier#34181,DP_Acct_Identifier#127] at scala.sys.package$.error(package.scala:27) at org.apache.spark.sql.catalyst.expressions.BindReferences$$anonfun$bindReference$1$$anonfun$applyOrElse$1.apply(BoundAttribute.scala:85) at org.apache.spark.sql.catalyst.expressions.BindReferences$$anonfun$bindReference$1$$anonfun$applyOrElse$1.apply(BoundAttribute.scala:79) at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52) ... 40 more
https://stackoverflow.com/questions/66485301/spark-treenodeexception-binding-attribute March 05, 2021 at 09:07AM
没有评论:
发表评论