Details
-
Type:
Bug
-
Status: Resolved
-
Priority:
Major
-
Resolution: Not A Bug
-
Affects Version/s: 0.2
-
Fix Version/s: None
-
Component/s: API, Core, Interpreter
-
Labels:None
-
Environment:
Git: master, revision 1fa1d37d9b32361e2fa6389da294e7445ce065d4
Description
The following code defines a class and then uses the with the Spark Context:
class MyClass( val myInt: Int ) extends Serializable { override def toString: String = myInt.toString } val myRes = sc.parallelize(List(1,2,3)).map((n) => new MyClass(n)).collect() myRes(1).myInt
The following error appears:
org.apache.spark.SparkDriverExecutionException: Execution error at org.apache.spark.scheduler.DAGScheduler.handleTaskCompletion(DAGScheduler.scala:1024) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1493) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1458) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1447) at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:567) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1824) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1837) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1850) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1921) at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:909) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:108) at org.apache.spark.rdd.RDD.withScope(RDD.scala:310) at org.apache.spark.rdd.RDD.collect(RDD.scala:908) at $iwC$$iwC.<init>(<console>:9) at $iwC.<init>(<console>:18) at <init>(<console>:20) at .<init>(<console>:24) at .<clinit>(<console>) at .<init>(<console>:7) at .<clinit>(<console>) at $print(<console>) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:497) at org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065) at org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1340) at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:840) at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:871) at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:819) at com.cloudera.hue.livy.repl.scala.SparkInterpreter$$anonfun$executeLine$1.apply(SparkInterpreter.scala:242) at com.cloudera.hue.livy.repl.scala.SparkInterpreter$$anonfun$executeLine$1.apply(SparkInterpreter.scala:242) at scala.util.DynamicVariable.withValue(DynamicVariable.scala:57) at scala.Console$.withOut(Console.scala:126) at com.cloudera.hue.livy.repl.scala.SparkInterpreter.executeLine(SparkInterpreter.scala:241) at com.cloudera.hue.livy.repl.scala.SparkInterpreter.executeLines(SparkInterpreter.scala:216) at com.cloudera.hue.livy.repl.scala.SparkInterpreter.execute(SparkInterpreter.scala:79) at com.cloudera.hue.livy.repl.Session.com$cloudera$hue$livy$repl$Session$$executeCode(Session.scala:96) at com.cloudera.hue.livy.repl.Session$$anonfun$3.apply(Session.scala:71) at com.cloudera.hue.livy.repl.Session$$anonfun$3.apply(Session.scala:71) at scala.concurrent.impl.Future$PromiseCompletingRunnable.liftedTree1$1(Future.scala:24) at scala.concurrent.impl.Future$PromiseCompletingRunnable.run(Future.scala:24) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) Caused by: java.lang.ArrayStoreException: [L$iwC$$iwC$MyClass; at scala.runtime.ScalaRunTime$.array_update(ScalaRunTime.scala:88) at org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1837) at org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1837) at org.apache.spark.scheduler.JobWaiter.taskSucceeded(JobWaiter.scala:56) at org.apache.spark.scheduler.DAGScheduler.handleTaskCompletion(DAGScheduler.scala:1020) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1493) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1458) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1447) at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
I've executed the code via curl from the command line:
cd /home/milad/projects/hue/apps/spark/java export SPARK_HOME=/home/milad/spark-1.5.2-bin-hadoop2.6 ./bin/livy-server curl -H "Content-Type: application/json" -X POST http://localhost:8998/sessions/0/statements -d '{"code": "class MyClass( val myInt: Int ) extends Serializable { override def toString: String = myInt.toString }; val myRes = sc.parallelize(List(1,2,3)).map((n) => new MyClass(n)).collect(); myRes(1).myInt"}' curl -X GET http://localhost:8998/sessions/0/statements/0 {"id":0,"state":"available","output":{"status":"error","execution_count":0,"ename":"Error","evalue":"org.apache.spark.SparkDriverExecutionException: Execution error\n\tat org.apache.spark.scheduler.DAGScheduler.handleTaskCompletion(DAGScheduler.scala:1024)\n\tat org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1493)\n\tat org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1458)\n\tat org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1447)\n\tat org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)\n\tat org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:567)\n\tat org.apache.spark.SparkContext.runJob(SparkContext.scala:1824)\n\tat org.apache.spark.SparkContext.runJob(SparkContext.scala:1837)\n\tat org.apache.spark.SparkContext.runJob(SparkContext.scala:1850)\n\tat org.apache.spark.SparkContext.runJob(SparkContext.scala:1921)\n\tat org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:909)\n\tat org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147)\n\tat org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:108)\n\tat org.apache.spark.rdd.RDD.withScope(RDD.scala:310)\n\tat org.apache.spark.rdd.RDD.collect(RDD.scala:908)\n\tat $iwC$$iwC.<init>(<console>:9)\n\tat $iwC.<init>(<console>:18)\n\tat <init>(<console>:20)\n\tat .<init>(<console>:24)\n\tat .<clinit>(<console>)\n\tat .<init>(<console>:7)\n\tat .<clinit>(<console>)\n\tat $print(<console>)\n\tat sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\n\tat sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\n\tat sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\n\tat java.lang.reflect.Method.invoke(Method.java:497)\n\tat org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065)\n\tat org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1340)\n\tat org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:840)\n\tat org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:871)\n\tat org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:819)\n\tat com.cloudera.hue.livy.repl.scala.SparkInterpreter$$anonfun$executeLine$1.apply(SparkInterpreter.scala:242)\n\tat com.cloudera.hue.livy.repl.scala.SparkInterpreter$$anonfun$executeLine$1.apply(SparkInterpreter.scala:242)\n\tat scala.util.DynamicVariable.withValue(DynamicVariable.scala:57)\n\tat scala.Console$.withOut(Console.scala:126)\n\tat com.cloudera.hue.livy.repl.scala.SparkInterpreter.executeLine(SparkInterpreter.scala:241)\n\tat com.cloudera.hue.livy.repl.scala.SparkInterpreter.executeLines(SparkInterpreter.scala:216)\n\tat com.cloudera.hue.livy.repl.scala.SparkInterpreter.execute(SparkInterpreter.scala:79)\n\tat com.cloudera.hue.livy.repl.Session.com$cloudera$hue$livy$repl$Session$$executeCode(Session.scala:96)\n\tat com.cloudera.hue.livy.repl.Session$$anonfun$3.apply(Session.scala:71)\n\tat com.cloudera.hue.livy.repl.Session$$anonfun$3.apply(Session.scala:71)\n\tat scala.concurrent.impl.Future$PromiseCompletingRunnable.liftedTree1$1(Future.scala:24)\n\tat scala.concurrent.impl.Future$PromiseCompletingRunnable.run(Future.scala:24)\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)\n\tat java.lang.Thread.run(Thread.java:745)\nCaused by: java.lang.ArrayStoreException: [L$iwC$$iwC$MyClass;\n\tat scala.runtime.ScalaRunTime$.array_update(ScalaRunTime.scala:88)\n\tat org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1837)\n\tat org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1837)\n\tat org.apache.spark.scheduler.JobWaiter.taskSucceeded(JobWaiter.scala:56)\n\tat org.apache.spark.scheduler.DAGScheduler.handleTaskCompletion(DAGScheduler.scala:1020)\n\tat org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1493)\n\tat org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1458)\n\tat org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1447)\n\tat org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)","traceback":[]}}