We're updating the issue view to help you get more done. 

NullPointerException at water.H2ONode.openChan(H2ONode.java:417) after upgrade to H2O 3.22.0.3

Description

NPE happens here
https://github.com/h2oai/h2o-3/blob/02b183e7d6d7c83301486ca2fd0ce15b61294379/h2o-core/src/main/java/water/H2ONode.java#L417

when calling
hc.as_h2o_frame() (from Sparkling Water 2.3.19, connected to h2o external backend cluster).

Same code worked fine before upgrade (from SW 2.3.13).

See full exception here -

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 Py4JJavaErrorTraceback (most recent call last) <ipython-input-52-9806e38ce736> in <module>() 79 print datetime.now().strftime('%Y%m%d %H:%M:%S') + "-Converting to H2O Frame..." 80 ---> 81 modeling = hc.as_h2o_frame(modeling_data,framename=hdfs_file_name) 82 print datetime.now().strftime('%Y%m%d %H:%M:%S') + "-Complete" 83 /opt/cloudera/parcels/Anaconda/lib/python2.7/site-packages/pysparkling/context.pyc in as_h2o_frame(self, dataframe, framename, full_cols) 253 """ 254 if isinstance(dataframe, DataFrame): --> 255 return fc._as_h2o_frame_from_dataframe(self, dataframe, framename, full_cols) 256 elif isinstance(dataframe, RDD) and dataframe.isEmpty(): 257 schema = StructType([]) /opt/cloudera/parcels/Anaconda/lib/python2.7/site-packages/pysparkling/conversions.pyc in _as_h2o_frame_from_dataframe(h2oContext, dataframe, frame_name, full_cols) 40 @staticmethod 41 def _as_h2o_frame_from_dataframe(h2oContext, dataframe, frame_name, full_cols=100): ---> 42 j_h2o_frame = h2oContext._jhc.asH2OFrame(dataframe._jdf, frame_name) 43 j_h2o_frame_key = j_h2o_frame.key() 44 return H2OFrame.from_java_h2o_frame(j_h2o_frame,j_h2o_frame_key, full_cols) /opt/cloudera/parcels/SPARK2-2.3.0.cloudera4-1.cdh5.13.3.p0.611179/lib/spark2/python/lib/py4j-0.10.7-src.zip/py4j/java_gateway.py in __call__(self, *args) 1255 answer = self.gateway_client.send_command(command) 1256 return_value = get_return_value( -> 1257 answer, self.gateway_client, self.target_id, self.name) 1258 1259 for temp_arg in temp_args: /opt/cloudera/parcels/SPARK2-2.3.0.cloudera4-1.cdh5.13.3.p0.611179/lib/spark2/python/lib/pyspark.zip/pyspark/sql/utils.py in deco(*a, **kw) 61 def deco(*a, **kw): 62 try: ---> 63 return f(*a, **kw) 64 except py4j.protocol.Py4JJavaError as e: 65 s = e.java_exception.toString() /opt/cloudera/parcels/SPARK2-2.3.0.cloudera4-1.cdh5.13.3.p0.611179/lib/spark2/python/lib/py4j-0.10.7-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name) 326 raise Py4JJavaError( 327 "An error occurred while calling {0}{1}{2}.\n". --> 328 format(target_id, ".", name), value) 329 else: 330 raise Py4JError( Py4JJavaError: An error occurred while calling o605.asH2OFrame. : org.apache.spark.SparkException: Job aborted due to stage failure: Task 2 in stage 85.0 failed 4 times, most recent failure: Lost task 2.3 in stage 85.0 (TID 12836, pc1udatahad15.abacus-us.com, executor 24): java.lang.NullPointerException at water.H2ONode.openChan(H2ONode.java:417) at water.H2ONode.openChan(H2ONode.java:428) at water.ExternalFrameUtils.getConnection(ExternalFrameUtils.java:50) at org.apache.spark.h2o.backends.external.ExternalWriteConverterCtx.<init>(ExternalWriteConverterCtx.scala:31) at org.apache.spark.h2o.converters.WriteConverterCtxUtils$$anonfun$create$1.apply(WriteConverterCtxUtils.scala:42) at org.apache.spark.h2o.converters.WriteConverterCtxUtils$$anonfun$create$1.apply(WriteConverterCtxUtils.scala:42) at scala.Option.map(Option.scala:146) at org.apache.spark.h2o.converters.WriteConverterCtxUtils$.create(WriteConverterCtxUtils.scala:42) at org.apache.spark.h2o.converters.SparkDataFrameConverter$.org$apache$spark$h2o$converters$SparkDataFrameConverter$$perSQLPartition(SparkDataFrameConverter.scala:102) at org.apache.spark.h2o.converters.SparkDataFrameConverter$$anonfun$toH2OFrame$2$$anonfun$apply$1.apply(SparkDataFrameConverter.scala:82) at org.apache.spark.h2o.converters.SparkDataFrameConverter$$anonfun$toH2OFrame$2$$anonfun$apply$1.apply(SparkDataFrameConverter.scala:82) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87) at org.apache.spark.scheduler.Task.run(Task.scala:109) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:381) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1651) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1639) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1638) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1638) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:831) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:831) at scala.Option.foreach(Option.scala:257) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:831) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1872) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1821) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1810) at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:642) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2034) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2055) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2087) at org.apache.spark.h2o.converters.WriteConverterCtxUtils$.convert(WriteConverterCtxUtils.scala:87) at org.apache.spark.h2o.converters.SparkDataFrameConverter$.toH2OFrame(SparkDataFrameConverter.scala:81) at org.apache.spark.h2o.H2OContext$$anonfun$asH2OFrame$2.apply(H2OContext.scala:222) at org.apache.spark.h2o.H2OContext$$anonfun$asH2OFrame$2.apply(H2OContext.scala:222) at org.apache.spark.h2o.utils.H2OContextUtils$class.withConversionDebugPrints(H2OContextUtils.scala:91) at org.apache.spark.h2o.H2OContext.withConversionDebugPrints(H2OContext.scala:66) at org.apache.spark.h2o.H2OContext.asH2OFrame(H2OContext.scala:222) at org.apache.spark.h2o.H2OContext.asH2OFrame(H2OContext.scala:224) at org.apache.spark.h2o.JavaH2OContext.asH2OFrame(JavaH2OContext.java:103) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.zeppelin.py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:231) at org.apache.zeppelin.py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:381) at org.apache.zeppelin.py4j.Gateway.invoke(Gateway.java:290) at org.apache.zeppelin.py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:133) at org.apache.zeppelin.py4j.commands.CallCommand.execute(CallCommand.java:79) at org.apache.zeppelin.py4j.GatewayConnection.run(GatewayConnection.java:209) at java.lang.Thread.run(Thread.java:748) Caused by: java.lang.NullPointerException at water.H2ONode.openChan(H2ONode.java:417) at water.H2ONode.openChan(H2ONode.java:428) at water.ExternalFrameUtils.getConnection(ExternalFrameUtils.java:50) at org.apache.spark.h2o.backends.external.ExternalWriteConverterCtx.<init>(ExternalWriteConverterCtx.scala:31) at org.apache.spark.h2o.converters.WriteConverterCtxUtils$$anonfun$create$1.apply(WriteConverterCtxUtils.scala:42) at org.apache.spark.h2o.converters.WriteConverterCtxUtils$$anonfun$create$1.apply(WriteConverterCtxUtils.scala:42) at scala.Option.map(Option.scala:146) at org.apache.spark.h2o.converters.WriteConverterCtxUtils$.create(WriteConverterCtxUtils.scala:42) at org.apache.spark.h2o.converters.SparkDataFrameConverter$.org$apache$spark$h2o$converters$SparkDataFrameConverter$$perSQLPartition(SparkDataFrameConverter.scala:102) at org.apache.spark.h2o.converters.SparkDataFrameConverter$$anonfun$toH2OFrame$2$$anonfun$apply$1.apply(SparkDataFrameConverter.scala:82) at org.apache.spark.h2o.converters.SparkDataFrameConverter$$anonfun$toH2OFrame$2$$anonfun$apply$1.apply(SparkDataFrameConverter.scala:82) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87) at org.apache.spark.scheduler.Task.run(Task.scala:109) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:381) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) ... 1 more

Environment

None

Status

Assignee

Jakub Hava

Reporter

Ruslan Dautkhanov

CustomerVisible

Yes

Fix versions

Affects versions

2.3.19

Priority

Critical