Handling for Spark DateType in SW

Description

I am getting an error converting a sparkframe with date types to h2o:
df.select("created_date").first()
Row(created_date=datetime.date(2006, 10, 3))

context.as_h2o_frame(df.select("created_date"))

---------------------------------------------------------------------------
Py4JJavaError Traceback (most recent call last)
<ipython-input-9-b6ba696f5e41> in <module>()
----> 1 context.as_h2o_frame(df.select("created_date"))

/opt/sparkling-water/2.0.3/py/build/dist/h2o_pysparkling_2.0-2.0.3-py2.7.egg/pysparkling/context.pyc in as_h2o_frame(self, dataframe, framename)
175 """
176 if isinstance(dataframe, DataFrame):
--> 177 return fc._as_h2o_frame_from_dataframe(self, dataframe, framename)
178 elif isinstance(dataframe, RDD):
179 # First check if the type T in RDD[T] is one of the python "primitive" types

/opt/sparkling-water/2.0.3/py/build/dist/h2o_pysparkling_2.0-2.0.3-py2.7.egg/pysparkling/conversions.pyc in _as_h2o_frame_from_dataframe(h2oContext, dataframe, frame_name)
42 if dataframe.count() == 0:
43 raise ValueError('Cannot transform empty H2OFrame')
---> 44 j_h2o_frame = h2oContext._jhc.asH2OFrame(dataframe._jdf, frame_name)
45 j_h2o_frame_key = j_h2o_frame.key()
46 return H2OFrame.from_java_h2o_frame(j_h2o_frame,j_h2o_frame_key)

/opt/spark/2.0.2/python/lib/py4j-src.zip/py4j/java_gateway.py in _call_(self, *args)
1131 answer = self.gateway_client.send_command(command)
1132 return_value = get_return_value(
-> 1133 answer, self.gateway_client, self.target_id, self.name)
1134
1135 for temp_arg in temp_args:

/opt/spark/2.0.2/python/pyspark/sql/utils.py in deco(*a, **kw)
61 def deco(*a, **kw):
62 try:
---> 63 return f(*a, **kw)
64 except py4j.protocol.Py4JJavaError as e:
65 s = e.java_exception.toString()

/opt/spark/2.0.2/python/lib/py4j-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
317 raise Py4JJavaError(
318 "An error occurred while calling {0}{1}{2}.\n".
--> 319 format(target_id, ".", name), value)
320 else:
321 raise Py4JError(

Py4JJavaError: An error occurred while calling o70.asH2OFrame.
: java.util.NoSuchElementException: key not found: DateType
at scala.collection.MapLike$class.default(MapLike.scala:228)
at scala.collection.AbstractMap.default(Map.scala:59)
at scala.collection.MapLike$class.apply(MapLike.scala:141)
at scala.collection.AbstractMap.apply(Map.scala:59)
at org.apache.spark.h2o.utils.ReflectionUtils$.supportedTypeOf(ReflectionUtils.scala:110)
at org.apache.spark.h2o.converters.SparkDataFrameConverter$$anonfun$4.apply(SparkDataFrameConverter.scala:70)
at org.apache.spark.h2o.converters.SparkDataFrameConverter$$anonfun$4.apply(SparkDataFrameConverter.scala:70)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
at scala.collection.Iterator$class.foreach(Iterator.scala:893)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
at scala.collection.IterableLike$class.foreach(IterableLike.scala:72)
at scala.collection.AbstractIterable.foreach(Iterable.scala:54)
at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
at scala.collection.AbstractTraversable.map(Traversable.scala:104)
at org.apache.spark.h2o.converters.SparkDataFrameConverter$.toH2OFrame(SparkDataFrameConverter.scala:70)
at org.apache.spark.h2o.H2OContext.asH2OFrame(H2OContext.scala:132)
at org.apache.spark.h2o.H2OContext.asH2OFrame(H2OContext.scala:133)
at org.apache.spark.h2o.JavaH2OContext.asH2OFrame(JavaH2OContext.java:100)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:237)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:280)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:214)
at java.lang.Thread.run(Thread.java:745)

But the error is caused by the fact that we don’t have handling for Spark DateType here https://github.com/h2oai/sparkling-water/blob/master/core/src/main/scala/org/apache/spark/h2o/utils/SupportedTypes.scala#L77

Assignee

Michal Malohlava

Reporter

Avkash Chauhan

Labels

None

CustomerVisible

No

testcase 1

None

testcase 2

None

testcase 3

None

h2ostream link

None

Affected Spark version

None

AffectedContact

None

AffectedCustomers

AffectedPilots

None

AffectedOpenSource

None

Support Assessment

Platform Issue

Customer Request Type

Support Incident

Support ticket URL

None

End date

None

Baseline start date

None

Baseline end date

None

Task progress

None

Task mode

None

ReleaseNotesHidden

None

Fix versions

Priority

Major
Configure