maximum recursion depth exceeded - Error happen with 3K columns

Description

Problem happens when 3K columns are selected however if less columns are selected then it does not happen

Here is some example code and the error message:

train = sc.parallelize((range(0,80),
range(0,80),
range(0,80),
range(0,80),
range(0,80))).repartition(1).cache()

names = [str for x in range(0,80)]
train2 = train.toDF(names)

from pysparkling import *
conf = (H2OConf(sc)
.use_auto_cluster_start()
.set_yarn_queue("spark-analytics")
.set_num_of_external_h2o_nodes(1)
.set_mapper_xmx("1G")
)
h2c = H2OContext.getOrCreate(sc, conf)

train_h2o = h2c.as_h2o_frame(train2)
for i in names:
for j in names:
if j>i:
train_h2o[i + "_" +j] = train_h2o[j]*train_h2o[i]
train_h2o.describe()

---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-23-685923140b63> in <module>()
22 if j>i:
23 train_h2o[i + "_" +j] = train_h2o[j]*train_h2o[i]
---> 24 train_h2o.describe()

/opt/sparkling-water/2.1.3/py/build/dist/h2o_pysparkling_2.1-2.1.3-py2.7.egg/h2o/frame.pyc in describe(self, chunk_summary)
456 aram bool chunk_summary: Retrieve the chunk summary along with the distribution summary
457 """
--> 458 res = h2o.api("GET /3/Frames/%s" % self.frame_id, data={"row_count": 10})["frames"][0]
459 self._ex._cache._fill_data(res)
460 print("Rows:{}".format(self.nrow))

/opt/sparkling-water/2.1.3/py/build/dist/h2o_pysparkling_2.1-2.1.3-py2.7.egg/h2o/frame.pyc in frame_id(self)
266 def frame_id(self):
267 """Internal id of the frame (str)."""
--> 268 return self._frame()._ex._cache._id
269
270 @frame_id.setter

/opt/sparkling-water/2.1.3/py/build/dist/h2o_pysparkling_2.1-2.1.3-py2.7.egg/h2o/frame.pyc in _frame(self, fill_cache)
470
471 def _frame(self, fill_cache=False):
--> 472 self._ex._eager_frame()
473 if fill_cache:
474 self._ex._cache.fill()

/opt/sparkling-water/2.1.3/py/build/dist/h2o_pysparkling_2.1-2.1.3-py2.7.egg/h2o/expr.pyc in _eager_frame(self)
85 if not self._cache.is_empty(): return
86 if self._cache._id is not None: return # Data already computed under ID, but not cached locally
---> 87 self._eval_driver(True)
88
89 def _eager_scalar(self): # returns a scalar (or a list of scalars)

/opt/sparkling-water/2.1.3/py/build/dist/h2o_pysparkling_2.1-2.1.3-py2.7.egg/h2o/expr.pyc in _eval_driver(self, top)
98
99 def _eval_driver(self, top):
--> 100 exec_str = self._get_ast_str(top)
101 res = ExprNode.rapids(exec_str)
102 if 'scalar' in res:

/opt/sparkling-water/2.1.3/py/build/dist/h2o_pysparkling_2.1-2.1.3-py2.7.egg/h2o/expr.pyc in _get_ast_str(self, top)
126 return self._cache._id # Data already computed under ID, but not cached
127 # assert isinstance(self._children,tuple)
--> 128 exec_str = "({} {})".format(self._op, " ".join([ExprNode._arg_to_expr(ast) for ast in self._children]))
129 gc_ref_cnt = len(gc.get_referrers(self))
130 if top or gc_ref_cnt >= ExprNode.MAGIC_REF_COUNT:

/opt/sparkling-water/2.1.3/py/build/dist/h2o_pysparkling_2.1-2.1.3-py2.7.egg/h2o/expr.pyc in _arg_to_expr(arg)
138 return "[]" # empty list
139 if isinstance(arg, ExprNode):
--> 140 return arg._get_ast_str(False)
141 if isinstance(arg, ASTId):
142 return str(arg)

... last 2 frames repeated, from the frame below ...

/opt/sparkling-water/2.1.3/py/build/dist/h2o_pysparkling_2.1-2.1.3-py2.7.egg/h2o/expr.pyc in _get_ast_str(self, top)
126 return self._cache._id # Data already computed under ID, but not cached
127 # assert isinstance(self._children,tuple)
--> 128 exec_str = "({} {})".format(self._op, " ".join([ExprNode._arg_to_expr(ast) for ast in self._children]))
129 gc_ref_cnt = len(gc.get_referrers(self))
130 if top or gc_ref_cnt >= ExprNode.MAGIC_REF_COUNT:

RuntimeError: maximum recursion depth exceeded

Assignee

New H2O Bugs

Fix versions

None

Reporter

Avkash Chauhan

Support ticket URL

None

Labels

Affected Spark version

None

Customer Request Type

Support Incident

Task progress

None

ReleaseNotesHidden

None

CustomerVisible

No

Support Assessment

Platform Issue

AffectedCustomers

Sprint

None

Priority

Major
Configure