Permissions issue when saving spark dataframe

Description

When running code below:

Receiving error:

--------------------------------------------------------------------------- Py4JJavaError Traceback (most recent call last) <command-3887762907047669> in <module> 1 sdf = spark.sql('select * from tableau_prd.v_propel limit 10') 2 sdf.createOrReplaceTempView('v_propel_sample') ----> 3 spark.sql('create table tableau_dev.v_propel_sample as select * from v_propel_sample') /databricks/spark/python/pyspark/sql/session.py in sql(self, sqlQuery) 834 [Row(f1=1, f2=u'row1'), Row(f1=2, f2=u'row2'), Row(f1=3, f2=u'row3')] 835 """ --> 836 return DataFrame(self._jsparkSession.sql(sqlQuery), self._wrapped) 837 838 @since(2.0) /databricks/spark/python/lib/py4j-0.10.7-src.zip/py4j/java_gateway.py in __call__(self, *args) 1255 answer = self.gateway_client.send_command(command) 1256 return_value = get_return_value( -> 1257 answer, self.gateway_client, self.target_id, self.name) 1258 1259 for temp_arg in temp_args: /databricks/spark/python/pyspark/sql/utils.py in deco(*a, **kw) 61 def deco(*a, **kw): 62 try: ---> 63 return f(*a, **kw) 64 except py4j.protocol.Py4JJavaError as e: 65 s = e.java_exception.toString() /databricks/spark/python/lib/py4j-0.10.7-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name) 326 raise Py4JJavaError( 327 "An error occurred while calling {0}{1}{2}.\n". --> 328 format(target_id, ".", name), value) 329 else: 330 raise Py4JError( Py4JJavaError: An error occurred while calling o210.sql. : org.apache.spark.SparkException: Job aborted. at org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:201) at org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:192) at org.apache.spark.sql.execution.datasources.DataSource.writeAndRead(DataSource.scala:555) at org.apache.spark.sql.execution.command.CreateDataSourceTableAsSelectCommand.saveDataIntoTable(createDataSourceTables.scala:216) at org.apache.spark.sql.execution.command.CreateDataSourceTableAsSelectCommand.run(createDataSourceTables.scala:175) at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:108) at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:106) at org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:119) at org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:205) at org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:205) at org.apache.spark.sql.Dataset$$anonfun$54.apply(Dataset.scala:3485) at org.apache.spark.sql.Dataset$$anonfun$54.apply(Dataset.scala:3480) at org.apache.spark.sql.execution.SQLExecution$$anonfun$withCustomExecutionEnv$1.apply(SQLExecution.scala:111) at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:240) at org.apache.spark.sql.execution.SQLExecution$.withCustomExecutionEnv(SQLExecution.scala:97) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:170) at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$withAction(Dataset.scala:3480) at org.apache.spark.sql.Dataset.<init>(Dataset.scala:205) at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:89) at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:696) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244) at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:380) at py4j.Gateway.invoke(Gateway.java:295) at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132) at py4j.commands.CallCommand.execute(CallCommand.java:79) at py4j.GatewayConnection.run(GatewayConnection.java:251) at java.lang.Thread.run(Thread.java:748) Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 13.0 failed 4 times, most recent failure: Lost task 0.3 in stage 13.0 (TID 389, 10.139.64.86, executor 2): PUT https://xxxdatalake01.dfs.core.windows.net/adlsfilesystem/encdata/data/cda/ssa/tableau_dev/v_propel_sample/_started_8115685763853012355?resource=file&timeout=90 StatusCode=403 StatusDescription=This request is not authorized to perform this operation using this permission. ErrorCode=AuthorizationPermissionMismatch ErrorMessage=This request is not authorized to perform this operation using this permission. RequestId:cdba983f-a01f-0056-3948-ddc345000000 Time:2020-02-06T23:53:37.2524794Z at shaded.databricks.v20180920_b33d810.org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation.execute(AbfsRestOperation.java:134) at shaded.databricks.v20180920_b33d810.org.apache.hadoop.fs.azurebfs.services.AbfsClient.createPath(AbfsClient.java:243) at shaded.databricks.v20180920_b33d810.org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore.createFile(AzureBlobFileSystemStore.java:322) at shaded.databricks.v20180920_b33d810.org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem.create(AzureBlobFileSystem.java:189) at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:911) at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:892) at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:789) at com.databricks.sql.transaction.directory.DirectoryAtomicCommitProtocol.newTaskTempFileAbsPath(DirectoryAtomicCommitProtocol.scala:116) at com.databricks.sql.transaction.directory.DirectoryAtomicCommitProtocol.newTaskTempFile(DirectoryAtomicCommitProtocol.scala:101) at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:115) at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:108) at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:239) at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:173) at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:172) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) at org.apache.spark.scheduler.Task.doRunTask(Task.scala:140) at org.apache.spark.scheduler.Task.run(Task.scala:113) at org.apache.spark.executor.Executor$TaskRunner$$anonfun$13.apply(Executor.scala:528) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1526) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:534) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:2360) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:2348) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:2347) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2347) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:1101) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:1101) at scala.Option.foreach(Option.scala:257) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1101) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2579) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2527) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2515) at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49) at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:896) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2280) at org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:170) ... 30 more Caused by: PUT https://xxxdatalake01.dfs.core.windows.net/adlsfilesystem/encdata/data/cda/ssa/tableau_dev/v_propel_sample/_started_8115685763853012355?resource=file&timeout=90 StatusCode=403 StatusDescription=This request is not authorized to perform this operation using this permission. ErrorCode=AuthorizationPermissionMismatch ErrorMessage=This request is not authorized to perform this operation using this permission. RequestId:cdba983f-a01f-0056-3948-ddc345000000 Time:2020-02-06T23:53:37.2524794Z at shaded.databricks.v20180920_b33d810.org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation.execute(AbfsRestOperation.java:134) at shaded.databricks.v20180920_b33d810.org.apache.hadoop.fs.azurebfs.services.AbfsClient.createPath(AbfsClient.java:243) at shaded.databricks.v20180920_b33d810.org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore.createFile(AzureBlobFileSystemStore.java:322) at shaded.databricks.v20180920_b33d810.org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem.create(AzureBlobFileSystem.java:189) at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:911) at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:892) at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:789) at com.databricks.sql.transaction.directory.DirectoryAtomicCommitProtocol.newTaskTempFileAbsPath(DirectoryAtomicCommitProtocol.scala:116) at com.databricks.sql.transaction.directory.DirectoryAtomicCommitProtocol.newTaskTempFile(DirectoryAtomicCommitProtocol.scala:101) at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:115) at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:108) at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:239) at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:173) at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:172) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) at org.apache.spark.scheduler.Task.doRunTask(Task.scala:140) at org.apache.spark.scheduler.Task.run(Task.scala:113) at org.apache.spark.executor.Executor$TaskRunner$$anonfun$13.apply(Executor.scala:528) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1526) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:534) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) ... 1 more

 

More details in ticket:

https://support.h2o.ai/a/tickets/96410

Assignee

New H2O Bugs

Fix versions

None

Reporter

neema.mashayekhi

Support ticket URL

Labels

None

Affected Spark version

None

Customer Request Type

None

Task progress

None

ReleaseNotesHidden

None

CustomerVisible

No

Priority

Major
Configure