Common issues
org.apache.spark.shuffle.FetchFailedException: Stream is corrupted / java.io.IOException: Stream is corrupted / net.jpountz.lz4.LZ4BlockInputStream.refill(LZ4BlockInputStream.java:200)
See : [SPARK-34790] Fail in fetch shuffle blocks in batch when I/O encryption is enabled. - ASF JIRA (apache.org)
By pass the default encryption configuration by settings these properties
spark.network.crypto.enabled=true
spark.authenticate=true
spark.io.encryption.enabled=true
java.net.SocketTimeoutException: Read timed out / InvalidAbfsRestOperationException
For exceptions like these:
: Status code: -1 error code: null error message: InvalidAbfsRestOperationException
java.net.SocketTimeoutException: Read timed out
at org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation.executeHttpOperation(AbfsRestOperation.java:194)
at org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation.execute(AbfsRestOperation.java:134)
at org.apache.hadoop.fs.azurebfs.services.AbfsClient.getAclStatus(AbfsClient.java:513)
at org.apache.hadoop.fs.azurebfs.services.AbfsClient.getAclStatus(AbfsClient.java:496)
at org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore.getIsNamespaceEnabled(AzureBlobFileSystemStore.java:230)
at org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore.getFileStatus(AzureBlobFileSystemStore.java:566)
at org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem.getFileStatus(AzureBlobFileSystem.java:443)
at org.apache.hadoop.fs.FileSystem.exists(FileSystem.java:1690)
at org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:119)
at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:108)
at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:106)
at org.apache.spark.sql.execution.command.DataWritingCommandExec.doExecute(commands.scala:131)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:180)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:218)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:215)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:176)
at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:132)
at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:131)
at org.apache.spark.sql.DataFrameWriter.$anonfun$runCommand$1(DataFrameWriter.scala:989)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:772)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:989)
at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:438)
at org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:415)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:293)
at org.apache.spark.sql.DataFrameWriter.parquet(DataFrameWriter.scala:874)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(Unknown Source)
at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source)
at java.base/java.lang.reflect.Method.invoke(Unknown Source)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.base/java.lang.Thread.run(Unknown Source)
Caused by: java.net.SocketTimeoutException: Read timed out
at java.base/java.net.SocketInputStream.socketRead0(Native Method)
at java.base/java.net.SocketInputStream.socketRead(Unknown Source)
at java.base/java.net.SocketInputStream.read(Unknown Source)
at java.base/java.net.SocketInputStream.read(Unknown Source)
at java.base/java.io.BufferedInputStream.fill(Unknown Source)
at java.base/java.io.BufferedInputStream.read1(Unknown Source)
at java.base/java.io.BufferedInputStream.read(Unknown Source)
at java.base/sun.net.www.http.HttpClient.parseHTTPHeader(Unknown Source)
at java.base/sun.net.www.http.HttpClient.parseHTTP(Unknown Source)
at java.base/sun.net.www.protocol.http.HttpURLConnection.getInputStream0(Unknown Source)
at java.base/sun.net.www.protocol.http.HttpURLConnection.getInputStream(Unknown Source)
at java.base/java.net.HttpURLConnection.getResponseCode(Unknown Source)
at org.apache.hadoop.fs.azurebfs.oauth2.AzureADAuthenticator.getTokenSingleCall(AzureADAuthenticator.java:264)
at org.apache.hadoop.fs.azurebfs.oauth2.AzureADAuthenticator.getTokenCall(AzureADAuthenticator.java:216)
at org.apache.hadoop.fs.azurebfs.oauth2.AzureADAuthenticator.getTokenFromMsi(AzureADAuthenticator.java:139)
at org.apache.hadoop.fs.azurebfs.oauth2.MsiTokenProvider.refreshToken(MsiTokenProvider.java:53)
at org.apache.hadoop.fs.azurebfs.oauth2.AccessTokenProvider.getToken(AccessTokenProvider.java:50)
at org.apache.hadoop.fs.azurebfs.services.AbfsClient.getAccessToken(AbfsClient.java:583)
at org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation.executeHttpOperation(AbfsRestOperation.java:162)
... 40 more
The token are expired, please upgrade the timeout seconds