- nebula 版本:3.1.0
- 部署方式:单机
- 安装方式:RPM
- 是否为线上版本:N
使用spark submit的方式提交jar包运行degreestatic算法。数据读入方式选择nebula,读出方式分别尝试了csv,text和nebula。其中仅csv写入计算结果成功。其他两类写入方式均报错。
1. 选择text写入,application.conf相关配置如下:
data: {
source: nebula
sink: text
hasWeight: false
}
local: {
read:{
......
}
write:{
resultPath:/home/.../degreestatic
}
}
使用text写入结果时得到报错信息Text data source does not support bigint data type:
Exception in thread "main" org.apache.spark.sql.AnalysisException: Text data source does not support bigint data type.;
at org.apache.spark.sql.execution.datasources.DataSourceUtils$$anonfun$verifySchema$1.apply(DataSourceUtils.scala:69)
at org.apache.spark.sql.execution.datasources.DataSourceUtils$$anonfun$verifySchema$1.apply(DataSourceUtils.scala:67)
at scala.collection.Iterator$class.foreach(Iterator.scala:891)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1334)
at scala.collection.IterableLike$class.foreach(IterableLike.scala:72)
at org.apache.spark.sql.types.StructType.foreach(StructType.scala:99)
at org.apache.spark.sql.execution.datasources.DataSourceUtils$.verifySchema(DataSourceUtils.scala:67)
at org.apache.spark.sql.execution.datasources.DataSourceUtils$.verifyWriteSchema(DataSourceUtils.scala:34)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:100)
at org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:159)
at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:104)
at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:102)
at org.apache.spark.sql.execution.command.DataWritingCommandExec.doExecute(commands.scala:122)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:136)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:132)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:160)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:157)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:132)
at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:83)
at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:81)
at org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:696)
at org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:696)
at org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:80)
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:127)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:75)
at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:696)
at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:305)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:291)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:249)
at org.apache.spark.sql.DataFrameWriter.text(DataFrameWriter.scala:634)
at com.vesoft.nebula.algorithm.writer.TextWriter.write(AlgoWriter.scala:60)
at com.vesoft.nebula.algorithm.Main$.saveAlgoResult(Main.scala:230)
at com.vesoft.nebula.algorithm.Main$.main(Main.scala:92)
at com.vesoft.nebula.algorithm.Main.main(Main.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:855)
at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:161)
at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:184)
at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:86)
at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:930)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:939)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
2. 选择nebula写入,application.conf相关配置如下:
data: {
source: nebula
sink: nebula
hasWeight: false
}
nebula: {
read: {
......
}
write:{
graphAddress: "192.168.XXX.XXX:9669"
metaAddress: "192.168.XXX.XXX:9559"
user:root
pswd:123456
space:XXXX000003
tag:degreestatic
type:insert
}
}
使用nebula写入结果时得到报错信息E_TAG_NOT_FOUND
com.vesoft.nebula.client.meta.exception.ExecuteFailedException: Execute failed: Get tag execute failed, errorCode: E_TAG_NOT_FOUND
at com.vesoft.nebula.client.meta.MetaClient.getTag(MetaClient.java:319)
at com.vesoft.nebula.connector.nebula.MetaProvider.getTagSchema(MetaProvider.scala:103)
at com.vesoft.nebula.connector.writer.NebulaVertexWriter.<init>(NebulaVertexWriter.scala:26)
at com.vesoft.nebula.connector.writer.NebulaVertexWriterFactory.createDataWriter(NebulaSourceWriter.scala:28)
at org.apache.spark.sql.execution.datasources.v2.DataWritingSparkTask$.run(WriteToDataSourceV2Exec.scala:113)
at org.apache.spark.sql.execution.datasources.v2.WriteToDataSourceV2Exec$$anonfun$doExecute$2.apply(WriteToDataSourceV2Exec.scala:67)
at org.apache.spark.sql.execution.datasources.v2.WriteToDataSourceV2Exec$$anonfun$doExecute$2.apply(WriteToDataSourceV2Exec.scala:66)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:123)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:411)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:417)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:750)
当写入方式由insert改为update,并预先在图空间内插入名为degreestatic的tag时,得到报错信息 the maximum number of statements for Nebula is 512
Exception in thread "main" java.lang.AssertionError: assertion failed: the maximum number of statements for Nebula is 512
at scala.Predef$.assert(Predef.scala:170)
at com.vesoft.nebula.connector.WriteNebulaVertexConfig$WriteVertexConfigBuilder.check(NebulaConfig.scala:382)
at com.vesoft.nebula.connector.WriteNebulaVertexConfig$WriteVertexConfigBuilder.build(NebulaConfig.scala:350)
at com.vesoft.nebula.algorithm.writer.NebulaWriter.write(AlgoWriter.scala:45)
at com.vesoft.nebula.algorithm.Main$.saveAlgoResult(Main.scala:222)
at com.vesoft.nebula.algorithm.Main$.main(Main.scala:92)
at com.vesoft.nebula.algorithm.Main.main(Main.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:855)
at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:161)
at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:184)
at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:86)
at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:930)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:939)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
麻烦看一下以上的写入错误情况,尤其是写入nebula的错误,多谢!