1、报错case:
Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 232.0 failed 1 times, most recent failure: Lost task 0.0 in stage 232.0 (TID 37, localhost, executor driver): java.util.NoSuchElementException: key not found: pagerank
at scala.collection.MapLike$class.default(MapLike.scala:228)
at scala.collection.AbstractMap.default(Map.scala:59)
at scala.collection.MapLike$class.apply(MapLike.scala:141)
at scala.collection.AbstractMap.apply(Map.scala:59)
at com.vesoft.nebula.connector.writer.NebulaExecutor$.com$vesoft$nebula$connector$writer$NebulaExecutor$$extraValue(NebulaExecutor.scala:125)
at com.vesoft.nebula.connector.writer.NebulaExecutor$$anonfun$8.apply(NebulaExecutor.scala:72)
at com.vesoft.nebula.connector.writer.NebulaExecutor$$anonfun$8.apply(NebulaExecutor.scala:69)
at scala.collection.TraversableLike$WithFilter$$anonfun$map$2.apply(TraversableLike.scala:683)
at scala.collection.immutable.Range.foreach(Range.scala:160)
at scala.collection.TraversableLike$WithFilter.map(TraversableLike.scala:682)
at com.vesoft.nebula.connector.writer.NebulaExecutor$.assignVertexPropValues(NebulaExecutor.scala:69)
at com.vesoft.nebula.connector.writer.NebulaVertexWriter.write(NebulaVertexWriter.scala:52)
at com.vesoft.nebula.connector.writer.NebulaVertexWriter.write(NebulaVertexWriter.scala:22)
at org.apache.spark.sql.execution.datasources.v2.DataWritingSparkTask$$anonfun$run$3.apply(WriteToDataSourceV2Exec.scala:118)
at org.apache.spark.sql.execution.datasources.v2.DataWritingSparkTask$$anonfun$run$3.apply(WriteToDataSourceV2Exec.scala:116)
at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1394)
at org.apache.spark.sql.execution.datasources.v2.DataWritingSparkTask$.run(WriteToDataSourceV2Exec.scala:146)
at org.apache.spark.sql.execution.datasources.v2.WriteToDataSourceV2Exec$$anonfun$doExecute$2.apply(WriteToDataSourceV2Exec.scala:67)
at org.apache.spark.sql.execution.datasources.v2.WriteToDataSourceV2Exec$$anonfun$doExecute$2.apply(WriteToDataSourceV2Exec.scala:66)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:123)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:411)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:417)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:750)
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1925)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1913)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1912)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1912)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:948)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:948)
at scala.Option.foreach(Option.scala:257)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:948)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2146)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2095)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2084)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:759)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2067)
at org.apache.spark.sql.execution.datasources.v2.WriteToDataSourceV2Exec.doExecute(WriteToDataSourceV2Exec.scala:64)
... 32 more
Caused by: java.util.NoSuchElementException: key not found: pagerank
at scala.collection.MapLike$class.default(MapLike.scala:228)
at scala.collection.AbstractMap.default(Map.scala:59)
at scala.collection.MapLike$class.apply(MapLike.scala:141)
at scala.collection.AbstractMap.apply(Map.scala:59)
at com.vesoft.nebula.connector.writer.NebulaExecutor$.com$vesoft$nebula$connector$writer$NebulaExecutor$$extraValue(NebulaExecutor.scala:125)
at com.vesoft.nebula.connector.writer.NebulaExecutor$$anonfun$8.apply(NebulaExecutor.scala:72)
at com.vesoft.nebula.connector.writer.NebulaExecutor$$anonfun$8.apply(NebulaExecutor.scala:69)
at scala.collection.TraversableLike$WithFilter$$anonfun$map$2.apply(TraversableLike.scala:683)
at scala.collection.immutable.Range.foreach(Range.scala:160)
at scala.collection.TraversableLike$WithFilter.map(TraversableLike.scala:682)
at com.vesoft.nebula.connector.writer.NebulaExecutor$.assignVertexPropValues(NebulaExecutor.scala:69)
at com.vesoft.nebula.connector.writer.NebulaVertexWriter.write(NebulaVertexWriter.scala:52)
at com.vesoft.nebula.connector.writer.NebulaVertexWriter.write(NebulaVertexWriter.scala:22)
at org.apache.spark.sql.execution.datasources.v2.DataWritingSparkTask$$anonfun$run$3.apply(WriteToDataSourceV2Exec.scala:118)
at org.apache.spark.sql.execution.datasources.v2.DataWritingSparkTask$$anonfun$run$3.apply(WriteToDataSourceV2Exec.scala:116)
at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1394)
at org.apache.spark.sql.execution.datasources.v2.DataWritingSparkTask$.run(WriteToDataSourceV2Exec.scala:146)
at org.apache.spark.sql.execution.datasources.v2.WriteToDataSourceV2Exec$$anonfun$doExecute$2.apply(WriteToDataSourceV2Exec.scala:67)
at org.apache.spark.sql.execution.datasources.v2.WriteToDataSourceV2Exec$$anonfun$doExecute$2.apply(WriteToDataSourceV2Exec.scala:66)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:123)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:411)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:417)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:750)
26/01/06 20:19:48 INFO SparkContext: Invoking stop() from shutdown hook
经过测试和conf中的algorithm.executeAlgo 字段有关,如果这个字段指定为 louvain,错误变成 ey not found: louvain
二、环境:
2.1 conf
{
spark: {
app: {
name: LPA
partitionNum:100
}
master:local
}
data: {
# 数据源,可选值为 nebula、csv、json。
source: csv
# 数据落库,即图计算的结果写入的目标,可选值为 nebula、csv、json。
sink: nebula
# 算法是否需要权重。
hasWeight: false
}
# Nebula Graph 相关配置
nebula: {
# 数据源。Nebula Graph 作为图计算的数据源时,nebula.read 的配置才生效。
read: {
# 所有 Meta 服务的 IP 地址和端口,多个地址用英文逗号(,)分隔。格式:"ip1:port1,ip2:port2"。
# 使用 docker-compose 部署,端口需要填写 docker-compose 映射到外部的端口
# 可以用`docker-compose ps`查看
metaAddress: "10.234.167.91:9559,10.234.167.92:9559,10.234.167.93:9559"
# Nebula Graph 图空间名称
space: basketballplayer
# Nebula Graph Edge type, 多个 labels 时,多个边的数据将合并。
labels: ["serve"]
# Nebula Graph 每个 Edge type 的属性名称,此属性将作为算法的权重列,请确保和 Edge type 对应。
weightCols: ["start_year"]
}
# 数据落库。图计算结果落库到 Nebula Graph 时,nebula.write 的配置才生效。
write:{
type: insert
graphAddress: "10.234.167.91:9669"
metaAddress: "10.234.167.91:9559,10.234.167.92:9559,10.234.167.93:9559"
user:root
pswd:""
space:louvain_test
tag: player
}
}
local: {
read:{
filePath: "/root/nebula/nebula-data/louvain_test.csv"
srcId: "_c0"
dstId: "_c1"
weight: "_c2"
header: false
delimiter:","
}
write:{
resultPath:/tmp/
}
}
algorithm: {
executeAlgo: pagerank
pagerank: {
maxIter: 10
resetProb: 0.15 # default 0.15
}
louvain: {
maxIter: 20
internalIter: 10
tol: 0.5
}
}
}
2.2 spark、java等环境
./spark-2.4.8-bin-hadoop2.7/bin/spark-submit --master "local" --class com.vesoft.nebula.algorithm.Main /root/nebula/nebula-algorithm/nebula-algorithm/target/nebula-algorithm-3.2.0.jar -p /root/nebula/nebula-algorithm-config/louvain_read_csv.conf
java -version
openjdk version "1.8.0_472"
OpenJDK Runtime Environment (build 1.8.0_472-8u472-ga-1~22.04-b08)
OpenJDK 64-Bit Server VM (build 25.472-b08, mixed mode)
参考Nebula Algorithm - Nebula Graph Database 手册 做的配置和提交,出现算法key不可用,请问是缺少额外的算包libs包吗?