-
nebula 版本:3.6.0
-
nebula-exchange 版本:3.6.1
-
部署方式:分布式
-
安装方式:RPM
-
是否上生产环境:Y
-
硬件信息
- 磁盘:SSD
- CPU、内存信息:16C128G
-
问题的具体描述
使用nebula-exchange导入neo4j数据,其中一个关系address_book出现数据缺失,address_book关系数据量230亿,使用nebula-exchange导入成功,从日志中观察到recordSuccess数量是和neo4j数量一致的。
但是在nebula中执行完成submit job compact和submit job stats,观察到数据只有20亿。 -
相关的 meta / storage / graph info 日志信息
nebula-graph.ERROR、nebula-storaged.ERROR、nebula-meta.ERROR没有任何错误日志 -
启动命令
/usr/local/datadisk/spark-2.4.8-bin-hadoop2.7/bin/spark-submit --master local[*] --total-executor-cores 30 --executor-cores 2 --executor-memory 2g --conf spark.driver.memory=90G --class com.vesoft.nebula.exchange.Exchange nebula-exchange_spark_2.4/target/nebula-exchange_spark_2.4-3.6.1.jar -c nebula-exchange_spark_2.4/src/main/resources/address_book_edge.conf -h > nebula_edge.log 2>&1 &
{
# address_book_edge相关配置
spark: {
app: {
name: NebulaGraph Exchange 3.6.1
}
driver: {
cores: 1
maxResultSize: 1G
}
cores: {
max: 32
}
}
# NebulaGraph 相关配置
nebula: {
address:{
graph: ["172.22.9.1:9669","172.22.9.2:9669","172.22.9.3:9669"]
meta: ["172.22.9.1:9569","172.22.9.2:9569","172.22.9.3:9569"]
}
user: root
pswd:
space: nebula_graph
connection: {
timeout: 30000
retry: 3
}
execution: {
retry: 3
}
error: {
max: 32
output: /tmp/errors
}
rate: {
limit: 10000
timeout: 30000
}
}
# 处理边数据
edges: [
{
name: address_book
type: {
source: neo4j
sink: client
}
server: "bolt://172.22.9.4:7687"
user: neo4j_read
password:
exec: "match (a:uid)-[r:address_book]->(b:phone_number) return toString(a.name) as src, left(b.name,128) as dst"
fields: []
nebula.fields: []
source: {
field: src
}
target: {
field: dst
}
partition: 60
batch: 2000
check_point_path: /data/code/nebula-exchange/checkpoint/edge/address_book
}
]
}