在编译完成Nebula-Exchange 之后,想尝试配置csv-hdfs.yaml 配置从Hdfs上读取csv文件,在copy 编译后的classes下的application.conf文件发现本身该application.conf文件 校验有错误,拿这个文件复制一个新的,在在线yaml 校验一直报错,如下:
YAMLException: missed comma between flow collection entries at line 7, column 5:
driver: {
^
本身配置文件如下:
{
spark: {
app: {
name: Nebula Exchange master
}
driver: {
cores: 1
maxResultSize: 1G
}
executor: {
memory:1G
}
cores: {
max: 2
}
}
nebula: {
address:{
graph:["119.3.216.97:9669"],
meta:["119.3.216.97:9559"]
}
user: root
pswd: nebula
space: basketballplayer
connection: {
timeout: 3000
retry: 3
}
execution {
retry: 3
}
error: {
max: 32
output: /tmp/errors
}
rate: {
limit: 1024
timeout: 1000
}
}
tags: [
{
name: player
type: {
source: csv
sink: client
}
path: "hdfs://119.3.216.97:9000/data/vertex_player.csv"
fields: [_c1, _c2]
nebula.fields: [age, name]
vertex: {
field:_c0
# policy:hash
}
separator: ","
header: false
batch: 256
partition: 32
}
# 设置Tag team相关信息。
{
name: team
type: {
source: csv
sink: client
}
path: "hdfs://192.168.*.*:9000/data/vertex_team.csv"
fields: [_c1]
nebula.fields: [name]
vertex: {
field:_c0
# policy:hash
}
separator: ","
header: false
batch: 256
partition: 2
}
]
# 处理边
edges: [
{
name: follow
type: {
source: csv
sink: client
}
path: "hdfs://119.3.238.38:9000/data/edge_follow.csv"
fields: [_c2]
nebula.fields: [degree]
source: {
field: _c0
}
target: {
field: _c1
}
separator: ","
header: false
batch: 256
partition: 3
}
# 设置Edge type serve相关信息。
{
name: serve
type: {
source: csv
sink: client
}
path: "hdfs://192.168.*.*:9000/data/edge_serve.csv"
fields: [_c2,_c3]
nebula.fields: [start_year, end_year]
source: {
field: _c0
}
target: {
field: _c1
}
separator: ","
header: false
batch: 256
partition: 3
}
]
}