{ # Spark 相关信息配置 # 参见: http://spark.apache.org/docs/latest/configuration.html spark: { app: { name: Spark Writer } driver: { cores: 16 maxResultSize: 16G } cores { max: 16 } } # Nebula Graph 相关信息配置 nebula: { # 查询引擎 IP 列表 address: { graph: ["10.130.196.48:9669", "10.130.196.49:9669", "10.131.211.55:9669", "10.131.211.56:9669", "10.131.211.57:9669", "10.134.7.164:9669"] meta: ["10.130.196.48:9559", "10.130.196.49:9559", "10.131.211.55:9559"] } # 连接 Nebula Graph 服务的用户名和密码 user: root pswd: nebula # Nebula Graph 图空间名称 space: knowledge_graph_v4 # thrift 超时时长及重试次数 # 如未设置,则默认值分别为 3000 和 3 connection { timeout: 10000000 retry: 10 } path:{ local:"./knowledge_graph_v4" # remote:"hdfs://alg-hdfs/warehouse/net_kg/nebula/knowledge_graph_v4/edgeAll/download" remote:"/knowledge_graph_v4/download" hdfs.namenode: "hdfs://alg-hdfs" } # nGQL 查询重试次数 # 如未设置,则默认值为 3 execution { retry: 10 } error: { max: 32 output: /temp/errors } rate: { limit: 1024 timeout: 10000000 } } # 处理标签 tags: [ # 与上述类似 # 从 Hive 加载将执行命令 $ {exec} 作为数据集 ] # 处理边 edges: [ # 从 Hive 加载将执行命令 $ {exec} 作为数据集 # 边权重为可选 { name: Activity_host type: { source: hive sink: SST } exec: "select edge_label,src_id, dst_id, rank from oppo_kg_dw.dwd_kg_release_spo_edge_prop_1_v3_55_df where data_version='v1.1.20211205.alpha' and ds = '20220521' order by edge_label " fields: [rank] nebula.fields: [rank] source: { field: src_id} target: { field:dst_id} ranking: rank header: true batch: 512 partition: 512 } ] }