{ # Spark 相关信息配置 # 参见: http://spark.apache.org/docs/latest/configuration.html spark: { app: { name: Spark Writer } driver: { cores: 16 maxResultSize: 16G } cores { max: 16 } } # Nebula Graph 相关信息配置 nebula: { # 查询引擎 IP 列表 address: { graph: ["10.130.196.48:9669", "10.130.196.49:9669", "10.131.211.55:9669", "10.131.211.56:9669", "10.131.211.57:9669", "10.134.7.164:9669"] meta: ["10.130.196.48:9559", "10.130.196.49:9559", "10.131.211.55:9559"] } # 连接 Nebula Graph 服务的用户名和密码 user: root pswd: nebula # Nebula Graph 图空间名称 space: knowledge_graph_v4 # thrift 超时时长及重试次数 # 如未设置,则默认值分别为 3000 和 3 connection { timeout: 10000000 retry: 10 } path:{ local:"./knowledge_graph_v4" # remote:"hdfs://alg-hdfs/warehouse/net_kg/nebula/knowledge_graph_v4/Thing/download" remote:"/knowledge_graph_v4/download" hdfs.namenode: "hdfs://alg-hdfs" } # nGQL 查询重试次数 # 如未设置,则默认值为 3 execution { retry: 10 } error: { max: 32 output: /temp/errors } rate: { limit: 1024 timeout: 10000000 } } # 处理标签 tags: [ # 与上述类似 # 从 Hive 加载将执行命令 $ {exec} 作为数据集 { name: Thing type: { source: hive sink: SST } exec: "select thing_id, thing_name, thing_title, thing_namech, thing_nameen, thing_abbreviation, thing_tag, thing_alias, thing_abstract, thing_image, thing_video, thing_audio, thing_gmtcreated, thing_gmtmodified, thing_popularity, thing_prior, thing_datasource, thing_urls, thing_class, thing_imagejson, thing_embedding, thing_sourceids, thing_videocover, thing_englishname, thing_japanname, thing_koreaname, thing_category, thing_thumbnails, thing_pictures, thing_additionalinfo, thing_subtitle, thing_edition from oppo_kg_dw.dwd_kg_release_spo_thing_1_v3_55_df where data_version='v1.1.20211205.alpha' and ds = '20220521'" fields: [thing_name, thing_title, thing_namech, thing_nameen, thing_abbreviation, thing_tag, thing_alias, thing_abstract, thing_image, thing_video, thing_audio, thing_gmtcreated, thing_gmtmodified, thing_popularity, thing_prior, thing_datasource, thing_urls, thing_class, thing_imagejson, thing_embedding, thing_sourceids, thing_videocover, thing_englishname, thing_japanname, thing_koreaname, thing_category, thing_thumbnails, thing_pictures, thing_additionalinfo, thing_subtitle, thing_edition] nebula.fields: [Thing_name, Thing_title, Thing_nameCh, Thing_nameEn, Thing_abbreviation, Thing_tag, Thing_alias, Thing_abstract, Thing_image, Thing_video, Thing_audio, Thing_gmtCreated, Thing_gmtModified, Thing_popularity, Thing_prior, Thing_dataSource, Thing_urls, Thing_class, Thing_imageJson, Thing_embedding, Thing_sourceIds, Thing_videoCover, Thing_englishName, Thing_japanName, Thing_koreaName, Thing_category, Thing_thumbnails, Thing_pictures, Thing_additionalInfo, Thing_subTitle, Thing_edition] vertex: {field:thing_id} isImplicit: true batch: 128 partition: 24 } ] # 处理边 edges: [ # 从 Hive 加载将执行命令 $ {exec} 作为数据集 # 边权重为可选 ] }