单台配置:内存 96G 磁盘 hdd 40T core:32
使用nebula-exchange2.5.0 将hive数据使用cilent模式写入nebula2.5.0
20G数据
connection {
timeout: 30000
retry: 3
}
execution {
retry: 3
}
error: {
max: 32
output: /tmp/errors
}
rate: {
limit: 1024
timeout: 60000
}
依然报错:
com.vesoft.nebula.client.graph.exception.IOErrorException: java.net.SocketTimeoutException: Read timed out
graph.conf:
--storage_client_timeout_ms=60000
--timezone_name=CST-8
########## basics ##########
# Whether to run as a daemon process
--daemonize=true
# The file to host the process id
--pid_file=pids/nebula-graphd.pid
# Whether to enable optimizer
--enable_optimizer=true
# The default charset when a space is created
--default_charset=utf8
# The defaule collate when a space is created
--default_collate=utf8_bin
# Whether to use the configuration obtained from the configuration file
--local_config=true
########## logging ##########
# The directory to host logging files
--log_dir=logs
# Log level, 0, 1, 2, 3 for INFO, WARNING, ERROR, FATAL respectively
--minloglevel=1
# Verbose log level, 1, 2, 3, 4, the higher of the level, the more verbose of the logging
--v=0
# Maximum seconds to buffer the log messages
--logbufsecs=0
# Whether to redirect stdout and stderr to separate output files
--redirect_stdout=true
# Destination filename of stdout and stderr, which will also reside in log_dir.
--stdout_log_file=graphd-stdout.log
--stderr_log_file=graphd-stderr.log
# Copy log messages at or above this level to stderr in addition to logfiles. The numbers of severity levels INFO, WARNING, ERROR, and FATAL are 0, 1, 2, and 3, respectively.
--stderrthreshold=2
########## query ##########
# Whether to treat partial success as an error.
# This flag is only used for Read-only access, and Modify access always treats partial success as an error.
--accept_partial_success=false
########## networking ##########
# Comma separated Meta Server Addresses
--meta_server_addrs=h30067:9559,h30068:9559,h30069:9559
# Local IP used to identify the nebula-graphd process.
# Change it to an address other than loopback if the service is distributed or
# will be accessed remotely.
--local_ip=h30067
# Network device to listen on
--listen_netdev=any
# Port to listen on
--port=9669
# To turn on SO_REUSEPORT or not
--reuse_port=false
# Backlog of the listen socket, adjust this together with net.core.somaxconn
--listen_backlog=1024
# Seconds before the idle connections are closed, 0 for never closed
--client_idle_timeout_secs=0
# Seconds before the idle sessions are expired, 0 for no expiration
--session_idle_timeout_secs=0
# The number of threads to accept incoming connections
--num_accept_threads=1
# The number of networking IO threads, 0 for # of CPU cores
--num_netio_threads=0
# The number of threads to execute user queries, 0 for # of CPU cores
--num_worker_threads=0
# HTTP service ip
--ws_ip=0.0.0.0
# HTTP service port
--ws_http_port=19669
# HTTP2 service port
--ws_h2_port=19670
# storage client timeout
--storage_client_timeout_ms=60000
# Port to listen on Meta with HTTP protocol, it corresponds to ws_http_port in metad's configuration file
--ws_meta_http_port=19559
########## authentication ##########
# Enable authorization
--enable_authorize=false
# User login authentication type, password for nebula authentication, ldap for ldap authentication, cloud for cloud authentication
--auth_type=password
########## memory ##########
# System memory high watermark ratio
--system_memory_high_watermark_ratio=1
exchange配置:
{
# Spark相关配置
spark: {
app: {
name: Nebula Exchange 2.1.0
}
driver: {
cores: 1
maxResultSize: 1G
}
cores {
max: 128
}
}
# Nebula Graph相关配置
nebula: {
address:{
# 以下为Nebula Graph的Graph服务和所有Meta服务所在机器的IP地址及端口。
# 如果有多个地址,格式为 "ip1:port","ip2:port","ip3:port"。
# 不同地址之间以英文逗号 (,) 隔开。
graph:[""]
meta:[""]
}
# 填写的账号必须拥有Nebula Graph相应图空间的写数据权限。
user: root
pswd: nebula
# 填写Nebula Graph中需要写入数据的图空间名称。
space: risk_control
path:{
local:"/tmp/"
remote:"/sst/tel"
hdfs.namenode: "hdfs://hadoop3"
}
connection {
timeout: 30000
retry: 3
}
execution {
retry: 3
}
error: {
max: 32
output: /tmp/errors
}
rate: {
limit: 1024
timeout: 60000
}
}
# 处理点
tags: [
# 设置Tag player相关信息。
{
# Nebula Graph中对应的Tag名称。
name: Tel
type: {
# 指定数据源文件格式,设置为hive。
source: hive
# 指定如何将点数据导入Nebula Graph:Client或SST。
sink: client
}
exec: "select concat('t_',cust_mobile) as tel from ods.customer where length(cust_mobile)=11 group by cust_mobile"
fields: []
nebula.fields: []
# 指定表中某一列数据为Nebula Graph中点VID的来源。
# vertex.field的值必须与上述fields中的列名保持一致。
vertex:{
field:tel
}
# 单批次写入 Nebula Graph 的最大数据条数。
batch: 1024
# Spark 分区数量
partition: 64
}
]
}