nebula已经入库数据丢失

QingYan · 2022 年10 月 17 日 03:59

丢失的这个边类型的数据都上传附件了，其他的数据也需要？

QingYan · 2022 年10 月 17 日 09:23

graphd配置

########## basics ##########
# Whether to run as a daemon process
--daemonize=false
# The file to host the process id
--pid_file=/data/br/base/NEBULA/pids/nebula-graphd.pid
# Whether to enable optimizer
--enable_optimizer=true
# Heartbeat interval of communication between meta client and graphd service
--heartbeat_interval_secs=10
# Whether to use the configuration obtained from the configuration file
--local_config=true

########## logging ##########
# The directory to host logging files
--log_dir=/data/br/base/NEBULA/logs
# Log level, 0, 1, 2, 3 for INFO, WARNING, ERROR, FATAL respectively
--minloglevel=0
# Verbose log level, 1, 2, 3, 4, the higher of the level, the more verbose of the logging
--v=4
# Maximum seconds to buffer the log messages
--logbufsecs=0
# Whether to redirect stdout and stderr to separate output files
--redirect_stdout=true
# Destination filename of stdout and stderr, which will also reside in log_dir.
--stdout_log_file=graphd-stdout.log
--stderr_log_file=graphd-stderr.log
# Copy log messages at or above this level to stderr in addition to logfiles. The numbers of severity levels INFO, WARNING, ERROR, and FATAL are 0, 1, 2, and 3, respectively.
--stderrthreshold=2
# wether logging files' name contain timestamp
--timestamp_in_logfile_name=true

########## query ##########
# Whether to treat partial success as an error.
# This flag is only used for Read-only access, and Modify access always treats partial success as an error.
--accept_partial_success=false
# Maximum sentence length, unit byte
--max_allowed_query_size=4194304
# 时区设置
--timezone_name=UTC+08:00
# 新增以下配置
--max_sessions_per_ip_per_user=30000

########## networking ##########
# Comma separated Meta Server Addresses
--meta_server_addrs=10.210.40.65:9559,10.210.40.66:9559,10.210.40.67:9559,10.210.40.68:9559,10.210.40.69:9559
# Local IP used to identify the nebula-graphd process.
# Change it to an address other than loopback if the service is distributed or
# will be accessed remotely.
--local_ip=10.210.40.69
# Network device to listen on
--listen_netdev=any
# Port to listen on
--port=9669
# To turn on SO_REUSEPORT or not
--reuse_port=false
# Backlog of the listen socket, adjust this together with net.core.somaxconn
--listen_backlog=1024
# The number of seconds Nebula service waits before closing the idle connections
--client_idle_timeout_secs=28800
# The number of seconds before idle sessions expire
# The range should be in [1, 604800]
--session_idle_timeout_secs=28800
# The number of threads to accept incoming connections
--num_accept_threads=1
# The number of networking IO threads, 0 for # of CPU cores
--num_netio_threads=0
# The number of threads to execute user queries, 0 for # of CPU cores
--num_worker_threads=0
# HTTP service ip
--ws_ip=10.210.40.69
# HTTP service port
--ws_http_port=19669
# HTTP2 service port
--ws_h2_port=19670
# storage client timeout
--storage_client_timeout_ms=60000
# Port to listen on Meta with HTTP protocol, it corresponds to ws_http_port in metad's configuration file
--ws_meta_http_port=19559

########## authentication ##########
# Enable authorization
--enable_authorize=false
# User login authentication type, password for nebula authentication, ldap for ldap authentication, cloud for cloud authentication
--auth_type=password

########## memory ##########
# System memory high watermark ratio, cancel the memory checking when the ratio greater than 1.0
--system_memory_high_watermark_ratio=0.8

########## metrics ##########
--enable_space_level_metrics=true

########## experimental feature ##########
# if use experimental features
--enable_experimental_feature=false

storage配置

########## basics ##########
# Whether to run as a daemon process
--daemonize=false
# The file to host the process id
--pid_file=/data/br/base/NEBULA/pids/nebula-storaged.pid
# Whether to use the configuration obtained from the configuration file
--local_config=true

########## logging ##########
# The directory to host logging files
--log_dir=/data/br/base/NEBULA/logs
# Log level, 0, 1, 2, 3 for INFO, WARNING, ERROR, FATAL respectively
--minloglevel=0
# Verbose log level, 1, 2, 3, 4, the higher of the level, the more verbose of the logging
--v=4
# Maximum seconds to buffer the log messages
--logbufsecs=0
# Whether to redirect stdout and stderr to separate output files
--redirect_stdout=true
# Destination filename of stdout and stderr, which will also reside in log_dir.
--stdout_log_file=storaged-stdout.log
--stderr_log_file=storaged-stderr.log
# Copy log messages at or above this level to stderr in addition to logfiles. The numbers of severity levels INFO, WARNING, ERROR, and FATAL are 0, 1, 2, and 3, respectively.
--stderrthreshold=2
# Wether logging files' name contain timestamp.
--timestamp_in_logfile_name=true

########## networking ##########
# Comma separated Meta server addresses
--meta_server_addrs=10.210.40.65:9559,10.210.40.66:9559,10.210.40.67:9559,10.210.40.68:9559,10.210.40.69:9559
# Local IP used to identify the nebula-storaged process.
# Change it to an address other than loopback if the service is distributed or
# will be accessed remotely.
--local_ip=10.210.40.69
# Storage daemon listening port
--port=9779
# HTTP service ip
--ws_ip=10.210.40.69
# HTTP service port
--ws_http_port=19779
# HTTP2 service port
--ws_h2_port=19780
# heartbeat with meta service
--heartbeat_interval_secs=10

######### Raft #########
# Raft election timeout
--raft_heartbeat_interval_secs=30
# RPC timeout for raft client (ms)
--raft_rpc_timeout_ms=1000
## recycle Raft WAL
--wal_ttl=10800

########## Disk ##########
# key的前缀长度
--rocksdb_filtering_prefix_length=16
# Root data path. split by comma. e.g. --data_path=/disk1/path1/,/disk2/path2/
# One path per Rocksdb instance.
--data_path=/data/br/base/NEBULA/data/storage

# Minimum reserved bytes of each data path
--minimum_reserved_bytes=268435456

# The default reserved bytes for one batch operation
--rocksdb_batch_size=4096
# The default block cache size used in BlockBasedTable. (MB)
# recommend: 1/3 of all memory
--rocksdb_block_cache=4096

# Compression algorithm, options: no,snappy,lz4,lz4hc,zlib,bzip2,zstd
# For the sake of binary compatibility, the default value is snappy.
# Recommend to use:
#   * lz4 to gain more CPU performance, with the same compression ratio with snappy
#   * zstd to occupy less disk space
#   * lz4hc for the read-heavy write-light scenario
--rocksdb_compression=lz4

# Set different compressions for different levels
# For example, if --rocksdb_compression is snappy,
# "no:no:lz4:lz4::zstd" is identical to "no:no:lz4:lz4:snappy:zstd:snappy"
# In order to disable compression for level 0/1, set it to "no:no"
--rocksdb_compression_per_level=

############## rocksdb Options ##############
# rocksdb DBOptions in json, each name and value of option is a string, given as "option_name":"option_value" separated by comma
--rocksdb_db_options={"max_subcompactions":"4","max_background_jobs":"4"}
# rocksdb ColumnFamilyOptions in json, each name and value of option is string, given as "option_name":"option_value" separated by comma
--rocksdb_column_family_options={"disable_auto_compactions":"false","write_buffer_size":"67108864","max_write_buffer_number":"4","max_bytes_for_level_base":"268435456"}
# rocksdb BlockBasedTableOptions in json, each name and value of option is string, given as "option_name":"option_value" separated by comma
--rocksdb_block_based_table_options={"block_size":"8192"}

# Whether or not to enable rocksdb's statistics, disabled by default
--enable_rocksdb_statistics=false

# Statslevel used by rocksdb to collection statistics, optional values are
#   * kExceptHistogramOrTimers, disable timer stats, and skip histogram stats
#   * kExceptTimers, Skip timer stats
#   * kExceptDetailedTimers, Collect all stats except time inside mutex lock AND time spent on compression.
#   * kExceptTimeForMutex, Collect all stats except the counters requiring to get time inside the mutex lock.
#   * kAll, Collect all stats
--rocksdb_stats_level=kExceptHistogramOrTimers

# Whether or not to enable rocksdb's prefix bloom filter, enabled by default.
--enable_rocksdb_prefix_filtering=true
# Whether or not to enable rocksdb's whole key bloom filter, disabled by default.
--enable_rocksdb_whole_key_filtering=false

############## Key-Value separation ##############
# Whether or not to enable BlobDB (RocksDB key-value separation support)
--rocksdb_enable_kv_separation=true
# RocksDB key value separation threshold. Values at or above this threshold will be written to blob files during flush or compaction.
--rocksdb_kv_separation_threshold=100
# Compression algorithm for blobs, options: no,snappy,lz4,lz4hc,zlib,bzip2,zstd
--rocksdb_blob_compression=lz4
# Whether to garbage collect blobs during compaction
--rocksdb_enable_blob_garbage_collection=true

############### misc ####################
--snapshot_part_rate_limit=10485760
--snapshot_batch_size=1048576
--rebuild_index_part_rate_limit=4194304
--rebuild_index_batch_size=1048576

在############### misc ####################下修改
# 网络 I/O 线程的数量，用于发送 RPC 请求和接收响应。默认为16，建议设置cpu核数
--num_io_threads=16
# Storage 的 RPC 服务的工作线程数量。默认32，建议设置为num_io_threads*2
--num_worker_threads=32
# TaskManager 并发执行子任务的最大个数。 默认值为10
--max_concurrent_subtasks=10

meta配置

########## basics ##########
# Whether to run as a daemon process
--daemonize=false
# The file to host the process id
--pid_file=/data/br/base/NEBULA/pids/nebula-metad.pid


########## logging ##########
# The directory to host logging files
--log_dir=/data/br/base/NEBULA/logs
# Log level, 0, 1, 2, 3 for INFO, WARNING, ERROR, FATAL respectively
--minloglevel=0
# Verbose log level, 1, 2, 3, 4, the higher of the level, the more verbose of the logging
--v=4
# Maximum seconds to buffer the log messages
--logbufsecs=0
# Whether to redirect stdout and stderr to separate output files
--redirect_stdout=true
# Destination filename of stdout and stderr, which will also reside in log_dir.
--stdout_log_file=metad-stdout.log
--stderr_log_file=metad-stderr.log
# Copy log messages at or above this level to stderr in addition to logfiles. The numbers of severity levels INFO, WARNING, ERROR, and FATAL are 0, 1, 2, and 3, respectively.
--stderrthreshold=2
# wether logging files' name contain time stamp.
--timestamp_in_logfile_name=true

########## networking ##########
# Comma separated Meta Server addresses
--meta_server_addrs=10.210.40.65:9559,10.210.40.66:9559,10.210.40.67:9559,10.210.40.68:9559,10.210.40.69:9559
# Local IP used to identify the nebula-metad process.
# Change it to an address other than loopback if the service is distributed or
# will be accessed remotely.
--local_ip=10.210.40.69
# Meta daemon listening port
--port=9559
# HTTP service ip
--ws_ip=10.210.40.69
# HTTP service port
--ws_http_port=19559
# HTTP2 service port
--ws_h2_port=19560
# Port to listen on Storage with HTTP protocol, it corresponds to ws_http_port in storage's configuration file
--ws_storage_http_port=19779

########## storage ##########
# Root data path, here should be only single path for metad
--data_path=/data/br/base/NEBULA/data/meta

########## Misc #########
# The default number of parts when a space is created
--default_parts_num=100
# The default replica factor when a space is created
--default_replica_factor=1

--heartbeat_interval_secs=10
--agent_heartbeat_interval_secs=60

############## rocksdb Options ##############
--rocksdb_wal_sync=true

QingYan · 2022 年10 月 18 日 07:15

所有创建语句
CREATE SPACE call_link (partition_num = 100, replica_factor = 3, charset = utf8, collate = utf8_bin, vid_type = FIXED_STRING(128)) comment = “调用链路拓扑图空间”

CREATE SPACE root_cause_analysis (partition_num = 100, replica_factor = 3, charset = utf8, collate = utf8_bin, vid_type = FIXED_STRING(128)) comment = “根因分析结果拓扑图空间”

CREATE SPACE entity_relationship (partition_num = 100, replica_factor = 3, charset = utf8, collate = utf8_bin, vid_type = FIXED_STRING(128)) comment = “实体关系拓扑图空间”

CREATE TAG one_trace_span_tag ( ttl_time timestamp NOT NULL, span_id string NOT NULL, trace_id string NOT NULL, service_id int64 NULL, service_name string NULL, mq_span_flag int64 NOT NULL ) ttl_duration = 1296000, ttl_col = “ttl_time”

CREATE TAG one_entity_backend_tag ( ttl_time timestamp NOT NULL, name string NOT NULL, type int64 NOT NULL, user_id int64 NOT NULL, create_time timestamp NOT NULL ) ttl_duration = 2592000, ttl_col = “ttl_time”

CREATE TAG one_entity_common_tag ( ttl_time timestamp NOT NULL, name string NOT NULL, type int64 NOT NULL, ip string NULL, user_id int64 NOT NULL, create_time timestamp NOT NULL, ext_info string NULL, technology string NULL ) ttl_duration = 2592000, ttl_col = “ttl_time”

CREATE TAG ai_rca_alert_tag ( alert_id string NULL, start_time string NULL, end_time string NULL, rca_score double NULL, is_rca bool NULL, source string NULL, active_status bool NULL, is_changed bool NULL, create_time timestamp NULL, raw_alert_info string NULL, incident_vid string NULL, produced_by string NULL, rca_start_time string NULL, rca_end_time string NULL, metric_unique_code string NULL ) ttl_duration = 2592000, ttl_col = “create_time”

CREATE TAG ai_rca_entity_tag ( name string NULL, type string NULL, alert_status bool NULL, alert_num int64 NULL, create_time timestamp NULL, basic_info string NULL, incident_vid string NULL, alert_start_time timestamp NULL, alert_end_time timestamp NULL, technology_type string NULL, entity_id string NULL ) ttl_duration = 2592000, ttl_col = “create_time”

CREATE TAG ai_rca_incident_tag ( status bool NULL, is_new bool NULL, description string NULL, level int64 NULL, incident_append_alerts string NULL, root_cases string NULL, incident_type string NULL, slave_incidents string NULL, master_incidents string NULL, params string NULL, create_time timestamp NULL, modify_time timestamp NULL, custom string NULL, incident_alerts string NULL, account_id string NULL ) ttl_duration = 2592000, ttl_col = “create_time”

CREATE TAG ai_rca_server_tag ( name string NULL, type string NULL, alert_status bool NULL, alert_num int64 NULL, create_time timestamp NULL, is_appended bool NULL, metric_type string NULL, basic_info string NULL, incident_vid string NULL, alert_start_time timestamp NULL, alert_end_time timestamp NULL, technology_type string NULL, service_metric_type string NULL, service_id string NULL ) ttl_duration = 2592000, ttl_col = “create_time”

CREATE EDGE ai_rca_edge_tag ( type string NULL, weight int64 NULL, create_time timestamp NULL ) ttl_duration = 2592000, ttl_col = “create_time”

CREATE EDGE one_trace_common_edge ( ttl_time timestamp NOT NULL, br_user_id int64 NOT NULL, trace_id string NOT NULL, url string NOT NULL, url_host string NULL, total_time int64 NOT NULL, status int64 NOT NULL, http_method string NOT NULL, http_code int64 NOT NULL, data_type int64 NOT NULL, agent_id int64 NULL, span_id string NULL, async_flag int64 NULL, rpc_time int64 NULL, db_time int64 NULL, mq_time int64 NULL, becd_detail_hash string NULL, key_method_name string NULL, request_attribute_detail string NULL, first_package_time int64 NULL, dns_time int64 NULL, tcp_time int64 NULL, ssl_time int64 NULL, server_resp_time int64 NULL, receive_time int64 NULL, session_id string NULL, user_id string NULL, cpu_time int64 NOT NULL, be_url_hash string NOT NULL, backend_type_proto int64 NOT NULL, db_type string NULL, db_name string NULL, db_port string NULL, topic_name string NULL, monitor_time int64 NOT NULL ) ttl_duration = 1296000, ttl_col = “ttl_time”

CREATE EDGE one_entity_service_virtual ( ttl_time timestamp NOT NULL, user_id int64 NOT NULL, request_count int64 NULL, error_count int64 NULL, response_time int64 NULL, property_type int64 NULL ) ttl_duration = 2592000, ttl_col = “ttl_time”

CREATE EDGE one_entity_process_virtual ( ttl_time timestamp NOT NULL, user_id int64 NOT NULL, cpu_usage int64 NOT NULL, mem_usage int64 NOT NULL ) ttl_duration = 2592000, ttl_col = “ttl_time”

CREATE EDGE one_entity_host_virtual ( ttl_time timestamp NOT NULL, user_id int64 NOT NULL, average_load_last_15_min int64 NOT NULL, cpu_usage int64 NOT NULL, mem_usage int64 NOT NULL, disk_usage int64 NOT NULL ) ttl_duration = 2592000, ttl_col = “ttl_time”

CREATE EDGE one_entity_common_edge ( ttl_time timestamp NOT NULL, user_id int64 NOT NULL, request_count int64 NULL, error_count int64 NULL, response_time int64 NULL, property_type int64 NULL, layout int64 NOT NULL ) ttl_duration = 2592000, ttl_col = “ttl_time”

CREATE TAG INDEX service_id ON one_trace_span_tag(service_id)
CREATE TAG INDEX span_trace_id ON one_trace_span_tag(trace_id(20))
CREATE TAG INDEX entity_type ON one_entity_common_tag ( type )
CREATE TAG INDEX one_entity_backend_tag_index ON one_entity_backend_tag ( )
CREATE TAG INDEX one_entity_common_tag_index ON one_entity_common_tag ( )
CREATE TAG INDEX server_incident_vid ON ai_rca_server_tag ( incident_vid(20) )
CREATE TAG INDEX open_status ON ai_rca_incident_tag ( status )
CREATE TAG INDEX entity_incident_vid ON ai_rca_entity_tag ( incident_vid(20) )
CREATE TAG INDEX alert_prodeced_by ON ai_rca_alert_tag ( produced_by(10) )
CREATE TAG INDEX alert_incident_vid ON ai_rca_alert_tag ( incident_vid(20) )
CREATE EDGE INDEX trace_id_index_0 ON one_trace_common_edge ( trace_id(32) )

xjc · 2022 年10 月 19 日 15:31

看storage和graph的日志v=4，能否上传下丢数据那段时间的日志？

QingYan · 2022 年10 月 20 日 06:03

好的，等下我把日志上传上来

system · 2022 年11 月 19 日 11:37

此话题已在最后回复的 30 天后被自动关闭。不再允许新回复。