docker swarm v3.2.0 集群部署失败

crx · 2023 年8 月 29 日 10:33

由于服务器有限，使用了两台服务器加入了swarm 进行部署
服务器a： graphd，metad0，storaged0
服务器b: storaged1，storaged2, metad1，metad2

ymal如下：

version: '3.6'
services:
  metad0:
    image: vesoft/nebula-metad:v3.2.0
    env_file:
      - ./nebula.env
    command:
      - --meta_server_addrs=120.46.147.172:9559,120.46.178.164:9559,120.46.178.164:5555
      - --local_ip=120.46.147.172
      - --ws_ip=120.46.147.172
      - --port=9559
      - --data_path=/data/meta
      - --log_dir=/logs
      - --v=0
      - --minloglevel=2
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == ecs-3f95
    healthcheck:
      test: ["CMD", "curl", "-f", "http://120.46.147.172:19559/status"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 20s
    ports:
      - target: 19559
        published: 19559
        protocol: tcp
        mode: host
      - target: 19560
        published: 19560
        protocol: tcp
        mode: host
      - target: 9559
        published: 9559
        protocol: tcp
        mode: host
    volumes:
      - data-metad0:/data/meta
      - logs-metad0:/logs
    networks:
      - nebula-net

  metad1:
    image: vesoft/nebula-metad:v3.2.0
    env_file:
      - ./nebula.env
    command:
      - --meta_server_addrs=120.46.147.172:9559,120.46.178.164:9559,120.46.178.164:5555
      - --local_ip=120.46.178.164
      - --ws_ip=120.46.178.164
      - --port=9559
      - --data_path=/data/meta
      - --log_dir=/logs
      - --v=0
      - --minloglevel=2
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == ecs-1d07
    healthcheck:
      test: ["CMD", "curl", "-f", "http://120.46.178.164:19559/status"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 20s
    ports:
      - target: 19559
        published: 19559
        protocol: tcp
        mode: host
      - target: 19560
        published: 19560
        protocol: tcp
        mode: host
      - target: 9559
        published: 9559
        protocol: tcp
        mode: host
    volumes:
      - data-metad1:/data/meta
      - logs-metad1:/logs
    networks:
      - nebula-net

  metad2:
    image: vesoft/nebula-metad:v3.2.0
    env_file:
      - ./nebula.env
    command:
      - --meta_server_addrs=120.46.147.172:9559,120.46.178.164:9559,120.46.178.164:5555
      - --local_ip=120.46.178.164
      - --ws_ip=120.46.178.164
      - --port=5555
      - --data_path=/data/meta
      - --log_dir=/logs
      - --v=0
      - --minloglevel=2
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == ecs-1d07
    healthcheck:
      test: ["CMD", "curl", "-f", "http://120.46.178.164:19558/status"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 20s
    ports:
      - target: 19558
        published: 19559
        protocol: tcp
        mode: host
      - target: 19561
        published: 19560
        protocol: tcp
        mode: host
      - target: 5555
        published: 9559
        protocol: tcp
        mode: host
    volumes:
      - data-metad2:/data/meta
      - logs-metad2:/logs
    networks:
      - nebula-net

  storaged0:
    image: vesoft/nebula-storaged:v3.2.0
    env_file:
      - ./nebula.env
    command:
      - --meta_server_addrs=120.46.147.172:9559,120.46.178.164:9559,120.46.178.164:5555
      - --local_ip=120.46.147.172
      - --ws_ip=120.46.147.172
      - --port=9779
      - --data_path=/data/storage
      - --log_dir=/logs
      - --v=0
      - --minloglevel=2
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == ecs-3f95
    depends_on:
      - metad0
      - metad1
      - metad2
    healthcheck:
      test: ["CMD", "curl", "-f", "http://120.46.147.172:19779/status"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 20s
    ports:
      - target: 19779
        published: 19779
        protocol: tcp
        mode: host
      - target: 19780
        published: 19780
        protocol: tcp
        mode: host
      - target: 9779
        published: 9779
        protocol: tcp
        mode: host
    volumes:
      - data-storaged0:/data/storage
      - logs-storaged0:/logs
    networks:
      - nebula-net

  storaged1:
    image: vesoft/nebula-storaged:v3.2.0
    env_file:
      - ./nebula.env
    command:
      - --meta_server_addrs=120.46.147.172:9559,120.46.178.164:9559,120.46.178.164:5555
      - --local_ip=120.46.178.164
      - --ws_ip=120.46.178.164
      - --port=9779
      - --data_path=/data/storage
      - --log_dir=/logs
      - --v=0
      - --minloglevel=2
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == ecs-1d07
    depends_on:
      - metad0
      - metad1
      - metad2
    healthcheck:
      test: ["CMD", "curl", "-f", "http://120.46.178.164:19779/status"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 20s
    ports:
      - target: 19779
        published: 19779
        protocol: tcp
        mode: host
      - target: 19780
        published: 19780
        protocol: tcp
        mode: host
      - target: 9779
        published: 9779
        protocol: tcp
        mode: host
    volumes:
      - data-storaged1:/data/storage
      - logs-storaged1:/logs
    networks:
      - nebula-net

  storaged2:
    image: vesoft/nebula-storaged:v3.2.0
    env_file:
      - ./nebula.env
    command:
      - --meta_server_addrs=120.46.147.172:9559,120.46.178.164:9559,120.46.178.164:5555
      - --local_ip=120.46.178.164
      - --ws_ip=120.46.178.164
      - --port=4778
      - --data_path=/data/storage
      - --log_dir=/logs
      - --v=0
      - --minloglevel=2
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == ecs-1d07
    depends_on:
      - metad0
      - metad1
      - metad2
    healthcheck:
      test: ["CMD", "curl", "-f", "http://120.46.178.164:19778/status"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 20s
    ports:
      - target: 19778
        published: 19779
        protocol: tcp
        mode: host
      - target: 19781
        published: 19780
        protocol: tcp
        mode: host
      - target: 4778
        published: 9779
        protocol: tcp
        mode: host
    volumes:
      - data-storaged2:/data/storage
      - logs-storaged2:/logs
    networks:
      - nebula-net

  graphd:
    image: vesoft/nebula-graphd:v3.2.0
    env_file:
      - ./nebula.env
    command:
      - --meta_server_addrs=120.46.147.172:9559,120.46.178.164:9559,120.46.178.164:5555
      - --port=9669
      - --ws_ip=120.46.147.172
      - --log_dir=/logs
      - --v=0
      - --minloglevel=2
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == ecs-3f95
    depends_on:
      - metad0
      - metad1
      - metad2
    healthcheck:
      test: ["CMD", "curl", "-f", "http://120.46.147.172:19669/status"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 20s
    ports:
      - target: 9669
        published: 9669
        protocol: tcp
        mode: host
      - target: 19669
        published: 19669
        protocol: tcp
        mode: host
      - target: 19670
        published: 19670
        protocol: tcp
        mode: host
    volumes:
      - logs-graphd:/logs
    networks:
      - nebula-net
  console:
    image: vesoft/nebula-console:nightly
    entrypoint: ""
    command: 
      - sh
      - -c
      - |
        for i in `seq 1 60`;do
          var=`nebula-console -addr 120.46.147.172 -port 9669 -u root -p nebula -e 'ADD HOSTS "120.46.147.172":9779,"120.46.178.164":4778,"120.46.178.164":9779`;
          if [[ $$? == 0 ]];then
            break;
          fi;
          sleep 1;
          echo "retry to add hosts.";
        done && tail -f /dev/null;

    depends_on:
      - graphd
    networks:
      - nebula-net
      
networks:
  nebula-net:
    external: true
    attachable: true
    name: host

volumes:
  data-metad0:
  logs-metad0:
  data-metad1:
  logs-metad1:
  data-metad2:
  logs-metad2:
  data-storaged0:
  logs-storaged0:
  data-storaged1:
  logs-storaged1:
  data-storaged2:
  logs-storaged2:
  logs-graphd:

运行后
docker stack deploy -c xxxxx .ymal nebula
启动失败图片：

根据docker swarm部署中的文档更改后进行启动也是失败，和上图一样。请问该如何处理呢？

crx · 2023 年8 月 30 日 05:37

[root@ecs-3f95 logs-metad0]# ls
metad-stderr.log                                        nebula-metad.ecs-3f95.root.log.WARNING.20230830-051551.1
metad-stdout.log                                        nebula-metad.ERROR
nebula-metad.ecs-3f95.root.log.ERROR.20230830-051551.1  nebula-metad.INFO
nebula-metad.ecs-3f95.root.log.INFO.20230830-051551.1   nebula-metad.WARNING
[root@ecs-3f95 logs-metad0]# cat metad-stderr.log 
E20230830 05:15:51.885763     1 FileUtils.cpp:377] Failed to read the directory "/data/meta/nebula" (2): No such file or directory
[root@ecs-3f95 logs-metad0]# cat metad-stdout.log 
[root@ecs-3f95 logs-metad0]# cat nebula-metad.ERROR 
Log file created at: 2023/08/30 05:15:51
Running on machine: ecs-3f95
Running duration (h:mm:ss): 0:00:00
Log line format: [IWEF]yyyymmdd hh:mm:ss.uuuuuu threadid file:line] msg
E20230830 05:15:51.885763     1 FileUtils.cpp:377] Failed to read the directory "/data/meta/nebula" (2): No such file or directory

使用docker run -it vesoft/nebula-storaged:v3.2.0 直接卡住不动

henson · 2023 年8 月 31 日 03:54

看日志文件路径不对，在yml文件里提前生成好文件夹试试呢

crx · 2023 年9 月 2 日 06:23

`E20230902 06:14:10.282040    63 ThriftClientManager-inl.h:70] Failed to resolve address for 'metad1': Name or service not known (error=-2): Unknown error -2
I20230902 06:14:10.282095    63 ThriftClientManager-inl.h:74] Connecting to "metad1":9560 for 6 times
I20230902 06:14:10.282218    63 CollectNSucceeded-inl.h:59] Set Value [completed=2, total=2, Result list size=0]
I20230902 06:14:10.282269    43 RaftPart.cpp:1285] [Port: 9560, Space: 0, Part: 0] Did not get enough votes from election of term 1, isPreVote = 1
I20230902 06:14:10.877132     1 MetaDaemonInit.cpp:118] Leader has not been elected, sleep 1s
I20230902 06:14:11.777395    51 DiskManager.cpp:161] Refresh filesystem info of "/data/meta"
I20230902 06:14:11.819051    50 RaftPart.cpp:1291] [Port: 9560, Space: 0, Part: 0] Start leader election...
I20230902 06:14:11.819113    50 RaftPart.cpp:1319] [Port: 9560, Space: 0, Part: 0] Sending out an election request (space = 0, part = 0, term = 1, lastLogId = 0, lastLogTerm = 0, candidateIP = metad2, candidatePort = 9560), isPreVote = 1
I20230902 06:14:11.819190    64 ThriftClientManager-inl.h:38] Invalid Channel: 0x7f055acfb200 for host: "metad0":9560
I20230902 06:14:11.819204    64 ThriftClientManager-inl.h:53] There is no existing client to "metad0":9560, trying to create one
E20230902 06:14:11.823705    64 ThriftClientManager-inl.h:70] Failed to resolve address for 'metad0': Name or service not known (error=-2): Unknown error -2
I20230902 06:14:11.823758    64 ThriftClientManager-inl.h:74] Connecting to "metad0":9560 for 5 times
I20230902 06:14:11.823848    64 ThriftClientManager-inl.h:38] Invalid Channel: 0x7f055acfb700 for host: "metad1":9560
I20230902 06:14:11.823858    64 ThriftClientManager-inl.h:53] There is no existing client to "metad1":9560, trying to create one
E20230902 06:14:11.827549    64 ThriftClientManager-inl.h:70] Failed to resolve address for 'metad1': Name or service not known (error=-2): Unknown error -2
I20230902 06:14:11.827613    64 ThriftClientManager-inl.h:74] Connecting to "metad1":9560 for 6 times
I20230902 06:14:11.827741    64 CollectNSucceeded-inl.h:59] Set Value [completed=2, total=2, Result list size=0]
I20230902 06:14:11.827785    43 RaftPart.cpp:1285] [Port: 9560, Space: 0, Part: 0] Did not get enough votes from election of term 1, isPreVote = 1
I20230902 06:14:11.878156     1 MetaDaemonInit.cpp:118] Leader has not been elected, sleep 1s
I20230902 06:14:12.879294     1 MetaDaemonInit.cpp:118] Leader has not been elected, sleep 1s
I20230902 06:14:13.583830    47 RaftPart.cpp:1291] [Port: 9560, Space: 0, Part: 0] Start leader election...
I20230902 06:14:13.583879    47 RaftPart.cpp:1319] [Port: 9560, Space: 0, Part: 0] Sending out an election request (space = 0, part = 0, term = 1, lastLogId = 0, lastLogTerm = 0, candidateIP = metad2, candidatePort = 9560), isPreVote = 1
I20230902 06:14:13.583945    65 ThriftClientManager-inl.h:38] Invalid Channel: 0x7f055ad0c200 for host: "metad0":9560
I20230902 06:14:13.583956    65 ThriftClientManager-inl.h:53] There is no existing client to "metad0":9560, trying to create one
E20230902 06:14:13.589493    65 ThriftClientManager-inl.h:70] Failed to resolve address for 'metad0': Name or service not known (error=-2): Unknown error -2
I20230902 06:14:13.589532    65 ThriftClientManager-inl.h:74] Connecting to "metad0":9560 for 5 times
I20230902 06:14:13.589617    65 ThriftClientManager-inl.h:38] Invalid Channel: 0x7f055ad0c700 for host: "metad1":9560
I20230902 06:14:13.589628    65 ThriftClientManager-inl.h:53] There is no existing client to "metad1":9560, trying to create one
E20230902 06:14:13.592943    65 ThriftClientManager-inl.h:70] Failed to resolve address for 'metad1': Name or service not known (error=-2): Unknown error -2
I20230902 06:14:13.592976    65 ThriftClientManager-inl.h:74] Connecting to "metad1":9560 for 6 times
I20230902 06:14:13.593034    65 CollectNSucceeded-inl.h:59] Set Value [completed=2, total=2, Result list size=0]
I20230902 06:14:13.593072    43 RaftPart.cpp:1285] [Port: 9560, Space: 0, Part: 0] Did not get enough votes from election of term 1, isPreVote = 1
I20230902 06:14:13.880805     1 MetaDaemonInit.cpp:118] Leader has not been elected, sleep 1s
I20230902 06:14:14.881529     1 MetaDaemonInit.cpp:118] Leader has not been elected, sleep 1s
`

[root@crx1 opt]# docker service ls
ID             NAME               MODE         REPLICAS   IMAGE                           PORTS
lf93plwvo69y   nebula_console     replicated   1/1        vesoft/nebula-console:nightly   
j0cg7dt7jg49   nebula_graphd      replicated   0/1        vesoft/nebula-graphd:v3.3.0     
0tbcb3uxqskb   nebula_graphd1     replicated   0/1        vesoft/nebula-graphd:v3.3.0     
0ir7910p9epz   nebula_graphd2     replicated   0/1        vesoft/nebula-graphd:v3.3.0     
2e1jwrmqu6nb   nebula_metad0      replicated   0/1        vesoft/nebula-metad:v3.3.0      
8e975sfoujqr   nebula_metad1      replicated   0/1        vesoft/nebula-metad:v3.3.0      
w2pya727750i   nebula_metad2      replicated   0/1        vesoft/nebula-metad:v3.3.0      
ry4mc9x563x6   nebula_storaged0   replicated   0/1        vesoft/nebula-storaged:v3.3.0   
s4egruu95ffz   nebula_storaged1   replicated   0/1        vesoft/nebula-storaged:v3.3.0   
7cm717an5e3b   nebula_storaged2   replicated   0/1        vesoft/nebula-storaged:v3.3.0

单节点启动可以，启动集群就不行了

crx · 2023 年9 月 2 日 07:22

有没有人用swarm启动v3集群

steam · 2023 年9 月 4 日 02:44

单节点可以，集群不行的话，你一开始的图空间创建语句是咋样的

crx · 2023 年9 月 4 日 04:21

 Heartbeat failed, status:RPC failure in MetaClient: apache::thrift::transport::TTransportException: Dropping unsent request. Connection closed after: apache::thrift::transport::TTransportException: AsyncSocketException: connect failed, type = Socket not open, errno = 111 (Connect
E20230904 04:20:02.082420    37 ThriftClientManager-inl.h:70] Failed to resolve address for 'metad1': Name or service not known (error=-2): Unknown error -2
E20230904 04:20:03.088455    37 ThriftClientManager-inl.h:70] Failed to resolve address for 'metad2': Name or service not known (error=-2): Unknown error -2
E20230904 04:20:04.094631    37 ThriftClientManager-inl.h:70] Failed to resolve address for 'metad2': Name or service not known (error=-2): Unknown error -2
E20230904 04:20:05.100684    37 ThriftClientManager-inl.h:70] Failed to resolve address for 'metad1': Name or service not known (error=-2): Unknown error -2
E20230904 04:20:05.100834    37 MetaClient.cpp:758] Send request to "metad1":9559, exceed retry limit
E20230904 04:20:05.100869    37 MetaClient.cpp:759] RpcResponse exception: apache::thrift::transport::TTransportException: Connection not open: apache::thrift::transport::TTransportException: AsyncSocketException: setReadCallback() called with socket in invalid state, type = Socket not open
E20230904 04:20:05.100921     1 MetaClient.cpp:112] Heartbeat failed, status:RPC failure in MetaClient: apache::thrift::transport::TTransportException: Connection not open: apache::thrift::transport::TTransportException: AsyncSocketException: setReadCallback() called with socket in invalid state, type = Socket not open
E20230904 04:20:05.105465    38 ThriftClientManager-inl.h:70] Failed to resolve address for 'metad2': Name or service not known (error=-2): Unknown error -2
E20230904 04:20:06.111505    38 ThriftClientManager-inl.h:70] Failed to resolve address for 'metad2': Name or service not known (error=-2): Unknown error -2
E20230904 04:20:08.129128    38 ThriftClientManager-inl.h:70] Failed to resolve address for 'metad1': Name or service not known (error=-2): Unknown error -2
E20230904 04:20:08.129277    38 MetaClient.cpp:758] Send request to "metad1":9559, exceed retry limit
E20230904 04:20:08.129302    38 MetaClient.cpp:759] RpcResponse exception: apache::thrift::transport::TTransportException: Connection not open: apache::thrift::transport::TTransportException: AsyncSocketException: setReadCallback() called with socket in invalid state, type = Socket not open
E20230904 04:20:08.129357     1 MetaClient.cpp:150] RPC failure in MetaClient: apache::thrift::transport::TTransportException: Connection not open: apache::thrift::transport::TTransportException: AsyncSocketException: setReadCallback() called with socket in invalid state, type = Socket not open
E20230904 04:20:08.129413     1 GraphService.cpp:48] Failed to wait for meta service ready synchronously.
E20230904 04:20:08.129436     1 GraphServer.cpp:39] Failed to wait for meta service ready synchronously.
E20230904 04:20:08.130803     1 GraphDaemon.cpp:156] The graph server start failed
E20230904 04:20:15.261232    34 ThriftClientManager-inl.h:70] Failed to resolve address for 'metad1': Name or service not known (error=-2): Unknown error -2
E20230904 04:20:16.268514    34 ThriftClientManager-inl.h:70] Failed to resolve address for 'metad2': Name or service not known (error=-2): Unknown error -2
E20230904 04:20:17.275702    34 ThriftClientManager-inl.h:70] Failed to resolve address for 'metad1': Name or service not known (error=-2): Unknown error -2
E20230904 04:20:18.282327    34 ThriftClientManager-inl.h:70] Failed to resolve address for 'metad1': Name or service not known (error=-2): Unknown error -2
E20230904 04:20:18.282471    34 MetaClient.cpp:758] Send request to "metad1":9559, exceed retry limit
E20230904 04:20:18.282501    34 MetaClient.cpp:759] RpcResponse exception: apache::thrift::transport::TTransportException: Connection not open: apache::thrift::transport::TTransportException: AsyncSocketException: setReadCallback() called with socket in invalid state, type = Socket not open

集群的这个通信连不上- -不知道哪里问题

steam · 2023 年9 月 4 日 05:39

你看见我上面的回复了么，你图空间创建语句是咋样的？也是副本数为 1 么

crx · 2023 年9 月 4 日 06:16

deploy:
  replicas: 1
  restart_policy:
    condition: on-failure
  placement:
    constraints:
      - node.hostname == ecs-1d07

  replicas:都是1

crx · 2023 年9 月 4 日 06:17

version: '3.6'
services:
  metad0:
    image: vesoft/nebula-metad:v3.3.0
    env_file:
      - ./nebula.env
    hostname: metad0
    command:
      - --meta_server_addrs=metad0:9559,metad1:9559,metad2:9559
      - --local_ip=metad0
      - --ws_ip=metad0
      - --port=9559
      - --ws_http_port=19559
      - --data_path=/data/meta
      - --log_dir=/logs
      - --v=3
      - --minloglevel=0
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == ecs-3f95
    ports:
      - target: 19559
        published: 19559
        protocol: tcp
        mode: host
      - target: 19560
        published: 19560
        protocol: tcp
        mode: host
      - target: 9559
        published: 9559
        protocol: tcp
        mode: host
    volumes:
      - /data/nebula/data-metad0:/data/meta
      - /data/nebula/logs-metad0:/logs
    networks:
      - nebula-net
  metad1:
    image: vesoft/nebula-metad:v3.3.0
    env_file:
      - ./nebula.env
    hostname: metad1
    command:
      - --meta_server_addrs=metad0:9559,metad1:9559,metad2:9559
      - --local_ip=metad1
      - --ws_ip=metad1
      - --port=9559
      - --data_path=/data/meta
      - --ws_http_port=19559
      - --log_dir=/logs
      - --v=3
      - --minloglevel=0
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == ecs-1d07
    ports:
      - target: 19559
        published: 19559
        protocol: tcp
        mode: host
      - target: 19560
        published: 19560
        protocol: tcp
        mode: host
      - target: 9559
        published: 9559
        protocol: tcp
        mode: host
    volumes:
      - /data/nebula1/data-metad1:/data/meta
      - /data/nebula1/logs-metad1:/logs
    networks:
      - nebula-net
  metad2:
    image: vesoft/nebula-metad:v3.3.0
    env_file:
      - ./nebula.env
    hostname: metad2
    command:
      - --meta_server_addrs=metad0:9559,metad1:9559,metad2:9559
      - --local_ip=metad2
      - --ws_ip=metad2
      - --port=9559
      - --data_path=/data/meta
      - --ws_http_port=19559
      - --log_dir=/logs
      - --v=3
      - --minloglevel=0
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == ecs-1d07
    ports:
      - 19559
      - 19560
      - 9559
    volumes:
      - /data/nebula2/data-metad2:/data/meta
      - /data/nebula2/logs-metad2:/logs
    networks:
      - nebula-net
  storaged0:
    image: vesoft/nebula-storaged:v3.3.0
    env_file:
      - ./nebula.env
    hostname: storaged0
    command:
      - --meta_server_addrs=metad0:9559,metad1:9559,metad2:9559
      - --local_ip=storaged0
      - --ws_ip=storaged0
      - --port=9779
      - --data_path=/data/storage
      - --ws_http_port=19779
      - --log_dir=/logs
      - --v=3
      - --minloglevel=0
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == ecs-3f95
    depends_on:
      - metad0
      - metad1
      - metad2
    ports:
      - target: 19779
        published: 19779
        protocol: tcp
        mode: host
      - target: 19780
        published: 19780
        protocol: tcp
        mode: host
      - target: 9779
        published: 9779
        protocol: tcp
        mode: host
    volumes:
      - /data/nebula/data-storaged0:/data/meta
      - /data/nebula/logs-storaged0:/logs
    networks:
      - nebula-net
  storaged1:
    image: vesoft/nebula-storaged:v3.3.0
    env_file:
      - ./nebula.env
    hostname: storaged1
    command:
      - --meta_server_addrs=metad0:9559,metad1:9559,metad2:9559
      - --local_ip=storaged1
      - --ws_ip=storaged1
      - --port=9779
      - --ws_http_port=19779
      - --data_path=/data/storage
      - --log_dir=/logs
      - --v=3
      - --minloglevel=2
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == ecs-1d07
    depends_on:
      - metad0
      - metad1
      - metad2
    ports:
      - target: 19779
        published: 19779
        protocol: tcp
        mode: host
      - target: 19780
        published: 19780
        protocol: tcp
        mode: host
      - target: 9779
        published: 9779
        protocol: tcp
        mode: host
    volumes:
      - /data/nebula1/data-storaged1:/data/meta
      - /data/nebula1/logs-storaged1:/logs
    networks:
      - nebula-net
  storaged2:
    image: vesoft/nebula-storaged:v3.3.0
    env_file:
      - ./nebula.env
    hostname: storaged2
    command:
      - --meta_server_addrs=metad0:9559,metad1:9559,metad2:9559
      - --local_ip=storaged2
      - --ws_ip=storaged2
      - --port=9779
      - --ws_http_port=19779
      - --data_path=/data/storage
      - --log_dir=/logs
      - --v=3
      - --minloglevel=2
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == ecs-1d07
    depends_on:
      - metad0
      - metad1
      - metad2
    ports:
      - 19779
      - 19780
      - 9779
    volumes:
      - /data/nebula2/data-storaged2:/data/meta
      - /data/nebula2/logs-storaged2:/logs
    networks:
      - nebula-net
  graphd:
    image: vesoft/nebula-graphd:v3.3.0
    env_file:
      - ./nebula.env
    hostname: graphd
    command:
      - --meta_server_addrs=metad0:9559,metad1:9559,metad2:9559
      - --port=9669
      - --ws_ip=graphd
      - --log_dir=/logs
      - --ws_http_port=19669
      - --v=3
      - --minloglevel=0
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == ecs-3f95
    depends_on:
      - metad0
      - metad1
      - metad2
    ports:
      - target: 9669
        published: 9669
        protocol: tcp
        mode: host
      - target: 19669
        published: 19669
        protocol: tcp
        mode: host
      - target: 19670
        published: 19670
        protocol: tcp
        mode: host
    volumes:
      - /data/nebula/logs-graphd:/logs
    networks:
      - nebula-net
  graphd1:
    image: vesoft/nebula-graphd:v3.3.0
    env_file:
      - ./nebula.env
    hostname: graphd1
    command:
      - --meta_server_addrs=metad0:9559,metad1:9559,metad2:9559
      - --port=9669
      - --ws_ip=graphd1
      - --log_dir=/logs
      - --ws_http_port=19669
      - --v=3
      - --minloglevel=0
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == ecs-1d07
    depends_on:
      - metad0
      - metad1
      - metad2
    ports:
      - target: 9669
        published: 9669
        protocol: tcp
        mode: host
      - target: 19669
        published: 19669
        protocol: tcp
        mode: host
      - target: 19670
        published: 19670
        protocol: tcp
        mode: host
    volumes:
      - /data/nebula1/logs-graphd1:/logs
    networks:
      - nebula-net
  graphd2:
    image: vesoft/nebula-graphd:v3.3.0
    env_file:
      - ./nebula.env
    hostname: graphd2
    command:
      - --meta_server_addrs=metad0:9559,metad1:9559,metad2:9559
      - --port=9669
      - --ws_ip=graphd2
      - --log_dir=/logs
      - --ws_http_port=19669
      - --v=3
      - --minloglevel=0
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == ecs-1d07
    depends_on:
      - metad0
      - metad1
      - metad2
    ports:
      - 9669
      - 19669
      - 19670
    volumes:
      - /data/nebula2/logs-graphd2:/logs
    networks:
      - nebula-net
  console:
    image: vesoft/nebula-console:nightly
    entrypoint: "/bin/sh"
    command: 
 #     - sh
      - -c
      - |
        for i in `seq 1 60`;do
          var=`/usr/local/bin/nebula-console -addr graphd -port 9669 -u root -p nebula -e 'ADD HOSTS "storaged0":9779,"storaged1":9779,"storaged2":9779'`;
          if [[ $$? == 0 ]];then
            break;
          fi;
          sleep 1;
          echo "retry to add hosts.";
        done && tail -f /dev/null;
    depends_on:
      - graphd
    networks:
      - nebula-net

networks:
  nebula-net:
 #   external: true
#    attachable: true
#    name: host

crx · 2023 年9 月 4 日 06:42

yaml中是想部署集群到2台服务器，3个meta，3个storaged，3个graphd。台服务器a有1个meta，1个storaged，1个graphd。剩下的两个都在服务器b上。
按照您的意思， replicas参数要为3或者比3更大，是这样吗？

steam · 2023 年9 月 4 日 06:46

- -，sorry。我刚把 2 个参数搞混了，副本数是要小于等于 storage 数的（我刚有翻了下研发之前的回复

crx · 2023 年9 月 4 日 06:53

crx:

20230904 04:20:05.100834    37 MetaClient.cpp:758] Send request to "metad1":9559, exceed retry limit
E20230904 04:20:05.100869    37 MetaClient.cpp:759] RpcResponse exception: apache::thrift::transport::TTransportException: Connection not open: apache::thrift::transport::TTransportException: AsyncSocketException: setReadCallback() called with socket in invalid state, type = Socket not open
E20230904 04:20:05.100921     1 MetaClient.cpp:112] Heartbeat failed, status:RPC failure in MetaClient: apache::thrift::transport::TTransportException: Connection not open: apache::thrift::transport::TTransportException: AsyncSocketException: setReadCallback() called with socket in invalid state, type = Socket not open
E20230904 04:20:05.105465    38 ThriftClientManager-inl.h:70] Failed to resolve address for 'metad2': Name or service not known (error=-2): Unknown error -2
E20230904 04:20:06.111505    38 ThriftClientManager-inl.h:70] Failed to resolve address for 'metad2': Name or service not known (error=-2): Unknown error -2
E20230904 04:20:08.129128    38 ThriftClientManager-inl.h:70] Failed to resolve address for 'metad1': Name or service not known (error=-2): Unknown error -2

我都指定的1 ，上边这个错误不是说找不到metad1 和metad2 么，这个是为啥？

wey · 2023 年9 月 4 日 07:14

你应该是曾经配置过 meta 的地址为域名 metad1，它存到了 meta 里边，而这个是不可以解析的，你把所有的状态清理干净，然后再用你的基于 ip 的配置冲头拉起

wey · 2023 年9 月 4 日 07:16

这个地址在 swarm 跨主机的时候没法解析，改成 ip，改好之后从头拉起，把之前生成的数据了的 folder 都删掉，弄干净

crx · 2023 年9 月 4 日 07:26

swarm 不是允许用容器名进行集群之间的端口访问吗？我先试试改成ip

wey · 2023 年9 月 4 日 07:32

不是允许的问题，首先得 dns 能解析呀，除非 swarm 做了这个工作，类似于 k8s，否则要么自己弄 dns/hosts，要么用可以跨主机访问的网络 ip。

crx · 2023 年9 月 4 日 07:51

version: '3.6'
services:
  metad0:
    image: vesoft/nebula-metad:v3.3.0
    env_file:
      - ./nebula.env
    hostname: metad0
    command:
      - --meta_server_addrs=120.46.147.172:9559,120.46.178.164:9559,120.46.178.164:14000
      - --local_ip=metad0
      - --ws_ip=metad0
      - --port=9559
      - --ws_http_port=19559
      - --data_path=/data/meta
      - --log_dir=/logs
      - --v=3
      - --minloglevel=0
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == ecs-3f95
    ports:
      - target: 19559
        published: 19559
        protocol: tcp
        mode: host
      - target: 19560
        published: 19560
        protocol: tcp
        mode: host
      - target: 9559
        published: 9559
        protocol: tcp
        mode: host
    volumes:
      - /data/nebula/data-metad0:/data/meta
      - /data/nebula/logs-metad0:/logs
    networks:
      - nebula-net
  metad1:
    image: vesoft/nebula-metad:v3.3.0
    env_file:
      - ./nebula.env
    hostname: metad1
    command:
      - --meta_server_addrs=120.46.147.172:9559,120.46.178.164:9559,120.46.178.164:14000
      - --local_ip=metad1
      - --ws_ip=metad1
      - --port=9559
      - --data_path=/data/meta
      - --ws_http_port=19559
      - --log_dir=/logs
      - --v=3
      - --minloglevel=0
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == ecs-1d07
    ports:
      - target: 19559
        published: 19559
        protocol: tcp
        mode: host
      - target: 19560
        published: 19560
        protocol: tcp
        mode: host
      - target: 9559
        published: 9559
        protocol: tcp
        mode: host
    volumes:
      - /data/nebula1/data-metad1:/data/meta
      - /data/nebula1/logs-metad1:/logs
    networks:
      - nebula-net
  metad2:
    image: vesoft/nebula-metad:v3.3.0
    env_file:
      - ./nebula.env
    hostname: metad2
    command:
      - --meta_server_addrs=120.46.147.172:9559,120.46.178.164:9559,120.46.178.164:14000
      - --local_ip=metad2
      - --ws_ip=metad2
      - --port=9559
      - --data_path=/data/meta
      - --ws_http_port=19559
      - --log_dir=/logs
      - --v=3
      - --minloglevel=0
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == ecs-1d07
    ports:
      - 19559
      - 19560
      - "14000:9559"
    volumes:
      - /data/nebula2/data-metad2:/data/meta
      - /data/nebula2/logs-metad2:/logs
    networks:
      - nebula-net
  storaged0:
    image: vesoft/nebula-storaged:v3.3.0
    env_file:
      - ./nebula.env
    hostname: storaged0
    command:
      - --meta_server_addrs=120.46.147.172:9559,120.46.178.164:9559,120.46.178.164:14000
      - --local_ip=storaged0
      - --ws_ip=storaged0
      - --port=9779
      - --data_path=/data/storage
      - --ws_http_port=19779
      - --log_dir=/logs
      - --v=3
      - --minloglevel=0
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == ecs-3f95
    depends_on:
      - metad0
      - metad1
      - metad2
    ports:
      - target: 19779
        published: 19779
        protocol: tcp
        mode: host
      - target: 19780
        published: 19780
        protocol: tcp
        mode: host
      - target: 9779
        published: 9779
        protocol: tcp
        mode: host
    volumes:
      - /data/nebula/data-storaged0:/data/meta
      - /data/nebula/logs-storaged0:/logs
    networks:
      - nebula-net
  storaged1:
    image: vesoft/nebula-storaged:v3.3.0
    env_file:
      - ./nebula.env
    hostname: storaged1
    command:
      - --meta_server_addrs=120.46.147.172:9559,120.46.178.164:9559,120.46.178.164:14000
      - --local_ip=storaged1
      - --ws_ip=storaged1
      - --port=9779
      - --ws_http_port=19779
      - --data_path=/data/storage
      - --log_dir=/logs
      - --v=3
      - --minloglevel=0
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == ecs-1d07
    depends_on:
      - metad0
      - metad1
      - metad2
    ports:
      - target: 19779
        published: 19779
        protocol: tcp
        mode: host
      - target: 19780
        published: 19780
        protocol: tcp
        mode: host
      - target: 9779
        published: 9779
        protocol: tcp
        mode: host
    volumes:
      - /data/nebula1/data-storaged1:/data/meta
      - /data/nebula1/logs-storaged1:/logs
    networks:
      - nebula-net
  storaged2:
    image: vesoft/nebula-storaged:v3.3.0
    env_file:
      - ./nebula.env
    hostname: storaged2
    command:
      - --meta_server_addrs=120.46.147.172:9559,120.46.178.164:9559,120.46.178.164:14000
      - --local_ip=storaged2
      - --ws_ip=storaged2
      - --port=9779
      - --ws_http_port=19779
      - --data_path=/data/storage
      - --log_dir=/logs
      - --v=3
      - --minloglevel=2
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == ecs-1d07
    depends_on:
      - metad0
      - metad1
      - metad2
    ports:
      - 19779
      - 19780
      - "3699:9779"
    volumes:
      - /data/nebula2/data-storaged2:/data/meta
      - /data/nebula2/logs-storaged2:/logs
    networks:
      - nebula-net
  graphd0:
    image: vesoft/nebula-graphd:v3.3.0
    env_file:
      - ./nebula.env
    hostname: graphd0
    command:
      - --meta_server_addrs=120.46.147.172:9559,120.46.178.164:9559,120.46.178.164:14000
      - --port=9669
      - --local_ip=graphd0
      - --ws_ip=graphd0
      - --log_dir=/logs
      - --ws_http_port=19669
      - --v=3
      - --minloglevel=0
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == ecs-3f95
    depends_on:
      - metad0
      - metad1
      - metad2
    healthcheck:
      test: ["CMD", "curl", "-f", "http://graphd0:19669/status"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 20s
    ports:
      - target: 9669
        published: 9669
        protocol: tcp
        mode: host
      - target: 19669
        published: 19669
        protocol: tcp
        mode: host
      - target: 19670
        published: 19670
        protocol: tcp
        mode: host
    volumes:
      - /data/nebula/logs-graphd:/logs
    networks:
      - nebula-net
  graphd1:
    image: vesoft/nebula-graphd:v3.3.0
    env_file:
      - ./nebula.env
    hostname: graphd1
    command:
      - --meta_server_addrs=120.46.147.172:9559,120.46.178.164:9559,120.46.178.164:14000
      - --port=9669
      - --local_ip=graphd1
      - --ws_ip=graphd1
      - --log_dir=/logs
      - --ws_http_port=19669
      - --v=3
      - --minloglevel=0
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == ecs-1d07
    depends_on:
      - metad0
      - metad1
      - metad2
    healthcheck:
      test: ["CMD", "curl", "-f", "http://graphd1:19669/status"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 20s
    ports:
      - target: 9669
        published: 9669
        protocol: tcp
        mode: host
      - target: 19669
        published: 19669
        protocol: tcp
        mode: host
      - target: 19670
        published: 19670
        protocol: tcp
        mode: host
    volumes:
      - /data/nebula1/logs-graphd1:/logs
    networks:
      - nebula-net
  graphd2:
    image: vesoft/nebula-graphd:v3.3.0
    env_file:
      - ./nebula.env
    hostname: graphd2
    command:
      - --meta_server_addrs=120.46.147.172:9559,120.46.178.164:9559,120.46.178.164:14000
      - --port=9669
      - --ws_ip=graphd2
      - --local_ip=graphd2
      - --log_dir=/logs
      - --ws_http_port=19669
      - --v=3
      - --minloglevel=0
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == ecs-1d07
    depends_on:
      - metad0
      - metad1
      - metad2
    healthcheck:
      test: ["CMD", "curl", "-f", "http://graphd2:19669/status"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 20s
    ports:
      - "13000:9669"
      - 19669
      - 19670
    volumes:
      - /data/nebula2/logs-graphd2:/logs
    networks:
      - nebula-net
  console:
    image: vesoft/nebula-console:nightly
    entrypoint: "/bin/sh"
    command: 
 #     - sh
      - -c
      - |
        for i in `seq 1 60`;do
          var=`/usr/local/bin/nebula-console -addr 120.46.147.172 -port 9669 -u root -p nebula -e 'ADD HOSTS "120.46.147.172":9779,"120.46.178.164":9779,"120.46.178.164":3699'`;
          if [[ $$? == 0 ]];then
            break;
          fi;
          sleep 1;
          echo "retry to add hosts.";
        done && tail -f /dev/null;
    depends_on:
      - graphd0
    networks:
      - nebula-net

networks:
  nebula-net:
 #   external: true
#    attachable: true
#    name: host

meta_server_addrs只改了这个成ip地址的，

20230904 07:48:09.191527    65 ThriftClientManager-inl.h:38] Invalid Channel: 0xffff91ea0000 for host: "120.46.147.172":9560
I20230904 07:48:09.191577    65 ThriftClientManager-inl.h:53] There is no existing client to "120.46.147.172":9560, trying to create one
I20230904 07:48:09.191593    65 ThriftClientManager-inl.h:74] Connecting to "120.46.147.172":9560 for 22 times
I20230904 07:48:09.191740    65 ThriftClientManager-inl.h:38] Invalid Channel: 0xffff91ea0500 for host: "120.46.178.164":9560
I20230904 07:48:09.191764    65 ThriftClientManager-inl.h:53] There is no existing client to "120.46.178.164":9560, trying to create one
I20230904 07:48:09.191776    65 ThriftClientManager-inl.h:74] Connecting to "120.46.178.164":9560 for 23 times
I20230904 07:48:09.191854    65 ThriftClientManager-inl.h:38] Invalid Channel: 0xffff91ea0a00 for host: "120.46.178.164":14001
I20230904 07:48:09.191875    65 ThriftClientManager-inl.h:53] There is no existing client to "120.46.178.164":14001, trying to create one
I20230904 07:48:09.191886    65 ThriftClientManager-inl.h:74] Connecting to "120.46.178.164":14001 for 24 times
I20230904 07:48:09.194072    65 AsyncSocket.cpp:2996] AsyncSocket::handleConnect(this=0xffffa7380400, fd=folly::NetworkSocket(140) host=120.46.178.164:9560) exception: AsyncSocketException: connect failed, type = Socket not open, errno = 111 (Connection refused)
I20230904 07:48:09.762035    51 DiskManager.cpp:161] Refresh filesystem info of "/data/meta"
I20230904 07:48:09.802703     1 MetaDaemonInit.cpp:118] Leader has not been elected, sleep 1s
I20230904 07:48:10.195278    65 CollectNSucceeded-inl.h:59] Set Value [completed=3, total=3, Result list size=0]
I20230904 07:48:10.195374    43 RaftPart.cpp:1285] [Port: 9560, Space: 0, Part: 0] Did not get enough votes from election of term 1, isPreVote = 1
I20230904 07:48:10.802817     1 MetaDaemonInit.cpp:118] Leader has not been elected, sleep 1s
I20230904 07:48:11.600276    48 RaftPart.cpp:1291] [Port: 9560, Space: 0, Part: 0] Start leader election...
I20230904 07:48:11.600334    48 RaftPart.cpp:1319] [Port: 9560, Space: 0, Part: 0] Sending out an election request (space = 0, part = 0, term = 1, lastLogId = 0, lastLogTerm = 0, candidateIP = metad2, candidatePort = 9560), isPreVote = 1
I20230904 07:48:11.600410    66 ThriftClientManager-inl.h:38] Invalid Channel: 0xffff918a0000 for host: "120.46.147.172":9560
I20230904 07:48:11.600461    66 ThriftClientManager-inl.h:53] There is no existing client to "120.46.147.172":9560, trying to create one
I20230904 07:48:11.600478    66 ThriftClientManager-inl.h:74] Connecting to "120.46.147.172":9560 for 22 times
I20230904 07:48:11.600641    66 ThriftClientManager-inl.h:38] Invalid Channel: 0xffff918a0500 for host: "120.46.178.164":9560
I20230904 07:48:11.600665    66 ThriftClientManager-inl.h:53] There is no existing client to "120.46.178.164":9560, trying to create one
I20230904 07:48:11.600677    66 ThriftClientManager-inl.h:74] Connecting to "120.46.178.164":9560 for 23 times
I20230904 07:48:11.600759    66 ThriftClientManager-inl.h:38] Invalid Channel: 0xffff918a0a00 for host: "120.46.178.164":14001
I20230904 07:48:11.600781    66 ThriftClientManager-inl.h:53] There is no existing client to "120.46.178.164":14001, trying to create one
I20230904 07:48:11.600793    66 ThriftClientManager-inl.h:74] Connecting to "120.46.178.164":14001 for 24 times
I20230904 07:48:11.602140    66 AsyncSocket.cpp:2996] AsyncSocket::handleConnect(this=0xffffa75c0400, fd=folly::NetworkSocket(140) host=120.46.178.164:9560) exception: AsyncSocketException: connect failed, type = Socket not open, errno = 111 (Connection refused)
I20230904 07:48:11.802939     1 MetaDaemonInit.cpp:118] Leader has not been elected, sleep 1s
I20230904 07:48:12.603317    66 CollectNSucceeded-inl.h:59] Set Value [completed=3, total=3, Result list size=0]
I20230904 07:48:12.603397    43 RaftPart.cpp:1285] [Port: 9560, Space: 0, Part: 0] Did not get enough votes from election of term 1, isPreVote = 1

crx · 2023 年9 月 4 日 12:30

docker swarm 不能部署集群吗？这找的例子也不行都没人部署过吗？

steam · 2023 年9 月 7 日 02:55

你试试 wey 说的

如果还有问题的话，再来更新下。