docker swarm v3.2.0 集群部署失败

由于服务器有限,使用了两台服务器加入了swarm 进行部署
服务器a: graphd,metad0,storaged0
服务器b: storaged1,storaged2, metad1,metad2

ymal如下:

version: '3.6'
services:
  metad0:
    image: vesoft/nebula-metad:v3.2.0
    env_file:
      - ./nebula.env
    command:
      - --meta_server_addrs=120.46.147.172:9559,120.46.178.164:9559,120.46.178.164:5555
      - --local_ip=120.46.147.172
      - --ws_ip=120.46.147.172
      - --port=9559
      - --data_path=/data/meta
      - --log_dir=/logs
      - --v=0
      - --minloglevel=2
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == ecs-3f95
    healthcheck:
      test: ["CMD", "curl", "-f", "http://120.46.147.172:19559/status"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 20s
    ports:
      - target: 19559
        published: 19559
        protocol: tcp
        mode: host
      - target: 19560
        published: 19560
        protocol: tcp
        mode: host
      - target: 9559
        published: 9559
        protocol: tcp
        mode: host
    volumes:
      - data-metad0:/data/meta
      - logs-metad0:/logs
    networks:
      - nebula-net

  metad1:
    image: vesoft/nebula-metad:v3.2.0
    env_file:
      - ./nebula.env
    command:
      - --meta_server_addrs=120.46.147.172:9559,120.46.178.164:9559,120.46.178.164:5555
      - --local_ip=120.46.178.164
      - --ws_ip=120.46.178.164
      - --port=9559
      - --data_path=/data/meta
      - --log_dir=/logs
      - --v=0
      - --minloglevel=2
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == ecs-1d07
    healthcheck:
      test: ["CMD", "curl", "-f", "http://120.46.178.164:19559/status"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 20s
    ports:
      - target: 19559
        published: 19559
        protocol: tcp
        mode: host
      - target: 19560
        published: 19560
        protocol: tcp
        mode: host
      - target: 9559
        published: 9559
        protocol: tcp
        mode: host
    volumes:
      - data-metad1:/data/meta
      - logs-metad1:/logs
    networks:
      - nebula-net

  metad2:
    image: vesoft/nebula-metad:v3.2.0
    env_file:
      - ./nebula.env
    command:
      - --meta_server_addrs=120.46.147.172:9559,120.46.178.164:9559,120.46.178.164:5555
      - --local_ip=120.46.178.164
      - --ws_ip=120.46.178.164
      - --port=5555
      - --data_path=/data/meta
      - --log_dir=/logs
      - --v=0
      - --minloglevel=2
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == ecs-1d07
    healthcheck:
      test: ["CMD", "curl", "-f", "http://120.46.178.164:19558/status"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 20s
    ports:
      - target: 19558
        published: 19559
        protocol: tcp
        mode: host
      - target: 19561
        published: 19560
        protocol: tcp
        mode: host
      - target: 5555
        published: 9559
        protocol: tcp
        mode: host
    volumes:
      - data-metad2:/data/meta
      - logs-metad2:/logs
    networks:
      - nebula-net

  storaged0:
    image: vesoft/nebula-storaged:v3.2.0
    env_file:
      - ./nebula.env
    command:
      - --meta_server_addrs=120.46.147.172:9559,120.46.178.164:9559,120.46.178.164:5555
      - --local_ip=120.46.147.172
      - --ws_ip=120.46.147.172
      - --port=9779
      - --data_path=/data/storage
      - --log_dir=/logs
      - --v=0
      - --minloglevel=2
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == ecs-3f95
    depends_on:
      - metad0
      - metad1
      - metad2
    healthcheck:
      test: ["CMD", "curl", "-f", "http://120.46.147.172:19779/status"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 20s
    ports:
      - target: 19779
        published: 19779
        protocol: tcp
        mode: host
      - target: 19780
        published: 19780
        protocol: tcp
        mode: host
      - target: 9779
        published: 9779
        protocol: tcp
        mode: host
    volumes:
      - data-storaged0:/data/storage
      - logs-storaged0:/logs
    networks:
      - nebula-net

  storaged1:
    image: vesoft/nebula-storaged:v3.2.0
    env_file:
      - ./nebula.env
    command:
      - --meta_server_addrs=120.46.147.172:9559,120.46.178.164:9559,120.46.178.164:5555
      - --local_ip=120.46.178.164
      - --ws_ip=120.46.178.164
      - --port=9779
      - --data_path=/data/storage
      - --log_dir=/logs
      - --v=0
      - --minloglevel=2
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == ecs-1d07
    depends_on:
      - metad0
      - metad1
      - metad2
    healthcheck:
      test: ["CMD", "curl", "-f", "http://120.46.178.164:19779/status"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 20s
    ports:
      - target: 19779
        published: 19779
        protocol: tcp
        mode: host
      - target: 19780
        published: 19780
        protocol: tcp
        mode: host
      - target: 9779
        published: 9779
        protocol: tcp
        mode: host
    volumes:
      - data-storaged1:/data/storage
      - logs-storaged1:/logs
    networks:
      - nebula-net

  storaged2:
    image: vesoft/nebula-storaged:v3.2.0
    env_file:
      - ./nebula.env
    command:
      - --meta_server_addrs=120.46.147.172:9559,120.46.178.164:9559,120.46.178.164:5555
      - --local_ip=120.46.178.164
      - --ws_ip=120.46.178.164
      - --port=4778
      - --data_path=/data/storage
      - --log_dir=/logs
      - --v=0
      - --minloglevel=2
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == ecs-1d07
    depends_on:
      - metad0
      - metad1
      - metad2
    healthcheck:
      test: ["CMD", "curl", "-f", "http://120.46.178.164:19778/status"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 20s
    ports:
      - target: 19778
        published: 19779
        protocol: tcp
        mode: host
      - target: 19781
        published: 19780
        protocol: tcp
        mode: host
      - target: 4778
        published: 9779
        protocol: tcp
        mode: host
    volumes:
      - data-storaged2:/data/storage
      - logs-storaged2:/logs
    networks:
      - nebula-net

  graphd:
    image: vesoft/nebula-graphd:v3.2.0
    env_file:
      - ./nebula.env
    command:
      - --meta_server_addrs=120.46.147.172:9559,120.46.178.164:9559,120.46.178.164:5555
      - --port=9669
      - --ws_ip=120.46.147.172
      - --log_dir=/logs
      - --v=0
      - --minloglevel=2
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == ecs-3f95
    depends_on:
      - metad0
      - metad1
      - metad2
    healthcheck:
      test: ["CMD", "curl", "-f", "http://120.46.147.172:19669/status"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 20s
    ports:
      - target: 9669
        published: 9669
        protocol: tcp
        mode: host
      - target: 19669
        published: 19669
        protocol: tcp
        mode: host
      - target: 19670
        published: 19670
        protocol: tcp
        mode: host
    volumes:
      - logs-graphd:/logs
    networks:
      - nebula-net
  console:
    image: vesoft/nebula-console:nightly
    entrypoint: ""
    command: 
      - sh
      - -c
      - |
        for i in `seq 1 60`;do
          var=`nebula-console -addr 120.46.147.172 -port 9669 -u root -p nebula -e 'ADD HOSTS "120.46.147.172":9779,"120.46.178.164":4778,"120.46.178.164":9779`;
          if [[ $$? == 0 ]];then
            break;
          fi;
          sleep 1;
          echo "retry to add hosts.";
        done && tail -f /dev/null;

    depends_on:
      - graphd
    networks:
      - nebula-net
      
networks:
  nebula-net:
    external: true
    attachable: true
    name: host

volumes:
  data-metad0:
  logs-metad0:
  data-metad1:
  logs-metad1:
  data-metad2:
  logs-metad2:
  data-storaged0:
  logs-storaged0:
  data-storaged1:
  logs-storaged1:
  data-storaged2:
  logs-storaged2:
  logs-graphd:

运行后
docker stack deploy -c xxxxx .ymal nebula
启动失败图片:

根据docker swarm部署 中的文档更改后进行启动也是失败,和上图一样。请问该如何处理呢?

[root@ecs-3f95 logs-metad0]# ls
metad-stderr.log                                        nebula-metad.ecs-3f95.root.log.WARNING.20230830-051551.1
metad-stdout.log                                        nebula-metad.ERROR
nebula-metad.ecs-3f95.root.log.ERROR.20230830-051551.1  nebula-metad.INFO
nebula-metad.ecs-3f95.root.log.INFO.20230830-051551.1   nebula-metad.WARNING
[root@ecs-3f95 logs-metad0]# cat metad-stderr.log 
E20230830 05:15:51.885763     1 FileUtils.cpp:377] Failed to read the directory "/data/meta/nebula" (2): No such file or directory
[root@ecs-3f95 logs-metad0]# cat metad-stdout.log 
[root@ecs-3f95 logs-metad0]# cat nebula-metad.ERROR 
Log file created at: 2023/08/30 05:15:51
Running on machine: ecs-3f95
Running duration (h:mm:ss): 0:00:00
Log line format: [IWEF]yyyymmdd hh:mm:ss.uuuuuu threadid file:line] msg
E20230830 05:15:51.885763     1 FileUtils.cpp:377] Failed to read the directory "/data/meta/nebula" (2): No such file or directory

使用docker run -it vesoft/nebula-storaged:v3.2.0 直接卡住不动

看日志文件路径不对,在yml文件里提前生成好文件夹试试呢

1 个赞
`E20230902 06:14:10.282040    63 ThriftClientManager-inl.h:70] Failed to resolve address for 'metad1': Name or service not known (error=-2): Unknown error -2
I20230902 06:14:10.282095    63 ThriftClientManager-inl.h:74] Connecting to "metad1":9560 for 6 times
I20230902 06:14:10.282218    63 CollectNSucceeded-inl.h:59] Set Value [completed=2, total=2, Result list size=0]
I20230902 06:14:10.282269    43 RaftPart.cpp:1285] [Port: 9560, Space: 0, Part: 0] Did not get enough votes from election of term 1, isPreVote = 1
I20230902 06:14:10.877132     1 MetaDaemonInit.cpp:118] Leader has not been elected, sleep 1s
I20230902 06:14:11.777395    51 DiskManager.cpp:161] Refresh filesystem info of "/data/meta"
I20230902 06:14:11.819051    50 RaftPart.cpp:1291] [Port: 9560, Space: 0, Part: 0] Start leader election...
I20230902 06:14:11.819113    50 RaftPart.cpp:1319] [Port: 9560, Space: 0, Part: 0] Sending out an election request (space = 0, part = 0, term = 1, lastLogId = 0, lastLogTerm = 0, candidateIP = metad2, candidatePort = 9560), isPreVote = 1
I20230902 06:14:11.819190    64 ThriftClientManager-inl.h:38] Invalid Channel: 0x7f055acfb200 for host: "metad0":9560
I20230902 06:14:11.819204    64 ThriftClientManager-inl.h:53] There is no existing client to "metad0":9560, trying to create one
E20230902 06:14:11.823705    64 ThriftClientManager-inl.h:70] Failed to resolve address for 'metad0': Name or service not known (error=-2): Unknown error -2
I20230902 06:14:11.823758    64 ThriftClientManager-inl.h:74] Connecting to "metad0":9560 for 5 times
I20230902 06:14:11.823848    64 ThriftClientManager-inl.h:38] Invalid Channel: 0x7f055acfb700 for host: "metad1":9560
I20230902 06:14:11.823858    64 ThriftClientManager-inl.h:53] There is no existing client to "metad1":9560, trying to create one
E20230902 06:14:11.827549    64 ThriftClientManager-inl.h:70] Failed to resolve address for 'metad1': Name or service not known (error=-2): Unknown error -2
I20230902 06:14:11.827613    64 ThriftClientManager-inl.h:74] Connecting to "metad1":9560 for 6 times
I20230902 06:14:11.827741    64 CollectNSucceeded-inl.h:59] Set Value [completed=2, total=2, Result list size=0]
I20230902 06:14:11.827785    43 RaftPart.cpp:1285] [Port: 9560, Space: 0, Part: 0] Did not get enough votes from election of term 1, isPreVote = 1
I20230902 06:14:11.878156     1 MetaDaemonInit.cpp:118] Leader has not been elected, sleep 1s
I20230902 06:14:12.879294     1 MetaDaemonInit.cpp:118] Leader has not been elected, sleep 1s
I20230902 06:14:13.583830    47 RaftPart.cpp:1291] [Port: 9560, Space: 0, Part: 0] Start leader election...
I20230902 06:14:13.583879    47 RaftPart.cpp:1319] [Port: 9560, Space: 0, Part: 0] Sending out an election request (space = 0, part = 0, term = 1, lastLogId = 0, lastLogTerm = 0, candidateIP = metad2, candidatePort = 9560), isPreVote = 1
I20230902 06:14:13.583945    65 ThriftClientManager-inl.h:38] Invalid Channel: 0x7f055ad0c200 for host: "metad0":9560
I20230902 06:14:13.583956    65 ThriftClientManager-inl.h:53] There is no existing client to "metad0":9560, trying to create one
E20230902 06:14:13.589493    65 ThriftClientManager-inl.h:70] Failed to resolve address for 'metad0': Name or service not known (error=-2): Unknown error -2
I20230902 06:14:13.589532    65 ThriftClientManager-inl.h:74] Connecting to "metad0":9560 for 5 times
I20230902 06:14:13.589617    65 ThriftClientManager-inl.h:38] Invalid Channel: 0x7f055ad0c700 for host: "metad1":9560
I20230902 06:14:13.589628    65 ThriftClientManager-inl.h:53] There is no existing client to "metad1":9560, trying to create one
E20230902 06:14:13.592943    65 ThriftClientManager-inl.h:70] Failed to resolve address for 'metad1': Name or service not known (error=-2): Unknown error -2
I20230902 06:14:13.592976    65 ThriftClientManager-inl.h:74] Connecting to "metad1":9560 for 6 times
I20230902 06:14:13.593034    65 CollectNSucceeded-inl.h:59] Set Value [completed=2, total=2, Result list size=0]
I20230902 06:14:13.593072    43 RaftPart.cpp:1285] [Port: 9560, Space: 0, Part: 0] Did not get enough votes from election of term 1, isPreVote = 1
I20230902 06:14:13.880805     1 MetaDaemonInit.cpp:118] Leader has not been elected, sleep 1s
I20230902 06:14:14.881529     1 MetaDaemonInit.cpp:118] Leader has not been elected, sleep 1s
`
[root@crx1 opt]# docker service ls
ID             NAME               MODE         REPLICAS   IMAGE                           PORTS
lf93plwvo69y   nebula_console     replicated   1/1        vesoft/nebula-console:nightly   
j0cg7dt7jg49   nebula_graphd      replicated   0/1        vesoft/nebula-graphd:v3.3.0     
0tbcb3uxqskb   nebula_graphd1     replicated   0/1        vesoft/nebula-graphd:v3.3.0     
0ir7910p9epz   nebula_graphd2     replicated   0/1        vesoft/nebula-graphd:v3.3.0     
2e1jwrmqu6nb   nebula_metad0      replicated   0/1        vesoft/nebula-metad:v3.3.0      
8e975sfoujqr   nebula_metad1      replicated   0/1        vesoft/nebula-metad:v3.3.0      
w2pya727750i   nebula_metad2      replicated   0/1        vesoft/nebula-metad:v3.3.0      
ry4mc9x563x6   nebula_storaged0   replicated   0/1        vesoft/nebula-storaged:v3.3.0   
s4egruu95ffz   nebula_storaged1   replicated   0/1        vesoft/nebula-storaged:v3.3.0   
7cm717an5e3b   nebula_storaged2   replicated   0/1        vesoft/nebula-storaged:v3.3.0 

单节点启动可以 ,启动集群就不行了

有没有人用swarm启动v3集群

:thinking: 单节点可以,集群不行的话,你一开始的图空间创建语句是咋样的

 Heartbeat failed, status:RPC failure in MetaClient: apache::thrift::transport::TTransportException: Dropping unsent request. Connection closed after: apache::thrift::transport::TTransportException: AsyncSocketException: connect failed, type = Socket not open, errno = 111 (Connect
E20230904 04:20:02.082420    37 ThriftClientManager-inl.h:70] Failed to resolve address for 'metad1': Name or service not known (error=-2): Unknown error -2
E20230904 04:20:03.088455    37 ThriftClientManager-inl.h:70] Failed to resolve address for 'metad2': Name or service not known (error=-2): Unknown error -2
E20230904 04:20:04.094631    37 ThriftClientManager-inl.h:70] Failed to resolve address for 'metad2': Name or service not known (error=-2): Unknown error -2
E20230904 04:20:05.100684    37 ThriftClientManager-inl.h:70] Failed to resolve address for 'metad1': Name or service not known (error=-2): Unknown error -2
E20230904 04:20:05.100834    37 MetaClient.cpp:758] Send request to "metad1":9559, exceed retry limit
E20230904 04:20:05.100869    37 MetaClient.cpp:759] RpcResponse exception: apache::thrift::transport::TTransportException: Connection not open: apache::thrift::transport::TTransportException: AsyncSocketException: setReadCallback() called with socket in invalid state, type = Socket not open
E20230904 04:20:05.100921     1 MetaClient.cpp:112] Heartbeat failed, status:RPC failure in MetaClient: apache::thrift::transport::TTransportException: Connection not open: apache::thrift::transport::TTransportException: AsyncSocketException: setReadCallback() called with socket in invalid state, type = Socket not open
E20230904 04:20:05.105465    38 ThriftClientManager-inl.h:70] Failed to resolve address for 'metad2': Name or service not known (error=-2): Unknown error -2
E20230904 04:20:06.111505    38 ThriftClientManager-inl.h:70] Failed to resolve address for 'metad2': Name or service not known (error=-2): Unknown error -2
E20230904 04:20:08.129128    38 ThriftClientManager-inl.h:70] Failed to resolve address for 'metad1': Name or service not known (error=-2): Unknown error -2
E20230904 04:20:08.129277    38 MetaClient.cpp:758] Send request to "metad1":9559, exceed retry limit
E20230904 04:20:08.129302    38 MetaClient.cpp:759] RpcResponse exception: apache::thrift::transport::TTransportException: Connection not open: apache::thrift::transport::TTransportException: AsyncSocketException: setReadCallback() called with socket in invalid state, type = Socket not open
E20230904 04:20:08.129357     1 MetaClient.cpp:150] RPC failure in MetaClient: apache::thrift::transport::TTransportException: Connection not open: apache::thrift::transport::TTransportException: AsyncSocketException: setReadCallback() called with socket in invalid state, type = Socket not open
E20230904 04:20:08.129413     1 GraphService.cpp:48] Failed to wait for meta service ready synchronously.
E20230904 04:20:08.129436     1 GraphServer.cpp:39] Failed to wait for meta service ready synchronously.
E20230904 04:20:08.130803     1 GraphDaemon.cpp:156] The graph server start failed
E20230904 04:20:15.261232    34 ThriftClientManager-inl.h:70] Failed to resolve address for 'metad1': Name or service not known (error=-2): Unknown error -2
E20230904 04:20:16.268514    34 ThriftClientManager-inl.h:70] Failed to resolve address for 'metad2': Name or service not known (error=-2): Unknown error -2
E20230904 04:20:17.275702    34 ThriftClientManager-inl.h:70] Failed to resolve address for 'metad1': Name or service not known (error=-2): Unknown error -2
E20230904 04:20:18.282327    34 ThriftClientManager-inl.h:70] Failed to resolve address for 'metad1': Name or service not known (error=-2): Unknown error -2
E20230904 04:20:18.282471    34 MetaClient.cpp:758] Send request to "metad1":9559, exceed retry limit
E20230904 04:20:18.282501    34 MetaClient.cpp:759] RpcResponse exception: apache::thrift::transport::TTransportException: Connection not open: apache::thrift::transport::TTransportException: AsyncSocketException: setReadCallback() called with socket in invalid state, type = Socket not open

集群的这个通信连不上- -不知道哪里问题

:thinking: 你看见我上面的回复了么,你图空间创建语句是咋样的?也是副本数为 1 么

deploy:
  replicas: 1
  restart_policy:
    condition: on-failure
  placement:
    constraints:
      - node.hostname == ecs-1d07

  replicas:都是1
version: '3.6'
services:
  metad0:
    image: vesoft/nebula-metad:v3.3.0
    env_file:
      - ./nebula.env
    hostname: metad0
    command:
      - --meta_server_addrs=metad0:9559,metad1:9559,metad2:9559
      - --local_ip=metad0
      - --ws_ip=metad0
      - --port=9559
      - --ws_http_port=19559
      - --data_path=/data/meta
      - --log_dir=/logs
      - --v=3
      - --minloglevel=0
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == ecs-3f95
    ports:
      - target: 19559
        published: 19559
        protocol: tcp
        mode: host
      - target: 19560
        published: 19560
        protocol: tcp
        mode: host
      - target: 9559
        published: 9559
        protocol: tcp
        mode: host
    volumes:
      - /data/nebula/data-metad0:/data/meta
      - /data/nebula/logs-metad0:/logs
    networks:
      - nebula-net
  metad1:
    image: vesoft/nebula-metad:v3.3.0
    env_file:
      - ./nebula.env
    hostname: metad1
    command:
      - --meta_server_addrs=metad0:9559,metad1:9559,metad2:9559
      - --local_ip=metad1
      - --ws_ip=metad1
      - --port=9559
      - --data_path=/data/meta
      - --ws_http_port=19559
      - --log_dir=/logs
      - --v=3
      - --minloglevel=0
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == ecs-1d07
    ports:
      - target: 19559
        published: 19559
        protocol: tcp
        mode: host
      - target: 19560
        published: 19560
        protocol: tcp
        mode: host
      - target: 9559
        published: 9559
        protocol: tcp
        mode: host
    volumes:
      - /data/nebula1/data-metad1:/data/meta
      - /data/nebula1/logs-metad1:/logs
    networks:
      - nebula-net
  metad2:
    image: vesoft/nebula-metad:v3.3.0
    env_file:
      - ./nebula.env
    hostname: metad2
    command:
      - --meta_server_addrs=metad0:9559,metad1:9559,metad2:9559
      - --local_ip=metad2
      - --ws_ip=metad2
      - --port=9559
      - --data_path=/data/meta
      - --ws_http_port=19559
      - --log_dir=/logs
      - --v=3
      - --minloglevel=0
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == ecs-1d07
    ports:
      - 19559
      - 19560
      - 9559
    volumes:
      - /data/nebula2/data-metad2:/data/meta
      - /data/nebula2/logs-metad2:/logs
    networks:
      - nebula-net
  storaged0:
    image: vesoft/nebula-storaged:v3.3.0
    env_file:
      - ./nebula.env
    hostname: storaged0
    command:
      - --meta_server_addrs=metad0:9559,metad1:9559,metad2:9559
      - --local_ip=storaged0
      - --ws_ip=storaged0
      - --port=9779
      - --data_path=/data/storage
      - --ws_http_port=19779
      - --log_dir=/logs
      - --v=3
      - --minloglevel=0
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == ecs-3f95
    depends_on:
      - metad0
      - metad1
      - metad2
    ports:
      - target: 19779
        published: 19779
        protocol: tcp
        mode: host
      - target: 19780
        published: 19780
        protocol: tcp
        mode: host
      - target: 9779
        published: 9779
        protocol: tcp
        mode: host
    volumes:
      - /data/nebula/data-storaged0:/data/meta
      - /data/nebula/logs-storaged0:/logs
    networks:
      - nebula-net
  storaged1:
    image: vesoft/nebula-storaged:v3.3.0
    env_file:
      - ./nebula.env
    hostname: storaged1
    command:
      - --meta_server_addrs=metad0:9559,metad1:9559,metad2:9559
      - --local_ip=storaged1
      - --ws_ip=storaged1
      - --port=9779
      - --ws_http_port=19779
      - --data_path=/data/storage
      - --log_dir=/logs
      - --v=3
      - --minloglevel=2
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == ecs-1d07
    depends_on:
      - metad0
      - metad1
      - metad2
    ports:
      - target: 19779
        published: 19779
        protocol: tcp
        mode: host
      - target: 19780
        published: 19780
        protocol: tcp
        mode: host
      - target: 9779
        published: 9779
        protocol: tcp
        mode: host
    volumes:
      - /data/nebula1/data-storaged1:/data/meta
      - /data/nebula1/logs-storaged1:/logs
    networks:
      - nebula-net
  storaged2:
    image: vesoft/nebula-storaged:v3.3.0
    env_file:
      - ./nebula.env
    hostname: storaged2
    command:
      - --meta_server_addrs=metad0:9559,metad1:9559,metad2:9559
      - --local_ip=storaged2
      - --ws_ip=storaged2
      - --port=9779
      - --ws_http_port=19779
      - --data_path=/data/storage
      - --log_dir=/logs
      - --v=3
      - --minloglevel=2
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == ecs-1d07
    depends_on:
      - metad0
      - metad1
      - metad2
    ports:
      - 19779
      - 19780
      - 9779
    volumes:
      - /data/nebula2/data-storaged2:/data/meta
      - /data/nebula2/logs-storaged2:/logs
    networks:
      - nebula-net
  graphd:
    image: vesoft/nebula-graphd:v3.3.0
    env_file:
      - ./nebula.env
    hostname: graphd
    command:
      - --meta_server_addrs=metad0:9559,metad1:9559,metad2:9559
      - --port=9669
      - --ws_ip=graphd
      - --log_dir=/logs
      - --ws_http_port=19669
      - --v=3
      - --minloglevel=0
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == ecs-3f95
    depends_on:
      - metad0
      - metad1
      - metad2
    ports:
      - target: 9669
        published: 9669
        protocol: tcp
        mode: host
      - target: 19669
        published: 19669
        protocol: tcp
        mode: host
      - target: 19670
        published: 19670
        protocol: tcp
        mode: host
    volumes:
      - /data/nebula/logs-graphd:/logs
    networks:
      - nebula-net
  graphd1:
    image: vesoft/nebula-graphd:v3.3.0
    env_file:
      - ./nebula.env
    hostname: graphd1
    command:
      - --meta_server_addrs=metad0:9559,metad1:9559,metad2:9559
      - --port=9669
      - --ws_ip=graphd1
      - --log_dir=/logs
      - --ws_http_port=19669
      - --v=3
      - --minloglevel=0
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == ecs-1d07
    depends_on:
      - metad0
      - metad1
      - metad2
    ports:
      - target: 9669
        published: 9669
        protocol: tcp
        mode: host
      - target: 19669
        published: 19669
        protocol: tcp
        mode: host
      - target: 19670
        published: 19670
        protocol: tcp
        mode: host
    volumes:
      - /data/nebula1/logs-graphd1:/logs
    networks:
      - nebula-net
  graphd2:
    image: vesoft/nebula-graphd:v3.3.0
    env_file:
      - ./nebula.env
    hostname: graphd2
    command:
      - --meta_server_addrs=metad0:9559,metad1:9559,metad2:9559
      - --port=9669
      - --ws_ip=graphd2
      - --log_dir=/logs
      - --ws_http_port=19669
      - --v=3
      - --minloglevel=0
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == ecs-1d07
    depends_on:
      - metad0
      - metad1
      - metad2
    ports:
      - 9669
      - 19669
      - 19670
    volumes:
      - /data/nebula2/logs-graphd2:/logs
    networks:
      - nebula-net
  console:
    image: vesoft/nebula-console:nightly
    entrypoint: "/bin/sh"
    command: 
 #     - sh
      - -c
      - |
        for i in `seq 1 60`;do
          var=`/usr/local/bin/nebula-console -addr graphd -port 9669 -u root -p nebula -e 'ADD HOSTS "storaged0":9779,"storaged1":9779,"storaged2":9779'`;
          if [[ $$? == 0 ]];then
            break;
          fi;
          sleep 1;
          echo "retry to add hosts.";
        done && tail -f /dev/null;
    depends_on:
      - graphd
    networks:
      - nebula-net

networks:
  nebula-net:
 #   external: true
#    attachable: true
#    name: host

yaml中是想部署集群到2台服务器,3个meta,3个storaged,3个graphd。 台服务器a有1个meta,1个storaged,1个graphd。剩下的两个都在服务器b上。
按照您的意思, replicas参数要为3或者比3更大,是这样吗?

- -,sorry。我刚把 2 个参数搞混了,副本数是要小于等于 storage 数的(我刚有翻了下研发之前的回复

我都指定的1 ,上边这个错误不是说找不到metad1 和metad2 么,这个是为啥?

你应该是曾经配置过 meta 的地址为域名 metad1,它存到了 meta 里边,而这个是不可以解析的,你把所有的状态清理干净,然后再用你的基于 ip 的配置冲头拉起

这个地址在 swarm 跨主机的时候没法解析,改成 ip,改好之后从头拉起,把之前生成的数据了的 folder 都删掉,弄干净

swarm 不是允许用容器名进行集群之间的端口访问吗? 我先试试改成ip

不是允许的问题,首先得 dns 能解析呀,除非 swarm 做了这个工作,类似于 k8s,否则要么自己弄 dns/hosts,要么用可以跨主机访问的网络 ip。

version: '3.6'
services:
  metad0:
    image: vesoft/nebula-metad:v3.3.0
    env_file:
      - ./nebula.env
    hostname: metad0
    command:
      - --meta_server_addrs=120.46.147.172:9559,120.46.178.164:9559,120.46.178.164:14000
      - --local_ip=metad0
      - --ws_ip=metad0
      - --port=9559
      - --ws_http_port=19559
      - --data_path=/data/meta
      - --log_dir=/logs
      - --v=3
      - --minloglevel=0
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == ecs-3f95
    ports:
      - target: 19559
        published: 19559
        protocol: tcp
        mode: host
      - target: 19560
        published: 19560
        protocol: tcp
        mode: host
      - target: 9559
        published: 9559
        protocol: tcp
        mode: host
    volumes:
      - /data/nebula/data-metad0:/data/meta
      - /data/nebula/logs-metad0:/logs
    networks:
      - nebula-net
  metad1:
    image: vesoft/nebula-metad:v3.3.0
    env_file:
      - ./nebula.env
    hostname: metad1
    command:
      - --meta_server_addrs=120.46.147.172:9559,120.46.178.164:9559,120.46.178.164:14000
      - --local_ip=metad1
      - --ws_ip=metad1
      - --port=9559
      - --data_path=/data/meta
      - --ws_http_port=19559
      - --log_dir=/logs
      - --v=3
      - --minloglevel=0
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == ecs-1d07
    ports:
      - target: 19559
        published: 19559
        protocol: tcp
        mode: host
      - target: 19560
        published: 19560
        protocol: tcp
        mode: host
      - target: 9559
        published: 9559
        protocol: tcp
        mode: host
    volumes:
      - /data/nebula1/data-metad1:/data/meta
      - /data/nebula1/logs-metad1:/logs
    networks:
      - nebula-net
  metad2:
    image: vesoft/nebula-metad:v3.3.0
    env_file:
      - ./nebula.env
    hostname: metad2
    command:
      - --meta_server_addrs=120.46.147.172:9559,120.46.178.164:9559,120.46.178.164:14000
      - --local_ip=metad2
      - --ws_ip=metad2
      - --port=9559
      - --data_path=/data/meta
      - --ws_http_port=19559
      - --log_dir=/logs
      - --v=3
      - --minloglevel=0
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == ecs-1d07
    ports:
      - 19559
      - 19560
      - "14000:9559"
    volumes:
      - /data/nebula2/data-metad2:/data/meta
      - /data/nebula2/logs-metad2:/logs
    networks:
      - nebula-net
  storaged0:
    image: vesoft/nebula-storaged:v3.3.0
    env_file:
      - ./nebula.env
    hostname: storaged0
    command:
      - --meta_server_addrs=120.46.147.172:9559,120.46.178.164:9559,120.46.178.164:14000
      - --local_ip=storaged0
      - --ws_ip=storaged0
      - --port=9779
      - --data_path=/data/storage
      - --ws_http_port=19779
      - --log_dir=/logs
      - --v=3
      - --minloglevel=0
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == ecs-3f95
    depends_on:
      - metad0
      - metad1
      - metad2
    ports:
      - target: 19779
        published: 19779
        protocol: tcp
        mode: host
      - target: 19780
        published: 19780
        protocol: tcp
        mode: host
      - target: 9779
        published: 9779
        protocol: tcp
        mode: host
    volumes:
      - /data/nebula/data-storaged0:/data/meta
      - /data/nebula/logs-storaged0:/logs
    networks:
      - nebula-net
  storaged1:
    image: vesoft/nebula-storaged:v3.3.0
    env_file:
      - ./nebula.env
    hostname: storaged1
    command:
      - --meta_server_addrs=120.46.147.172:9559,120.46.178.164:9559,120.46.178.164:14000
      - --local_ip=storaged1
      - --ws_ip=storaged1
      - --port=9779
      - --ws_http_port=19779
      - --data_path=/data/storage
      - --log_dir=/logs
      - --v=3
      - --minloglevel=0
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == ecs-1d07
    depends_on:
      - metad0
      - metad1
      - metad2
    ports:
      - target: 19779
        published: 19779
        protocol: tcp
        mode: host
      - target: 19780
        published: 19780
        protocol: tcp
        mode: host
      - target: 9779
        published: 9779
        protocol: tcp
        mode: host
    volumes:
      - /data/nebula1/data-storaged1:/data/meta
      - /data/nebula1/logs-storaged1:/logs
    networks:
      - nebula-net
  storaged2:
    image: vesoft/nebula-storaged:v3.3.0
    env_file:
      - ./nebula.env
    hostname: storaged2
    command:
      - --meta_server_addrs=120.46.147.172:9559,120.46.178.164:9559,120.46.178.164:14000
      - --local_ip=storaged2
      - --ws_ip=storaged2
      - --port=9779
      - --ws_http_port=19779
      - --data_path=/data/storage
      - --log_dir=/logs
      - --v=3
      - --minloglevel=2
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == ecs-1d07
    depends_on:
      - metad0
      - metad1
      - metad2
    ports:
      - 19779
      - 19780
      - "3699:9779"
    volumes:
      - /data/nebula2/data-storaged2:/data/meta
      - /data/nebula2/logs-storaged2:/logs
    networks:
      - nebula-net
  graphd0:
    image: vesoft/nebula-graphd:v3.3.0
    env_file:
      - ./nebula.env
    hostname: graphd0
    command:
      - --meta_server_addrs=120.46.147.172:9559,120.46.178.164:9559,120.46.178.164:14000
      - --port=9669
      - --local_ip=graphd0
      - --ws_ip=graphd0
      - --log_dir=/logs
      - --ws_http_port=19669
      - --v=3
      - --minloglevel=0
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == ecs-3f95
    depends_on:
      - metad0
      - metad1
      - metad2
    healthcheck:
      test: ["CMD", "curl", "-f", "http://graphd0:19669/status"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 20s
    ports:
      - target: 9669
        published: 9669
        protocol: tcp
        mode: host
      - target: 19669
        published: 19669
        protocol: tcp
        mode: host
      - target: 19670
        published: 19670
        protocol: tcp
        mode: host
    volumes:
      - /data/nebula/logs-graphd:/logs
    networks:
      - nebula-net
  graphd1:
    image: vesoft/nebula-graphd:v3.3.0
    env_file:
      - ./nebula.env
    hostname: graphd1
    command:
      - --meta_server_addrs=120.46.147.172:9559,120.46.178.164:9559,120.46.178.164:14000
      - --port=9669
      - --local_ip=graphd1
      - --ws_ip=graphd1
      - --log_dir=/logs
      - --ws_http_port=19669
      - --v=3
      - --minloglevel=0
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == ecs-1d07
    depends_on:
      - metad0
      - metad1
      - metad2
    healthcheck:
      test: ["CMD", "curl", "-f", "http://graphd1:19669/status"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 20s
    ports:
      - target: 9669
        published: 9669
        protocol: tcp
        mode: host
      - target: 19669
        published: 19669
        protocol: tcp
        mode: host
      - target: 19670
        published: 19670
        protocol: tcp
        mode: host
    volumes:
      - /data/nebula1/logs-graphd1:/logs
    networks:
      - nebula-net
  graphd2:
    image: vesoft/nebula-graphd:v3.3.0
    env_file:
      - ./nebula.env
    hostname: graphd2
    command:
      - --meta_server_addrs=120.46.147.172:9559,120.46.178.164:9559,120.46.178.164:14000
      - --port=9669
      - --ws_ip=graphd2
      - --local_ip=graphd2
      - --log_dir=/logs
      - --ws_http_port=19669
      - --v=3
      - --minloglevel=0
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == ecs-1d07
    depends_on:
      - metad0
      - metad1
      - metad2
    healthcheck:
      test: ["CMD", "curl", "-f", "http://graphd2:19669/status"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 20s
    ports:
      - "13000:9669"
      - 19669
      - 19670
    volumes:
      - /data/nebula2/logs-graphd2:/logs
    networks:
      - nebula-net
  console:
    image: vesoft/nebula-console:nightly
    entrypoint: "/bin/sh"
    command: 
 #     - sh
      - -c
      - |
        for i in `seq 1 60`;do
          var=`/usr/local/bin/nebula-console -addr 120.46.147.172 -port 9669 -u root -p nebula -e 'ADD HOSTS "120.46.147.172":9779,"120.46.178.164":9779,"120.46.178.164":3699'`;
          if [[ $$? == 0 ]];then
            break;
          fi;
          sleep 1;
          echo "retry to add hosts.";
        done && tail -f /dev/null;
    depends_on:
      - graphd0
    networks:
      - nebula-net

networks:
  nebula-net:
 #   external: true
#    attachable: true
#    name: host

meta_server_addrs只改了这个成ip地址的,

20230904 07:48:09.191527    65 ThriftClientManager-inl.h:38] Invalid Channel: 0xffff91ea0000 for host: "120.46.147.172":9560
I20230904 07:48:09.191577    65 ThriftClientManager-inl.h:53] There is no existing client to "120.46.147.172":9560, trying to create one
I20230904 07:48:09.191593    65 ThriftClientManager-inl.h:74] Connecting to "120.46.147.172":9560 for 22 times
I20230904 07:48:09.191740    65 ThriftClientManager-inl.h:38] Invalid Channel: 0xffff91ea0500 for host: "120.46.178.164":9560
I20230904 07:48:09.191764    65 ThriftClientManager-inl.h:53] There is no existing client to "120.46.178.164":9560, trying to create one
I20230904 07:48:09.191776    65 ThriftClientManager-inl.h:74] Connecting to "120.46.178.164":9560 for 23 times
I20230904 07:48:09.191854    65 ThriftClientManager-inl.h:38] Invalid Channel: 0xffff91ea0a00 for host: "120.46.178.164":14001
I20230904 07:48:09.191875    65 ThriftClientManager-inl.h:53] There is no existing client to "120.46.178.164":14001, trying to create one
I20230904 07:48:09.191886    65 ThriftClientManager-inl.h:74] Connecting to "120.46.178.164":14001 for 24 times
I20230904 07:48:09.194072    65 AsyncSocket.cpp:2996] AsyncSocket::handleConnect(this=0xffffa7380400, fd=folly::NetworkSocket(140) host=120.46.178.164:9560) exception: AsyncSocketException: connect failed, type = Socket not open, errno = 111 (Connection refused)
I20230904 07:48:09.762035    51 DiskManager.cpp:161] Refresh filesystem info of "/data/meta"
I20230904 07:48:09.802703     1 MetaDaemonInit.cpp:118] Leader has not been elected, sleep 1s
I20230904 07:48:10.195278    65 CollectNSucceeded-inl.h:59] Set Value [completed=3, total=3, Result list size=0]
I20230904 07:48:10.195374    43 RaftPart.cpp:1285] [Port: 9560, Space: 0, Part: 0] Did not get enough votes from election of term 1, isPreVote = 1
I20230904 07:48:10.802817     1 MetaDaemonInit.cpp:118] Leader has not been elected, sleep 1s
I20230904 07:48:11.600276    48 RaftPart.cpp:1291] [Port: 9560, Space: 0, Part: 0] Start leader election...
I20230904 07:48:11.600334    48 RaftPart.cpp:1319] [Port: 9560, Space: 0, Part: 0] Sending out an election request (space = 0, part = 0, term = 1, lastLogId = 0, lastLogTerm = 0, candidateIP = metad2, candidatePort = 9560), isPreVote = 1
I20230904 07:48:11.600410    66 ThriftClientManager-inl.h:38] Invalid Channel: 0xffff918a0000 for host: "120.46.147.172":9560
I20230904 07:48:11.600461    66 ThriftClientManager-inl.h:53] There is no existing client to "120.46.147.172":9560, trying to create one
I20230904 07:48:11.600478    66 ThriftClientManager-inl.h:74] Connecting to "120.46.147.172":9560 for 22 times
I20230904 07:48:11.600641    66 ThriftClientManager-inl.h:38] Invalid Channel: 0xffff918a0500 for host: "120.46.178.164":9560
I20230904 07:48:11.600665    66 ThriftClientManager-inl.h:53] There is no existing client to "120.46.178.164":9560, trying to create one
I20230904 07:48:11.600677    66 ThriftClientManager-inl.h:74] Connecting to "120.46.178.164":9560 for 23 times
I20230904 07:48:11.600759    66 ThriftClientManager-inl.h:38] Invalid Channel: 0xffff918a0a00 for host: "120.46.178.164":14001
I20230904 07:48:11.600781    66 ThriftClientManager-inl.h:53] There is no existing client to "120.46.178.164":14001, trying to create one
I20230904 07:48:11.600793    66 ThriftClientManager-inl.h:74] Connecting to "120.46.178.164":14001 for 24 times
I20230904 07:48:11.602140    66 AsyncSocket.cpp:2996] AsyncSocket::handleConnect(this=0xffffa75c0400, fd=folly::NetworkSocket(140) host=120.46.178.164:9560) exception: AsyncSocketException: connect failed, type = Socket not open, errno = 111 (Connection refused)
I20230904 07:48:11.802939     1 MetaDaemonInit.cpp:118] Leader has not been elected, sleep 1s
I20230904 07:48:12.603317    66 CollectNSucceeded-inl.h:59] Set Value [completed=3, total=3, Result list size=0]
I20230904 07:48:12.603397    43 RaftPart.cpp:1285] [Port: 9560, Space: 0, Part: 0] Did not get enough votes from election of term 1, isPreVote = 1

docker swarm 不能部署集群吗 ?这找的例子也不行 都没人部署过吗?

你试试 wey 说的

如果还有问题的话,再来更新下。