求助,docker swarm集群部署失败

求助,docker-swarm 修改后部署失败,求官方大手支援
docker-swarm文件


    version: '3.4'
services:
  metad0:
    image: 192.168.1.161:80/common/nebula-metad:v1.0
    environment:
      USER: root
      TZ:   "${TZ}"
    command:
      - --meta_server_addrs=192.168.1.166:45500,192.168.1.167:45501,192.168.1.168:45502
      - --local_ip=192.168.1.166
      - --ws_ip=192.168.1.166
      - --port=45500
      - --data_path=/data/meta
      - --log_dir=/logs
      - --v=0
      - --minloglevel=2
    deploy:
      mode: global
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == KF2-DATA-166
    healthcheck:
      test: ["CMD", "curl", "-f", "http://192.168.1.166:11000/status"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 20s

    ports:
      - target: 11000
        published: 11000
        protocol: tcp
        mode: host
      - target: 11002
        published: 11002
        protocol: tcp
        mode: host
      - target: 45500
        published: 45500
        protocol: tcp
        mode: host

    volumes:
      - ./data/meta0:/data/meta
      - ./logs/meta0:/logs
    networks:
      - nebula-net

  metad1:
    image: 192.168.1.161:80/common/nebula-metad:v1.0
    environment:
      USER: root
      TZ:   "${TZ}"
    command:
      - --meta_server_addrs=192.168.1.166:45500,192.168.1.167:45501,192.168.1.168:45502
      - --local_ip=192.168.1.167
      - --ws_ip=192.168.1.167
      - --port=45500
      - --data_path=/data/meta
      - --log_dir=/logs
      - --v=0
      - --minloglevel=2
    deploy:
      mode: global
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == KF2-DATA-167
    healthcheck:
      test: ["CMD", "curl", "-f", "http://192.168.1.167:11001/status"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 20s
    ports:
      - target: 11000
        published: 11001
        protocol: tcp
        mode: host
      - target: 11002
        published: 11002
        protocol: tcp
        mode: host
      - target: 45500
        published: 45501
        protocol: tcp
        mode: host
    volumes:
      - ./data/meta1:/data/meta
      - ./logs/meta1:/logs
    networks:
      - nebula-net

  metad2:
    image: 192.168.1.161:80/common/nebula-metad:v1.0
    environment:
      USER: root
      TZ:   "${TZ}"
    command:
      - --meta_server_addrs=192.168.1.166:45500,192.168.1.167:45501,192.168.1.168:45502
      - --local_ip=192.168.1.168
      - --ws_ip=192.168.1.168
      - --port=45500
      - --data_path=/data/meta
      - --log_dir=/logs
      - --v=0
      - --minloglevel=2
    deploy:
      mode: global
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == KF2-DATA-168
    healthcheck:
      test: ["CMD", "curl", "-f", "http://192.168.1.168:11003/status"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 20s
    ports:
      - target: 11000
        published: 11003
        protocol: tcp
        mode: host
      - target: 11002
        published: 11004
        protocol: tcp
        mode: host
      - target: 45500
        published: 45502
        protocol: tcp
        mode: host
    volumes:
      - ./data/meta2:/data/meta
      - ./logs/meta2:/logs
    networks:
      - nebula-net

  storaged0:
    image: 192.168.1.161:80/common/nebula-storaged:v1.0
    environment:
      USER: root
      TZ:   "${TZ}"
    command:
      - --meta_server_addrs=192.168.1.166:45500,192.168.1.167:45501,192.168.1.168:45502
      - --local_ip=192.168.1.166
      - --ws_ip=192.168.1.166
      - --port=44500
      - --data_path=/data/storage
      - --log_dir=/logs
      - --v=0
      - --minloglevel=2
    deploy:
      mode: global
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == KF2-DATA-166
    depends_on:
      - metad0
      - metad1
      - metad2
    healthcheck:
      test: ["CMD", "curl", "-f", "http://192.168.1.166:12000/status"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 20s
    ports:
      - target: 12000
        published: 12000
        protocol: tcp
        mode: host
      - target: 12002
        published: 12002
        protocol: tcp
        mode: host
    volumes:
      - ./data/storage0:/data/storage
      - ./logs/storage0:/logs
    networks:
      - nebula-net

  storaged1:
    image: 192.168.1.161:80/common/nebula-storaged:v1.0
    environment:
      USER: root
      TZ:   "${TZ}"
    command:
      - --meta_server_addrs=192.168.1.166:45500,192.168.1.167:45501,192.168.1.168:45502
      - --local_ip=192.168.1.167
      - --ws_ip=192.168.1.167
      - --port=44500
      - --data_path=/data/storage
      - --log_dir=/logs
      - --v=0
      - --minloglevel=2
    deploy:
      mode: global
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == KF2-DATA-167
    depends_on:
      - metad0
      - metad1
      - metad2
    healthcheck:
      test: ["CMD", "curl", "-f", "http://192.168.1.167:12003/status"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 20s
    ports:
      - target: 12000
        published: 12003
        protocol: tcp
        mode: host
      - target: 12002
        published: 12004
        protocol: tcp
        mode: host
    volumes:
      - ./data/storage1:/data/storage
      - ./logs/storage1:/logs
    networks:
      - nebula-net

  storaged2:
    image: 192.168.1.161:80/common/nebula-storaged:v1.0
    environment:
      USER: root
      TZ:   "${TZ}"
    command:
      - --meta_server_addrs=192.168.1.166:45500,192.168.1.167:45501,192.168.1.168:45502
      - --local_ip=192.168.1.168
      - --ws_ip=192.168.1.168
      - --port=44500
      - --data_path=/data/storage
      - --log_dir=/logs
      - --v=0
      - --minloglevel=2
    deploy:
      mode: global
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == KF2-DATA-168
    depends_on:
      - metad0
      - metad1
      - metad2
    healthcheck:
      test: ["CMD", "curl", "-f", "http://192.168.1.168:12005/status"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 20s
    ports:
      - target: 12000
        published: 12005
        protocol: tcp
        mode: host
      - target: 12002
        published: 12006
        protocol: tcp
        mode: host
    volumes:
      - ./data/storage2:/data/storage
      - ./logs/storage2:/logs
    networks:
      - nebula-net

  graphd:
    image: 192.168.1.161:80/common/nebula-graphd:v1.0
    environment:
      USER: root
      TZ:   "${TZ}"
    command:
      - --meta_server_addrs=192.168.1.166:45500,192.168.1.167:45501,192.168.1.168:45502
      - --port=3699
      - --ws_ip=192.168.1.166
      - --log_dir=/logs
      - --v=0
      - --minloglevel=2
    deploy:
      mode: global
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == KF2-DATA-166
    depends_on:
      - metad0
      - metad1
      - metad2
    healthcheck:
      test: ["CMD", "curl", "-f", "http://192.168.1.166:13000/status"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 20s
    ports:
      - target: 3699
        published: 3699
        protocol: tcp
        mode: host
      - target: 13000
        published: 13000
        protocol: tcp
        mode: host
      - target: 13002
        published: 13002
        protocol: tcp
        mode: host
    volumes:
      - ./logs/graph:/logs
    networks:
      - nebula-net

networks:
  nebula-net:

问题截图

格式有点问题,欢迎联系
邮箱:henson_wu@foxmail.com
vx: wu88888888000

@dingding 求助

服务配置的日志目录下面产生日志文件了吗?

@dingding 日志文件有的,可以留邮箱打包发给您,这里没法上传附件

nebula-net 的网络 mode 需要指定为 host,参考 network_mode

@yee @dingding 两位好,目前我指定了网络模式为host

storage报错如图
meta没打印出日志

我们在本地验证一下你的 配置文件,然后再回复你

多谢

你好,我找了三台测试机,现场已复现,可以把密码发给您

感谢您的协助,我们已经准备机器了。我们后续会提供swarm配置给用户直接用。

多谢

@henson

抱歉现在才回复。

docker swarm 的部署的样例已经调试通过,详细的方式见 nebula-docker-compose#docker-swarm 分支。

具体使用方式如下:

  1. 准备 docker swarm 集群,通过 docker swarm init/join 初始化每个 node,如果后面遇到端口等问题,可以注意一下每个节点的防火墙是否关闭。
  2. 将 swarm 集群中的每个 node 根据 ip 设置对应的 hostname(或者其他可以区分的 hostname)
  3. clone 上述工程的分支:git clone --branch docker-swarm --single-branch --depth 1 https://github.com/vesoft-inc/nebula-docker-compose
  4. 将上述 docker-stack.yaml 中的 ip 改成你自己的节点 ip,并且 node.hostname 的 constraints 改成上述对应的 node 的 hostname
  5. 通过 docker stack 部署:docker stack deploy -c docker-stack.yaml nebula
  6. 查看 nebula 所有的 service: docker stack services nebula

上述中的具体思路就是,让每个 container 分别部署到对应的 node 上,同时使用该 node 上的 network,这样 nebula 在配置的时候就直接使用 host 的 ip 即可了。关键点在于 network 的类型。

部署中如果还遇到什么问题,欢迎随时给我们回复。

万分感谢

:blush: 如果觉得 Yee 解决了你的问题,可以选择对应的回复为解决方案哈

1588736206143-a997a32e-840d-4421-ad14-a15e359319e3

您好,验证完毕后会给反馈

验证完毕,再次感谢

1 个赞

我按照文件又加了俩graphd,但是服务没启动成功

      graphd2:
    image: 192.168.1.161:80/common/nebula-graphd:v1.0.0
    env_file:
      - ./nebula.env
    command:
      - --meta_server_addrs=192.168.1.166:45500,192.168.1.167:45500,192.168.1.168:45500
      - --port=3640
      - --ws_ip=192.168.1.167
      - --log_dir=/logs
      - --v=2
      - --minloglevel=2
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == KF2-DATA-167
    depends_on:
      - metad0
      - metad1
      - metad2
    healthcheck:
      test: ["CMD", "curl", "-f", "http://192.168.1.167:13001/status"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 20s
    ports:
      - target: 3699
        published: 3640
        protocol: tcp
        mode: host
      - target: 13000
        published: 13001
        protocol: tcp
        mode: host
      - target: 13002
        published: 13003
        protocol: tcp
        mode: host
    volumes:
      - logs-graphd2:/logs
    networks:
      - nebula-net

  graphd3:
    image: 192.168.1.161:80/common/nebula-graphd:v1.0.0
    env_file:
      - ./nebula.env
    command:
      - --meta_server_addrs=192.168.1.166:45500,192.168.1.167:45500,192.168.1.168:45500
      - --port=3641
      - --ws_ip=192.168.1.168
      - --log_dir=/logs
      - --v=0
      - --minloglevel=2
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == KF2-DATA-168
    depends_on:
      - metad0
      - metad1
      - metad2
    healthcheck:
      test: ["CMD", "curl", "-f", "http://192.168.1.168:13002/status"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 20s
    ports:
      - target: 3699
        published: 3641
        protocol: tcp
        mode: host
      - target: 13000
        published: 13002
        protocol: tcp
        mode: host
      - target: 13002
        published: 13004
        protocol: tcp
        mode: host
    volumes:
      - logs-graphd3:/logs
    networks:
      - nebula-net

建议检查一下部署的环境, 比如是否有端口被占用等

多谢,已验证解决,希望已验证修改能合到github代码仓库(生产环境:graphd服务数量为3个)

1 个赞

抱歉昨天请假未能及时回复。

上次的 stack 没有合入是因为一些配置跟具体的环境有关,我们后续考虑一下如何将其做成配置的方式。多谢您的建议。

1 个赞