用Docker swarm快速部署Nebula Graph集群

:clap: :clap:

amazing

非常感谢分享,不过根据描述Nubula 的graphd节点是一个无状态的服务且sdk client没有负载均衡的功能,既然部署在docker swarm中为何不让graphd节点无状态化的部署而是要绑定具体的节点呢?为何不使用swarm的ingress在作为负载均衡呢?

A:
1、在1.x中docker部署不支持service_name:port这种形式的访问,需要ng做dns相关优化,目前2.0是可以以这种形式来部署。
2、在使用python scan,或者spark reader时需要提供meta ip,在运行的过程中,meta服务需要返回对应的storage ip,如果单纯的service_name:port这种形式的话meta服务会返回对应的docker子网划分出的ip(如果172.x.x.x),会造成使用问题。

这是个好问题,其实是可以用ingress的,当时只是为了图方便。
@hetong

2 个赞

谢谢解惑,我看到文档中描述”每个计算节点都运行着一个无状态的查询计算引擎,而节点彼此间无任何通信关系。计算节点仅从 Meta Service 读取 meta 信息,以及和 Storage Service 进行交互。" 这段描述以为对于外部调用方来说实际交互只有计算服务,而完全不用关心meta和storage节点的swarm中的子网ip或者,所以以为只需要把meta节点和storage节点部署成host模式的即可,而因为在通一个网络中graphd节点直接可以做成无状态的就OK了。

1 个赞

简单修改了一个版本的docker swarm的stack部署yaml定义,通过让graphd节点变成普通的无状态服务并通过ingress路由实现负载均衡,这样节点扩展只需要通过swarm的特定节点访问即可,如果有用得上的可以参考

version: '3.8'
services:
  metad0:
    image: vesoft/nebula-metad:nightly
    env_file:
      - ./nebula.env
    command:
      - --meta_server_addrs=172.16.36.211:45500,172.16.36.212:45500,172.16.36.213:45500
      - --local_ip=172.16.36.211
      - --ws_ip=172.16.36.211
      - --port=45500
      - --data_path=/data/meta
      - --log_dir=/logs
      - --v=0
      - --minloglevel=2
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == centos211
    healthcheck:
      test: ["CMD", "curl", "-f", "http://172.16.36.211:11000/status"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 20s
    ports:
      - target: 11000
        published: 11000
        protocol: tcp
        mode: host
      - target: 11002
        published: 11002
        protocol: tcp
        mode: host
      - target: 45500
        published: 45500
        protocol: tcp
        mode: host
    volumes:
      - /data/nebula/meta/data:/data/meta
      - /data/nebula/meta/logs:/logs
    networks:
      - nebula-net

  metad1:
    image: vesoft/nebula-metad:nightly
    env_file:
      - ./nebula.env
    command:
      - --meta_server_addrs=172.16.36.211:45500,172.16.36.212:45500,172.16.36.213:45500
      - --local_ip=172.16.36.212
      - --ws_ip=172.16.36.212
      - --port=45500
      - --data_path=/data/meta
      - --log_dir=/logs
      - --v=0
      - --minloglevel=2
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == centos212
    healthcheck:
      test: ["CMD", "curl", "-f", "http://172.16.36.212:11000/status"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 20s
    ports:
      - target: 11000
        published: 11000
        protocol: tcp
        mode: host
      - target: 11002
        published: 11002
        protocol: tcp
        mode: host
      - target: 45500
        published: 45500
        protocol: tcp
        mode: host
    volumes:
      - /data/nebula/meta/data:/data/meta
      - /data/nebula/meta/logs:/logs
    networks:
      - nebula-net

  metad2:
    image: vesoft/nebula-metad:nightly
    env_file:
      - ./nebula.env
    command:
      - --meta_server_addrs=172.16.36.211:45500,172.16.36.212:45500,172.16.36.213:45500
      - --local_ip=172.16.36.213
      - --ws_ip=172.16.36.213
      - --port=45500
      - --data_path=/data/meta
      - --log_dir=/logs
      - --v=0
      - --minloglevel=2
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == centos213
    healthcheck:
      test: ["CMD", "curl", "-f", "http://172.16.36.213:11000/status"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 20s
    ports:
      - target: 11000
        published: 11000
        protocol: tcp
        mode: host
      - target: 11002
        published: 11002
        protocol: tcp
        mode: host
      - target: 45500
        published: 45500
        protocol: tcp
        mode: host
    volumes:
      - /data/nebula/meta/data:/data/meta
      - /data/nebula/meta/logs:/logs
    networks:
      - nebula-net

  storaged0:
    image: vesoft/nebula-storaged:nightly
    env_file:
      - ./nebula.env
    command:
      - --meta_server_addrs=172.16.36.211:45500,172.16.36.212:45500,172.16.36.213:45500
      - --local_ip=172.16.36.211
      - --ws_ip=172.16.36.211
      - --port=44500
      - --data_path=/data/storage
      - --log_dir=/logs
      - --v=0
      - --minloglevel=2
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == centos211
    depends_on:
      - metad0
      - metad1
      - metad2
    healthcheck:
      test: ["CMD", "curl", "-f", "http://172.16.36.211:12000/status"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 20s
    ports:
      - target: 12000
        published: 12000
        protocol: tcp
        mode: host
      - target: 12002
        published: 12002
        protocol: tcp
        mode: host
    volumes:
      - /data/nebula/storaged/data:/data/storage
      - /data/nebula/storaged/logs:/logs
    networks:
      - nebula-net
  storaged1:
    image: vesoft/nebula-storaged:nightly
    env_file:
      - ./nebula.env
    command:
      - --meta_server_addrs=172.16.36.211:45500,172.16.36.212:45500,172.16.36.213:45500
      - --local_ip=172.16.36.212
      - --ws_ip=172.16.36.212
      - --port=44500
      - --data_path=/data/storage
      - --log_dir=/logs
      - --v=0
      - --minloglevel=2
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == centos212
    depends_on:
      - metad0
      - metad1
      - metad2
    healthcheck:
      test: ["CMD", "curl", "-f", "http://172.16.36.212:12000/status"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 20s
    ports:
      - target: 12000
        published: 12000
        protocol: tcp
        mode: host
      - target: 12002
        published: 12004
        protocol: tcp
        mode: host
    volumes:
      - /data/nebula/storaged/data:/data/storage
      - /data/nebula/storaged/logs:/logs
    networks:
      - nebula-net

  storaged2:
    image: vesoft/nebula-storaged:nightly
    env_file:
      - ./nebula.env
    command:
      - --meta_server_addrs=172.16.36.211:45500,172.16.36.212:45500,172.16.36.213:45500
      - --local_ip=172.16.36.213
      - --ws_ip=172.16.36.213
      - --port=44500
      - --data_path=/data/storage
      - --log_dir=/logs
      - --v=0
      - --minloglevel=2
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == centos213
    depends_on:
      - metad0
      - metad1
      - metad2
    healthcheck:
      test: ["CMD", "curl", "-f", "http://172.16.36.213:12000/status"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 20s
    ports:
      - target: 12000
        published: 12000
        protocol: tcp
        mode: host
      - target: 12002
        published: 12006
        protocol: tcp
        mode: host
    volumes:
      - /data/nebula/storaged/data:/data/storage
      - /data/nebula/storaged/logs:/logs
    networks:
      - nebula-net
  graphd:
    image: vesoft/nebula-graphd:nightly
    env_file:
      - ./nebula.env
    command:
      - --meta_server_addrs=172.16.36.211:45500,172.16.36.212:45500,172.16.36.213:45500
      - --port=3699
      - --ws_ip=0.0.0.0
      - --log_dir=/logs
      - --v=0
      - --minloglevel=2
    deploy:
      replicas: 3
      restart_policy:
        condition: on-failure
      placement:
        max_replicas_per_node: 1
    depends_on:
      - metad0
      - metad1
      - metad2
    healthcheck:
      test: [ "CMD", "curl", "-f", "http://localhost:13000/status" ]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 20s
    ports:
      - target: 3699
        published: 3699
        protocol: tcp
        mode: ingress
      - target: 13000
        published: 13000
        protocol: tcp
        mode: ingress
      - target: 13002
        published: 13002
        protocol: tcp
        mode: ingress
    volumes:
      - logs-graphd:/logs
    networks:
      - custom-overlay-network
networks:
  nebula-net:
    external: true
    attachable: true
    name: host
  custom-overlay-network:
    external: true
volumes:
   logs-graphd:
2 个赞

:clap:

2 个赞

兄弟,这个外部网络访问测试可行了不

我在vpc网络内部测试了至少看上去是没有问题的,至少在swarm的集群的外部网络访问没有问题,至于通过其他的边缘服务爆出到到vpc外的网络这个没有测试过,不过我们的应用场景都在在一个vpc网络内部够用了

2 个赞

客户端也是这三个节点中的一个吗

对,任一grahd节点都行

两个节点适合部署一个集群吗

:expressionless:好歹也必须是个奇数吧,2个不成

最好是质数,因为用raft,偶数不好选主

还有一个问题请教一下,如果安装集群安装三个节点,比如图数据库需要数据大小为3T,是每个节点都申请3T呢,还是一个节点申请1T,三个节点加起来3T呢;第二个问题就是比如存储在hdfs的text格式是3T,那申请集群节点存储需要申请多大的存储呢

存储量*副本数

一般副本数是3吧

还有一个问题就是如果有三个节点,有6T的图数据要存储,每个节点的存储量是总的存储量/3=2T吗,然后副本数目是3,那每个节点申请的存储量是6T,总共三个节点是18T吗

3个节点(副本数为3),假设为3个分区p1,p2,p3
节点1存储 p1 p2 p3
节点2存储 p1 p2 p3
节点3存储 p1 p2 p3

3个节点(副本数为2),假设为3个分区p1,p2,p3
节点1存储 p1 p2
节点2存储 p2 p3
节点3存储 p1 p3

3副本的话,每个节点还是6T

3个节点(副本数为3),假设为3个分区p1,p2,p3
节点1存储 p1 p2 p3
节点2存储 p1 p2 p3
节点3存储 p1 p2 p3
如果是这种情况,“3副本的话,每个节点还是2T”,节点1存储1/3的图数据即6t/3等于2t,就没有空间存储另外的4t数据了啊