exchange2.0 hive导数报错NoSuchMethodError

Jiayi98 · 2021 年4 月 7 日 08:40

nebula 版本：2.0-ga
部署方式（分布式 / 单机 / Docker / DBaaS）：分布式，docker swarm
问题的具体描述
相关的 meta / storage / graph info 日志信息

主要是Exception in thread “main” java.lang.NoSuchMethodError: org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.

21/04/07 16:13:07 INFO client.HiveClientImpl: Warehouse location for Hive client (version 2.1.1) is /user/hive/warehouse
Exception in thread “main” java.lang.NoSuchMethodError: org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(Lorg/apache/hadoop/hive/conf/HiveConf;Lorg/apache/hadoop/hive/metastore/HiveMetaHookLoader;Ljava/util/concurrent/ConcurrentHashMap;Ljava/lang/String;Z)Lorg/apache/hadoop/hive/metastore/IMetaStoreClient;
at org.apache.hadoop.hive.ql.metadata.Hive.createMetaStoreClient(Hive.java:3819)
at org.apache.hadoop.hive.ql.metadata.Hive.getMSC(Hive.java:3871)
at org.apache.hadoop.hive.ql.metadata.Hive.getMSC(Hive.java:3851)
at org.apache.hadoop.hive.ql.metadata.Hive.getAllFunctions(Hive.java:4105)
at org.apache.hadoop.hive.ql.metadata.Hive.reloadFunctions(Hive.java:254)
at org.apache.hadoop.hive.ql.metadata.Hive.registerAllFunctionsOnce(Hive.java:237)
at org.apache.hadoop.hive.ql.metadata.Hive.(Hive.java:394)
at org.apache.hadoop.hive.ql.metadata.Hive.create(Hive.java:338)
at org.apache.hadoop.hive.ql.metadata.Hive.getInternal(Hive.java:318)
at org.apache.hadoop.hive.ql.metadata.Hive.get(Hive.java:294)
at org.apache.spark.sql.hive.client.HiveClientImpl.org$apache$spark$sql$hive$client$HiveClientImpl$$client(HiveClientImpl.scala:254)
at org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$withHiveState$1.apply(HiveClientImpl.scala:276)
at org.apache.spark.sql.hive.client.HiveClientImpl.liftedTree1$1(HiveClientImpl.scala:221)
at org.apache.spark.sql.hive.client.HiveClientImpl.retryLocked(HiveClientImpl.scala:220)
at org.apache.spark.sql.hive.client.HiveClientImpl.withHiveState(HiveClientImpl.scala:266)
at org.apache.spark.sql.hive.client.HiveClientImpl.databaseExists(HiveClientImpl.scala:356)
at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply$mcZ$sp(HiveExternalCatalog.scala:217)
at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply(HiveExternalCatalog.scala:217)
at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply(HiveExternalCatalog.scala:217)
at org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:99)
at org.apache.spark.sql.hive.HiveExternalCatalog.databaseExists(HiveExternalCatalog.scala:216)
at org.apache.spark.sql.internal.SharedState.externalCatalog$lzycompute(SharedState.scala:114)
at org.apache.spark.sql.internal.SharedState.externalCatalog(SharedState.scala:102)
at org.apache.spark.sql.internal.SharedState.globalTempViewManager$lzycompute(SharedState.scala:141)
at org.apache.spark.sql.internal.SharedState.globalTempViewManager(SharedState.scala:136)
at org.apache.spark.sql.hive.HiveSessionStateBuilder$$anonfun$2.apply(HiveSessionStateBuilder.scala:55)
at org.apache.spark.sql.hive.HiveSessionStateBuilder$$anonfun$2.apply(HiveSessionStateBuilder.scala:55)
at org.apache.spark.sql.catalyst.catalog.SessionCatalog.globalTempViewManager$lzycompute(SessionCatalog.scala:91)
at org.apache.spark.sql.catalyst.catalog.SessionCatalog.globalTempViewManager(SessionCatalog.scala:91)
at org.apache.spark.sql.catalyst.catalog.SessionCatalog.isTemporaryTable(SessionCatalog.scala:731)
at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$.isRunningDirectlyOnFiles(Analyzer.scala:749)
at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$.resolveRelation(Analyzer.scala:683)
at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$$anonfun$apply$8.applyOrElse(Analyzer.scala:715)
at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$$anonfun$apply$8.applyOrElse(Analyzer.scala:708)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$$anonfun$resolveOperatorsUp$1$$anonfun$apply$1.apply(AnalysisHelper.scala:90)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$$anonfun$resolveOperatorsUp$1$$anonfun$apply$1.apply(AnalysisHelper.scala:90)
at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:70)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$$anonfun$resolveOperatorsUp$1.apply(AnalysisHelper.scala:89)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$$anonfun$resolveOperatorsUp$1.apply(AnalysisHelper.scala:86)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.allowInvokingTransformsInAnalyzer(AnalysisHelper.scala:194)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$class.resolveOperatorsUp(AnalysisHelper.scala:86)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperatorsUp(LogicalPlan.scala:29)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$$anonfun$resolveOperatorsUp$1$$anonfun$1.apply(AnalysisHelper.scala:87)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$$anonfun$resolveOperatorsUp$1$$anonfun$1.apply(AnalysisHelper.scala:87)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:326)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:324)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$$anonfun$resolveOperatorsUp$1.apply(AnalysisHelper.scala:87)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$$anonfun$resolveOperatorsUp$1.apply(AnalysisHelper.scala:86)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.allowInvokingTransformsInAnalyzer(AnalysisHelper.scala:194)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$class.resolveOperatorsUp(AnalysisHelper.scala:86)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperatorsUp(LogicalPlan.scala:29)
at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$.apply(Analyzer.scala:708)
at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$.apply(Analyzer.scala:654)
at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:87)
at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:84)
at scala.collection.LinearSeqOptimized$class.foldLeft(LinearSeqOptimized.scala:124)
at scala.collection.immutable.List.foldLeft(List.scala:84)
at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:84)
at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:76)
at scala.collection.immutable.List.foreach(List.scala:392)
at org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:76)
at org.apache.spark.sql.catalyst.analysis.Analyzer.org$apache$spark$sql$catalyst$analysis$Analyzer$$executeSameContext(Analyzer.scala:127)
at org.apache.spark.sql.catalyst.analysis.Analyzer.execute(Analyzer.scala:121)
at org.apache.spark.sql.catalyst.analysis.Analyzer$$anonfun$executeAndCheck$1.apply(Analyzer.scala:106)
at org.apache.spark.sql.catalyst.analysis.Analyzer$$anonfun$executeAndCheck$1.apply(Analyzer.scala:105)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.markInAnalyzer(AnalysisHelper.scala:201)
at org.apache.spark.sql.catalyst.analysis.Analyzer.executeAndCheck(Analyzer.scala:105)
at org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:57)
at org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:55)
at org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:47)
at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:78)
at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:651)
at com.vesoft.nebula.exchange.reader.HiveReader.read(ServerBaseReader.scala:68)
at com.vesoft.nebula.exchange.Exchange$.com$vesoft$nebula$exchange$Exchange$$createDataSource(Exchange.scala:240)
at com.vesoft.nebula.exchange.Exchange$$anonfun$main$2.apply(Exchange.scala:130)
at com.vesoft.nebula.exchange.Exchange$$anonfun$main$2.apply(Exchange.scala:122)
at scala.collection.immutable.List.foreach(List.scala:392)
at com.vesoft.nebula.exchange.Exchange$.main(Exchange.scala:122)
at com.vesoft.nebula.exchange.Exchange.main(Exchange.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:851)
at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:167)
at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:195)
at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:86)
at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:926)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:935)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
21/04/07 16:13:08 INFO spark.SparkContext: Invoking stop() from shutdown hook
21/04/07 16:13:08 INFO server.AbstractConnector: Stopped Spark@6ac4944a{HTTP/1.1,[http/1.1]}{0.0.0.0:4040}
21/04/07 16:13:08 INFO ui.SparkUI: Stopped Spark web UI at http://slave04:4040
21/04/07 16:13:08 INFO spark.MapOutputTrackerMasterEndpoint: MapOutputTrackerMasterEndpoint stopped!
21/04/07 16:13:08 INFO memory.MemoryStore: MemoryStore cleared
21/04/07 16:13:08 INFO storage.BlockManager: BlockManager stopped
21/04/07 16:13:08 INFO storage.BlockManagerMaster: BlockManagerMaster stopped
21/04/07 16:13:08 INFO scheduler.OutputCommitCoordinator$OutputCommitCoordinatorEndpoint: OutputCommitCoordinator stopped!
21/04/07 16:13:08 INFO spark.SparkContext: Successfully stopped SparkContext
21/04/07 16:13:08 INFO util.ShutdownHookManager: Shutdown hook called
21/04/07 16:13:08 INFO util.ShutdownHookManager: Deleting directory /tmp/spark-286bba8a-074f-4062-9b66-55d877fd8c7e
21/04/07 16:13:08 INFO util.ShutdownHookManager: Deleting directory /tmp/spark-012aeb90-90f0-4446-8c50-8358f4707290

配置文件如下：

{
  # Spark related configuration
  spark: {
    app: {
      name: Hive Exchange 2.0
    }
    driver: {
      cores: 1
      maxResultSize: 1G
    }
    executor: {
        memory:1G
    }
    cores {
      max: 16
    }
  }

  # If Spark and HIVE are deployed in the different clusters, 
  # configure these parameters for HIVE. Otherwise, ignore them. 
  # hive: {
  #   waredir: "hdfs://localhost:9000/usr/hive/warehouse/"
  #   connectionURL: "jdbc:mysql://localhost:3306/hive_spark?characterEncoding=UTF-8"
  #   connectionDriverName: "com.mysql.jdbc.Driver"
  #   connectionUserName: "jiayizhou"
  #   connectionPassword: "123456"
  # }

  # Nebula Graph related configuration
  nebula: {
    address:{
      # Specifies the IP addresses and ports of the Graph Service and the Meta Service of Nebula Graph
      # If multiple servers are used, separate the addresses with commas.
      # Format: "ip1:port","ip2:port","ip3:port"
      graph:["192.168.1.12:3699"]
      meta:["192.168.1.10:45500","192.168.1.12:45500", "192.168.1.60:45500"]
    }

    # Specifies an account that has the WriteData privilege in Nebula Graph and its password
    user: user
    pswd: password

    # Specifies a graph space name
    space: bukong
    connection {
      timeout: 3000
      retry: 3
    }
    execution {
      retry: 3
    }
    error: {
      max: 32
      output: /tmp/errors
    }
    rate: {
      limit: 1024
      timeout: 1000
    }
  }

  # Process vertices
  tags: [
    # Sets for the user tag
    {
      # Specifies a tag name defined in Nebula Graph
      name: target
      type: {
        # Specifies the data source. hive is used.
        source: hive
        # Specifies how to import vertex data into Nebula Graph: client or sst.
        # For more information about importing sst files, see Import SST files (doc to do).
        sink: client
      }

      # Specifies the SQL statement to read data from the users table in the mooc database
      exec: "select phone_number from target"

      # Specifies the column names from the users table to fields. 
      # Their values are used as the source of the userId (nebula.fields) property defined in Nebula Graph.
      # If more than one column name is specified, separate them with commas.
      # fields for the HIVE and nebula.fields for Nebula Graph must have the one-to-one correspondence relationship.
      fields: [phone_number] # fields是hive表中的字段名
      nebula.fields: [phone_number] # nebula.fields是nebula中某个类型的点(tag)的属性

      # Specifies a column as the source of VIDs.
      # The value of vertex must be one column name in the exec sentence. 
      # If the values are not of the int type, use vertex.policy to 
      # set the mapping policy. "hash" is preferred. 
      # Refer to the configuration of the course tag.
      vertex: phone_number # hive表中选取一个字段作为点的唯一unique key，由于v2只支持string做vertex id，不再支持int

      # Specifies the maximum number of vertex data to be written into 
      # Nebula Graph in a single batch.
      batch: 256

      # Specifies the partition number of Spark.
      partition: 32
    }

  ]

  # Process edges
  edges: [
    # Sets for the action edge type
    {
      # Specifies an edge type name defined in Nebula Graph
      name: call

      type: {
        # Specifies the data source. hive is used.
        source: hive

        # Specifies how to import vertex data into Nebula Graph: client or sst
        # For more information about importing sst files, 
        # see Import SST files (doc to do).
        sink: client
      }

      # Specifies the SQL statement to read data from the actions table in
      # the mooc database.
      exec: "select last_time, phone1, phone2 from bukong"

      # Specifies the column names from the actions table to fields. 
      # Their values are used as the source of the properties of 
      # the action edge type defined in Nebula Graph.
      # If more than one column name is specified, separate them with commas.
      # fields for the HIVE and nebula.fields for Nebula Graph must 
      # have the one-to-one correspondence relationship.
      fields: [last_time]
      nebula.fields: [latest_time]

      # source specifies a column as the source of the IDs of
      # the source vertex of an edge.
      # target specifies a column as the source of the IDs of
      # the target vertex of an edge.
      # The value of source.field and target.field must be
      # column names set in the exec sentence. 
      source: phone1
      target: phone2
      # For now, only string type VIDs are supported in Nebula Graph v2.x. 
      # Do not use vertex.policy for mapping.
      #target: {
      #  field: dstid
      #  policy: "hash"
      #}

      # Specifies the maximum number of vertex data to be 
      # written into Nebula Graph in a single batch.
      batch: 256

      # Specifies the partition number of Spark.
      partition: 32
    }
  ]
}

nicole · 2021 年4 月 7 日 11:45

hive版本不匹配，有可能以下几种情况

Missing org.apache.hadoop.hive.metastore.RetryingMetaStoreClient on the classpath
Have the wrong version of org.apache.hadoop.hive.metastore.RetryingMetaStoreClient on the classpath
Have multiple versions of org.apache.hadoop.hive.metastore.RetryingMetaStoreClient on the classpath

Jiayi98 · 2021 年4 月 8 日 07:56

hive版本和什么不匹配呢？
我的大数据环境是cdh统一搭建的，应该不存在hive和spark版本不对应呀

nicole · 2021 年4 月 8 日 08:06

环境内部版本兼容，是你环境中的hive 版本与exchange中使用的hive-metastore版本不一致。
可以参考下这个帖子，这个用户也是cdh nebula exchange 2.0使用异常 - #6 由 nicole