- nebula 版本:2.0-ga
- 部署方式(分布式 / 单机 / Docker / DBaaS):分布式,docker swarm
- 问题的具体描述
- 相关的 meta / storage / graph info 日志信息
主要是Exception in thread “main” java.lang.NoSuchMethodError: org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.
21/04/07 16:13:07 INFO client.HiveClientImpl: Warehouse location for Hive client (version 2.1.1) is /user/hive/warehouse
Exception in thread “main” java.lang.NoSuchMethodError: org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(Lorg/apache/hadoop/hive/conf/HiveConf;Lorg/apache/hadoop/hive/metastore/HiveMetaHookLoader;Ljava/util/concurrent/ConcurrentHashMap;Ljava/lang/String;Z)Lorg/apache/hadoop/hive/metastore/IMetaStoreClient;
at org.apache.hadoop.hive.ql.metadata.Hive.createMetaStoreClient(Hive.java:3819)
at org.apache.hadoop.hive.ql.metadata.Hive.getMSC(Hive.java:3871)
at org.apache.hadoop.hive.ql.metadata.Hive.getMSC(Hive.java:3851)
at org.apache.hadoop.hive.ql.metadata.Hive.getAllFunctions(Hive.java:4105)
at org.apache.hadoop.hive.ql.metadata.Hive.reloadFunctions(Hive.java:254)
at org.apache.hadoop.hive.ql.metadata.Hive.registerAllFunctionsOnce(Hive.java:237)
at org.apache.hadoop.hive.ql.metadata.Hive.(Hive.java:394)
at org.apache.hadoop.hive.ql.metadata.Hive.create(Hive.java:338)
at org.apache.hadoop.hive.ql.metadata.Hive.getInternal(Hive.java:318)
at org.apache.hadoop.hive.ql.metadata.Hive.get(Hive.java:294)
at org.apache.spark.sql.hive.client.HiveClientImpl.org$apache$spark$sql$hive$client$HiveClientImpl$$client(HiveClientImpl.scala:254)
at org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$withHiveState$1.apply(HiveClientImpl.scala:276)
at org.apache.spark.sql.hive.client.HiveClientImpl.liftedTree1$1(HiveClientImpl.scala:221)
at org.apache.spark.sql.hive.client.HiveClientImpl.retryLocked(HiveClientImpl.scala:220)
at org.apache.spark.sql.hive.client.HiveClientImpl.withHiveState(HiveClientImpl.scala:266)
at org.apache.spark.sql.hive.client.HiveClientImpl.databaseExists(HiveClientImpl.scala:356)
at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply$mcZ$sp(HiveExternalCatalog.scala:217)
at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply(HiveExternalCatalog.scala:217)
at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply(HiveExternalCatalog.scala:217)
at org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:99)
at org.apache.spark.sql.hive.HiveExternalCatalog.databaseExists(HiveExternalCatalog.scala:216)
at org.apache.spark.sql.internal.SharedState.externalCatalog$lzycompute(SharedState.scala:114)
at org.apache.spark.sql.internal.SharedState.externalCatalog(SharedState.scala:102)
at org.apache.spark.sql.internal.SharedState.globalTempViewManager$lzycompute(SharedState.scala:141)
at org.apache.spark.sql.internal.SharedState.globalTempViewManager(SharedState.scala:136)
at org.apache.spark.sql.hive.HiveSessionStateBuilder$$anonfun$2.apply(HiveSessionStateBuilder.scala:55)
at org.apache.spark.sql.hive.HiveSessionStateBuilder$$anonfun$2.apply(HiveSessionStateBuilder.scala:55)
at org.apache.spark.sql.catalyst.catalog.SessionCatalog.globalTempViewManager$lzycompute(SessionCatalog.scala:91)
at org.apache.spark.sql.catalyst.catalog.SessionCatalog.globalTempViewManager(SessionCatalog.scala:91)
at org.apache.spark.sql.catalyst.catalog.SessionCatalog.isTemporaryTable(SessionCatalog.scala:731)
at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$.isRunningDirectlyOnFiles(Analyzer.scala:749)
at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$.resolveRelation(Analyzer.scala:683)
at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$$anonfun$apply$8.applyOrElse(Analyzer.scala:715)
at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$$anonfun$apply$8.applyOrElse(Analyzer.scala:708)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$$anonfun$resolveOperatorsUp$1$$anonfun$apply$1.apply(AnalysisHelper.scala:90)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$$anonfun$resolveOperatorsUp$1$$anonfun$apply$1.apply(AnalysisHelper.scala:90)
at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:70)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$$anonfun$resolveOperatorsUp$1.apply(AnalysisHelper.scala:89)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$$anonfun$resolveOperatorsUp$1.apply(AnalysisHelper.scala:86)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.allowInvokingTransformsInAnalyzer(AnalysisHelper.scala:194)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$class.resolveOperatorsUp(AnalysisHelper.scala:86)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperatorsUp(LogicalPlan.scala:29)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$$anonfun$resolveOperatorsUp$1$$anonfun$1.apply(AnalysisHelper.scala:87)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$$anonfun$resolveOperatorsUp$1$$anonfun$1.apply(AnalysisHelper.scala:87)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:326)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:324)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$$anonfun$resolveOperatorsUp$1.apply(AnalysisHelper.scala:87)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$$anonfun$resolveOperatorsUp$1.apply(AnalysisHelper.scala:86)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.allowInvokingTransformsInAnalyzer(AnalysisHelper.scala:194)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$class.resolveOperatorsUp(AnalysisHelper.scala:86)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperatorsUp(LogicalPlan.scala:29)
at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$.apply(Analyzer.scala:708)
at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$.apply(Analyzer.scala:654)
at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:87)
at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:84)
at scala.collection.LinearSeqOptimized$class.foldLeft(LinearSeqOptimized.scala:124)
at scala.collection.immutable.List.foldLeft(List.scala:84)
at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:84)
at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:76)
at scala.collection.immutable.List.foreach(List.scala:392)
at org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:76)
at org.apache.spark.sql.catalyst.analysis.Analyzer.org$apache$spark$sql$catalyst$analysis$Analyzer$$executeSameContext(Analyzer.scala:127)
at org.apache.spark.sql.catalyst.analysis.Analyzer.execute(Analyzer.scala:121)
at org.apache.spark.sql.catalyst.analysis.Analyzer$$anonfun$executeAndCheck$1.apply(Analyzer.scala:106)
at org.apache.spark.sql.catalyst.analysis.Analyzer$$anonfun$executeAndCheck$1.apply(Analyzer.scala:105)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.markInAnalyzer(AnalysisHelper.scala:201)
at org.apache.spark.sql.catalyst.analysis.Analyzer.executeAndCheck(Analyzer.scala:105)
at org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:57)
at org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:55)
at org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:47)
at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:78)
at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:651)
at com.vesoft.nebula.exchange.reader.HiveReader.read(ServerBaseReader.scala:68)
at com.vesoft.nebula.exchange.Exchange$.com$vesoft$nebula$exchange$Exchange$$createDataSource(Exchange.scala:240)
at com.vesoft.nebula.exchange.Exchange$$anonfun$main$2.apply(Exchange.scala:130)
at com.vesoft.nebula.exchange.Exchange$$anonfun$main$2.apply(Exchange.scala:122)
at scala.collection.immutable.List.foreach(List.scala:392)
at com.vesoft.nebula.exchange.Exchange$.main(Exchange.scala:122)
at com.vesoft.nebula.exchange.Exchange.main(Exchange.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:851)
at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:167)
at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:195)
at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:86)
at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:926)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:935)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
21/04/07 16:13:08 INFO spark.SparkContext: Invoking stop() from shutdown hook
21/04/07 16:13:08 INFO server.AbstractConnector: Stopped Spark@6ac4944a{HTTP/1.1,[http/1.1]}{0.0.0.0:4040}
21/04/07 16:13:08 INFO ui.SparkUI: Stopped Spark web UI at http://slave04:4040
21/04/07 16:13:08 INFO spark.MapOutputTrackerMasterEndpoint: MapOutputTrackerMasterEndpoint stopped!
21/04/07 16:13:08 INFO memory.MemoryStore: MemoryStore cleared
21/04/07 16:13:08 INFO storage.BlockManager: BlockManager stopped
21/04/07 16:13:08 INFO storage.BlockManagerMaster: BlockManagerMaster stopped
21/04/07 16:13:08 INFO scheduler.OutputCommitCoordinator$OutputCommitCoordinatorEndpoint: OutputCommitCoordinator stopped!
21/04/07 16:13:08 INFO spark.SparkContext: Successfully stopped SparkContext
21/04/07 16:13:08 INFO util.ShutdownHookManager: Shutdown hook called
21/04/07 16:13:08 INFO util.ShutdownHookManager: Deleting directory /tmp/spark-286bba8a-074f-4062-9b66-55d877fd8c7e
21/04/07 16:13:08 INFO util.ShutdownHookManager: Deleting directory /tmp/spark-012aeb90-90f0-4446-8c50-8358f4707290
配置文件如下:
{
# Spark related configuration
spark: {
app: {
name: Hive Exchange 2.0
}
driver: {
cores: 1
maxResultSize: 1G
}
executor: {
memory:1G
}
cores {
max: 16
}
}
# If Spark and HIVE are deployed in the different clusters,
# configure these parameters for HIVE. Otherwise, ignore them.
# hive: {
# waredir: "hdfs://localhost:9000/usr/hive/warehouse/"
# connectionURL: "jdbc:mysql://localhost:3306/hive_spark?characterEncoding=UTF-8"
# connectionDriverName: "com.mysql.jdbc.Driver"
# connectionUserName: "jiayizhou"
# connectionPassword: "123456"
# }
# Nebula Graph related configuration
nebula: {
address:{
# Specifies the IP addresses and ports of the Graph Service and the Meta Service of Nebula Graph
# If multiple servers are used, separate the addresses with commas.
# Format: "ip1:port","ip2:port","ip3:port"
graph:["192.168.1.12:3699"]
meta:["192.168.1.10:45500","192.168.1.12:45500", "192.168.1.60:45500"]
}
# Specifies an account that has the WriteData privilege in Nebula Graph and its password
user: user
pswd: password
# Specifies a graph space name
space: bukong
connection {
timeout: 3000
retry: 3
}
execution {
retry: 3
}
error: {
max: 32
output: /tmp/errors
}
rate: {
limit: 1024
timeout: 1000
}
}
# Process vertices
tags: [
# Sets for the user tag
{
# Specifies a tag name defined in Nebula Graph
name: target
type: {
# Specifies the data source. hive is used.
source: hive
# Specifies how to import vertex data into Nebula Graph: client or sst.
# For more information about importing sst files, see Import SST files (doc to do).
sink: client
}
# Specifies the SQL statement to read data from the users table in the mooc database
exec: "select phone_number from target"
# Specifies the column names from the users table to fields.
# Their values are used as the source of the userId (nebula.fields) property defined in Nebula Graph.
# If more than one column name is specified, separate them with commas.
# fields for the HIVE and nebula.fields for Nebula Graph must have the one-to-one correspondence relationship.
fields: [phone_number] # fields是hive表中的字段名
nebula.fields: [phone_number] # nebula.fields是nebula中某个类型的点(tag)的属性
# Specifies a column as the source of VIDs.
# The value of vertex must be one column name in the exec sentence.
# If the values are not of the int type, use vertex.policy to
# set the mapping policy. "hash" is preferred.
# Refer to the configuration of the course tag.
vertex: phone_number # hive表中选取一个字段作为点的唯一unique key,由于v2只支持string做vertex id,不再支持int
# Specifies the maximum number of vertex data to be written into
# Nebula Graph in a single batch.
batch: 256
# Specifies the partition number of Spark.
partition: 32
}
]
# Process edges
edges: [
# Sets for the action edge type
{
# Specifies an edge type name defined in Nebula Graph
name: call
type: {
# Specifies the data source. hive is used.
source: hive
# Specifies how to import vertex data into Nebula Graph: client or sst
# For more information about importing sst files,
# see Import SST files (doc to do).
sink: client
}
# Specifies the SQL statement to read data from the actions table in
# the mooc database.
exec: "select last_time, phone1, phone2 from bukong"
# Specifies the column names from the actions table to fields.
# Their values are used as the source of the properties of
# the action edge type defined in Nebula Graph.
# If more than one column name is specified, separate them with commas.
# fields for the HIVE and nebula.fields for Nebula Graph must
# have the one-to-one correspondence relationship.
fields: [last_time]
nebula.fields: [latest_time]
# source specifies a column as the source of the IDs of
# the source vertex of an edge.
# target specifies a column as the source of the IDs of
# the target vertex of an edge.
# The value of source.field and target.field must be
# column names set in the exec sentence.
source: phone1
target: phone2
# For now, only string type VIDs are supported in Nebula Graph v2.x.
# Do not use vertex.policy for mapping.
#target: {
# field: dstid
# policy: "hash"
#}
# Specifies the maximum number of vertex data to be
# written into Nebula Graph in a single batch.
batch: 256
# Specifies the partition number of Spark.
partition: 32
}
]
}