-nebula exchange 1.1.0
-nebula graph 1.1.0
-spark 2.4.7
-jdk 1.8.0_191
- 使用exchange 1.1.0 导入官网 demo csv 文件 sink 类型为sst
- 问题的具体描述
报错日志及源码分析
java.lang.NumberFormatException: For input string: "Women in Islam"
at java.lang.NumberFormatException.forInputString(NumberFormatException.java:65)
at java.lang.Long.parseLong(Long.java:589)
at java.lang.Long.parseLong(Long.java:631)
at scala.collection.immutable.StringLike$class.toLong(StringLike.scala:277)
at scala.collection.immutable.StringOps.toLong(StringOps.scala:29)
at com.vesoft.nebula.tools.importer.processor.Processor$class.getLong(Processor.scala:368)
at com.vesoft.nebula.tools.importer.processor.VerticesProcessor.getLong(VerticesProcessor.scala:56)
at com.vesoft.nebula.tools.importer.processor.VerticesProcessor$$anonfun$process$1$$anonfun$apply$1.apply(VerticesProcessor.scala:179)
at com.vesoft.nebula.tools.importer.processor.VerticesProcessor$$anonfun$process$1$$anonfun$apply$1.apply(VerticesProcessor.scala:178)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage2.sort_addToSorter_0$(Unknown Source)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage2.processNext(Unknown Source)
at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$10$$anon$1.hasNext(WholeStageCodegenExec.scala:614)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
at scala.collection.Iterator$class.foreach(Iterator.scala:891)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1334)
at com.vesoft.nebula.tools.importer.processor.VerticesProcessor$$anonfun$process$2.apply(VerticesProcessor.scala:202)
at com.vesoft.nebula.tools.importer.processor.VerticesProcessor$$anonfun$process$2.apply(VerticesProcessor.scala:195)
at org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1$$anonfun$apply$29.apply(RDD.scala:935)
at org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1$$anonfun$apply$29.apply(RDD.scala:935)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2079)
iter.map { row =>
val vertexID = getLong(row, tagConfig.vertexField) //此处直接
val part = (vertexID.toLong % partSize + 1).toInt
// TODO version
val encodedKey = NebulaCodec.createVertexKey(part, vertexID.toLong, tagID, 0L)
val values = for {
property <- fieldKeys if property.trim.length != 0
} yield
extraValue(row, property, fieldTypeMap, tagConfig.isImplicit, true)
.asInstanceOf[AnyRef]
val encodedValue = NebulaCodec.encode(values.toArray)
(encodedKey, encodedValue)
}
val vertexID = getLong(row, tagConfig.vertexField) //此处直接默认强转long policy :hash 不生效
也就是得出结论 此版本 vertexID 导入sst的话,不支持String 只支持 Long类型