2016-08-24 22 views
1

私はスパークMllibを使用して小売業界にリンク解析プロジェクトをやっています。私のスキーマは次のとおりです。機能テイク()エラー - リンク解析研究用いたスパークMllib

ID - ロング チェーン - のInt 部門 - のInt 会社 - - ロング ブランド - ロング 日 - 日 ProductSize - のInt ProductMeasure - Chararray PurchaseQuantity - のInt PurchaseAmountお カテゴリーをint型 - ダブル

そして、私が使用しているコードは次のとおりです。

scala> import org.apache.spark._ 
scala> import org.apache.spark.rdd.RDD 
scala> import org.apache.spark.util.IntParam 
scala> import org.apache.spark.graphx._ 
scala> import org.apache.spark.graphx.util.GraphGenerators 

scala> case class Transactions(ID:Long,Chain:Int,Dept:Int,Category:Int,Company:Long,Brand:Long,Date:String,ProductSize:Int,ProductMeasure:String,PurchaseQuantity:Int,PurchaseAmount:Double) 
defined class Transactions 

scala> def parseTransactions(str:String): Transactions = { 
    | val line = str.split(",") 
    | Transactions(line(0).toLong,line(1).toInt,line(2).toInt,line(3).toInt,line(4).toInt,line(5).toInt,line(6),line(7).toInt,line(8),line(9).toInt,line(10).toInt) 
    | } 

scala> val textRDD = sc.textFile("/user/cloudera/transactions.csv") 

scala> val transactionsRDD = textRDD.map(parseTransactions).cache() 

scala> val products = transactionsRDD.map(Transactions => (Transactions.ID,Transactions.Chain,Transactions.Dept,Transactions.Category,Transactions.Company,Transactions.Brand,Transactions.Date)).distinct 

scala> products.take(1) 

しかし最後の行を提出すると、次のエラーが表示されます。

[Stage 0:>               (0 + 1)/7]16/08/24 04:56:13 ERROR executor.Executor: Exception in task 0.0 in stage 0.0 (TID 0) 
java.lang.NumberFormatException: For input string: "id" 
    at java.lang.NumberFormatException.forInputString(NumberFormatException.java:65) 
    at java.lang.Long.parseLong(Long.java:441) 
    at java.lang.Long.parseLong(Long.java:483) 
    at scala.collection.immutable.StringLike$class.toLong(StringLike.scala:230) 
\t at scala.collection.immutable.StringOps.toLong(StringOps.scala:31) 
\t at $line65.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.parseTransactions(<console>:38) 
\t at $line67.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:42) 
    at $line67.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:42) 
\t at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) 
\t at org.apache.spark.storage.MemoryStore.unrollSafely(MemoryStore.scala:285) 
\t at org.apache.spark.CacheManager.putInBlockManager(CacheManager.scala:171) 
\t at org.apache.spark.CacheManager.getOrCompute(CacheManager.scala:78) 
\t at org.apache.spark.rdd.RDD.iterator(RDD.scala:268) 
\t at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) 
\t at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) 
\t at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) 
\t at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) 
\t at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) 
\t at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) 
\t at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73) 
\t at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) 
\t at org.apache.spark.scheduler.Task.run(Task.scala:89) 
\t at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214) 
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) 
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) 
\t at java.lang.Thread.run(Thread.java:745) 
16/08/24 04:56:13 WARN scheduler.TaskSetManager: Lost task 0.0 in stage 0.0 (TID 0, localhost): java.lang.NumberFormatException: For input string: "id" 
\t at java.lang.NumberFormatException.forInputString(NumberFormatException.java:65) 
\t at java.lang.Long.parseLong(Long.java:441) 
\t at java.lang.Long.parseLong(Long.java:483) 
\t at scala.collection.immutable.StringLike$class.toLong(StringLike.scala:230) 
    at scala.collection.immutable.StringOps.toLong(StringOps.scala:31) 
    at $line65.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.parseTransactions(<console>:38) 
    at $line67.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:42) 
\t at $line67.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:42) 
\t at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) 
    at org.apache.spark.storage.MemoryStore.unrollSafely(MemoryStore.scala:285) 
    at org.apache.spark.CacheManager.putInBlockManager(CacheManager.scala:171) 
    at org.apache.spark.CacheManager.getOrCompute(CacheManager.scala:78) 
    at org.apache.spark.rdd.RDD.iterator(RDD.scala:268) 
    at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) 
    at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) 
    at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) 
    at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) 
    at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) 
    at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) 
    at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73) 
    at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) 
    at org.apache.spark.scheduler.Task.run(Task.scala:89) 
    at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214) 
\t at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) 
\t at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) 
    at java.lang.Thread.run(Thread.java:745) 

16/08/24 04:56:13 ERROR scheduler.TaskSetManager: Task 0 in stage 0.0 failed 1 times; aborting job 
16/08/24 04:56:13 ERROR executor.Executor: Exception in task 1.0 in stage 0.0 (TID 1) 
java.lang.NumberFormatException: For input string: "6.67" 
    at java.lang.NumberFormatException.forInputString(NumberFormatException.java:65) 
    at java.lang.Integer.parseInt(Integer.java:492) 
    at java.lang.Integer.parseInt(Integer.java:527) 
    at scala.collection.immutable.StringLike$class.toInt(StringLike.scala:229) 
\t at scala.collection.immutable.StringOps.toInt(StringOps.scala:31) 
\t at $line65.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.parseTransactions(<console>:38) 
\t at $line67.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:42) 
    at $line67.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:42) 
\t at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) 
\t at org.apache.spark.storage.MemoryStore.unrollSafely(MemoryStore.scala:285) 
\t at org.apache.spark.CacheManager.putInBlockManager(CacheManager.scala:171) 
\t at org.apache.spark.CacheManager.getOrCompute(CacheManager.scala:78) 
\t at org.apache.spark.rdd.RDD.iterator(RDD.scala:268) 
\t at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) 
\t at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) 
\t at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) 
\t at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) 
\t at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) 
\t at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) 
\t at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73) 
\t at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) 
\t at org.apache.spark.scheduler.Task.run(Task.scala:89) 
\t at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214) 
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) 
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) 
\t at java.lang.Thread.run(Thread.java:745) 
16/08/24 04:56:13 WARN scheduler.TaskSetManager: Lost task 1.0 in stage 0.0 (TID 1, localhost): java.lang.NumberFormatException: For input string: "6.67" 
\t at java.lang.NumberFormatException.forInputString(NumberFormatException.java:65) 
\t at java.lang.Integer.parseInt(Integer.java:492) 
\t at java.lang.Integer.parseInt(Integer.java:527) 
\t at scala.collection.immutable.StringLike$class.toInt(StringLike.scala:229) 
    at scala.collection.immutable.StringOps.toInt(StringOps.scala:31) 
    at $line65.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.parseTransactions(<console>:38) 
    at $line67.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:42) 
\t at $line67.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:42) 
\t at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) 
    at org.apache.spark.storage.MemoryStore.unrollSafely(MemoryStore.scala:285) 
    at org.apache.spark.CacheManager.putInBlockManager(CacheManager.scala:171) 
    at org.apache.spark.CacheManager.getOrCompute(CacheManager.scala:78) 
    at org.apache.spark.rdd.RDD.iterator(RDD.scala:268) 
    at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) 
    at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) 
    at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) 
    at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) 
    at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) 
    at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) 
    at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73) 
    at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) 
    at org.apache.spark.scheduler.Task.run(Task.scala:89) 
    at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214) 
\t at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) 
\t at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) 
    at java.lang.Thread.run(Thread.java:745) 

org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 0.0 failed 1 times, most recent failure: Lost task 0.0 in stage 0.0 (TID 0, localhost): java.lang.NumberFormatException: For input string: "id" 
    at java.lang.NumberFormatException.forInputString(NumberFormatException.java:65) 
    at java.lang.Long.parseLong(Long.java:441) 
    at java.lang.Long.parseLong(Long.java:483) 
    at scala.collection.immutable.StringLike$class.toLong(StringLike.scala:230) 
\t at scala.collection.immutable.StringOps.toLong(StringOps.scala:31) 
\t at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.parseTransactions(<console>:38) 
\t at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:42) 
\t at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:42) 
\t at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) 
\t at org.apache.spark.storage.MemoryStore.unrollSafely(MemoryStore.scala:285) 
\t at org.apache.spark.CacheManager.putInBlockManager(CacheManager.scala:171) 
\t at org.apache.spark.CacheManager.getOrCompute(CacheManager.scala:78) 
\t at org.apache.spark.rdd.RDD.iterator(RDD.scala:268) 
\t at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) 
\t at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) 
\t at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) 
\t at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) 
\t at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) 
\t at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) 
\t at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73) 
\t at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) 
\t at org.apache.spark.scheduler.Task.run(Task.scala:89) 
\t at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214) 
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) 
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) 
    at java.lang.Thread.run(Thread.java:745) 

Driver stacktrace: 
    at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1431) 
\t at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1419) 
    at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1418) 
\t at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) 
\t at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) 
\t at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1418) 
\t at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:799) 
    at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:799) 
\t at scala.Option.foreach(Option.scala:236) 
\t at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:799) 
\t at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1640) 
\t at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1599) 
\t at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1588) 
\t at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) 
\t at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:620) 
\t at org.apache.spark.SparkContext.runJob(SparkContext.scala:1843) 
\t at org.apache.spark.SparkContext.runJob(SparkContext.scala:1856) 
\t at org.apache.spark.SparkContext.runJob(SparkContext.scala:1869) 
\t at org.apache.spark.rdd.RDD$$anonfun$take$1.apply(RDD.scala:1328) 
\t at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150) 
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111) 
\t at org.apache.spark.rdd.RDD.withScope(RDD.scala:316) 
\t at org.apache.spark.rdd.RDD.take(RDD.scala:1302) 
\t at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:47) 
\t at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:52) 
\t at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:54) 
    at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:56) 
\t at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:58) 
\t at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:60) 
\t at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:62) 
    at $iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:64) 
\t at $iwC$$iwC$$iwC$$iwC.<init>(<console>:66) 
\t at $iwC$$iwC$$iwC.<init>(<console>:68) 
\t at $iwC$$iwC.<init>(<console>:70) 
    at $iwC.<init>(<console>:72) 
\t at <init>(<console>:74) 
\t at .<init>(<console>:78) 
\t at .<clinit>(<console>) 
\t at .<init>(<console>:7) 
\t at .<clinit>(<console>) 
\t at $print(<console>) 
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) 
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) 
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) 
    at java.lang.reflect.Method.invoke(Method.java:606) 
    at org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1045) 
\t at org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1326) 
    at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:821) 
\t at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:852) 
\t at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:800) 
\t at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:857) 
    at org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:902) 
    at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:814) 
    at org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:657) 
\t at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:665) 
    at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$loop(SparkILoop.scala:670) 
\t at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp(SparkILoop.scala:997) 
    at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945) 
\t at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945) 
\t at scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135) 
\t at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$process(SparkILoop.scala:945) 
    at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1064) 
    at org.apache.spark.repl.Main$.main(Main.scala:31) 
\t at org.apache.spark.repl.Main.main(Main.scala) 
\t at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) 
\t at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) 
\t at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) 
\t at java.lang.reflect.Method.invoke(Method.java:606) 
\t at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:731) 
\t at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:181) 
\t at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206) 
\t at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121) 
\t at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) 
Caused by: java.lang.NumberFormatException: For input string: "id" 
\t at java.lang.NumberFormatException.forInputString(NumberFormatException.java:65) 
\t at java.lang.Long.parseLong(Long.java:441) 
\t at java.lang.Long.parseLong(Long.java:483) 
\t at scala.collection.immutable.StringLike$class.toLong(StringLike.scala:230) 
\t at scala.collection.immutable.StringOps.toLong(StringOps.scala:31) 
\t at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.parseTransactions(<console>:38) 
    at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:42) 
    at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:42) 
    at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) 
    at org.apache.spark.storage.MemoryStore.unrollSafely(MemoryStore.scala:285) 
    at org.apache.spark.CacheManager.putInBlockManager(CacheManager.scala:171) 
    at org.apache.spark.CacheManager.getOrCompute(CacheManager.scala:78) 
    at org.apache.spark.rdd.RDD.iterator(RDD.scala:268) 
    at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) 
    at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) 
    at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) 
    at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) 
    at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) 
    at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) 
    at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73) 
    at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) 
    at org.apache.spark.scheduler.Task.run(Task.scala:89) 
    at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214) 
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) 
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) 
    at java.lang.Thread.run(Thread.java:745) 

なぜこのエラーが発生するのですか?私は

多くのおかげで...私が作成した配列を返すようになっていました!それはあなたのtoInt呼び出しのように見えます

答えて

1

は、数字以外の文字を含む文字列を変換しようとします。

+0

あなたは日付の右にそれを解析することはありません「(6)ライン」Stringとして私は、フィールドの日付をしたと私は渡しているところ –

+0

私が考えていたとエラーがint型だろうか?それを文字列として保持するだけです。私はScalaを知らないので、私は離れるかもしれないが、私はSparkを知っている。文字列を数値に解析しようとするので、NumberFormatExceptionです。 Scalaに構造体を除いてみますか?また、TransactionsRDDのいくつかの行を取り出してから別のトランザクションを実行してみてください。何かを削除して問題があるかどうかを確認してください(多分それらをすべて削除してください) –

+0

1月にすべてを削除すると、このエラーが発生します: def parseTransactions(str:String):Transactions = { | val line = str.split( "、") |トランザクション(ライン0、ライン1、ライン2、ライン3、ライン4、ライン5、ライン6、ライン7、ライン8、ライン(9)、行(10)) | } :47:エラー:タイプの不一致。 found:文字列 必須:Int トランザクション(行(0).toLong、行(1)、行(2)、行(3)、行(4)、行(5)、行(6)、行7)、ライン(8)、ライン(9)、ライン(10)) 理由を知っていますか? –

関連する問題