2017-12-13 13 views
1

私はIntelliJでローカルに実行しているSparkプロジェクトを持っており、そこから実行するとうまくいきます。このプロジェクトは非常にシンプルで、現時点ではおもしろい例です。Spark/Scala - プロジェクトはIntelliJから正常に実行されますが、SBTでエラーが発生します

package mls.main 


import org.apache.spark.SparkContext._ 
import org.apache.spark.rdd.RDD 
import org.apache.spark.sql.{DataFrame, SQLContext} 
import org.apache.spark.{SparkConf, SparkContext} 
import java.nio.file.{Paths, Files} 
import scala.io.Source 


object Main { 

    def main(args: Array[String]) { 
    import org.apache.log4j.Logger 
    import org.apache.log4j.Level 
    print("HELLO WORLD!") 
    Logger.getLogger("org").setLevel(Level.WARN) 
    Logger.getLogger("akka").setLevel(Level.WARN) 

    // fire up spark 
    val sc = createContext 
    val sqlContext = new SQLContext(sc) 
    loadAHSData(List("x"),sqlContext) 

    } 

    def loadAHSData(years: List[String], sqlContext : SQLContext) : Unit = { 
    // load the column names that exists in all 3 datasets 
    val columns = sqlContext.sparkContext 
     .textFile("data/common_columns.txt") 
     .collect() 
     .toSeq 

    columns.foreach(println) 
    } 


    def createContext(appName: String, masterUrl: String): SparkContext = { 
    val conf = new SparkConf().setAppName(appName).setMaster(masterUrl) 
    new SparkContext(conf) 
    } 

    def createContext(appName: String): SparkContext = createContext(appName, "local") 

    def createContext: SparkContext = createContext("Data Application", "local") 
} 

私はIntelliJのを経由して実行すると、私が指定したテキストファイルから列のカップルと適切な出力を得る:以下のコードです。しかし、適切なディレクトリにcdを実行してからsbt runを実行すると、「HELLO WORLD!」と表示されます。出力は、それは、その後、以下のスタックトレースで失敗します。

java.lang.ClassNotFoundException: scala.None$ 
at java.net.URLClassLoader.findClass(URLClassLoader.java:381) 
at java.lang.ClassLoader.loadClass(ClassLoader.java:424) 
at java.lang.ClassLoader.loadClass(ClassLoader.java:357) 
at java.lang.Class.forName0(Native Method) 
at java.lang.Class.forName(Class.java:348) 
at 

org.apache.spark.serializer.JavaDeserializationStream$$anon$1.resolveClass(JavaSerializer.scala:67) 
    at java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1826) 
    at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1713) 
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2000) 
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1535) 
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2245) 
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2169) 
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2027) 
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1535) 
    at java.io.ObjectInputStream.readObject(ObjectInputStream.java:422) 
    at org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:75) 
    at org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:114) 
    at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:309) 
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) 
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) 
    at java.lang.Thread.run(Thread.java:748) 

17/12/13 09:52:14 WARN FileSystem: exception in the cleaner thread but it will continue to run 
java.lang.InterruptedException 
    at java.lang.Object.wait(Native Method) 
    at java.lang.ref.ReferenceQueue.remove(ReferenceQueue.java:143) 
    at java.lang.ref.ReferenceQueue.remove(ReferenceQueue.java:164) 
    at org.apache.hadoop.fs.FileSystem$Statistics$StatisticsDataReferenceCleaner.run(FileSystem.java:2989) 
    at java.lang.Thread.run(Thread.java:748) 
17/12/13 09:52:14 ERROR Utils: uncaught error in thread SparkListenerBus, stopping SparkContext 
java.lang.InterruptedException 
    at java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireSharedInterruptibly(AbstractQueuedSynchronizer.java:998) 
    at java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireSharedInterruptibly(AbstractQueuedSynchronizer.java:1304) 
    at java.util.concurrent.Semaphore.acquire(Semaphore.java:312) 
    at org.apache.spark.scheduler.LiveListenerBus$$anon$1$$anonfun$run$1$$anonfun$apply$mcV$sp$1.apply$mcV$sp(LiveListenerBus.scala:80) 
    at org.apache.spark.scheduler.LiveListenerBus$$anon$1$$anonfun$run$1$$anonfun$apply$mcV$sp$1.apply(LiveListenerBus.scala:79) 
    at org.apache.spark.scheduler.LiveListenerBus$$anon$1$$anonfun$run$1$$anonfun$apply$mcV$sp$1.apply(LiveListenerBus.scala:79) 
    at scala.util.DynamicVariable.withValue(DynamicVariable.scala:58) 
    at org.apache.spark.scheduler.LiveListenerBus$$anon$1$$anonfun$run$1.apply$mcV$sp(LiveListenerBus.scala:78) 
    at org.apache.spark.util.Utils$.tryOrStopSparkContext(Utils.scala:1279) 
    at org.apache.spark.scheduler.LiveListenerBus$$anon$1.run(LiveListenerBus.scala:77) 
17/12/13 09:52:14 ERROR Utils: throw uncaught fatal error in thread SparkListenerBus 
java.lang.InterruptedException 
    at java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireSharedInterruptibly(AbstractQueuedSynchronizer.java:998) 
    at java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireSharedInterruptibly(AbstractQueuedSynchronizer.java:1304) 
    at java.util.concurrent.Semaphore.acquire(Semaphore.java:312) 
    at org.apache.spark.scheduler.LiveListenerBus$$anon$1$$anonfun$run$1$$anonfun$apply$mcV$sp$1.apply$mcV$sp(LiveListenerBus.scala:80) 
    at org.apache.spark.scheduler.LiveListenerBus$$anon$1$$anonfun$run$1$$anonfun$apply$mcV$sp$1.apply(LiveListenerBus.scala:79) 
    at org.apache.spark.scheduler.LiveListenerBus$$anon$1$$anonfun$run$1$$anonfun$apply$mcV$sp$1.apply(LiveListenerBus.scala:79) 
    at scala.util.DynamicVariable.withValue(DynamicVariable.scala:58) 
    at org.apache.spark.scheduler.LiveListenerBus$$anon$1$$anonfun$run$1.apply$mcV$sp(LiveListenerBus.scala:78) 
    at org.apache.spark.util.Utils$.tryOrStopSparkContext(Utils.scala:1279) 
    at org.apache.spark.scheduler.LiveListenerBus$$anon$1.run(LiveListenerBus.scala:77) 
17/12/13 09:52:14 ERROR ContextCleaner: Error in cleaning thread 
java.lang.InterruptedException 
    at java.lang.Object.wait(Native Method) 
    at java.lang.ref.ReferenceQueue.remove(ReferenceQueue.java:143) 
    at org.apache.spark.ContextCleaner$$anonfun$org$apache$spark$ContextCleaner$$keepCleaning$1.apply$mcV$sp(ContextCleaner.scala:181) 
    at org.apache.spark.util.Utils$.tryOrStopSparkContext(Utils.scala:1279) 
    at org.apache.spark.ContextCleaner.org$apache$spark$ContextCleaner$$keepCleaning(ContextCleaner.scala:178) 
    at org.apache.spark.ContextCleaner$$anon$1.run(ContextCleaner.scala:73) 

そして、私のbuild.sbtは、次のようになります。それはのIntelliJから完璧に動作し、なぜ私が理解することはできません

name := "MLS_scala" 

version := "0.1" 

scalaVersion := "2.11.1" 

resolvers ++= Seq(
    Resolver.sonatypeRepo("releases"), 
    Resolver.sonatypeRepo("snapshots") 
) 

val sparkVersion = "2.2.0" 
libraryDependencies ++= Seq(
    "org.apache.spark" %% "spark-core" % sparkVersion, 
    "org.apache.spark" %% "spark-sql" % sparkVersion, 
    "org.apache.spark" %% "spark-mllib" % sparkVersion, 
    "org.apache.spark" %% "spark-streaming" % sparkVersion, 
    "org.apache.spark" %% "spark-hive" % sparkVersion 
) 

、しかしからそのエラーを取得しますsbt。この問題を解決するための措置があるかどうか教えてください。ありがとうございました!

+0

SBTのバージョンは何ですか? 'project/build.properties'ファイルがありますか?内部には何がありますか? –

答えて

2

は、あなたのbuild.sbtにこれを追加してみてください、Scalaのlibのバージョン管理とは何かだろう:

fork := true 
関連する問題