2016-05-15 4 views
1

にデータをロードするには、私は私のコードで以下のスパークからのMongoDBにデータをロードしようとしていながら:私はのIntelliJ IDEAでコードを実行していますエラーMongoDBの

import com.databricks.spark.avro._ 
import org.apache.spark.{SparkConf, SparkContext} 
import org.apache.spark.sql.SQLContext 

import com.mongodb.casbah.{WriteConcern => MongodbWriteConcern} 
import com.stratio.provider.mongodb._ 
import MongodbConfig._ 

object test { 
    def main(args: Array[String]) { 

     val conf = new SparkConf().setAppName("test").setMaster("local[8]") 

     val sc = new SparkContext(conf) 
     val sqlContext = new SQLContext(sc) 

     val test= sqlContext.read.avro("file:///Users\\abhis\\Desktop\\Avro\\test") // test File 
     logger.info("Reading test file") 

     // The avro data is stored in a temporary tables 
     test.registerTempTable("test") // test temp table 

     val targetData = sqlContext.sql("SELECT * From test") // my code is running fine till here 

     // Configuration of MongoDb to save the data into collection 
     val targetOutputBuilder = MongodbConfigBuilder(Map(Host -> List("localhost:27017"), Database -> "subs", Collection -> "target", SamplingRatio -> 1.0, WriteConcern -> MongodbWriteConcern.Normal, SplitKey -> "_id", SplitSize -> 8)) 

     val writeConfig = targetOutputBuilder.build() 

     // Writing data into the mongoDb table 
     targetData.saveToMongodb(writeConfig) 

    } 
    } 

と依存関係としてこれらのjarファイルを追加しました:

  • カスバ-commons_2.10-2.8.0
  • カスバ-core_2.10-2.8.0
  • カスバ-core_2.10-2.8.0
  • Mongoの、Javaベースドライバ-2.13.0
  • 火花MongoDBのコア-0.8.7

私は、エラーの下に取得しています:

java.lang.NoClassDefFoundError: Could not initialize class com.mongodb.MongoClient 
    at com.mongodb.casbah.MongoClient$.apply(MongoClient.scala:176) 
    at com.stratio.provider.mongodb.MongodbClientFactory$.createClient(MongodbClientFactory.scala:43) 
    at com.stratio.provider.mongodb.writer.MongodbWriter.<init>(MongodbWriter.scala:40) 
    at com.stratio.provider.mongodb.writer.MongodbBatchWriter.<init>(MongodbBatchWriter.scala:32) 
    at com.stratio.provider.mongodb.MongodbDataFrame$$anonfun$saveToMongodb$1.apply(mongodbFunctions.scala:61) 
    at com.stratio.provider.mongodb.MongodbDataFrame$$anonfun$saveToMongodb$1.apply(mongodbFunctions.scala:59) 
    at org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1$$anonfun$apply$33.apply(RDD.scala:920) 
    at org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1$$anonfun$apply$33.apply(RDD.scala:920) 
    at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1858) 
    at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1858) 
    at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66) 
    at org.apache.spark.scheduler.Task.run(Task.scala:89) 
    at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214) 
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) 
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) 
    at java.lang.Thread.run(Thread.java:745) 
16/05/15 22:13:57 ERROR Executor: Exception in task 2.0 in stage 4.0 (TID 18) 
java.lang.IllegalAccessError: tried to access method org.bson.types.ObjectId.<init>(III)V from class com.mongodb.Bytes 
    at com.mongodb.Bytes.<clinit>(Bytes.java:219) 
    at com.mongodb.Mongo.<clinit>(Mongo.java:74) 
    at com.mongodb.casbah.MongoClient$.apply(MongoClient.scala:176) 
    at com.stratio.provider.mongodb.MongodbClientFactory$.createClient(MongodbClientFactory.scala:43) 
    at com.stratio.provider.mongodb.writer.MongodbWriter.<init>(MongodbWriter.scala:40) 
    at com.stratio.provider.mongodb.writer.MongodbBatchWriter.<init>(MongodbBatchWriter.scala:32) 
    at com.stratio.provider.mongodb.MongodbDataFrame$$anonfun$saveToMongodb$1.apply(mongodbFunctions.scala:61) 
    at com.stratio.provider.mongodb.MongodbDataFrame$$anonfun$saveToMongodb$1.apply(mongodbFunctions.scala:59) 
    at org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1$$anonfun$apply$33.apply(RDD.scala:920) 
    at org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1$$anonfun$apply$33.apply(RDD.scala:920) 
    at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1858) 
    at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1858) 
    at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66) 
    at org.apache.spark.scheduler.Task.run(Task.scala:89) 
    at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214) 
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) 
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) 
    at java.lang.Thread.run(Thread.java:745) 

答えて

2

まあ、私はそれが動作しますが、若干の修正を加えて、あなたのコードを試してみました。 私はあなたの正確なセットアップ(依存関係のバージョン)をテストしませんでした。それは私のセットアップから離れてテストする方法です。バージョンを変更できる場合は、これが役立ちます。

import org.apache.spark.SparkConf 
import org.apache.spark.SparkContext 
import org.apache.spark.sql.SQLContext 
import com.stratio.datasource._ 
import com.stratio.datasource.mongodb._ 
import com.stratio.datasource.mongodb.schema._ 
import com.stratio.datasource.mongodb.writer._ 
import com.stratio.datasource.mongodb.config.MongodbConfig._ 
import org.apache.spark.sql.SQLContext 
import com.stratio.datasource.util.Config._ 
import com.stratio.datasource.mongodb.config.MongodbConfigBuilder 

/** 
* Using https://github.com/Stratio/Spark-MongoDB 
*/ 
object ToMongoDB { 

    def main(args: Array[String]): Unit = { 
    val conf = new SparkConf().setAppName("ToMongoDB") 
    val sc = new SparkContext(conf) 
    val sqlContext = new SQLContext(sc) 

    val avroInput = sqlContext.read.format("com.databricks.spark.avro").load("/spark_learning/avro/") 


    val targetOutputBuilder = MongodbConfigBuilder(
     Map(Host -> List("localhost:27017"), 
     Database -> "test", 
     Collection -> "target", 
     SamplingRatio -> 1.0, 
     WriteConcern -> "normal", //changed from MongodbWriteConcern.Normal to "normal" 
     SplitKey -> "_id", 
     SplitSize -> 8)) 

    val writeConfig = targetOutputBuilder.build() 
    avroInput.saveToMongodb(writeConfig) 
    } 

} 

私はbuid.sbtファイルでこれらのdependecniesを追加

val spark_mongodb = "com.stratio" % "spark-mongodb" % "0.8.0" 
val spark_mongodb_stratio = "com.stratio.datasource" % "spark-mongodb_2.10" % "0.11.1" 
val spark_avro = "com.databricks" %% "spark-avro" % "1.0.0" 
libraryDependencies += spark_mongodb, 
libraryDependencies += spark_mongodb_stratio, 
libraryDependencies += spark_avro 
関連する問題