-1
私はハイブとhbaseテーブルのシェルコマンドを使用してSparkSQLクエリを処理することができます。今、ハイブとhbaseのダイレクトテーブルのjava/scalaコードを探しています。誰でもこのコードサンプルを私に教えてください。私はGoogleで試したが、私のために働いていない。SparkSQLを使用してHive/HBaseテーブルをクエリするためのJava/Scalaコードサンプル?
次のように私は、私のコードを出力して
コード質問を更新しています:出力
object HbaseScalaSample {
def main(args: Array[String]) {
val conf = new SparkConf().setAppName("WordCount").setMaster("local[*]").setSparkHome("spark://master01:7077");
val context = new SparkContext(conf)
//val sqlContext = new org.apache.spark.sql.SQLContext(context)
println("before hive context ===== " + context)
println(context.sparkUser)
val sqlContext = new org.apache.spark.sql.hive.HiveContext(context)
println("hive context ===== " + sqlContext)
import sqlContext.implicits._
println("before query context ===== ")
// val result = sqlContext.sql("select * from test_sample.police_station_juridiction limit 1")
val result = sqlContext.sql("show databases")
println("after query context ===== ")
result.show()
}
}
を:スパーク・シェルで
17/10/16 18:20:06 INFO HiveMetaStore: Added admin role in metastore
17/10/16 18:20:06 INFO HiveMetaStore: Added public role in metastore
17/10/16 18:20:06 INFO HiveMetaStore: No user is added in admin role, since config is empty
17/10/16 18:20:06 INFO HiveMetaStore: 0: get_all_databases
17/10/16 18:20:06 INFO audit: ugi=tmpl1684 ip=unknown-ip-addr cmd=get_all_databases
17/10/16 18:20:06 INFO HiveMetaStore: 0: get_functions: db=default pat=*
17/10/16 18:20:06 INFO audit: ugi=tmpl1684 ip=unknown-ip-addr cmd=get_functions: db=default pat=*
17/10/16 18:20:06 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MResourceUri" is tagged as "embedded-only" so does not have its own datastore table.
Exception in thread "main" java.lang.reflect.InvocationTargetException
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(Unknown Source)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(Unknown Source)
at java.lang.reflect.Constructor.newInstance(Unknown Source)
at org.apache.spark.sql.hive.client.IsolatedClientLoader.createClient(IsolatedClientLoader.scala:258)
at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:359)
at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:263)
at org.apache.spark.sql.hive.HiveSharedState.metadataHive$lzycompute(HiveSharedState.scala:39)
at org.apache.spark.sql.hive.HiveSharedState.metadataHive(HiveSharedState.scala:38)
at org.apache.spark.sql.hive.HiveSharedState.externalCatalog$lzycompute(HiveSharedState.scala:46)
at org.apache.spark.sql.hive.HiveSharedState.externalCatalog(HiveSharedState.scala:45)
at org.apache.spark.sql.hive.HiveSessionState.catalog$lzycompute(HiveSessionState.scala:50)
at org.apache.spark.sql.hive.HiveSessionState.catalog(HiveSessionState.scala:48)
at org.apache.spark.sql.hive.HiveSessionState$$anon$1.<init>(HiveSessionState.scala:63)
at org.apache.spark.sql.hive.HiveSessionState.analyzer$lzycompute(HiveSessionState.scala:63)
at org.apache.spark.sql.hive.HiveSessionState.analyzer(HiveSessionState.scala:62)
at org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:49)
at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:64)
at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:582)
at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:682)
at HbaseScalaSample$.main(HbaseScalaSample.scala:36)
at HbaseScalaSample.main(HbaseScalaSample.scala)
Caused by: java.lang.RuntimeException: java.lang.NullPointerException
at org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:522)
at org.apache.spark.sql.hive.client.HiveClientImpl.<init>(HiveClientImpl.scala:189)
... 22 more
Caused by: java.lang.NullPointerException
at java.lang.ProcessBuilder.start(Unknown Source)
at org.apache.hadoop.util.Shell.runCommand(Shell.java:482)
at org.apache.hadoop.util.Shell.run(Shell.java:455)
at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:702)
at org.apache.hadoop.util.Shell.execCommand(Shell.java:791)
at org.apache.hadoop.util.Shell.execCommand(Shell.java:774)
at org.apache.hadoop.fs.FileUtil.execCommand(FileUtil.java:1097)
at org.apache.hadoop.fs.RawLocalFileSystem$DeprecatedRawLocalFileStatus.loadPermissionInfo(RawLocalFileSystem.java:572)
at org.apache.hadoop.fs.RawLocalFileSystem$DeprecatedRawLocalFileStatus.getPermission(RawLocalFileSystem.java:547)
at org.apache.hadoop.hive.ql.session.SessionState.createRootHDFSDir(SessionState.java:599)
at org.apache.hadoop.hive.ql.session.SessionState.createSessionDirs(SessionState.java:554)
at org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:508)
... 23 more
17/10/16 18:20:06 INFO SparkContext: Invoking stop() from shutdown hook
17/10/16 18:20:06 INFO SparkUI: Stopped Spark web UI at http://192.168.1.127:4040
17/10/16 18:20:06 INFO MapOutputTrackerMasterEndpoint: MapOutputTrackerMasterEndpoint stopped!
17/10/16 18:20:06 INFO MemoryStore: MemoryStore cleared
17/10/16 18:20:06 INFO BlockManager: BlockManager stopped
17/10/16 18:20:06 INFO BlockManagerMaster: BlockManagerMaster stopped
17/10/16 18:20:06 INFO OutputCommitCoordinator$OutputCommitCoordinatorEndpoint: OutputCommitCoordinator stopped!
17/10/16 18:20:06 INFO SparkContext: Successfully stopped SparkContext
17/10/16 18:20:06 INFO ShutdownHookManager: Shutdown hook called
17/10/16 18:20:06 INFO ShutdownHookManager: Deleting directory C:\Users\tmpl1684\AppData\Local\Temp\spark-6701cfb8-e178-4711-84d4-89d57da4dbfa
が求めCMD:
spark-shell
scala> val sqlContext = new org.apache.spark.sql.hive.HiveContext(sc)
.
scala> sqlContext.sql("select * from test_sample.police_station_juridiction limit 1").collect()
を
結果:
res0: Array[org.apache.spark.sql.Row] = Array([1.0,Hennur Police Station,null,null,null,POLYGON ((77.65560130784614 13.019060286311245, 77.656408100047088 13.020235045175891, 77.657542928637426 13.021478901432321, 77.658216733112923 13.023241020438837, 77.659812585818088 13.025210432730312, 77.66144390191667 13.027318032046464, 77.663811083429337 13.028803706031669, 77.664156851515443 13.029002371063319, 77.664405095269558 13.031109938105049, 77.66405046133508 13.031317238812122, 77.6647951925975 13.033148387518626, 77.665043436351638 13.035186820144828, 77.665220753318863 13.038209888463735, 77.668199678368481 13.037726200014607, 77.670611189122937 13.037380707686342, 77.671107676631209 13.038762674104895, 77.672845382910154 13.038659026891253, 77.672987236483962 13.03879722316646, 77....
I同じことを試みたが、それはhiveContext変数にNullPointerExceptionを与えます。 – user6608138
質問をコードと出力の両方で更新する必要があります。 –
私の質問が更新されました。テーブルにクエリを使用する方法を教えてください。 – user6608138