4

私はドキュメントを見て、次の結合タイプがサポートされていると言っています:Sparkのさまざまな結合タイプは何ですか?

実行する結合のタイプ。デフォルトの内部。内部、クロス、 アウター、フル、フルアウター、左、左外、右、右外、 left_semi、left_antiのいずれかである必要があります。私はSQLのStackOverflow answer参加すると回答のトップカップルは、例えば、上から合流のいくつかを言及していない見

left_semiおよびleft_anti。スパークではどういう意味ですか?ここで

答えて

7

は、単純な実験例である:

import org.apache.spark._ 
import org.apache.spark.sql._ 
import org.apache.spark.sql.expressions._ 
import org.apache.spark.sql.functions._ 

object SparkSandbox extends App { 

    case class Row(id: Int, value: String) 

    private[this] implicit val spark = SparkSession.builder().master("local[*]").getOrCreate() 
    import spark.implicits._ 
    spark.sparkContext.setLogLevel("ERROR") 

    val r1 = Seq(Row(1, "A1"), Row(2, "A2"), Row(3, "A3"), Row(4, "A4")).toDS() 
    val r2 = Seq(Row(3, "A3"), Row(4, "A4"), Row(4, "A4_1"), Row(5, "A5"), Row(6, "A6")).toDS() 

    val joinTypes = Seq("inner", "outer", "full", "full_outer", "left", "left_outer", "right", "right_outer", "left_semi", "left_anti") 

    joinTypes foreach {joinType => 
    println(s"${joinType.toUpperCase()} JOIN") 
    r1.join(right = r2, usingColumns = Seq("id"), joinType = joinType).orderBy("id").show() 
    } 
} 

出力

INNER JOIN 
+---+-----+-----+ 
| id|value|value| 
+---+-----+-----+ 
| 3| A3| A3| 
| 4| A4| A4_1| 
| 4| A4| A4| 
+---+-----+-----+ 

OUTER JOIN 
+---+-----+-----+ 
| id|value|value| 
+---+-----+-----+ 
| 1| A1| null| 
| 2| A2| null| 
| 3| A3| A3| 
| 4| A4| A4| 
| 4| A4| A4_1| 
| 5| null| A5| 
| 6| null| A6| 
+---+-----+-----+ 

FULL JOIN 
+---+-----+-----+ 
| id|value|value| 
+---+-----+-----+ 
| 1| A1| null| 
| 2| A2| null| 
| 3| A3| A3| 
| 4| A4| A4_1| 
| 4| A4| A4| 
| 5| null| A5| 
| 6| null| A6| 
+---+-----+-----+ 

FULL_OUTER JOIN 
+---+-----+-----+ 
| id|value|value| 
+---+-----+-----+ 
| 1| A1| null| 
| 2| A2| null| 
| 3| A3| A3| 
| 4| A4| A4_1| 
| 4| A4| A4| 
| 5| null| A5| 
| 6| null| A6| 
+---+-----+-----+ 

LEFT JOIN 
+---+-----+-----+ 
| id|value|value| 
+---+-----+-----+ 
| 1| A1| null| 
| 2| A2| null| 
| 3| A3| A3| 
| 4| A4| A4_1| 
| 4| A4| A4| 
+---+-----+-----+ 

LEFT_OUTER JOIN 
+---+-----+-----+ 
| id|value|value| 
+---+-----+-----+ 
| 1| A1| null| 
| 2| A2| null| 
| 3| A3| A3| 
| 4| A4| A4_1| 
| 4| A4| A4| 
+---+-----+-----+ 

RIGHT JOIN 
+---+-----+-----+ 
| id|value|value| 
+---+-----+-----+ 
| 3| A3| A3| 
| 4| A4| A4| 
| 4| A4| A4_1| 
| 5| null| A5| 
| 6| null| A6| 
+---+-----+-----+ 

RIGHT_OUTER JOIN 
+---+-----+-----+ 
| id|value|value| 
+---+-----+-----+ 
| 3| A3| A3| 
| 4| A4| A4_1| 
| 4| A4| A4| 
| 5| null| A5| 
| 6| null| A6| 
+---+-----+-----+ 

LEFT_SEMI JOIN 
+---+-----+ 
| id|value| 
+---+-----+ 
| 3| A3| 
| 4| A4| 
+---+-----+ 

LEFT_ANTI JOIN 
+---+-----+ 
| id|value| 
+---+-----+ 
| 1| A1| 
| 2| A2| 
+---+-----+ 
0

加入の様々な種類があります。詳細はこのlinkをREFとgithubのReference

での例を符号化するために以下に記載されているスパーク-SQLでご利用いただけます

1) JOIN 
2) {LEFT|RIGHT|FULL} OUTER JOIN 
3) LEFT SEMI JOIN 
4) CROSS JOIN 

に参加例:

package org.apache.spark.sql.catalyst.plans 

import java.util.Locale 

import org.apache.spark.sql.catalyst.expressions.Attribute 

object JoinType { 
    def apply(typ: String): JoinType = typ.toLowerCase(Locale.ROOT).replace("_", "") match { 
    case "inner" => Inner 
    case "outer" | "full" | "fullouter" => FullOuter 
    case "leftouter" | "left" => LeftOuter 
    case "rightouter" | "right" => RightOuter 
    case "leftsemi" => LeftSemi 
    case "leftanti" => LeftAnti 
    case "cross" => Cross 
    case _ => 
     val supported = Seq(
     "inner", 
     "outer", "full", "fullouter", "full_outer", 
     "leftouter", "left", "left_outer", 
     "rightouter", "right", "right_outer", 
     "leftsemi", "left_semi", 
     "leftanti", "left_anti", 
     "cross") 

     throw new IllegalArgumentException(s"Unsupported join type '$typ'. " + 
     "Supported join types include: " + supported.mkString("'", "', '", "'") + ".") 
    } 
} 

sealed abstract class JoinType { 
    def sql: String 
} 

/** 
* The explicitCartesian flag indicates if the inner join was constructed with a CROSS join 
* indicating a cartesian product has been explicitly requested. 
*/ 
sealed abstract class InnerLike extends JoinType { 
    def explicitCartesian: Boolean 
} 

case object Inner extends InnerLike { 
    override def explicitCartesian: Boolean = false 
    override def sql: String = "INNER" 
} 

case object Cross extends InnerLike { 
    override def explicitCartesian: Boolean = true 
    override def sql: String = "CROSS" 
} 

case object LeftOuter extends JoinType { 
    override def sql: String = "LEFT OUTER" 
} 

case object RightOuter extends JoinType { 
    override def sql: String = "RIGHT OUTER" 
} 

case object FullOuter extends JoinType { 
    override def sql: String = "FULL OUTER" 
} 

case object LeftSemi extends JoinType { 
    override def sql: String = "LEFT SEMI" 
} 

case object LeftAnti extends JoinType { 
    override def sql: String = "LEFT ANTI" 
} 

case class ExistenceJoin(exists: Attribute) extends JoinType { 
    override def sql: String = { 
    // This join type is only used in the end of optimizer and physical plans, we will not 
    // generate SQL for this join type 
    throw new UnsupportedOperationException 
    } 
} 

case class NaturalJoin(tpe: JoinType) extends JoinType { 
    require(Seq(Inner, LeftOuter, RightOuter, FullOuter).contains(tpe), 
    "Unsupported natural join type " + tpe) 
    override def sql: String = "NATURAL " + tpe.sql 
} 

case class UsingJoin(tpe: JoinType, usingColumns: Seq[String]) extends JoinType { 
    require(Seq(Inner, LeftOuter, LeftSemi, RightOuter, FullOuter, LeftAnti).contains(tpe), 
    "Unsupported using join type " + tpe) 
    override def sql: String = "USING " + tpe.sql 
} 

object LeftExistence { 
    def unapply(joinType: JoinType): Option[JoinType] = joinType match { 
    case LeftSemi | LeftAnti => Some(joinType) 
    case j: ExistenceJoin => Some(joinType) 
    case _ => None 
    } 
} 

stackoverflowの例のいくつかは、この質問に答えていない。このlink

+0

に参加left_semi'何であるかこれが半を残したものを私にない教えてくれありません例えば – pathikrit

関連する問題