任意のコード生成検証式によって結合やexplode
ずに、データを通して単一パスで非常に簡単にこれを行うことができます。これは、生産
// Simulate the data
case class Record(Name: String, code1: Option[Int], code2: Option[Int])
val dfData = sc.parallelize(Seq(
Record("A", Some(3), Some(4)),
Record("B", Some(3), None)
)).toDF.registerTempTable("my_data")
// Simulate the lookup table
val dfLookup = sc.parallelize(Seq((1,3), (2,4))).toDF("ID", "Value")
// Build a validation expression
val validationExpression = dfLookup.collect.map{ row =>
s"code${row.getInt(0)} = ${row.getInt(1)}"
}.mkString(" and ")
// Add an is_valid column to the data
sql(s"select *, nvl($validationExpression, false) as is_valid from my_data").show
:
defined class Record
dfData: Unit =()
dfLookup: org.apache.spark.sql.DataFrame = [ID: int, Value: int]
validationExpression: String = code1 = 3 and code2 = 4
+----+-----+-----+--------+
|Name|code1|code2|is_valid|
+----+-----+-----+--------+
| A| 3| 4| true|
| B| 3| null| false|
+----+-----+-----+--------+
を
出典
2016-07-28 16:56:44
Sim