UDFを書くにはより良い方法があります。しかし、これはあなたがRDDへの迂回を行うと「getValuesMap」を使用して仕事を得る作業溶液
val spark: SparkSession = SparkSession.builder.master("local").getOrCreate
//implicits for magic functions like .toDf
import spark.implicits._
import org.apache.spark.sql.functions.udf
//We have hard code number of params as UDF don't support variable number of args
val maxval = udf((c1: Double, c2: Double, c3: Double, c4: Double) =>
if(c1 >= c2 && c1 >= c3 && c1 >= c4)
"column1"
else if(c2 >= c1 && c2 >= c3 && c2 >= c4)
"column2"
else if(c3 >= c1 && c3 >= c2 && c3 >= c4)
"column3"
else
"column4"
)
//create schema class
case class Record(name: String,
column1: Double,
column2: Double,
column3: Double,
column4: Double)
val df = Seq(
Record("first", 2.0, 1, 2.1, 5.4),
Record("test", 1.5, 0.5, 0.9, 3.7),
Record("choose", 7, 2.9, 9.1, 2.5)
).toDF();
df.withColumn("max_column", maxval($"column1", $"column2", $"column3", $"column4"))
.select("name", "max_column").show
出力
+------+----------+
| name|max_column|
+------+----------+
| first| column4|
| test| column4|
|choose| column3|
+------+----------+
参照http://stackoverflow.com/a/42487191/3297229 – Wilmerton