语法
map
1
2
3
4
5
6
7
1.初始化:var a:Map[String,Int]=Map()
2.添加元素:a+=("key"->value)
3.获取元素:a.get("key").getOrElse("default")
4.遍历:for((k, v)<- a)
5.是否包含:a.contains("")
6.a.toSeq.sortBy(_._1)//升序排序 key
7.a.toSeq.sortWith(_._1>_._1) //降序排序 key
dataframe转map
1
2
val userIds = df.select("wid", "pid", "user_id").distinct().map(
row => row.getAs("user_id").toString -> (row.getAs("wid").toString + ","+row.getAs("pid").toString)).collect().toMap
自定义函数
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
var resData = 0
val calListUdf: UserDefinedFunction = udf((rows: Seq[Row]) => {
val list = ListBuffer[String]()
for (row <- rows) {
list.append(String.valueOf(row.get(1)))
}
list.toList
if (String.valueOf(list.max) != "1" || list.max != "1") {
resData = list.size
} else {
resData = 0
}
resData
})
collect_list(struct("wid_pid_id", "weimobopenid_id"))) as "result")
时间戳比较大小:
1
2
3
4
5
6
val df: DateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
val curr:Date=df.parse(currentTime)
val star: Date = df.parse(starTime)
val end: Date = df.parse(endTime)
val cs: Long = curr.getTime - star.getTime
val ec: Long = end.getTime - curr.getTime