spark sql TempTable操作

2017-01-05 11:11:11来源:oschina作者:JPblog人点击


1.目的

找出人际关系圈中的三角环状关系(A->B,B->C,C->A)


2.素材

text1.txt


1 tom 2 jack friend
2 jack 3 sala friend
3 sala 1 tom friend
4 joy 1 tom friend
1 tom 4 joy friend
1 tom 4 joy friend
2 jack 5 Missing friend
3.代码
/**
* Created by puwenchao on 2016-07-06.
*/
package test
import org.apache.log4j.{Level, Logger}
import org.apache.spark.{SparkConf,SparkContext}
import org.apache.spark.sql.{SaveMode,SQLContext}
import org.apache.spark.sql._
//Create an RDD of Person objects and register it as a table.
case class person (aid:Int,aname:String,bid:Int,bname:String,rel:String)
object sql {
def main(args: Array[String]) ={
//屏蔽日志
Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
Logger.getLogger("org.eclipse.jetty.server").setLevel(Level.OFF)
//create SparkContext and SQLContext
val sparkConf=new SparkConf().setAppName("SQL_APP").setMaster("local")
val sc=new SparkContext(sparkConf)
val sqlContext=new org.apache.spark.sql.SQLContext(sc)
//implicitly convert RDD => DataFrame.
import sqlContext.implicits._
val text = sc.textFile("e://text1.txt").map(_.split(" ")).map(p=>person(p(0).toInt,p(1)toString,p(2).toInt,p(3).toString,p(4).toString)).toDF()
text.registerTempTable("text")
//sql_methods provided by sqlContext
val query=sqlContext.sql("select a.aid,a.aname,b.aid,b.aname,c.aid,c.aname from text a,text b,text c where a.bid=b.aid and b.bid=c.aid and c.bid=a.aid and a.aidprintln(query.collect.mkString("/n"))
sc.stop()
}
}
4.输出

[1,tom,2,jack,3,sala]


最新文章

123

最新摄影

微信扫一扫

第七城市微信公众平台