1、pom加载jar包
org.apache.spark spark-streaming_2.11 2.1.0
org.apache.kafka kafka_2.11 1.0.0
org.apache.spark spark-streaming-kafka-0-10_2.11 2.1.0
2、代码
package cn.piesat import org.apache.kafka.common.serialization.StringDeserializer import org.apache.spark.sql.SparkSession import org.apache.spark.streaming.kafka010.KafkaUtils import org.apache.spark.streaming.kafka010.LocationStrategies.PreferConsistent import org.apache.spark.streaming.kafka010.ConsumerStrategies.Subscribe import org.apache.spark.streaming.{Seconds, StreamingContext} object App { private val brokers="hadoop01:9092" def main(args:Array[String]):Unit={ val spark=getSparkSession() val sc=spark.sparkContext val ssc=new StreamingContext(sc,Seconds(3)) val topics=Array("lj01") val kafkaParams=Map[String,Object]( "bootstrap.servers"->brokers, "key.deserializer" -> classOf[StringDeserializer], "value.deserializer" -> classOf[StringDeserializer], "group.id" -> "use_a_separate_group_id_for_each_stream", "auto.offset.reset" -> "latest", "enable.auto.commit" -> (false: java.lang.Boolean) ) val messages=KafkaUtils.createDirectStream[String,String]( ssc, PreferConsistent, Subscribe[String,String](topics,kafkaParams) ) val lines=messages.map(x=>{ x.value() }) val wordCounts=lines.flatMap(x=>{ x.split(" ").map(x=>(x,1)) }).reduceByKey(_+_) wordCounts.print() ssc.start() ssc.awaitTermination() } def getSparkSession():SparkSession={ val spark=SparkSession .builder() .appName("sparkSql") .config("spark.some.config.option","some-value") .master("local[4]") .getOrCreate() spark } }