package com.hj.spark;
import java.util.Arrays;
import java.util.Iterator;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.streaming.Durations;
import org.apache.spark.streaming.api.java.JavaDStream;
import org.apache.spark.streaming.api.java.JavaPairDStream;
import org.apache.spark.streaming.api.java.JavaReceiverInputDStream;
import org.apache.spark.streaming.api.java.JavaStreamingContext;
import scala.Tuple2;
public class SparkStreaming {
public static void main(String[] args) throws InterruptedException {
SparkConf conf = new SparkConf().setAppName("NetwordCount").setMaster("local[2]");
// 功能入口
JavaStreamingContext jssc =new JavaStreamingContext(conf, Durations.seconds(1));
// 創建一個Dstream 接收來自TCP的資料流 主機名 埠號
JavaReceiverInputDStream<String> lines = jssc.socketTextStream("hadoop", 9999);
JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
@Override
public Iterator<String> call(String s) throws Exception {
// TODO Auto-generated method stub
return Arrays.asList(s.split(" ")).iterator();
}
});
JavaPairDStream<String, Integer> pairs = words.mapToPair(new PairFunction<String, String, Integer>() {
@Override
public Tuple2<String, Integer> call(String s) throws Exception {
// TODO Auto-generated method stub
return new Tuple2<String, Integer>(s, 1);
}
});
// reduceByKey
JavaPairDStream<String, Integer> wordCount = pairs.reduceByKey(new Function2<Integer, Integer, Integer>() {
@Override
public Integer call(Integer arg0, Integer arg1) throws Exception {
// TODO Auto-generated method stub
return arg0 + arg1;
}
});
wordCount.print();
jssc.start();
jssc.awaitTermination();
}
}
轉載請註明出處,本文鏈接:https://www.uj5u.com/qita/345680.html
標籤:其他
