hadoop的入门程序,java8也能实现
txt统计单词数量程序
@Test
public void fileWordCount() throws IOException {
//特殊文件需要格式转换为txt
Files.readAllLines(Paths.get("D:\\jd.txt"), StandardCharsets.UTF_8).parallelStream()
//将多个流融合为一个
.flatMap(line -> Arrays.stream(line.trim().split("\\s")))
.filter(word -> word.length() > 0)
.map(word -> new AbstractMap.SimpleEntry<>(word, 1))
.collect(groupingBy(AbstractMap.SimpleEntry :: getKey, counting()))
.entrySet().forEach(System.out :: println);
}
List统计单词数量程序
@Test
public void listWordCount(){
List<String> stringList = Arrays.asList("a","b","c","a");
stringList.stream()
.map(s -> new AbstractMap.SimpleEntry<>(s, 1))
.collect(groupingBy(AbstractMap.SimpleEntry :: getKey, counting()))
.entrySet().stream()
.forEach(System.out :: println);
System.out.println("---------------------------------------------------");
//通过自定义reduce统计,其实counting()也使用的是reduce
//记住:凡是在中间操作使用了map,接口定义都需要声明出来,直接使用lambda表达式会有1.无法读取method,2.类型检查不到 的问题
BinaryOperator<Integer> binaryOperator2 = Integer::sum;
//排序的转换规则接口
ToIntFunction<Map.Entry> sortMapFunction = (Map.Entry se) -> Integer.valueOf(se.getValue().toString()).intValue();
stringList
.stream()
.map(s -> new AbstractMap.SimpleEntry<>(s, 1))
.collect(groupingBy(AbstractMap.SimpleEntry::getKey,
reducing(0, AbstractMap.SimpleEntry::getValue,binaryOperator2)))
.entrySet()
.stream()
.sorted(Comparator.comparingInt(sortMapFunction))
.forEach(System.out::println);
}