GenericWritable
发布日期:2021-08-31 01:31:42 浏览次数:1 分类:技术文章

本文共 3961 字,大约阅读时间需要 13 分钟。

hot3.png

package com.test;

import java.io.IOException;

import java.util.Iterator;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.GenericWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
import org.apache.hadoop.mapreduce.lib.input.MultipleInputs;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

/**

 * 业务场景:
 * 含有两个文件,两个文件中单词之间的分隔方式不一样,但是统计出单词在两个文件中公共出现的次数
 *
 * 文件来源1,逗号分隔text1.txt
 *  hello,what
 *  you,haha
 * 文件来源2,制表符分隔text2.txt
 * girl boy
 * father mother
 */
public class WordCountGenericWritable extends Configured implements Tool {
 
 public static class Map1 extends Mapper<LongWritable, Text, Text, MyGenericWritable> {
  public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
   String line = value.toString();
   
   StringTokenizer st = new StringTokenizer(line, ",");
   while(st.hasMoreElements()) {
    context.write(new Text(st.nextElement().toString()), new MyGenericWritable(new LongWritable(1)));
   }
  }
 }
 
 public static class Map2 extends Mapper<Text, Text, Text, MyGenericWritable> {
  public void map(Text key, Text value, Context context) throws IOException, InterruptedException {
   context.write(key, new MyGenericWritable(new Text("1")));
   context.write(value, new MyGenericWritable(new Text("1")));
  }
 }
 
 public static class Reduce extends Reducer<Text, MyGenericWritable, Text, IntWritable> {
  public void reduce(Text key, Iterable<MyGenericWritable> values, Context context) throws IOException, InterruptedException {
   int count = 0;
   Iterator<MyGenericWritable> it = values.iterator();
   while(it.hasNext()) {
    MyGenericWritable myGw = it.next();
    Writable value = myGw.get();
    if(value instanceof LongWritable) {
     count = count + Long.valueOf(((LongWritable)value).get()).intValue();
    }
    if(value instanceof Text) {
     count = count + Long.valueOf(((Text)value).toString()).intValue();
    }
   }
   context.write(key, new IntWritable(count));
  }
 }
 
 public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
  Configuration conf = this.getConf();
  Job job = new Job(conf);
  job.setJobName(WordCountGenericWritable.class.getSimpleName());
  job.setJarByClass(WordCountGenericWritable.class);
  
  MultipleInputs.addInputPath(job, new Path("hdfs://grid131:9000/text1.txt"), TextInputFormat.class, Map1.class);
  MultipleInputs.addInputPath(job, new Path("hdfs://grid131:9000/text2.txt"), KeyValueTextInputFormat.class, Map2.class);
  
  FileOutputFormat.setOutputPath(job, new Path(args[1]));
  job.setReducerClass(Reduce.class);
  
  job.setOutputFormatClass(TextOutputFormat.class);
  
  //当map的输出类型和reduce的输出类型不一致的时候,需要单独设置map输出类型
  job.setMapOutputKeyClass(Text.class);
  job.setMapOutputValueClass(MyGenericWritable.class);
  
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(IntWritable.class);
  
  job.waitForCompletion(true);
  
  return job.isSuccessful()?0:1;
 }
 
 public static void main(String[] args) throws Exception {
  int exit = ToolRunner.run(new WordCount(), args);
  System.exit(exit);
 }
 
}

class MyGenericWritable extends GenericWritable {

 public MyGenericWritable() {

  
 }
 
 public MyGenericWritable(LongWritable longWritable) {
  super.set(longWritable);
 }
 
 public MyGenericWritable(Text text) {
  super.set(text);
 }
 
 @Override
 protected Class<? extends Writable>[] getTypes() {
  return new Class[]{LongWritable.class, Text.class};
 }
 
}

转载于:https://my.oschina.net/sniperLi/blog/366271

转载地址:https://blog.csdn.net/weixin_34056162/article/details/91969357 如侵犯您的版权,请留言回复原文章的地址,我们会给您删除此文章,给您带来不便请您谅解!

上一篇:log4j样例
下一篇:Oracle中组合索引的使用详解

发表评论

最新留言

表示我来过!
[***.240.166.169]2024年03月06日 12时50分18秒

关于作者

    喝酒易醉,品茶养心,人生如梦,品茶悟道,何以解忧?唯有杜康!
-- 愿君每日到此一游!

推荐文章

s2-045 php exp,S2-045-EXP.py --Struts2任意代码执行漏洞 (S2-045,CVE-2017-5638) 2019-04-21
linux sdk 窗口句柄,Venus: 针对Linux平台上,对常用的系统API进行面向对象的封装SDK。... 2019-04-21
c语言程序设计 科学出版社习题答案,C语言程序设计(科学出版社)第4章 课后习题参考答案.doc... 2019-04-21
c语言 无错 但只运行一半,求哈夫曼编码时程序运行到一半就终止了,编译无错... 2019-04-21
deepin linux 2014安装,2014.2版本的Deepin虚拟机安装浅谈(就是深度Linux) 2019-04-21
android 限速工具,Android下载器之限速篇 2019-04-21
html刷新ajax实现原理,AJAX的原理—如何做到异步和局部刷新 2019-04-21
html中列表菜单加文字请选择,html中下拉菜单 2019-04-21
读书郎平板中android,读书郎学生平板电脑怎么用 使用方法详解【图文】 2019-04-21
html5 调用摄像头 支持IE,JS调用本地摄像头拍照(兼容各大浏览器及IE8+) 2019-04-21
rust和gta5哪个吃配置_盘点4款Steam“自由度”很高的游戏,GTA5众所周知,目前最热门... 2019-04-21
es审计日志_elasticsearch 事务日志translog 2019-04-21
dw1510_超低温种子储存柜 2019-04-21
python用opencv计算汽车间距_计算机视觉:利用OpenCV和Python进行车辆计数详细步调... 2019-04-21
文件未找到mathpage.wll_解决MathPage.wll文件找不到的问题(找了好久的良心之作)... 2019-04-21
docker 查看容器磁盘大小_查看 docker 容器使用的资源 2019-04-21
python consul服务发现_Prometheus+Consul服务自动发现监控 2019-04-21
excel提取不规则字段_利用excel服务器来实现3级或者更多级的层级关联 2019-04-21
@autowired注解的作用_只因多看了一眼提示,又一次刷新了@Autowired注释的认知 2019-04-21
ab753变频器参数怎么拷贝到面板_变频器不知道如何上手,厂家教你如何三点搞定设置变频器参数... 2019-04-21