Hadoop那些事儿（四） - MapReduce编程实例（基础）(2)

2019-01-10 14:13

String[] otherArgs = new GenericOptionsParser(conf,args).getRemainingArgs(); if(otherArgs.length != 2){

System.err.println(\ System.exit(2); }

//配置作业名

Job job = new Job(conf,\ //配置作业各个类

job.setJarByClass(InvertedIndex.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class);

FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1);

} }

在map中，通过context.getInputSplit()获取到数据所在的文件，然后将读取的数据按句号分隔，并遍历，如果包含指定字符“月”，则将文件名作为key,该句作value写出。

在reduce中是一个简单的合并的过程。

2.最大值最小值平均数

问题描述

给定一批数字，获取其中的最大值最小值以及求得平均数

解决方案

这个问题也很简单，首先在map中读取数据并进行切割，定义一个递增的数字作key,切下来的数字作为value.在reduce中遍历value，计算数量并求和同时比较大小获取最大最小值，最后求其平均数

测试数据输入

in1.txt

1 1 1 1 1 1 1 1 1 1 5 5 5 5 5 5 5 5 5 5

in2.txt

5 8 10 17 32 8 9 13 32 21

预期结果

平均数 11 最大值 32 最小值 1

看图说话

代码

package train;

import java.io.IOException; import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.GenericOptionsParser;

import test.WordCount;

/**

* 计算平均数 * @author hadoop * */

public class Average1 {

public static class Map extends Mapper{ private static IntWritable no = new IntWritable(1); //计数作为key private Text number = new Text(); //存储切下的数字

public void map(Object key,Text value,Context context) throws IOException, InterruptedException{

StringTokenizer st = new StringTokenizer(value.toString()); while(st.hasMoreTokens()){ number.set(st.nextToken());

context.write(no, new IntWritable(Integer.parseInt(number.toString()))); } } }

public static class Reduce extends Reducer{ //定义全局变量

int count = 0; //数字的数量 int sum = 0; //数字的总和 int max = -2147483648; int min = 2147483647;

public void reduce(IntWritable key,Iterable values,Context context) throws IOException, InterruptedException{ for(IntWritable val:values){ if(val.get()>max){ max = val.get(); }

if(val.get()

count++;

sum+=val.get(); }

int average = (int)sum/count; //计算平均数

//System.out.println(sum+\

context.write(new Text(\平均数\ context.write(new Text(\最大值\ context.write(new Text(\最小值\ } }

public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {

// TODO Auto-generated method stub Configuration conf = new Configuration();

//conf.set(\ conf.addResource(\ args = new String[]{\p/output/average1_out\ //检查运行命令

String[] otherArgs = new GenericOptionsParser(conf,args).getRemainingArgs(); if(otherArgs.length != 2){

System.err.println(\ System.exit(2); }

//配置作业名

Job job = new Job(conf,\ //配置作业各个类

job.setJarByClass(Average1.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); //Mapper的输出类型

*强调内容* job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(IntWritable.class);

FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1);

} }

3.平均成绩

问题描述

给定三个输入文件，每个文件中分别写有多个学生的数学英语语文成绩，求每个学生三科的平均成绩。

解决方案

这个问题同样很简单，在map中解析数据并以学生名字作为key，成绩作为value输出。

测试数据

输入：

in1.txt

张三 80 李四 83 王五 91 赵六 88

in2.txt

张三 92 李四 100 王五 94 赵六 88

in3.txt

张三 89 李四 98 王五 84 赵六 93

预期结果

张三 87 李四 93 王五 89 赵六 89

看图说话

代码

共5页:

Hadoop那些事儿（四） - MapReduce编程实例（基础）(2).doc 将本文的Word文档下载到电脑下载失败或者文档不完整，请联系客服人员解决！

下载这篇word文档