educoder平台MapReduce基础实战

上传人：飞*** IP属地：河北上传时间：2020-06-09 格式：DOC 页数：12 大小：37KB 积分：15 举报 版权申诉

已阅读5页，还剩7页未读，继续免费阅读

版权说明：本文档由用户提供并上传，收益归属内容提供方，若内容存在侵权，请进行举报或认领

文档简介

MapReduce第1关：成绩统计过关代码：import java.io.IOException;import java.util.StringTokenizer; import java.io.IOException;import java.util.StringTokenizer;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.*;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.util.GenericOptionsParser;public class WordCount /* Begin */ public static class TokenizerMapper extends Mapper private final static IntWritable one = new IntWritable(1); private Text word = new Text(); private int maxValue = 0; public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException StringTokenizer itr = new StringTokenizer(value.toString(),n); while (itr.hasMoreTokens() String str = itr.nextToken().split( ); String name = str0; one.set(Integer.parseInt(str1); word.set(name); context.write(word,one); /context.write(word,one); public static class IntSumReducer extends Reducer private IntWritable result = new IntWritable(); public void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException int maxAge = 0; int age = 0; for (IntWritable intWritable : values) maxAge = Math.max(maxAge, intWritable.get(); result.set(maxAge); context.write(key, result); public static void main(String args) throws Exception Configuration conf = new Configuration(); Job job = new Job(conf, word count); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); String inputfile = /user/test/input; String outputFile = /user/test/output/; FileInputFormat.addInputPath(job, new Path(inputfile); FileOutputFormat.setOutputPath(job, new Path(outputFile); job.waitForCompletion(true); /* End */ 命令行touch file01echo Hello World Bye Worldcat file01echo Hello World Bye World file01cat file01touch file02echo Hello Hadoop Goodbye Hadoop file02cat file02start-dfs.shhadoop fs -mkdir /usrhadoop fs -mkdir /usr/inputhadoop fs -ls /usr/outputhadoop fs -ls /hadoop fs -ls /usrhadoop fs -put file01 /usr/inputhadoop fs -put file02 /usr/inputhadoop fs -ls /usr/input测评MapReduce第2关：文件内容合并去重代码import java.io.IOException;import java.util.*;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.*;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.util.GenericOptionsParser;public class Merge /* * param args * 对A,B两个文件进行合并，并剔除其中重复的内容，得到一个新的输出文件C */ /在这重载map函数，直接将输入中的value复制到输出数据的key上注意在map方法中要抛出异常：throws IOException,InterruptedException /* Begin */ public static class Map extends Mapper protected void map(LongWritable key, Text value, Mapper.Context context) throws IOException, InterruptedException String str = value.toString(); String data = str.split( ); Text t1= new Text(data0); Text t2 = new Text(data1); context.write(t1,t2); /* End */ /在这重载reduce函数，直接将输入中的key复制到输出数据的key上注意在reduce方法上要抛出异常：throws IOException,InterruptedException /* Begin */ public static class Reduce extends Reducer protected void reduce(Text key, Iterable values, Reducer.Context context) throws IOException, InterruptedException List list = new ArrayList(); for (Text text : values) String str = text.toString(); if(!list.contains(str) list.add(str); Collections.sort(list); for (String text : list) context.write(key, new Text(text); /* End */ public static void main(String args) throws Exception Configuration conf = new Configuration(); Job job = new Job(conf, word count); job.setJarByClass(Merge.class); job.setMapperClass(Map.class); job.setCombinerClass(Reduce.class); job.setReducerClass(Reduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); String inputPath = /user/tmp/input/; /在这里设置输入路径 String outputPath = /user/tmp/output/; /在这里设置输出路径 FileInputFormat.addInputPath(job, new Path(inputPath); FileOutputFormat.setOutputPath(job, new Path(outputPath); System.exit(job.waitForCompletion(true) ? 0 : 1); 测评MapReduce第3关：信息挖掘 - 挖掘父子关系代码import java.io.IOException;import java.util.*;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.util.GenericOptionsParser;public class simple_data_mining public static int time = 0; /* * param args * 输入一个child-parent的表格 * 输出一个体现grandchild-grandparent关系的表格 */ /Map将输入文件按照空格分割成child和parent，然后正序输出一次作为右表，反序输出一次作为左表，需要注意的是在输出的value中必须加上左右表区别标志 public static class Map extends Mapper public void map(Object key, Text value, Context context) throws IOException,InterruptedException /* Begin */ String child_name = new String(); String parent_name = new String(); String relation_type = new String(); String line = value.toString(); int i = 0; while(line.charAt(i) != ) i+; String values = line.substring(0,i),line.substring(i+1); if(pareTo(child) != 0) child_name = values0; parent_name = values1; relation_type = 1;/左右表区分标志 context.write(new Text(values1), new Text(relation_type+child_name+parent_name); /左表 relation_type = 2; context.write(new Text(values0), new Text(relation_type+child_name+parent_name); /右表 /* End */ public static class Reduce extends Reducer public void reduce(Text key, Iterable values,Context context) throws IOException,InterruptedException /* Begin */ if(time = 0) /输出表头 context.write(new Text(grand_child), new Text(grand_parent); time+; int grand_child_num = 0; String grand_child = new String10; int grand_parent_num = 0; String grand_parent= new String10; Iterator ite = values.iterator(); while(ite.hasNext() String record = ite.next().toString(); int len = record.length(); int i = 2; if(len = 0) continue; char relation_type = record.charAt(0); String child_name = new String(); String parent_name = new String(); /获取value-list中value的child while(record.charAt(i) != +) child_name = child_name + record.charAt(i); i+; i=i+1; /获取value-list中value的parent while(ilen) parent_name = parent_name+record.charAt(i); i+; /左表，取出child放入grand_child if(relation_type = 1) grand_childgrand_child_num = child_name; grand_child_num+; else/右表，取出parent放入grand_parent grand_parentgrand_parent_num = parent_name; grand_parent_num+; if(grand_parent_num != 0 & grand_child_num !=

人人文库> 全部分类> 行业资料 > 管理策划

温馨提示

1. 本站所有资源如无特殊说明，都需要本地电脑安装OFFICE2007和PDF阅读器。图纸软件为CAD,CAXA,PROE,UG,SolidWorks等.压缩文件请下载最新的WinRAR软件解压。
2. 本站的文档不包含任何第三方提供的附件图纸等，如果需要附件，请联系上传者。文件的所有权益归上传用户所有。
3. 本站RAR压缩包中若带图纸，网页内容里面会有图纸预览，若没有图纸预览就没有图纸。
4. 未经权益所有人同意不得将文件中的内容挪作商业或盈利用途。
5. 人人文库网仅提供信息存储空间，仅对用户上传内容的表现方式做保护处理，对用户上传分享的文档内容本身不做任何修改或编辑，并不能对任何下载内容负责。
6. 下载文件中如有侵权或不适当内容，请与我们联系，我们立即纠正。
7. 本站不保证下载资源的准确性、安全性和完整性, 同时也不承担用户因使用这些下载资源对自己和他人造成任何形式的伤害或损失。

educoder平台MapReduce基础实战

文档简介

温馨提示

最新文档

评论

educoder平台MapReduce基础实战

文档简介

温馨提示

最新文档

评论

相关文档