mapreduce简单的gzip压缩
生活随笔
收集整理的這篇文章主要介紹了
mapreduce简单的gzip压缩
小編覺得挺不錯(cuò)的,現(xiàn)在分享給大家,幫大家做個(gè)參考.
MapReduce使用簡單的gzip格式進(jìn)行文件的壓縮
package example;import java.io.IOException;import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.GzipCodec; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;public class countData {public static final String INPUT_PATH="hdfs://hadoop0:9000/test/";public static final String OUTPUT_PATH="hdfs://hadoop0:9000/testout/";public static void main(String[] args) throws Exception {Configuration conf=new Configuration(); //設(shè)置輸出壓縮conf.setBoolean("mapred.compress.map.out", true);//設(shè)置map輸出壓縮conf.setBoolean("mapred.output.compress", true);//設(shè)置輸出壓縮conf.setClass("mapred.output.compression.codec", GzipCodec.class, CompressionCodec.class);//設(shè)置壓縮算法Job job = new Job(conf,countData.class.getSimpleName());//input pathFileInputFormat.setInputPaths(job, new Path(INPUT_PATH));//input formatjob.setInputFormatClass(TextInputFormat.class);//Mapper classjob.setMapperClass(MyMapper.class);//map output formatjob.setMapOutputKeyClass(Text.class);job.setMapOutputValueClass(LongWritable.class);//排序、分組、規(guī)約、分區(qū)//set reducejob.setReducerClass(MyReducer.class);//reduce output formatjob.setOutputKeyClass(Text.class);job.setOutputValueClass(LongWritable.class);//output pathFileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH));//output formatjob.setOutputFormatClass(TextOutputFormat.class);job.waitForCompletion(true);}static class MyMapper extends Mapper<LongWritable, Text, Text, LongWritable>{@Overrideprotected void map(LongWritable key, Text value,Context context)throws IOException, InterruptedException {String[] split = value.toString().split(",");int count=0;for(int i=0;i<split.length;i++){if(i==split.length-1){count=1;}context.write(new Text(split[i]),new LongWritable(count));}}}static class MyReducer extends Reducer<Text, LongWritable, Text, LongWritable>{@Overrideprotected void reduce(Text k2, Iterable<LongWritable> v2s,Context context)throws IOException, InterruptedException {int count=0;int sum=0;for (LongWritable value : v2s) {count+=Integer.parseInt(value.toString());sum+=1;}if(count>0){count=0;for (int i=0;i<sum;i++) {count+=1;}context.write(k2, new LongWritable(count));}}} }
總結(jié)
以上是生活随笔為你收集整理的mapreduce简单的gzip压缩的全部內(nèi)容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 求生之路2特殊感染者有哪些
- 下一篇: ttribute value is qu