日韩av黄I国产麻豆传媒I国产91av视频在线观看I日韩一区二区三区在线看I美女国产在线I麻豆视频国产在线观看I成人黄色短片

歡迎訪問 生活随笔!

生活随笔

當前位置: 首頁 >

对Mapreduce代码进行单元测试

發(fā)布時間:2025/7/14 56 豆豆
生活随笔 收集整理的這篇文章主要介紹了 对Mapreduce代码进行单元测试 小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
hadoop自帶一個wordcount的示例代碼,用于計算單詞個數。我將其單獨移出來,測試成功。源碼如下:
package org.apache.hadoop.examples; import?java.io.IOException; import java.util.StringTokenizer; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.GenericOptionsParser; public class WordCount { public static class TokenizerMapper extends Mapper{ private final static IntWritable one = new IntWritable(1); private Text word = new Text(); public void map(Object key, Text value, Context context ) throws IOException, InterruptedException { StringTokenizer itr = new StringTokenizer(value.toString()); while (itr.hasMoreTokens()) { word ?= new Text(itr.nextToken()); //to unitest,should be new Text word.set(itr.nextToken()) context.write(word, new IntWritable(1)); } } } public static class IntSumReducer extends Reducer { private IntWritable result = new IntWritable(); public void reduce(Text key, Iterable values, Context context ) throws IOException, InterruptedException { int sum = 0; for (IntWritable val : values) { sum += val.get(); } result.set(sum); context.write(key, result); } } public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount ?"); System.exit(2); } Job job = new Job(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); } }
現在我想對其進行單元測試。一種方式,是job執(zhí)行完了后,讀取輸出目錄中的文件,確認計數是否正確。但這樣的情況如果失敗,也不知道是哪里失敗。我們需要對map和reduce單獨進行測試。 tomwhite的書《hadoop權威指南》有提到如何用Mockito進行單元測試,我們依照原書對溫度的單元測試來對wordcount進行單元測試。(原書第二版的示例已經過時,可以參考英文版第三版或我的程序)。
package org.apache.hadoop.examples; /* author zhouhh * date:2012.8.7 */ import static org.mockito.Mockito.*; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.apache.hadoop.io.*; import org.junit.*; public class WordCountTest { @Test public ?void testWordCountMap() throws IOException, InterruptedException { WordCount w = new WordCount(); WordCount.TokenizerMapper mapper = new WordCount.TokenizerMapper(); Text value = new Text("a b c b a a"); @SuppressWarnings("unchecked") WordCount.TokenizerMapper.Context context = mock(WordCount.TokenizerMapper.Context.class); mapper.map(null, value, context); verify(context,times(3)).write(new Text("a"), new IntWritable(1)); verify(context).write(new Text("c"), new IntWritable(1)); //verify(context).write(new Text("cc"), new IntWritable(1)); } @Test public void testWordCountReduce() throws IOException, InterruptedException { WordCount.IntSumReducer reducer = new WordCount.IntSumReducer(); WordCount.IntSumReducer.Context context = mock(WordCount.IntSumReducer.Context.class); Text key = new Text("a"); List values = new ArrayList(); values.add(new IntWritable(1)); values.add(new IntWritable(1)); reducer.reduce(key, values, context); verify(context).write(new Text("a"), new IntWritable(2)); } public static void main(String[] args) { //try { //WordCountTest t = new WordCountTest(); // ////t.testWordCountMap(); //t.testWordCountReduce(); //} catch (IOException e) { //// TODO Auto-generated catch block //e.printStackTrace(); //} catch (InterruptedException e) { //// TODO Auto-generated catch block //e.printStackTrace(); //} } }
verify(context)只檢查一次的寫,如果多次寫,需用verify(contex,times(n))檢查,否則會失敗。 執(zhí)行時在測試文件上點run as JUnit Test,會得到測試結果是否通過。 本示例程序在hadoop1.0.3環(huán)境中測試通過。Mockito也在hadoop的lib中自帶,打包在mockito-all-1.8.5.jar
最新內容請見作者的GitHub頁:http://qaseven.github.io/

總結

以上是生活随笔為你收集整理的对Mapreduce代码进行单元测试的全部內容,希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網站內容還不錯,歡迎將生活随笔推薦給好友。