生活随笔
收集整理的這篇文章主要介紹了
KMeans聚类算法Hadoop实现
小編覺得挺不錯的,現(xiàn)在分享給大家,幫大家做個參考.
Assistance.Java??輔助類,功能詳見注釋
[java]?view plaincopy
package?KMeans;?? ?? import?org.apache.hadoop.conf.Configuration;?? import?org.apache.hadoop.fs.FSDataInputStream;?? import?org.apache.hadoop.fs.FSDataOutputStream;?? import?org.apache.hadoop.fs.FileSystem;?? import?org.apache.hadoop.fs.Path;?? import?org.apache.hadoop.io.Text;?? import?org.apache.hadoop.util.LineReader;?? ?? import?java.io.IOException;?? import?java.util.*;?? ?? public?class?Assistance?{?? ?????? ????public?static?List<ArrayList<Float>>?getCenters(String?inputpath){?? ????????List<ArrayList<Float>>?result?=?new?ArrayList<ArrayList<Float>>();?? ????????Configuration?conf?=?new?Configuration();?? ????????try?{?? ????????????FileSystem?hdfs?=?FileSystem.get(conf);?? ????????????Path?in?=?new?Path(inputpath);?? ????????????FSDataInputStream?fsIn?=?hdfs.open(in);?? ????????????LineReader?lineIn?=?new?LineReader(fsIn,?conf);?? ????????????Text?line?=?new?Text();?? ????????????while?(lineIn.readLine(line)?>?0){?? ????????????????String?record?=?line.toString();?? ????????????????? ? ? ?? ????????????????String[]?fields?=?record.replace("\t",?"?").split("?");?? ????????????????List<Float>?tmplist?=?new?ArrayList<Float>();?? ????????????????for?(int?i?=?0;?i?<?fields.length;?++i){?? ????????????????????tmplist.add(Float.parseFloat(fields[i]));?? ????????????????}?? ????????????????result.add((ArrayList<Float>)?tmplist);?? ????????????}?? ????????????fsIn.close();?? ????????}?catch?(IOException?e){?? ????????????e.printStackTrace();?? ????????}?? ????????return?result;?? ????}?? ?? ?????? ????public?static?void?deleteLastResult(String?path){?? ????????Configuration?conf?=?new?Configuration();?? ????????try?{?? ????????????FileSystem?hdfs?=?FileSystem.get(conf);?? ????????????Path?path1?=?new?Path(path);?? ????????????hdfs.delete(path1,?true);?? ????????}?catch?(IOException?e){?? ????????????e.printStackTrace();?? ????????}?? ????}?? ?????? ????public?static?boolean?isFinished(String?oldpath,?String?newpath,?int?k,?float?threshold)?? ????throws?IOException{?? ????????List<ArrayList<Float>>?oldcenters?=?Assistance.getCenters(oldpath);?? ????????List<ArrayList<Float>>?newcenters?=?Assistance.getCenters(newpath);?? ????????float?distance?=?0;?? ????????for?(int?i?=?0;?i?<?k;?++i){?? ????????????for?(int?j?=?1;?j?<?oldcenters.get(i).size();?++j){?? ????????????????float?tmp?=?Math.abs(oldcenters.get(i).get(j)?-?newcenters.get(i).get(j));?? ????????????????distance?+=?Math.pow(tmp,?2);?? ????????????}?? ????????}?? ????????System.out.println("Distance?=?"?+?distance?+?"?Threshold?=?"?+?threshold);?? ????????if?(distance?<?threshold)?? ????????????return?true;?? ????????? ? ?? ????????Assistance.deleteLastResult(oldpath);?? ????????Configuration?conf?=?new?Configuration();?? ????????FileSystem?hdfs?=?FileSystem.get(conf);?? ????????hdfs.copyToLocalFile(new?Path(newpath),?new?Path("/home/hadoop/class/oldcenter.data"));?? ????????hdfs.delete(new?Path(oldpath),?true);?? ????????hdfs.moveFromLocalFile(new?Path("/home/hadoop/class/oldcenter.data"),?new?Path(oldpath));?? ????????return?false;?? ????}?? }??
KMeansDriver.java 作業(yè)驅(qū)動類
[java]?view plaincopy
package?KMeans;?? ?? import?org.apache.hadoop.conf.Configuration;?? import?org.apache.hadoop.fs.FileSystem;?? import?org.apache.hadoop.fs.Path;?? import?org.apache.hadoop.io.IntWritable;?? import?org.apache.hadoop.io.Text;?? import?org.apache.hadoop.mapreduce.Job;?? import?org.apache.hadoop.mapreduce.lib.input.FileInputFormat;?? import?org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;?? import?org.apache.hadoop.util.GenericOptionsParser;?? ?? import?java.io.IOException;?? ?? public?class?KMeansDriver{?? ????public?static?void?main(String[]?args)?throws?Exception{?? ????????int?repeated?=?0;?? ?? ????????? ? ?? ????????do?{?? ????????????Configuration?conf?=?new?Configuration();?? ????????????String[]?otherArgs??=?new?GenericOptionsParser(conf,?args).getRemainingArgs();?? ????????????if?(otherArgs.length?!=?6){?? ????????????????System.err.println("Usage:?<int>?<out>?<oldcenters>?<newcenters>?<k>?<threshold>");?? ????????????????System.exit(2);?? ????????????}?? ????????????conf.set("centerpath",?otherArgs[2]);?? ????????????conf.set("kpath",?otherArgs[4]);?? ????????????Job?job?=?new?Job(conf,?"KMeansCluster");?? ????????????job.setJarByClass(KMeansDriver.class);?? ?? ????????????Path?in?=?new?Path(otherArgs[0]);?? ????????????Path?out?=?new?Path(otherArgs[1]);?? ????????????FileInputFormat.addInputPath(job,?in);?? ????????????FileSystem?fs?=?FileSystem.get(conf);?? ????????????if?(fs.exists(out)){?? ????????????????fs.delete(out,?true);?? ????????????}?? ????????????FileOutputFormat.setOutputPath(job,?out);?? ?? ????????????job.setMapperClass(KMeansMapper.class);?? ????????????job.setReducerClass(KMeansReducer.class);?? ?? ????????????job.setOutputKeyClass(IntWritable.class);?? ????????????job.setOutputValueClass(Text.class);?? ?? ????????????job.waitForCompletion(true);?? ?? ????????????++repeated;?? ????????????System.out.println("We?have?repeated?"?+?repeated?+?"?times.");?? ?????????}?while?(repeated?<?10?&&?(Assistance.isFinished(args[2],?args[3],?Integer.parseInt(args[4]),?Float.parseFloat(args[5]))?==?false));?? ?????????? ????????Cluster(args);?? ????}?? ????public?static?void?Cluster(String[]?args)?? ????????????throws?IOException,?InterruptedException,?ClassNotFoundException{?? ????????Configuration?conf?=?new?Configuration();?? ????????String[]?otherArgs??=?new?GenericOptionsParser(conf,?args).getRemainingArgs();?? ????????if?(otherArgs.length?!=?6){?? ????????????System.err.println("Usage:?<int>?<out>?<oldcenters>?<newcenters>?<k>?<threshold>");?? ????????????System.exit(2);?? ????????}?? ????????conf.set("centerpath",?otherArgs[2]);?? ????????conf.set("kpath",?otherArgs[4]);?? ????????Job?job?=?new?Job(conf,?"KMeansCluster");?? ????????job.setJarByClass(KMeansDriver.class);?? ?? ????????Path?in?=?new?Path(otherArgs[0]);?? ????????Path?out?=?new?Path(otherArgs[1]);?? ????????FileInputFormat.addInputPath(job,?in);?? ????????FileSystem?fs?=?FileSystem.get(conf);?? ????????if?(fs.exists(out)){?? ????????????fs.delete(out,?true);?? ????????}?? ????????FileOutputFormat.setOutputPath(job,?out);?? ?? ?????????? ????????job.setMapperClass(KMeansMapper.class);?? ?? ????????job.setOutputKeyClass(IntWritable.class);?? ????????job.setOutputValueClass(Text.class);?? ?? ????????job.waitForCompletion(true);?? ????}?? }??
KMeansMapper.java?
[java]?view plaincopy
package?KMeans;?? ?? import?org.apache.hadoop.io.IntWritable;?? import?org.apache.hadoop.io.LongWritable;?? import?org.apache.hadoop.io.Text;?? import?org.apache.hadoop.mapreduce.Mapper;?? ?? import?java.io.IOException;?? import?java.util.ArrayList;?? import?java.util.List;?? ?? public?class?KMeansMapper?extends?Mapper<Object,?Text,?IntWritable,?Text>?{?? ????public?void?map(Object?key,?Text?value,?Context?context)?? ????throws?IOException,?InterruptedException{?? ????????String?line?=?value.toString();?? ????????String[]?fields?=?line.split("?");?? ????????List<ArrayList<Float>>?centers?=?Assistance.getCenters(context.getConfiguration().get("centerpath"));?? ????????int?k?=?Integer.parseInt(context.getConfiguration().get("kpath"));?? ????????float?minDist?=?Float.MAX_VALUE;?? ????????int?centerIndex?=?k;?? ?????????? ????????for?(int?i?=?0;?i?<?k;?++i){?? ????????????float?currentDist?=?0;?? ????????????for?(int?j?=?0;?j?<?fields.length;?++j){?? ????????????????float?tmp?=?Math.abs(centers.get(i).get(j?+?1)?-?Float.parseFloat(fields[j]));?? ????????????????currentDist?+=?Math.pow(tmp,?2);?? ????????????}?? ????????????if?(minDist?>?currentDist){?? ????????????????minDist?=?currentDist;?? ????????????????centerIndex?=?i;?? ????????????}?? ????????}?? ????????context.write(new?IntWritable(centerIndex),?new?Text(value));?? ????}?? }??
KMeansReducer.java
[java]?view plaincopy
package?KMeans;?? ?? import?org.apache.hadoop.io.IntWritable;?? import?org.apache.hadoop.io.Text;?? import?org.apache.hadoop.mapreduce.Reducer;?? ?? import?java.io.IOException;?? import?java.util.ArrayList;?? import?java.util.List;?? ?? public?class?KMeansReducer?extends?Reducer<IntWritable,?Text,?IntWritable,?Text>?{?? ????public?void?reduce(IntWritable?key,?Iterable<Text>?value,?Context?context)?? ????throws?IOException,?InterruptedException{?? ????????List<ArrayList<Float>>?assistList?=?new?ArrayList<ArrayList<Float>>();?? ????????String?tmpResult?=?"";?? ????????for?(Text?val?:?value){?? ????????????String?line?=?val.toString();?? ????????????String[]?fields?=?line.split("?");?? ????????????List<Float>?tmpList?=?new?ArrayList<Float>();?? ????????????for?(int?i?=?0;?i?<?fields.length;?++i){?? ????????????????tmpList.add(Float.parseFloat(fields[i]));?? ????????????}?? ????????????assistList.add((ArrayList<Float>)?tmpList);?? ????????}?? ?????????? ????????for?(int?i?=?0;?i?<?assistList.get(0).size();?++i){?? ????????????float?sum?=?0;?? ????????????for?(int?j?=?0;?j?<?assistList.size();?++j){?? ????????????????sum?+=?assistList.get(j).get(i);?? ????????????}?? ????????????float?tmp?=?sum?/?assistList.size();?? ????????????if?(i?==?0){?? ????????????????tmpResult?+=?tmp;?? ????????????}?? ????????????else{?? ????????????????tmpResult?+=?"?"?+?tmp;?? ????????????}?? ????????}?? ????????Text?result?=?new?Text(tmpResult);?? ????????context.write(key,?result);?? ????}?? }??
作業(yè)運行情況:
[plain]?view plaincopy
hadoop@shaobo-ThinkPad-E420:~/class$?hadoop?jar?KMeans.jar?KMeans.KMeansDriver?input/iris.data?output?input/oldcenter.data?output/part-r-00000?3?0.0001?? 14/04/17?16:15:50?INFO?input.FileInputFormat:?Total?input?paths?to?process?:?1?? 14/04/17?16:15:51?INFO?mapred.JobClient:?Running?job:?job_201404171511_0012?? 14/04/17?16:15:52?INFO?mapred.JobClient:??map?0%?reduce?0%?? 14/04/17?16:16:07?INFO?mapred.JobClient:??map?100%?reduce?0%?? 14/04/17?16:16:19?INFO?mapred.JobClient:??map?100%?reduce?100%?? 14/04/17?16:16:24?INFO?mapred.JobClient:?Job?complete:?job_201404171511_0012?? 14/04/17?16:16:24?INFO?mapred.JobClient:?Counters:?25?? 14/04/17?16:16:24?INFO?mapred.JobClient:???Job?Counters??? 14/04/17?16:16:24?INFO?mapred.JobClient:?????Launched?reduce?tasks=1?? 14/04/17?16:16:24?INFO?mapred.JobClient:?????SLOTS_MILLIS_MAPS=12041?? 14/04/17?16:16:24?INFO?mapred.JobClient:?????Total?time?spent?by?all?reduces?waiting?after?reserving?slots?(ms)=0?? 14/04/17?16:16:24?INFO?mapred.JobClient:?????Total?time?spent?by?all?maps?waiting?after?reserving?slots?(ms)=0?? 14/04/17?16:16:24?INFO?mapred.JobClient:?????Launched?map?tasks=1?? 14/04/17?16:16:24?INFO?mapred.JobClient:?????Data-local?map?tasks=1?? 14/04/17?16:16:24?INFO?mapred.JobClient:?????SLOTS_MILLIS_REDUCES=10030?? 14/04/17?16:16:24?INFO?mapred.JobClient:???File?Output?Format?Counters??? 14/04/17?16:16:24?INFO?mapred.JobClient:?????Bytes?Written=125?? 14/04/17?16:16:24?INFO?mapred.JobClient:???FileSystemCounters?? 14/04/17?16:16:24?INFO?mapred.JobClient:?????FILE_BYTES_READ=3306?? 14/04/17?16:16:24?INFO?mapred.JobClient:?????HDFS_BYTES_READ=11214?? 14/04/17?16:16:24?INFO?mapred.JobClient:?????FILE_BYTES_WRITTEN=48901?? 14/04/17?16:16:24?INFO?mapred.JobClient:?????HDFS_BYTES_WRITTEN=125?? 14/04/17?16:16:24?INFO?mapred.JobClient:???File?Input?Format?Counters??? 14/04/17?16:16:24?INFO?mapred.JobClient:?????Bytes?Read=2550?? 14/04/17?16:16:24?INFO?mapred.JobClient:???Map-Reduce?Framework?? 14/04/17?16:16:24?INFO?mapred.JobClient:?????Reduce?input?groups=3?? 14/04/17?16:16:24?INFO?mapred.JobClient:?????Map?output?materialized?bytes=3306?? 14/04/17?16:16:24?INFO?mapred.JobClient:?????Combine?output?records=0?? 14/04/17?16:16:24?INFO?mapred.JobClient:?????Map?input?records=150?? 14/04/17?16:16:24?INFO?mapred.JobClient:?????Reduce?shuffle?bytes=0?? 14/04/17?16:16:24?INFO?mapred.JobClient:?????Reduce?output?records=3?? 14/04/17?16:16:24?INFO?mapred.JobClient:?????Spilled?Records=300?? 14/04/17?16:16:24?INFO?mapred.JobClient:?????Map?output?bytes=3000?? 14/04/17?16:16:24?INFO?mapred.JobClient:?????Combine?input?records=0?? 14/04/17?16:16:24?INFO?mapred.JobClient:?????Map?output?records=150?? 14/04/17?16:16:24?INFO?mapred.JobClient:?????SPLIT_RAW_BYTES=114?? 14/04/17?16:16:24?INFO?mapred.JobClient:?????Reduce?input?records=150?? We?have?repeated?1?times.?? Distance?=?0.35025704?Threshold?=?1.0E-4?? 14/04/17?16:16:24?INFO?input.FileInputFormat:?Total?input?paths?to?process?:?1?? 14/04/17?16:16:25?INFO?mapred.JobClient:?Running?job:?job_201404171511_0013?? 14/04/17?16:16:26?INFO?mapred.JobClient:??map?0%?reduce?0%?? 14/04/17?16:16:40?INFO?mapred.JobClient:??map?100%?reduce?0%?? 14/04/17?16:16:52?INFO?mapred.JobClient:??map?100%?reduce?100%?? 14/04/17?16:16:57?INFO?mapred.JobClient:?Job?complete:?job_201404171511_0013?? 14/04/17?16:16:57?INFO?mapred.JobClient:?Counters:?25?? 14/04/17?16:16:57?INFO?mapred.JobClient:???Job?Counters??? 14/04/17?16:16:57?INFO?mapred.JobClient:?????Launched?reduce?tasks=1?? 14/04/17?16:16:57?INFO?mapred.JobClient:?????SLOTS_MILLIS_MAPS=12077?? 14/04/17?16:16:57?INFO?mapred.JobClient:?????Total?time?spent?by?all?reduces?waiting?after?reserving?slots?(ms)=0?? 14/04/17?16:16:57?INFO?mapred.JobClient:?????Total?time?spent?by?all?maps?waiting?after?reserving?slots?(ms)=0?? 14/04/17?16:16:57?INFO?mapred.JobClient:?????Launched?map?tasks=1?? 14/04/17?16:16:57?INFO?mapred.JobClient:?????Data-local?map?tasks=1?? 14/04/17?16:16:57?INFO?mapred.JobClient:?????SLOTS_MILLIS_REDUCES=10048?? 14/04/17?16:16:57?INFO?mapred.JobClient:???File?Output?Format?Counters??? 14/04/17?16:16:57?INFO?mapred.JobClient:?????Bytes?Written=116?? 14/04/17?16:16:57?INFO?mapred.JobClient:???FileSystemCounters?? 14/04/17?16:16:57?INFO?mapred.JobClient:?????FILE_BYTES_READ=3306?? 14/04/17?16:16:57?INFO?mapred.JobClient:?????HDFS_BYTES_READ=21414?? 14/04/17?16:16:57?INFO?mapred.JobClient:?????FILE_BYTES_WRITTEN=48901?? 14/04/17?16:16:57?INFO?mapred.JobClient:?????HDFS_BYTES_WRITTEN=116?? 14/04/17?16:16:57?INFO?mapred.JobClient:???File?Input?Format?Counters??? 14/04/17?16:16:57?INFO?mapred.JobClient:?????Bytes?Read=2550?? 14/04/17?16:16:57?INFO?mapred.JobClient:???Map-Reduce?Framework?? 14/04/17?16:16:57?INFO?mapred.JobClient:?????Reduce?input?groups=3?? 14/04/17?16:16:57?INFO?mapred.JobClient:?????Map?output?materialized?bytes=3306?? 14/04/17?16:16:57?INFO?mapred.JobClient:?????Combine?output?records=0?? 14/04/17?16:16:57?INFO?mapred.JobClient:?????Map?input?records=150?? 14/04/17?16:16:57?INFO?mapred.JobClient:?????Reduce?shuffle?bytes=3306?? 14/04/17?16:16:57?INFO?mapred.JobClient:?????Reduce?output?records=3?? 14/04/17?16:16:57?INFO?mapred.JobClient:?????Spilled?Records=300?? 14/04/17?16:16:57?INFO?mapred.JobClient:?????Map?output?bytes=3000?? 14/04/17?16:16:57?INFO?mapred.JobClient:?????Combine?input?records=0?? 14/04/17?16:16:57?INFO?mapred.JobClient:?????Map?output?records=150?? 14/04/17?16:16:57?INFO?mapred.JobClient:?????SPLIT_RAW_BYTES=114?? 14/04/17?16:16:57?INFO?mapred.JobClient:?????Reduce?input?records=150?? We?have?repeated?2?times.?? Distance?=?0.006297064?Threshold?=?1.0E-4?? 14/04/17?16:16:57?INFO?input.FileInputFormat:?Total?input?paths?to?process?:?1?? 14/04/17?16:16:58?INFO?mapred.JobClient:?Running?job:?job_201404171511_0014?? 14/04/17?16:16:59?INFO?mapred.JobClient:??map?0%?reduce?0%?? 14/04/17?16:17:14?INFO?mapred.JobClient:??map?100%?reduce?0%?? 14/04/17?16:17:25?INFO?mapred.JobClient:??map?100%?reduce?100%?? 14/04/17?16:17:30?INFO?mapred.JobClient:?Job?complete:?job_201404171511_0014?? 14/04/17?16:17:30?INFO?mapred.JobClient:?Counters:?25?? 14/04/17?16:17:30?INFO?mapred.JobClient:???Job?Counters??? 14/04/17?16:17:30?INFO?mapred.JobClient:?????Launched?reduce?tasks=1?? 14/04/17?16:17:30?INFO?mapred.JobClient:?????SLOTS_MILLIS_MAPS=12046?? 14/04/17?16:17:30?INFO?mapred.JobClient:?????Total?time?spent?by?all?reduces?waiting?after?reserving?slots?(ms)=0?? 14/04/17?16:17:30?INFO?mapred.JobClient:?????Total?time?spent?by?all?maps?waiting?after?reserving?slots?(ms)=0?? 14/04/17?16:17:30?INFO?mapred.JobClient:?????Launched?map?tasks=1?? 14/04/17?16:17:30?INFO?mapred.JobClient:?????Data-local?map?tasks=1?? 14/04/17?16:17:30?INFO?mapred.JobClient:?????SLOTS_MILLIS_REDUCES=10051?? 14/04/17?16:17:30?INFO?mapred.JobClient:???File?Output?Format?Counters??? 14/04/17?16:17:30?INFO?mapred.JobClient:?????Bytes?Written=116?? 14/04/17?16:17:30?INFO?mapred.JobClient:???FileSystemCounters?? 14/04/17?16:17:30?INFO?mapred.JobClient:?????FILE_BYTES_READ=3306?? 14/04/17?16:17:30?INFO?mapred.JobClient:?????HDFS_BYTES_READ=20064?? 14/04/17?16:17:30?INFO?mapred.JobClient:?????FILE_BYTES_WRITTEN=48901?? 14/04/17?16:17:30?INFO?mapred.JobClient:?????HDFS_BYTES_WRITTEN=116?? 14/04/17?16:17:30?INFO?mapred.JobClient:???File?Input?Format?Counters??? 14/04/17?16:17:30?INFO?mapred.JobClient:?????Bytes?Read=2550?? 14/04/17?16:17:30?INFO?mapred.JobClient:???Map-Reduce?Framework?? 14/04/17?16:17:30?INFO?mapred.JobClient:?????Reduce?input?groups=3?? 14/04/17?16:17:30?INFO?mapred.JobClient:?????Map?output?materialized?bytes=3306?? 14/04/17?16:17:30?INFO?mapred.JobClient:?????Combine?output?records=0?? 14/04/17?16:17:30?INFO?mapred.JobClient:?????Map?input?records=150?? 14/04/17?16:17:30?INFO?mapred.JobClient:?????Reduce?shuffle?bytes=0?? 14/04/17?16:17:30?INFO?mapred.JobClient:?????Reduce?output?records=3?? 14/04/17?16:17:30?INFO?mapred.JobClient:?????Spilled?Records=300?? 14/04/17?16:17:30?INFO?mapred.JobClient:?????Map?output?bytes=3000?? 14/04/17?16:17:30?INFO?mapred.JobClient:?????Combine?input?records=0?? 14/04/17?16:17:30?INFO?mapred.JobClient:?????Map?output?records=150?? 14/04/17?16:17:30?INFO?mapred.JobClient:?????SPLIT_RAW_BYTES=114?? 14/04/17?16:17:30?INFO?mapred.JobClient:?????Reduce?input?records=150?? We?have?repeated?3?times.?? Distance?=?0.0?Threshold?=?1.0E-4?? 14/04/17?16:17:30?INFO?input.FileInputFormat:?Total?input?paths?to?process?:?1?? 14/04/17?16:17:30?INFO?mapred.JobClient:?Running?job:?job_201404171511_0015?? 14/04/17?16:17:31?INFO?mapred.JobClient:??map?0%?reduce?0%?? 14/04/17?16:17:47?INFO?mapred.JobClient:??map?100%?reduce?0%?? 14/04/17?16:17:59?INFO?mapred.JobClient:??map?100%?reduce?100%?? 14/04/17?16:18:04?INFO?mapred.JobClient:?Job?complete:?job_201404171511_0015?? 14/04/17?16:18:04?INFO?mapred.JobClient:?Counters:?25?? 14/04/17?16:18:04?INFO?mapred.JobClient:???Job?Counters??? 14/04/17?16:18:04?INFO?mapred.JobClient:?????Launched?reduce?tasks=1?? 14/04/17?16:18:04?INFO?mapred.JobClient:?????SLOTS_MILLIS_MAPS=12036?? 14/04/17?16:18:04?INFO?mapred.JobClient:?????Total?time?spent?by?all?reduces?waiting?after?reserving?slots?(ms)=0?? 14/04/17?16:18:04?INFO?mapred.JobClient:?????Total?time?spent?by?all?maps?waiting?after?reserving?slots?(ms)=0?? 14/04/17?16:18:04?INFO?mapred.JobClient:?????Launched?map?tasks=1?? 14/04/17?16:18:04?INFO?mapred.JobClient:?????Data-local?map?tasks=1?? 14/04/17?16:18:04?INFO?mapred.JobClient:?????SLOTS_MILLIS_REDUCES=10050?? 14/04/17?16:18:04?INFO?mapred.JobClient:???File?Output?Format?Counters??? 14/04/17?16:18:04?INFO?mapred.JobClient:?????Bytes?Written=2700?? 14/04/17?16:18:04?INFO?mapred.JobClient:???FileSystemCounters?? 14/04/17?16:18:04?INFO?mapred.JobClient:?????FILE_BYTES_READ=3306?? 14/04/17?16:18:04?INFO?mapred.JobClient:?????HDFS_BYTES_READ=20064?? 14/04/17?16:18:04?INFO?mapred.JobClient:?????FILE_BYTES_WRITTEN=48717?? 14/04/17?16:18:04?INFO?mapred.JobClient:?????HDFS_BYTES_WRITTEN=2700?? 14/04/17?16:18:04?INFO?mapred.JobClient:???File?Input?Format?Counters??? 14/04/17?16:18:04?INFO?mapred.JobClient:?????Bytes?Read=2550?? 14/04/17?16:18:04?INFO?mapred.JobClient:???Map-Reduce?Framework?? 14/04/17?16:18:04?INFO?mapred.JobClient:?????Reduce?input?groups=3?? 14/04/17?16:18:04?INFO?mapred.JobClient:?????Map?output?materialized?bytes=3306?? 14/04/17?16:18:04?INFO?mapred.JobClient:?????Combine?output?records=0?? 14/04/17?16:18:04?INFO?mapred.JobClient:?????Map?input?records=150?? 14/04/17?16:18:04?INFO?mapred.JobClient:?????Reduce?shuffle?bytes=0?? 14/04/17?16:18:04?INFO?mapred.JobClient:?????Reduce?output?records=150?? 14/04/17?16:18:04?INFO?mapred.JobClient:?????Spilled?Records=300?? 14/04/17?16:18:04?INFO?mapred.JobClient:?????Map?output?bytes=3000?? 14/04/17?16:18:04?INFO?mapred.JobClient:?????Combine?input?records=0?? 14/04/17?16:18:04?INFO?mapred.JobClient:?????Map?output?records=150?? 14/04/17?16:18:04?INFO?mapred.JobClient:?????SPLIT_RAW_BYTES=114?? 14/04/17?16:18:04?INFO?mapred.JobClient:?????Reduce?input?records=150??
from:?http://blog.csdn.net/jdplus/article/details/23960127
總結(jié)
以上是生活随笔為你收集整理的KMeans聚类算法Hadoop实现的全部內(nèi)容,希望文章能夠幫你解決所遇到的問題。
如果覺得生活随笔網(wǎng)站內(nèi)容還不錯,歡迎將生活随笔推薦給好友。