1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74
| import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class BaiduLog { public static class BaiduLogMapper extends Mapper<LongWritable,Text, Text, LogBean> { @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { // super.map(key, value, context); String log = value.toString(); String str = "(cn.baidu.core.inteceptor.LogInteceptor:55)"; if (log.indexOf(str)!=-1){ String[] log_arr = log.split(str); String time = log_arr[0].substring(1, 10); String[] log_arr2 = log_arr[1].split("\t"); String ip = log_arr2[1]; String url = log_arr2[2]; if (url.equals("null")){ url = log_arr2[3]; } LogBean logbean = new LogBean(time,ip,url); context.write(new Text(ip),logbean); } } } public static class BaiduLogReducer extends Reducer<Text,LogBean,IntWritable,Text>{
@Override protected void reduce(Text key, Iterable<LogBean> values, Context context) throws IOException, InterruptedException { // super.reduce(key, values, context); int sum = 0;
StringBuffer str = new StringBuffer(); int flag = 0; for (LogBean logbean:values){ sum++; if (flag==0){ str.append(logbean.toString()); flag = 1; } } context.write(new IntWritable(sum),new Text(str.toString()));
} } public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "avg");
job.setJarByClass(BaiduLog.class); job.setMapperClass(BaiduLog.BaiduLogMapper.class); job.setReducerClass(BaiduLog.BaiduLogReducer.class);
// job.setCombinerClass(BaiduLog.BaiduLogReducer.class);
job.setOutputKeyClass(Text.class); job.setOutputValueClass(LogBean.class);
FileInputFormat.addInputPath(job,new Path(args[0])); FileOutputFormat.setOutputPath(job,new Path(args[1])); System.exit(job.waitForCompletion(true)?0:1); }
}
|