157. boolean res = job.waitForCompletion(true); 158. System.exit(res?0:1); 159. 160. } 161. }
[plain] view plain copy
1. package com.guludada.dataparser; 2.
3. import com.guludada.javabean.PageViewsBean; 4. import com.guludada.javabean.WebLogSessionBean; 5.
6. public class PageViewsParser { 7. /**
8. * 根据logSession的输出数据加载PageViewsBean 9. * 10. * */
11. public PageViewsBean loadBean(String sessionContent) { 12.
13. PageViewsBean pageViewsBean = new PageViewsBean(); 14.
15. String[] contents = sessionContent.split(\
16. pageViewsBean.setTime(contents[0] + \ 17. pageViewsBean.setIP_addr(contents[2]); 18. pageViewsBean.setSession(contents[3]); 19. pageViewsBean.setVisit_URL(contents[4]); 20. pageViewsBean.setStayTime(\ 21. pageViewsBean.setStep(\ 22.
23. return pageViewsBean; 24. } 25.
26. public String parser(PageViewsBean pageBean) { 27.
28. return pageBean.toString(); 29. } 30. 31. }
[plain] view plain copy
1. package com.guludada.javabean;
2.
3. import java.text.ParseException; 4. import java.text.SimpleDateFormat; 5. import java.util.Date; 6.
7. public class PageViewsBean { 8.
9. String session; 10. String IP_addr; 11. String time; 12. String visit_URL; 13. String stayTime; 14. String step;
15. public String getSession() { 16. return session; 17. }
18. public void setSession(String session) { 19. this.session = session; 20. }
21. public String getIP_addr() { 22. return IP_addr; 23. }
24. public void setIP_addr(String iP_addr) { 25. IP_addr = iP_addr; 26. }
27. public String getTime() { 28. return time; 29. }
30. public void setTime(String time) { 31. this.time = time; 32. }
33. public String getVisit_URL() { 34. return visit_URL; 35. }
36. public void setVisit_URL(String visit_URL) { 37. this.visit_URL = visit_URL; 38. }
39. public String getStayTime() { 40. return stayTime; 41. }
42. public void setStayTime(String stayTime) { 43. this.stayTime = stayTime; 44. }
45. public String getStep() {
46. return step; 47. }
48. public void setStep(String step) { 49. this.step = step; 50. } 51.
52. public Date getTimeWithDateFormat() { 53.
54. SimpleDateFormat sdf_final = new SimpleDateFormat(\
ss\
55. if(this.time != null && this.time != \ 56. try {
57. return sdf_final.parse(this.time); 58. } catch (ParseException e) {
59. // TODO Auto-generated catch block 60. e.printStackTrace(); 61. } 62. }
63. return null; 64. } 65.
66. @Override
67. public String toString() {
68. return session + \ 69. + visit_URL + \ 70. } 71. 72. }
第三次日志清洗产生的PageViews数据结构如下图: 访问页面 SessionID IP 访问时间 停留第时间 几步 30000 1 30000 2 30000 3 30000 1 Session1 192.168.12.130 2016-05-30 /blog/me 15:17:30 Session1 192.168.12.130 2016-05-30 /blog/me/admin 15:18:00 Session1 192.168.12.130 2016-05-30 /home 15:18:30 Session2 192.168.12.150 2016-05-30 /products 15:16:30 Session2 192.168.12.150 2016-05-30 /products/details 30000 2 15:17:00
第四步,再次清洗Session日志,并生成Visits信息表
[plain] view plain copy
1. package com.guludada.clickstream; 2.
3. import java.io.IOException; 4. import java.text.ParseException; 5. import java.text.SimpleDateFormat; 6. import java.util.ArrayList; 7. import java.util.Collections; 8. import java.util.Comparator; 9. import java.util.Date; 10. import java.util.HashMap; 11. import java.util.Map; 12.
13. import org.apache.hadoop.conf.Configuration; 14. import org.apache.hadoop.fs.Path;
15. import org.apache.hadoop.io.NullWritable; 16. import org.apache.hadoop.io.Text; 17. import org.apache.hadoop.mapreduce.Job; 18. import org.apache.hadoop.mapreduce.Mapper; 19. import org.apache.hadoop.mapreduce.Reducer;
20. import org.apache.hadoop.mapreduce.Reducer.Context;
21. import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 22. import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 23.
24. import com.guludada.clickstream.PageViews.pageMapper; 25. import com.guludada.clickstream.PageViews.pageReducer; 26. import com.guludada.clickstream.logClean.cleanMap; 27. import com.guludada.dataparser.PageViewsParser; 28. import com.guludada.dataparser.VisitsInfoParser; 29. import com.guludada.javabean.PageViewsBean; 30.
31. public class VisitsInfo { 32.
33. public static class visitMapper extends Mapper
34.
35. private Text word = new Text(); 36.
37. public void map(Object key,Text value,Context context) { 38.