Flume+Hadoop+Hive的离线分析系统基本架构 - 图文(3)

2019-08-03 13:18

4. import java.util.regex.Matcher; 5. import java.util.regex.Pattern; 6.

7. import com.guludada.javabean.WebLogBean; 8. /**

9. * 用正则表达式匹配出合法的日志记录 10. * 11. * 12. */

13. public class WebLogParser { 14.

15. public String parser(String weblog_origin) { 16.

17. WebLogBean weblogbean = new WebLogBean(); 18.

19. // 获取IP地址

20. Pattern IPPattern = Pattern.compile(\ 21. Matcher IPMatcher = IPPattern.matcher(weblog_origin); 22. if(IPMatcher.find()) {

23. String IPAddr = IPMatcher.group(0); 24. weblogbean.setIP_addr(IPAddr); 25. } else { 26. return \ 27. }

28. // 获取时间信息

29. Pattern TimePattern = Pattern.compile(\ 30. Matcher TimeMatcher = TimePattern.matcher(weblog_origin); 31. if(TimeMatcher.find()) {

32. String time = TimeMatcher.group(1); 33. String[] cleanTime = time.split(\ 34. weblogbean.setTime(cleanTime[0]); 35. } else { 36. return \ 37. } 38.

39. //获取其余请求信息

40. Pattern InfoPattern = Pattern.compile(

41. \

)\ 42.

43. Matcher InfoMatcher = InfoPattern.matcher(weblog_origin); 44. if(InfoMatcher.find()) { 45.

46. String requestInfo = InfoMatcher.group(1).replace('\\\

47. String[] requestInfoArry = requestInfo.split(\ 48. weblogbean.setMethod(requestInfoArry[0]); 49. weblogbean.setRequest_URL(requestInfoArry[1]); 50. weblogbean.setRequest_protocol(requestInfoArry[2]); 51. String status_code = InfoMatcher.group(2); 52. weblogbean.setRespond_code(status_code); 53.

54. String respond_data = InfoMatcher.group(3); 55. weblogbean.setRespond_data(respond_data); 56.

57. String request_come_from = InfoMatcher.group(4).replace('\\\

m();

58. weblogbean.setRequst_come_from(request_come_from); 59.

60. String browserInfo = InfoMatcher.group(5).replace('\\\ 61. weblogbean.setBrowser(browserInfo); 62. } else { 63. return \ 64. } 65.

66. return weblogbean.toString(); 67. } 68. 69. }

[plain] view plain copy

1. package com.guludada.javabean; 2.

3. public class WebLogBean { 4.

5. String IP_addr; 6. String time; 7. String method; 8. String request_URL; 9. String request_protocol; 10. String respond_code; 11. String respond_data; 12. String requst_come_from; 13. String browser;

14. public String getIP_addr() { 15. return IP_addr; 16. }

17. public void setIP_addr(String iP_addr) {

18. IP_addr = iP_addr; 19. }

20. public String getTime() { 21. return time; 22. }

23. public void setTime(String time) { 24. this.time = time; 25. }

26. public String getMethod() { 27. return method; 28. }

29. public void setMethod(String method) { 30. this.method = method; 31. }

32. public String getRequest_URL() { 33. return request_URL; 34. }

35. public void setRequest_URL(String request_URL) { 36. this.request_URL = request_URL; 37. }

38. public String getRequest_protocol() { 39. return request_protocol; 40. }

41. public void setRequest_protocol(String request_protocol) { 42. this.request_protocol = request_protocol; 43. }

44. public String getRespond_code() { 45. return respond_code; 46. }

47. public void setRespond_code(String respond_code) { 48. this.respond_code = respond_code; 49. }

50. public String getRespond_data() { 51. return respond_data; 52. }

53. public void setRespond_data(String respond_data) { 54. this.respond_data = respond_data; 55. }

56. public String getRequst_come_from() { 57. return requst_come_from; 58. }

59. public void setRequst_come_from(String requst_come_from) { 60. this.requst_come_from = requst_come_from; 61. }

62. public String getBrowser() { 63. return browser; 64. }

65. public void setBrowser(String browser) { 66. this.browser = browser; 67. }

68. @Override

69. public String toString() {

70. return IP_addr + \

71. + request_URL + \

72. + \

r; 73. } 74. 75. 76. }

第一次日记清洗后的记录如下图:

宅男福利社www.zhainan.hk申博官网www.l-ch.net澳门金沙官网www.91jinsha.com


Flume+Hadoop+Hive的离线分析系统基本架构 - 图文(3).doc 将本文的Word文档下载到电脑 下载失败或者文档不完整,请联系客服人员解决!

下一篇:砂浆合同

相关阅读
本类排行
× 注册会员免费下载(下载后可以自由复制和排版)

马上注册会员

注:下载文档有可能“只有目录或者内容不全”等情况,请下载之前注意辨别,如果您已付费且无法下载或内容有问题,请联系我们协助你处理。
微信: QQ: