4. import java.util.regex.Matcher; 5. import java.util.regex.Pattern; 6.
7. import com.guludada.javabean.WebLogBean; 8. /**
9. * 用正则表达式匹配出合法的日志记录 10. * 11. * 12. */
13. public class WebLogParser { 14.
15. public String parser(String weblog_origin) { 16.
17. WebLogBean weblogbean = new WebLogBean(); 18.
19. // 获取IP地址
20. Pattern IPPattern = Pattern.compile(\ 21. Matcher IPMatcher = IPPattern.matcher(weblog_origin); 22. if(IPMatcher.find()) {
23. String IPAddr = IPMatcher.group(0); 24. weblogbean.setIP_addr(IPAddr); 25. } else { 26. return \ 27. }
28. // 获取时间信息
29. Pattern TimePattern = Pattern.compile(\ 30. Matcher TimeMatcher = TimePattern.matcher(weblog_origin); 31. if(TimeMatcher.find()) {
32. String time = TimeMatcher.group(1); 33. String[] cleanTime = time.split(\ 34. weblogbean.setTime(cleanTime[0]); 35. } else { 36. return \ 37. } 38.
39. //获取其余请求信息
40. Pattern InfoPattern = Pattern.compile(
41. \
)\ 42.
43. Matcher InfoMatcher = InfoPattern.matcher(weblog_origin); 44. if(InfoMatcher.find()) { 45.
46. String requestInfo = InfoMatcher.group(1).replace('\\\
47. String[] requestInfoArry = requestInfo.split(\ 48. weblogbean.setMethod(requestInfoArry[0]); 49. weblogbean.setRequest_URL(requestInfoArry[1]); 50. weblogbean.setRequest_protocol(requestInfoArry[2]); 51. String status_code = InfoMatcher.group(2); 52. weblogbean.setRespond_code(status_code); 53.
54. String respond_data = InfoMatcher.group(3); 55. weblogbean.setRespond_data(respond_data); 56.
57. String request_come_from = InfoMatcher.group(4).replace('\\\
m();
58. weblogbean.setRequst_come_from(request_come_from); 59.
60. String browserInfo = InfoMatcher.group(5).replace('\\\ 61. weblogbean.setBrowser(browserInfo); 62. } else { 63. return \ 64. } 65.
66. return weblogbean.toString(); 67. } 68. 69. }
[plain] view plain copy
1. package com.guludada.javabean; 2.
3. public class WebLogBean { 4.
5. String IP_addr; 6. String time; 7. String method; 8. String request_URL; 9. String request_protocol; 10. String respond_code; 11. String respond_data; 12. String requst_come_from; 13. String browser;
14. public String getIP_addr() { 15. return IP_addr; 16. }
17. public void setIP_addr(String iP_addr) {
18. IP_addr = iP_addr; 19. }
20. public String getTime() { 21. return time; 22. }
23. public void setTime(String time) { 24. this.time = time; 25. }
26. public String getMethod() { 27. return method; 28. }
29. public void setMethod(String method) { 30. this.method = method; 31. }
32. public String getRequest_URL() { 33. return request_URL; 34. }
35. public void setRequest_URL(String request_URL) { 36. this.request_URL = request_URL; 37. }
38. public String getRequest_protocol() { 39. return request_protocol; 40. }
41. public void setRequest_protocol(String request_protocol) { 42. this.request_protocol = request_protocol; 43. }
44. public String getRespond_code() { 45. return respond_code; 46. }
47. public void setRespond_code(String respond_code) { 48. this.respond_code = respond_code; 49. }
50. public String getRespond_data() { 51. return respond_data; 52. }
53. public void setRespond_data(String respond_data) { 54. this.respond_data = respond_data; 55. }
56. public String getRequst_come_from() { 57. return requst_come_from; 58. }
59. public void setRequst_come_from(String requst_come_from) { 60. this.requst_come_from = requst_come_from; 61. }
62. public String getBrowser() { 63. return browser; 64. }
65. public void setBrowser(String browser) { 66. this.browser = browser; 67. }
68. @Override
69. public String toString() {
70. return IP_addr + \
71. + request_URL + \
72. + \
r; 73. } 74. 75. 76. }
第一次日记清洗后的记录如下图:
宅男福利社www.zhainan.hk申博官网www.l-ch.net澳门金沙官网www.91jinsha.com