50. }
51. context.write(key, new IntWritable(sum));
52. }
53. }
54.
55. public static void main(String[] args) throws Exception {
56. Configuration conf = new Configuration();
57.
58. if (args.length < 3) {
59. System.out.println(\
60. return;
61. }
62.
63. // Add to target
64. String[] target_words = args[2].split(\
65. for (String word : target_words) {
66. Map.add(word.toLowerCase());
67. }
68.
69. Job job = new Job(conf, \
70.
71. job.setOutputKeyClass(Text.class);
72. job.setOutputValueClass(IntWritable.class);
73.
74. job.setMapperClass(Map.class);
75. job.setReducerClass(Reduce.class);
76.
77. job.setInputFormatClass(TextInputFormat.class);
78. job.setOutputFormatClass(TextOutputFormat.class);
79.
80. FileInputFormat.addInputPath(job, new Path(args[0]));
81. FileOutputFormat.setOutputPath(job, new Path(args[1]));
82.
83. job.waitForCompletion(true);
84. }
85.
86. }
复制代码
5. 第五题的程序是什么? 6.
hosts:增加局域网主机名和ip对应关系,省得再记住ip; hostname:该主机名,克隆虚拟机的时候经常需要这么做; fstab:修改挂载点,加新硬盘的时候会需要;
profile, bash.bashrc: 修改系统范围环境变量时经常用; network/interfaces:配置静态IP时需要。 7 7.1
1. package org.aboutyun;
2.
3. import java.io.BufferedReader;
4. import java.io.FileNotFoundException;
5. import java.io.FileReader;
6. import java.io.IOException;
7.
8. public class LineCounter {
9. public static void main(String[] args) {
10. try {
11. BufferedReader reader = new BufferedReader(new FileReader(args[0]));
12. char[] buffer = new char[4096];
13. int count;
14. char last = 0;
15. long line_count = 0;
16. while((count = reader.read(buffer)) >= 0) {
17. if (count > 0 && line_count == 0) {
18. // has something in file, so at least 1 line.
19. line_count = 1;
20. }
21.
22. for (int i = 0; i < count ; ++i) {
23. char c = buffer[i];
24. if (c == 0x02) {
25. if (i == 0 && last == 0x01) {
26. // buffer split the 0x01,0x02
27. ++line_count;
28. } else if (buffer[i-1] == 0x01) {
29. // normal one
30. ++line_count;
31. }
32. }
33. }
34.
35. // keep the last one
36. last = buffer[count-1];
37. }
38.
39. System.out.println(line_count);
40. } catch (FileNotFoundException e) {
41. e.printStackTrace();
42. } catch (IOException e) {
43. e.printStackTrace();
44. }
45. }
46. }
复制代码
7.2 可以使用Profiler来对性能进行评估分析,比如Eclipse的TPTP,或者JProfiler。可以观察不同函数调用次数和以及占用时间,从而减少调用次数,以及优化函数内部。