计算机操作系统(7)

2019-04-14 13:57

for (Result r : rs) {

System.out.println(\获得到rowkey:\ for (KeyValue keyValue : r.raw()) {

System.out.println(\列：\ + \值:\ } }

} catch (IOException e) { e.printStackTrace(); } }

/**

* 单条件查询,根据rowkey查询唯一一条记录 * @param tableName */

public static void QueryByCondition1(String tableName) {

HTablePool pool = new HTablePool(configuration, 1000); HTable table = (HTable) pool.getTable(tableName); try {

Get scan = new Get(\根据rowkey查询 Result r = table.get(scan);

System.out.println(\获得到rowkey:\ for (KeyValue keyValue : r.raw()) {

System.out.println(\列：\ + \值:\ }

} catch (IOException e) { e.printStackTrace(); } }

/**

* 单条件按查询，查询多条记录 * @param tableName */

public static void QueryByCondition2(String tableName) {

try {

HTablePool pool = new HTablePool(configuration, 1000); HTable table = (HTable) pool.getTable(tableName); Filter filter = new SingleColumnValueFilter(Bytes

.toBytes(\

.toBytes(\当列column1的值为aaa时进行查询 Scan s = new Scan(); s.setFilter(filter);

ResultScanner rs = table.getScanner(s); for (Result r : rs) {

System.out.println(\获得到rowkey:\ for (KeyValue keyValue : r.raw()) {

System.out.println(\列：\ + \值:\ } }

} catch (Exception e) { e.printStackTrace(); }

}

/**

* 组合条件查询 * @param tableName */

public static void QueryByCondition3(String tableName) {

try {

HTablePool pool = new HTablePool(configuration, 1000); HTable table = (HTable) pool.getTable(tableName);

List filters = new ArrayList();

Filter filter1 = new SingleColumnValueFilter(Bytes

.toBytes(\ .toBytes(\ filters.add(filter1);

Filter filter2 = new SingleColumnValueFilter(Bytes

.toBytes(\ .toBytes(\ filters.add(filter2);

Filter filter3 = new SingleColumnValueFilter(Bytes

.toBytes(\ .toBytes(\ filters.add(filter3);

FilterList filterList1 = new FilterList(filters);

Scan scan = new Scan(); scan.setFilter(filterList1);

ResultScanner rs = table.getScanner(scan); for (Result r : rs) {

System.out.println(\获得到rowkey:\ for (KeyValue keyValue : r.raw()) {

System.out.println(\列：\ + \值:\ } }

rs.close();

} catch (Exception e) { e.printStackTrace(); }

} }

实验名称：大数据综合案例

一目的

1.掌握Hadoop大数据基本框架。 2.掌握MR核心编程。

3.掌握Hadoop生态组件使用。

二内容

使用Hadoop框架完成日志的数据清理，分析。

三步骤

1、先在本地创建一个gd.txt文件，然后把数据导入进去；

2、在hdfs上创建一个demo文件夹，然后在demo文件夹中创建t1文件夹（t1文件夹也可以不创建，可以只有demo一个文件夹），然后把ubuntu中的gd.txt文件导入到hdfs中的demo文件夹中的t1文件夹中，然后使用命令查看是否导入进去

3、进入hive中，创建表t1（ip、年、月、日、网址），然后使用命令查看表t1中的数据；（1）分别统计30，31号总流

select count(*) from t1 where day=30 select count(*) from t1 where day=31;

（2）分别统计30，31号所有IP数（去重） select distinct ip from t1 where day=30;

select distinct ip from t1 where day=31

（3）统计30,31号IP访问数为1的。

create table t2(ip String,ipcount int) row format delimited fields terminated by '\\t';

insert overwrite table t2 select ip,count(ip) ipcount from t1 where day=30 group by ip;

select * from t2;

select ip,ipcount from t2 where ipcount=1;

create table t3(ip String,ipcount int) row format delimited fields terminated by '\\t'; OK

Time taken: 0.247 seconds

insert overwrite table t3 select ip,count(ip) ipcount from t1 where day=31 group by ip; hive> select * from t3; OK

211.97.15.179 4 27.19.74.143 7 8.35.201.161 1 8.35.201.163 1 8.35.201.164 2 8.35.201.165 4

Time taken: 0.089 seconds, Fetched: 6 row(s) hive> select ip,ipcount from t3 where ipcount=1; OK

8.35.201.161 1 8.35.201.163 1

Time taken: 0.101 seconds, Fetched: 2 row(s)

（4）统计30,31号IP访问最高的。

select ip,ipcount from t2 order by ipcount desc limit 1; select ip,ipcount from t3 order by

四结果

1.成功搭建Hadoop生态环境，用于海量数据分析。 2.对某日志的hive数据清洗分析。

五疑难

1.在最开始的不太懂MR的原理。 2.搭建hive的时候出现各种的错误。

六：算法

（1）分别统计30，31号总流

select count(*) from t1 where day=30 select count(*) from t1 where day=31;

（2）分别统计30，31号所有IP数（去重） select distinct ip from t1 where day=30; select distinct ip from t1 where day=31 （3）统计30,31号IP访问数为1的。

create table t2(ip String,ipcount int) row format delimited fields terminated by '\\t';

insert overwrite table t2 select ip,count(ip) ipcount from t1 where day=30 group by ip;

select * from t2;

select ip,ipcount from t2 where ipcount=1;

create table t3(ip String,ipcount int) row format delimited fields terminated by '\\t'; OK

Time taken: 0.247 seconds

insert overwrite table t3 select ip,count(ip) ipcount from t1 where day=31 group by ip; hive> select * from t3; OK

211.97.15.179 4 27.19.74.143 7 8.35.201.161 1 8.35.201.163 1 8.35.201.164 2 8.35.201.165 4

Time taken: 0.089 seconds, Fetched: 6 row(s) hive> select ip,ipcount from t3 where ipcount=1; OK

8.35.201.161 1 8.35.201.163 1

Time taken: 0.101 seconds, Fetched: 2 row(s) （4）统计30,31号IP访问最高的。

select ip,ipcount from t2 order by ipcount desc limit 1; select ip,ipcount from t3 order by

非关系数据库

实验名称：HBase的安装与配置

一目的

1. 掌握HBase完全分布式的安装方法； 2. 验证HBase完全分布式的安装；

3. 打开Web UI管理界面验证HBase的安装； 4. 打开HBase Shell验证测试安装环境。

二内容

共8页:

计算机操作系统(7).doc 将本文的Word文档下载到电脑下载失败或者文档不完整，请联系客服人员解决！

下载这篇word文档