Hive基本操作(数据导入导出)

2017-01-09 14:03:49来源:oschina作者:kunping人点击

1、创建数据库(mydb)


CREATE DATABASE IF NOT EXISTS mydb;
use mydb;

2、创建表(t_loginfo)


CREATE TABLE IF NOT EXISTS t_loginfo(
sdate string,
stime array,
level string,
class string,
info1 string,
info2 string,
info3 string
) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' '
COLLECTION ITEMS TERMINATED BY ','
MAP KEYS TERMINATED BY ':';

可将上述HQL语句保存到一个文本中,如/usr/script/loginfo.hql,然后执行


hive -f /usr/script/loginfo.hql
--或 hive> source /usr/script/loginfo.hql;

hadoop的日志格式:日期,时间,级别,类信息,提示信息。


2017-01-08 01:51:13,445 INFO org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetAsyncDiskService: Deleted BP-1727771939-127.0.0.1-1483765767408 blk_1073741836_1012 file /usr/local/install_path/hadoop-2.7.3/hdfs/data/current/BP-1727771939-127.0.0.1-1483765767408/current/finalized/subdir0/subdir0/blk_1073741836
2017-01-08 01:51:13,458 INFO org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetAsyncDiskService: Deleted BP-1727771939-127.0.0.1-1483765767408 blk_1073741837_1013 file /usr/local/install_path/hadoop-2.7.3/hdfs/data/current/BP-1727771939-127.0.0.1-1483765767408/current/finalized/subdir0/subdir0/blk_1073741837
2017-01-08 01:51:13,459 INFO org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetAsyncDiskService: Deleted BP-1727771939-127.0.0.1-1483765767408 blk_1073741838_1014 file /usr/local/install_path/hadoop-2.7.3/hdfs/data/current/BP-1727771939-127.0.0.1-1483765767408/current/finalized/subdir0/subdir0/blk_1073741838
2017-01-08 01:51:13,460 INFO org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetAsyncDiskService: Deleted BP-1727771939-127.0.0.1-1483765767408 blk_1073741839_1015 file /usr/local/install_path/hadoop-2.7.3/hdfs/data/current/BP-1727771939-127.0.0.1-1483765767408/current/finalized/subdir0/subdir0/blk_1073741839
2017-01-08 06:11:34,368 INFO org.apache.hadoop.hdfs.server.datanode.DirectoryScanner: BlockPool BP-1727771939-127.0.0.1-1483765767408 Total blocks: 6, missing metadata files:0, missing block files:0, missing blocks in memory:0, mismatched blocks:0
2017-01-08 07:16:37,859 INFO org.apache.hadoop.hdfs.server.datanode.DataNode: Successfully sent block report 0x3f6343edc1a7,containing 1 storage report(s), of which we sent 1. The reports had 6 total blocks and used 1 RPC(s). This took 0 msec to generate and 8 msecs for RPC and NN processing. Got back one command: FinalizeCommand/5.
2017-01-08 07:16:37,860 INFO org.apache.hadoop.hdfs.server.datanode.DataNode: Got finalize command for block pool BP-1727771939-127.0.0.1-1483765767408
2017-01-08 10:05:01,087 INFO org.apache.hadoop.hdfs.server.datanode.DataNode: Receiving BP-1727771939-127.0.0.1-1483765767408:blk_1073741846_1022 src: /192.168.241.129:47726 dest: /192.168.241.129:50010
2017-01-08 10:05:01,176 INFO org.apache.hadoop.hdfs.server.datanode.DataNode.clienttrace: src: /192.168.241.129:47726, dest: /192.168.241.129:50010, bytes: 227, op: HDFS_WRITE, cliID: DFSClient_NONMAPREDUCE_-1528398405_1, offset: 0, srvID: 0d460e03-63da-42ea-88da-ac4499dda1f1, blockid: BP-1727771939-127.0.0.1-1483765767408:blk_1073741846_1022, duration: 5938769

3、导入本地数据,若无LOCAL关键字,则从hdfs里面加载,无OVERWRITE关键字,则追加而不是覆盖


LOAD DATA LOCAL INPATH '/usr/local/hadoop-2.7.3/logs/hadoop.log' OVERWRITE INTO TABLE t_loginfo;

4、查看数据


SELECT * FROM t_loginfo;

5、基本操作语句


--列出所有的数据库名称
SHOW DATABASES;
--使用数据库
USE dbName;
--列出所有表
SHOW TABLES;
--列出指定数据库所有表
SHOW TABLES IN dbName;
--正则表达式搜索表
SHOW TABLES LIKE 't%';
--获取建表语句
SHOW CREATE TABLE tableName;
--查看表结构
DESCRIBE tableName;
--复制表结构
CREATE TABLE new_table LIKE old_table;
--复制表结构和内容
CREATE TABLE new_table AS SELECT * FROM old_table;
--查询结果输出到文件中,-S静默执行
hive -S -e "SELECT * FROM tableName" > /tmp/myquery
--从文件中执行Hive查询,将查询语句保存为.q或者是.hql后缀的文件(其它也可)
--脚本 SELECT * FROM tableName;
--方式1:
hive -f /path/to/file/myquery.hql
--方式2:
hive> source /path/to/file/myquery.hql;
--执行shell命令,在命令前面加(!),并以分号(;)结束
hive> ! pwd;
--执行hadoop命令只需要将hadoop去掉,并以分号结束
hive> dfs -ls / ;
hive> dfs -help;
--设置显示查询字段名称
hive> set hive.cli.print.header=true;
hive> SELECT * FROM tableName
--删除数据库,不允许删除有表的数据库,若要删除需带上cascade
DROP DATABASE IF EXISTS dbName CASCADE;
--设置数据的属性,其中数据库名和数据库所在目录
hive> ALTER DATABASE daName SET DBPROPERTIES('key' = 'value');
--导出数据
INSERT OVERWRITE LOCAL DIRECTORY '/tmp/t_loginof.txt' SELECT * FROM tableName;
--导出数据到多个文件
FROM tableName t
INSERT OVERWRITE LOCAL DIRECTORY '/tmp/t1.txt' SELECT * WHERE t.col = 'xx1'
INSERT OVERWRITE LOCAL DIRECTORY '/tmp/t2.txt' SELECT * WHERE t.col = 'xx2'
INSERT OVERWRITE LOCAL DIRECTORY '/tmp/t3.txt' SELECT * WHERE t.col = 'xx3';

最新文章

123

最新摄影

微信扫一扫

第七城市微信公众平台