1.hive创建数据库
CREATEDATABASE|SCHEMA[IFNOTEXISTS]<databasename>;
2.hive创建表
hive里一般有两种表的结构,表和外部表,以下分别是两种表的创建代码:
CREATETABLEphone_info(idint,nameString,storageString,pricedouble)
ROWFORMATDELIMITED//代表一行是一条记录
FIELDSTERMINATEDBY'\t'//列是按照table键分开
STOREDASTEXTFILE[SEQUENCEFILE];//二种最常见的存储格式,一般可以不写
CREATEEXTERNALTABLEphone_external(idint,nameString,pricedouble)
ROWFORMATDELIMITED
FIELDSTERMINATEDBY'\t'
STOREDASTEXTFILE
LOCATION'</xudong/phone.txt>';//这里填写外部表数据的hdfs地址
3.hive表中导入数据
loaddatalocalinpath'/home/xudong/xxx.txt'intotablephone_info;
4.hive删除表
DROPTABLEIFEXISTSphone_info;
5.hive创建临时表存储中间结果
CREATETABLEtemp_info
AS
SELECTidphone_id,namephone_name,priceFROMphone_info
SORTBYphone_id;
6.hive简单的查询语句
SELECT*FROMtemp_info;
SELECTidphone_id,namephone_nameFROMphone_info;
SELECTa.ip,a.name,b.usernameFROMphone_infoaINNERJOINuserbon(a.ip=b.ip);
7.hive批量插入数据到表
CREATETABLEphone_info_likeLIKEphone_info;//复制表的结构
INSERTINTOphone_info_likeSELECT*FROMphone_info;
INSERTOVERWRITEphoen_info_likeSELECT*FROMphone_info;//into是追加数据,overwrite是覆盖以及存在的数据,属于重复性校验
8.hive分区表
CREATETABLEpart_table(idint,nameString,ipString,cityString,dateString)
PARTITIONEDBY(part_flagString)//这里的分区字段可以是表中字段也可以是指定的字段
ROWFORMATDELIMITEDFIELDSTERMINATEDBY',';
loaddatalocalinpath'/home/xudong/test.txt'intotablepart_tablepartition(part_flag='part1');
loaddatalocalinpath'/home/xudong/test1.txt'intotablepart_tablepartition(part_flag='part2');
select*frompart_tablewherepart_flag='part1';
CREATEDATABASE|SCHEMA[IFNOTEXISTS]<databasename>;
2.hive创建表
hive里一般有两种表的结构,表和外部表,以下分别是两种表的创建代码:
CREATETABLEphone_info(idint,nameString,storageString,pricedouble)
ROWFORMATDELIMITED//代表一行是一条记录
FIELDSTERMINATEDBY'\t'//列是按照table键分开
STOREDASTEXTFILE[SEQUENCEFILE];//二种最常见的存储格式,一般可以不写
CREATEEXTERNALTABLEphone_external(idint,nameString,pricedouble)
ROWFORMATDELIMITED
FIELDSTERMINATEDBY'\t'
STOREDASTEXTFILE
LOCATION'</xudong/phone.txt>';//这里填写外部表数据的hdfs地址
3.hive表中导入数据
loaddatalocalinpath'/home/xudong/xxx.txt'intotablephone_info;
4.hive删除表
DROPTABLEIFEXISTSphone_info;
5.hive创建临时表存储中间结果
CREATETABLEtemp_info
AS
SELECTidphone_id,namephone_name,priceFROMphone_info
SORTBYphone_id;
6.hive简单的查询语句
SELECT*FROMtemp_info;
SELECTidphone_id,namephone_nameFROMphone_info;
SELECTa.ip,a.name,b.usernameFROMphone_infoaINNERJOINuserbon(a.ip=b.ip);
7.hive批量插入数据到表
CREATETABLEphone_info_likeLIKEphone_info;//复制表的结构
INSERTINTOphone_info_likeSELECT*FROMphone_info;
INSERTOVERWRITEphoen_info_likeSELECT*FROMphone_info;//into是追加数据,overwrite是覆盖以及存在的数据,属于重复性校验
8.hive分区表
CREATETABLEpart_table(idint,nameString,ipString,cityString,dateString)
PARTITIONEDBY(part_flagString)//这里的分区字段可以是表中字段也可以是指定的字段
ROWFORMATDELIMITEDFIELDSTERMINATEDBY',';
loaddatalocalinpath'/home/xudong/test.txt'intotablepart_tablepartition(part_flag='part1');
loaddatalocalinpath'/home/xudong/test1.txt'intotablepart_tablepartition(part_flag='part2');
select*frompart_tablewherepart_flag='part1';