hive shell/sql 命令行
命令hive進入hive命令行
//列表數據庫
show databases;
//創建數據庫
create database myhive;
//創建數據庫時檢查存在與否
create database if not exists t1;
//創建數據庫時帶注釋
create database if not exists t2 comment 'learning hive';
//創建帶屬性的數據庫
create database if not exists t3 with dbproperties('creator'='hadoop','date'='2018-04-05');
//使用數據庫
use myhive;
//顯示數據庫信息
desc database t2;
desc database extended t3;
//列表數據表
show tables;
show tables in t1;#t1為數據庫名
///查看student_c開頭的表
show tables like 'student_c*';
//查看當前正在使用的數據庫
select current_database();
//創建一張表
CREATE?[EXTERNAL]?TABLE?[IF NOT EXISTS] table_name
[(col_name data_type [COMMENT col_comment], ...)]
[COMMENT?table_comment]
[PARTITIONED BY?(col_name data_type [COMMENT col_comment], ...)]
[CLUSTERED BY?(col_name, col_name, ...)
[SORTED BY?(col_name [ASC|DESC], ...)]?INTO?num_buckets?BUCKETS]
[ROW FORMAT?row_format]
[STORED AS?file_format]
[LOCATION?hdfs_path]
詳情請參見:?https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL#LanguageManualD DL-CreateTable
?CREATE TABLE 創建一個指定名字的表。如果相同名字的表已經存在,則拋出異常;用戶可以用 IF NOT EXIST 選項來忽略這個異常 ?EXTERNAL 關鍵字可以讓用戶創建一個外部表,在建表的同時指定一個指向實際數據的路徑(LOCATION) ?LIKE 允許用戶復制現有的表結構,但是不復制數據 ?COMMENT可以為表與字段增加描述
?PARTITIONED BY 指定分區
?ROW FORMAT
DELIMITED [FIELDS TERMINATED BY char] [COLLECTION ITEMS TERMINATED BY char]
MAP KEYS TERMINATED BY char] [LINES TERMINATED BY char]
| SERDE serde_name [WITH SERDEPROPERTIES
(property_name=property_value, property_name=property_value, ...)]
用戶在建表的時候可以自定義 SerDe 或者使用自帶的 SerDe。如果沒有指定 ROW FORMAT 或者 ROW FORMAT DELIMITED,將會使用自帶的 SerDe。在建表的時候,
用戶還需要為表指定列,用戶在指定表的列的同時也會指定自定義的 SerDe,Hive 通過 SerDe 確定表的具體的列的數據。
?STORED AS
SEQUENCEFILE //序列化文件
| TEXTFILE //普通的文本文件格式
| RCFILE //行列存儲相結合的文件
| INPUTFORMAT input_format_classname OUTPUTFORMAT output_format_classname //自定義文件格式
如果文件數據是純文本,可以使用 STORED AS TEXTFILE。如果數據需要壓縮,使用 STORED AS SEQUENCE 。
?LOCATION指定表在HDFS的存儲路徑
最佳實踐:
如果一份數據已經存儲在HDFS上,并且要被多個用戶或者客戶端使用,最好創建外部表
反之,最好創建內部表。
如果不指定,就按照默認的規則存儲在默認的倉庫路徑中。
?
///創建內部表
create table student(id int, name string, sex string, age int, department string) row format delimited fields terminated by ",";
///創建外部表
create external table student_ext
(id int, name string, sex string, age int,department string) row format delimited fields terminated by "," location "/user/hive/outtable/student_ext";
注意需要切換到hdfs用戶登錄賦權,root才有hdfs的/user/hive目錄權限
su - hdfs
hdfs dfs -chmod 777 /user/hive
hdfs dfs -ls /user
///創建分區表
create external table student_ptn
(id int, name string, sex string, age int,department string)
partitioned by (city string)
row format delimited fields terminated by ","
location "/user/hive/outtable/student_ptn";
添加分區
alter table student_ptn add partition(city="beijing");
alter table student_ptn add partition(city="tianjin");
如果某張表是分區表。那么每個分區的定義,其實就表現為了這張表的數據存儲目錄下的一個子目錄
如果是分區表。那么數據文件一定要存儲在某個分區中,而不能直接存儲在表中。
創建分桶表
create external table student_bck(id int, name string, sex string, age int,department string) clustered by (id) sorted by (id asc, name desc) into 4 buckets row format delimited fields terminated by ","
location "/user/hive/outtable/student_bck";
//使用CTAS創建表(查詢結果創建表)
create table student_ctas as select * from student where id < 95012;
?
//復制表結構
create table student_copy like student;
?
//創建本地數據文件(本地文件系統,非HDFS)
cat <<EOF >student.txt
95002,劉晨,女,19,IS
95017,王風娟,女,18,IS
95018,王一,女,19,IS
95013,馮偉,男,21,CS
95014,王小麗,女,19,CS
95019,邢小麗,女,19,IS
95020,趙錢,男,21,IS
95003,王敏,女,22,MA
95004,張立,男,19,IS
95012,孫花,女,20,CS
95010,孔小濤,男,19,CS
95005,劉剛,男,18,MA
95006,孫慶,男,23,CS
95007,易思玲,女,19,MA
95008,李娜,女,18,CS
95021,周二,男,17,MA
95022,鄭明,男,20,MA
95001,李勇,男,20,CS
95011,包小柏,男,18,MA
95009,夢圓圓,女,18,MA
95015,王君,男,18,MA
EOF
//加載數據
load data local inpath "/home/hadoop/student.txt" into table student;
加載的數據會直接把文件放到hdfs表的目錄中
//查詢數據
select * from student;
//查看表結構
hive> desc student;
OK
id int
name string
sex string
age int
department string
Time taken: 0.709 seconds, Fetched: 5 row(s)
?
hive> desc extended student;
OK
id int
name string
sex string
age int
department string
Detailed Table Information Table(tableName:student, dbName:myhive, owner:root, createTime:1551859665, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:id, type:int, comment:null), FieldSchema(name:name, type:string, comment:null), FieldSchema(name:sex, type:string, comment:null), FieldSchema(name:age, type:int, comment:null), FieldSchema(name:department, type:string, comment:null)], location:hdfs://node2:8020/user/hive/warehouse/myhive.db/student, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{field.delim=,, serialization.format=,}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{totalSize=504, COLUMN_STATS_ACCURATE=true, numFiles=1, transient_lastDdlTime=1551859708}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE, ownerType:USER)
Time taken: 0.618 seconds, Fetched: 7 row(s)
///格式友好的表結構信息
hive> desc formatted student;
OK
# col_name data_type comment
id int
name string
sex string
age int
department string
# Detailed Table Information
Database: myhive
OwnerType: USER
Owner: root
CreateTime: Wed Mar 06 16:07:45 CST 2019
LastAccessTime: UNKNOWN
Protect Mode: None
Retention: 0
Location: hdfs://node2:8020/user/hive/warehouse/myhive.db/student
Table Type: MANAGED_TABLE
Table Parameters:
COLUMN_STATS_ACCURATE true
numFiles 1
totalSize 504
transient_lastDdlTime 1551859708
# Storage Information
SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
InputFormat: org.apache.hadoop.mapred.TextInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Compressed: No
Num Buckets: -1
Bucket Columns: []
Sort Columns: []
Storage Desc Params:
field.delim ,
serialization.format ,
Time taken: 0.703 seconds, Fetched: 35 row(s)
?
//查看分區信息
show partitions student_ptn;
?
//查看詳細建表信息
show create table student_ptn;
?
//刪除庫
drop database dbname; drop database if exists dbname;
默認情況下,hive 不允許刪除包含表的數據庫,有兩種解決辦法:
1、 手動刪除庫下所有表,然后刪除庫
2、 使用 cascade 關鍵字
drop database if exists dbname cascade;
?
//刪除表
drop table tbname;
?
//修改表名
alter table student rename to new_student;
?
//添加字段
alter table new_student add columns (score int);
?
//修改字段定義
alter table new_student change name new_name string;
?
///刪除字段
不支持
?
//替換所有字段定義
alter table new_student replace columns (id int, name string, address string);
?
///添加多個表分區
alter table student_ptn add partition(city="chongqing2") partition(city="chongqing3") partition(city="chongqing4");
?
///動態分區
先添加數據
load data local inpath "/var/lib/hadoop-hdfs/student.txt" into table student_ptn partition(city="beijing");
現在我把這張表的內容直接插入到另一張表student_ptn_age中,并實現sex為動態分區(不指定到底是哪中性別,讓系統自己分配決定)
首先創建student_ptn_age并指定分區為age
create table student_ptn_age(id int,name string,sex string,department string) partitioned by (age int);
從student_ptn表中查詢數據并插入student_ptn_age表中
insert overwrite table student_ptn_age partition(age) select id,name,sex,department,age from student_ptn;
此語句報錯:FAILED: SemanticException [Error 10096]: Dynamic partition strict mode requires at least one static partition column. To turn this off set hive.exec.dynamic.partition.mode=nonstrict
需要執行如下設置后即可
set hive.exec.dynamic.partition.mode=nonstrict;
hive.exec.dynamic.partition.mode默認是strict,必須制定一個分區進行插入數據,以避免覆蓋所有的分區數據;但是如果需要動態分區插入數據就必須設置nonstrict,nonstrict表示不是嚴格的必須指定一個靜態分區,言外之意就是動態分區插入。其他屬性容易理解不解釋。
參考:https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DML#LanguageManualDML-DynamicPartitionInserts
?
//添加分區指定存儲目錄
alter table student_ptn add if not exists partition(city='beijing') location '/user/hive/outtable/student_ptn/student_ptn_beijing' partition(city='jilin') location '/user/hive/outtable/student_ptn/student_ptn_jilin';
?
//修改已經指定好的分區的數據存儲目錄
alter table student_ptn partition (city='beijing') set location '/user/hive/outtable/student_ptn/student_ptn_beijing';
此時原先的分區文件夾仍存在,但是在往分區添加數據時,只會添加到新的分區目錄。
而且查詢的時候只查新分區,不會查舊分區里的文件,如果需要保留數據,需要把文件也挪過來。
?
//刪除分區
alter table student_ptn drop partition (city='beijing');
?
//清空表
truncate table student_ptn;
?
//查看函數列表
show functions;
?
?
參考文章
https://www.cnblogs.com/qingyunzong/p/8723271.html
總結
以上是生活随笔為你收集整理的hive shell/sql 命令行的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 2018-2019-1 20165202
- 下一篇: makefile与stm32工程皮毛了解