當前位置：首頁 > 编程资源 > 编程问答 >内容正文

编程问答

hive shell/sql 命令行

發布時間：2023/12/10 编程问答 31 豆豆

生活随笔收集整理的這篇文章主要介紹了 hive shell/sql 命令行小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

命令hive進入hive命令行

//列表數據庫

show databases;

//創建數據庫

create database myhive;

//創建數據庫時檢查存在與否

create database if not exists t1;

//創建數據庫時帶注釋

create database if not exists t2 comment 'learning hive';

//創建帶屬性的數據庫

create database if not exists t3 with dbproperties('creator'='hadoop','date'='2018-04-05');

//使用數據庫

use myhive;

//顯示數據庫信息

desc database t2;

desc database extended t3;

//列表數據表

show tables;

show tables in t1;#t1為數據庫名

///查看student_c開頭的表

show tables like 'student_c*';

//查看當前正在使用的數據庫

select current_database();

//創建一張表

CREATE?[EXTERNAL]?TABLE?[IF NOT EXISTS] table_name

　　[(col_name data_type [COMMENT col_comment], ...)]

　　[COMMENT?table_comment]

　　[PARTITIONED BY?(col_name data_type [COMMENT col_comment], ...)]

　　[CLUSTERED BY?(col_name, col_name, ...)

　　　　[SORTED BY?(col_name [ASC|DESC], ...)]?INTO?num_buckets?BUCKETS]

　　[ROW FORMAT?row_format]

　　[STORED AS?file_format]

　　[LOCATION?hdfs_path]

詳情請參見：?https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL#LanguageManualD DL-CreateTable

?CREATE TABLE 創建一個指定名字的表。如果相同名字的表已經存在，則拋出異常；用戶可以用 IF NOT EXIST 選項來忽略這個異常 ?EXTERNAL 關鍵字可以讓用戶創建一個外部表，在建表的同時指定一個指向實際數據的路徑（LOCATION） ?LIKE 允許用戶復制現有的表結構，但是不復制數據 ?COMMENT可以為表與字段增加描述

?PARTITIONED BY 指定分區

?ROW FORMAT

　　DELIMITED [FIELDS TERMINATED BY char] [COLLECTION ITEMS TERMINATED BY char]

　　　　MAP KEYS TERMINATED BY char] [LINES TERMINATED BY char]

　　　　| SERDE serde_name [WITH SERDEPROPERTIES

　　　　(property_name=property_value, property_name=property_value, ...)]

　　用戶在建表的時候可以自定義 SerDe 或者使用自帶的 SerDe。如果沒有指定 ROW FORMAT 或者 ROW FORMAT DELIMITED，將會使用自帶的 SerDe。在建表的時候，

用戶還需要為表指定列，用戶在指定表的列的同時也會指定自定義的 SerDe，Hive 通過 SerDe 確定表的具體的列的數據。

?STORED AS

　　SEQUENCEFILE //序列化文件

　　| TEXTFILE //普通的文本文件格式

　　| RCFILE　　//行列存儲相結合的文件

　　| INPUTFORMAT input_format_classname OUTPUTFORMAT output_format_classname //自定義文件格式

　　如果文件數據是純文本，可以使用 STORED AS TEXTFILE。如果數據需要壓縮，使用 STORED AS SEQUENCE 。

?LOCATION指定表在HDFS的存儲路徑

最佳實踐：

　　如果一份數據已經存儲在HDFS上，并且要被多個用戶或者客戶端使用，最好創建外部表

　　反之，最好創建內部表。

　　如果不指定，就按照默認的規則存儲在默認的倉庫路徑中。

///創建內部表

create table student(id int, name string, sex string, age int, department string) row format delimited fields terminated by ",";

///創建外部表

create external table student_ext

(id int, name string, sex string, age int,department string) row format delimited fields terminated by "," location "/user/hive/outtable/student_ext";

注意需要切換到hdfs用戶登錄賦權，root才有hdfs的/user/hive目錄權限

su - hdfs

hdfs dfs -chmod 777 /user/hive

hdfs dfs -ls /user

///創建分區表

create external table student_ptn

(id int, name string, sex string, age int,department string)

partitioned by (city string)

row format delimited fields terminated by ","

location "/user/hive/outtable/student_ptn";

添加分區

alter table student_ptn add partition(city="beijing");

alter table student_ptn add partition(city="tianjin");

如果某張表是分區表。那么每個分區的定義，其實就表現為了這張表的數據存儲目錄下的一個子目錄

如果是分區表。那么數據文件一定要存儲在某個分區中，而不能直接存儲在表中。

創建分桶表

create external table student_bck(id int, name string, sex string, age int,department string) clustered by (id) sorted by (id asc, name desc) into 4 buckets row format delimited fields terminated by ","

location "/user/hive/outtable/student_bck";

//使用CTAS創建表（查詢結果創建表）

create table student_ctas as select * from student where id < 95012;

//復制表結構

create table student_copy like student;

//創建本地數據文件（本地文件系統，非HDFS）

cat <<EOF >student.txt

95002,劉晨,女,19,IS

95017,王風娟,女,18,IS

95018,王一,女,19,IS

95013,馮偉,男,21,CS

95014,王小麗,女,19,CS

95019,邢小麗,女,19,IS

95020,趙錢,男,21,IS

95003,王敏,女,22,MA

95004,張立,男,19,IS

95012,孫花,女,20,CS

95010,孔小濤,男,19,CS

95005,劉剛,男,18,MA

95006,孫慶,男,23,CS

95007,易思玲,女,19,MA

95008,李娜,女,18,CS

95021,周二,男,17,MA

95022,鄭明,男,20,MA

95001,李勇,男,20,CS

95011,包小柏,男,18,MA

95009,夢圓圓,女,18,MA

95015,王君,男,18,MA

EOF

//加載數據

load data local inpath "/home/hadoop/student.txt" into table student;

加載的數據會直接把文件放到hdfs表的目錄中

//查詢數據

select * from student;

//查看表結構

hive> desc student;

id int

name string

sex string

age int

department string

Time taken: 0.709 seconds, Fetched: 5 row(s)

hive> desc extended student;

id int

name string

sex string

age int

department string

Detailed Table Information Table(tableName:student, dbName:myhive, owner:root, createTime:1551859665, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:id, type:int, comment:null), FieldSchema(name:name, type:string, comment:null), FieldSchema(name:sex, type:string, comment:null), FieldSchema(name:age, type:int, comment:null), FieldSchema(name:department, type:string, comment:null)], location:hdfs://node2:8020/user/hive/warehouse/myhive.db/student, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{field.delim=,, serialization.format=,}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{totalSize=504, COLUMN_STATS_ACCURATE=true, numFiles=1, transient_lastDdlTime=1551859708}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE, ownerType:USER)

Time taken: 0.618 seconds, Fetched: 7 row(s)

///格式友好的表結構信息

hive> desc formatted student;

# col_name data_type comment

id int

name string

sex string

age int

department string

# Detailed Table Information

Database: myhive

OwnerType: USER

Owner: root

CreateTime: Wed Mar 06 16:07:45 CST 2019

LastAccessTime: UNKNOWN

Protect Mode: None

Retention: 0

Location: hdfs://node2:8020/user/hive/warehouse/myhive.db/student

Table Type: MANAGED_TABLE

Table Parameters:

COLUMN_STATS_ACCURATE true

numFiles 1

totalSize 504

transient_lastDdlTime 1551859708

# Storage Information

SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe

InputFormat: org.apache.hadoop.mapred.TextInputFormat

OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

Compressed: No

Num Buckets: -1

Bucket Columns: []

Sort Columns: []

Storage Desc Params:

field.delim ,

serialization.format ,

Time taken: 0.703 seconds, Fetched: 35 row(s)

//查看分區信息

show partitions student_ptn;

//查看詳細建表信息

show create table student_ptn;

//刪除庫

drop database dbname; drop database if exists dbname;

默認情況下，hive 不允許刪除包含表的數據庫，有兩種解決辦法：

1、手動刪除庫下所有表，然后刪除庫

2、使用 cascade 關鍵字

drop database if exists dbname cascade;

//刪除表

drop table tbname;

//修改表名

alter table student rename to new_student;

//添加字段

alter table new_student add columns (score int);

//修改字段定義

alter table new_student change name new_name string;

///刪除字段

不支持

//替換所有字段定義

alter table new_student replace columns (id int, name string, address string);

///添加多個表分區

alter table student_ptn add partition(city="chongqing2") partition(city="chongqing3") partition(city="chongqing4");

///動態分區

先添加數據

load data local inpath "/var/lib/hadoop-hdfs/student.txt" into table student_ptn partition(city="beijing");

現在我把這張表的內容直接插入到另一張表student_ptn_age中，并實現sex為動態分區（不指定到底是哪中性別，讓系統自己分配決定）

首先創建student_ptn_age并指定分區為age

create table student_ptn_age(id int,name string,sex string,department string) partitioned by (age int);

從student_ptn表中查詢數據并插入student_ptn_age表中

insert overwrite table student_ptn_age partition(age) select id,name,sex,department,age from student_ptn;

此語句報錯：FAILED: SemanticException [Error 10096]: Dynamic partition strict mode requires at least one static partition column. To turn this off set hive.exec.dynamic.partition.mode=nonstrict

需要執行如下設置后即可

set hive.exec.dynamic.partition.mode=nonstrict;

hive.exec.dynamic.partition.mode默認是strict，必須制定一個分區進行插入數據，以避免覆蓋所有的分區數據；但是如果需要動態分區插入數據就必須設置nonstrict，nonstrict表示不是嚴格的必須指定一個靜態分區，言外之意就是動態分區插入。其他屬性容易理解不解釋。

參考：https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DML#LanguageManualDML-DynamicPartitionInserts

//添加分區指定存儲目錄

alter table student_ptn add if not exists partition(city='beijing') location '/user/hive/outtable/student_ptn/student_ptn_beijing' partition(city='jilin') location '/user/hive/outtable/student_ptn/student_ptn_jilin';

//修改已經指定好的分區的數據存儲目錄

alter table student_ptn partition (city='beijing') set location '/user/hive/outtable/student_ptn/student_ptn_beijing';

此時原先的分區文件夾仍存在，但是在往分區添加數據時，只會添加到新的分區目錄。

而且查詢的時候只查新分區，不會查舊分區里的文件，如果需要保留數據，需要把文件也挪過來。

//刪除分區

alter table student_ptn drop partition (city='beijing');

//清空表

truncate table student_ptn;

//查看函數列表

show functions;

參考文章

https://www.cnblogs.com/qingyunzong/p/8723271.html

總結

以上是生活随笔為你收集整理的hive shell/sql 命令行的全部內容，希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網站內容還不錯，歡迎將生活随笔推薦給好友。

上一篇： 2018-2019-1 20165202
下一篇： makefile与stm32工程皮毛了解