hive (default)> show databases;
OK
default
hive
Time taken: 0.023 seconds, Fetched: 2 row(s)
hive (default)> create database students;
OK
Time taken: 0.066 seconds
hive (default)> show databases;
OK
default
hive
students
Time taken: 0.016 seconds, Fetched: 3 row(s)
create database if not exists students; 如果存在同名的表,不會報錯(對于連續執行很有用,不會中斷)
hive (default)> create database students;
FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. Database students already exists
hive (default)> create database if not exists students;
OK
Time taken: 0.016 seconds
show databases like "s.*"; 搜索以 xxx 開頭的數據庫
hive (default)> show databases like "s.*";
OK
students
hive (default)> create database test1> comment "just test comment!";
OK
Time taken: 0.06 secondshive (default)> describe database test1;
OK
test1 just test comment! hdfs://localhost:9000/user/hive/warehouse/test1.db hadoop USER
Time taken: 0.018 seconds, Fetched: 1 row(s)
增加相關鍵值對 with dbproperties ('created'="michael", "date"='2021-04-06')
describe database extended test2; 顯示附件鍵值對信息
hive (default)> create database test2> with dbproperties ('created'="michael", "date"='2021-04-06');
OK
Time taken: 0.728 secondshive (default)> describe database extended test2;
OK
test2 hdfs://localhost:9000/user/hive/warehouse/test2.db hadoop USER {date=2021-04-06, created=michael}
Time taken: 0.022 seconds, Fetched: 1 row(s)
切換數據庫 use 數據庫名
hive (default)> show databases;
OK
default
hive
students
test
test1
test2
Time taken: 0.641 seconds, Fetched: 6 row(s)hive (default)> use students;
OK
Time taken: 0.027 seconds
hive (students)>
注:cmd里顯示數據庫名,需要 vim /usr/local/hive/bin/.hiverc 添加 set hive.cli.print.current.db=true;
刪除數據庫 drop database if exists test2數據庫名;
2. 修改數據庫
hive (default)> alter database student set dbproperties('created by'='Michael ming');hive (default)> describe database extended student;
student hdfs://localhost:9000/user/hive/warehouse/student.db hadoop USER {created by=Michael ming}hive (default)> alter database student set dbproperties('created by'='Michael haha');hive (default)> describe database extended student;
student hdfs://localhost:9000/user/hive/warehouse/student.db hadoop USER {created by=Michael haha}
3. 創建表
hive (default)> create table if not exists employees(> name string comment "employee name",> salary float comment "employee salary",> subordinates array<string> comment "name of subordinates",> deductions map<string, float> comment "key is name, value is percentages",> address struct<street:string, city:string, state:string, zip:int> comment "home address")> comment "info of employees"> location '/employees'> tblproperties('created_by'='michael ming', 'created_at'='2021-04-06 20:00:00');
OK
Time taken: 0.344 secondshive (default)> show tables;
OK
employee
employees
student1
Time taken: 0.057 seconds, Fetched: 3 row(s)
顯示 表屬性 show tblproperties employees;
hive (default)> show tblproperties employees;
OK
comment info of employees
created_at 2021-04-06 20:00:00
created_by michael ming
transient_lastDdlTime 1617710228
Time taken: 0.093 seconds, Fetched: 4 row(s)
復制表的模式,不拷貝數據 like 要復制的表名
hive (default)> create table if not exists employees1> like employees;
OK
Time taken: 0.193 secondshive (default)> show tables;
OK
employees
employees1
student1
Time taken: 0.022 seconds, Fetched: 3 row(s)
過濾查找
hive (default)> show tables "emp.*";
OK
employees
employees1
hive (default)> describe extended employees;
OK
name string employee name
salary float employee salary
subordinates array<string> name of subordinates
deductions map<string,float> key is name, value is percentages
address struct<street:string,city:string,state:string,zip:int> home address Detailed Table Information Table(tableName:employees, dbName:default, owner:hadoop, createTime:1617710228, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:name, type:string, comment:employee name), FieldSchema(name:salary, type:float, comment:employee salary), FieldSchema(name:subordinates, type:array<string>, comment:name of subordinates), FieldSchema(name:deductions, type:map<string,float>, comment:key is name, value is percentages), FieldSchema(name:address, type:struct<street:string,city:string,state:string,zip:int>, comment:home address)], location:hdfs://localhost:9000/employees, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1617710228, created_at=2021-04-06 20:00:00, comment=info of employees, created_by=michael ming}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)
Time taken: 0.746 seconds, Fetched: 7 row(s)hive (default)> describe formatted employees;
OK
# col_name data_type comment name string employee name
salary float employee salary
subordinates array<string> name of subordinates
deductions map<string,float> key is name, value is percentages
address struct<street:string,city:string,state:string,zip:int> home address # Detailed Table Information
Database: default
Owner: hadoop
CreateTime: Tue Apr 06 19:57:08 CST 2021
LastAccessTime: UNKNOWN
Retention: 0
Location: hdfs://localhost:9000/employees
Table Type: MANAGED_TABLE
Table Parameters: comment info of employees created_at 2021-04-06 20:00:00 created_by michael ming transient_lastDdlTime 1617710228 # Storage Information
SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
InputFormat: org.apache.hadoop.mapred.TextInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Compressed: No
Num Buckets: -1
Bucket Columns: []
Sort Columns: []
Storage Desc Params: serialization.format 1
Time taken: 0.069 seconds, Fetched: 32 row(s)
3.1 管理表
也叫內部表,刪除管理表時,數據也會被刪除
3.2 外部表
刪除外部表 external,只會刪除表的元信息,不會刪除數據
hive (default)> create external table if not exists extstudent(>id int,> name string,> sex string,> age int,> course string)> row format delimited fields terminated by '\t'> location '/home/hadoop/workspace/student.txt';
OK
hive (default)> load data local inpath '/home/hadoop/workspace/student.txt' overwrite into table extstudent;
Loading data to table default.extstudent
OK
Time taken: 1.117 secondshive (default)>select * from extstudent;
OK
1 michael male 18 bigdata
2 ming male 19 AI
3 lili female 18 math
4 huahua female 20 AI
Time taken: 0.768 seconds, Fetched: 4 row(s)
hive (default)> create table stu(>id int,> name string)> partitioned by (country string, sex string)> row format delimited fields terminated by '\t';
OK
Time taken: 0.041 secondshive (default)> load data local inpath '/home/hadoop/workspace/student.txt' overwrite into table stu partition(country='china', sex='male');
Loading data to table default.stu partition (country=china, sex=male)
OK
Time taken: 0.294 seconds
hive (default)>select * from stu;
OK
1 michael china male
2 ming china male
3 lili china male
4 huahua china male
Time taken: 0.069 seconds, Fetched: 4 row(s)# dfs -ls -R / 顯示文件
drwxr-xr-x - hadoop supergroup 0 2021-04-06 22:50 /user/hive/warehouse/stu
drwxr-xr-x - hadoop supergroup 0 2021-04-06 22:50 /user/hive/warehouse/stu/country=china
drwxr-xr-x - hadoop supergroup 0 2021-04-06 22:50 /user/hive/warehouse/stu/country=china/sex=male
-rwxr-xr-x 1 hadoop supergroup 114 2021-04-06 22:50 /user/hive/warehouse/stu/country=china/sex=male/student.txt
展示分區 show partitions
hive (default)> show partitions stu;
OK
country=china/sex=male
Time taken: 0.075 seconds, Fetched: 1 row(s)
set hive.mapred.mode=strict; 嚴格模式,需要帶where過濾才行,避免超大的查詢任務
hive (default)>set hive.mapred.mode=strict;
hive (default)>select * from stu;
FAILED: SemanticException Queries against partitioned tables without a partition filter are disabled for safety reasons. If you know what you are doing, please make sure that hive.strict.checks.large.query is set to false and that hive.mapred.mode is not set to 'strict' to enable them. No partition predicate for Alias "stu" Table "stu"
hive (default)>select * from stu where country='china';
OK
1 michael china male
2 ming china male
3 lili china male
4 huahua china male
Time taken: 0.402 seconds, Fetched: 4 row(s)
set hive.mapred.mode=nonstrict; 不嚴格模式
hive (default)>set hive.mapred.mode=nonstrict;
hive (default)>select * from stu;
OK
1 michael china male
2 ming china male
3 lili china male
4 huahua china male
Time taken: 0.077 seconds, Fetched: 4 row(s)
只查看指定分區
hive (default)> show partitions stu partition(country='china');
OK
country=china/sex=male
describe formatted stu; 也會顯示分區信息
hive (default)> describe formatted stu;
OK
# col_name data_type comment id int
name string # Partition Information # col_name data_type comment country string
sex string
省略。。。
添加分區 alter table stu add partition(country='china1', sex='female');
5. 刪除表
drop table if exists 表名;
6. 修改表
使用 alter table 語句,會修改元數據,但不會修改數據本身
重命名 表
hive (default)> alter table stu rename to stu_new;
增加多個分區
hive (default)> alter table stu_new add if not exists> partition(country='USA', sex='male')> partition(country='RUS', sex='male');hive (default)> show partitions stu_new;
country=RUS/sex=male
country=USA/sex=male
country=china/sex=male
country=china1/sex=female
修改分區路徑
hive (default)> alter table stu_new partition(country='china', sex='male')>set location "/user/hive/warehouse/mypath";
刪除分區
hive (default)> alter table stu_new drop if exists partition(country='USA',sex='male');
Dropped the partition country=USA/sex=male
OK
Time taken: 0.404 seconds
修改列信息
hive (default)> alter table stu_new> change column id new_id int> comment "changed new_id";
hive (default)> describe stu_new;
OK
new_id int changed new_id
name string
country string
sex string # Partition Information # col_name data_type comment country string
sex string
還可以修改字段位置 追加 after 字段 或者 first,請同時修改數據以匹配
增加列 在分區字段之前,增加新的字段到已有字段之后
hive (default)> alter table stu_new add columns(> height int comment "height of people",> hobby string comment "likes things");
OK
Time taken: 0.074 seconds
hive (default)> describe stu_new;
OK
new_id1 float changed new_id1 float
name string
height int height of people
hobby string likes things
country string
sex string # Partition Information # col_name data_type comment country string
sex string
刪除、替換列 replace columns 下面例子移除了之前所有的字段,并指定新的字段:
hive (default)> alter table stu_new replace columns(> c1 float comment "column 1",> c2 string,> c3 float comment 'column 3');hive (default)> describe stu_new;
OK
c1 float column 1
c2 string
c3 float column 3
country string
sex string # Partition Information # col_name data_type comment country string
sex string
注意:需要數據類型兼容(原來是float ,不能改為 int)
修改表屬性 增加或修改屬性,但無法刪除屬性
hive (default)> show tblproperties stu_new;
last_modified_by hadoop
last_modified_time 1617749844
transient_lastDdlTime 1617749844hive (default)> alter table stu_new set tblproperties(>'creator'='michael',>'ok'='good');hive (default)> show tblproperties stu_new;
creator michael
last_modified_by hadoop
last_modified_time 1617750323
ok good
transient_lastDdlTime 1617750323hive (default)> alter table stu_new set tblproperties(>'creator'='ming');hive (default)> show tblproperties stu_new;
creator ming
last_modified_by hadoop
last_modified_time 1617750355
ok good
transient_lastDdlTime 1617750355
修改存儲屬性
修改文件格式
hive (default)> alter table stu_new>set fileformat textfile;hive (default)> alter table stu_new> partition(country='china', sex='male')>set fileformat sequencefile;
修改 Serde,并指定屬性
hive (default)> alter table stu_new>set serde 'com.example.mySerDe'# 不改,就不需要這句> with serdeproperties(>'prop1'='v1',>'prop2'='v2');
FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. at least one column must be specified for the table
修改存儲屬性
hive (default)> alter table stu_new> clustered by (c1, c2)> sorted by (c3)# 可選> into 5 buckets;