Hive SQL 練習(這個秒退是怎麼回事啊?寫了半天東西都沒了,瞬間整個人都淩亂了)

首先將練習用的兩張表上傳到hdfs上
命令如下:
hadoop fs -put /opt/dep.txt /dep.txt
hadoop fs -put /opt/employee.txt /employee.txt

然後開始寫練習

創建數據庫
create database bs17;
use bs17;
創建員工信息表
create table employee(
emp_id string
,emp_name string
,status string
,salary string
,status_salary string
,in_work_date string
,leader_id string
,dep_id string
)row format delimited
fields terminated by '\t'

row format delimited fileds terminated by '\t' 表示將傳進來的文件按照空格分割寫入到表格中

闖入文件 employee.txt
load data inpath '/employee.txt' overwrite into table employee;

查詢表
select * from employee;

同理創建第二個表
create table department(
dep_id string
,dep_name string
,address string
)row format delimited
fields terminated by '\t'
傳入文件以空格分割寫入到表格中
load data inpath '/dep.txt'overwrite into table department;
查詢表
select * from department
之前的創建表都是采用string字段這樣做的原因是防止傳入的數據損失精度失真以原文件的格式類型為準 txt 通用 string
接下來通過 as 來更改表結構將字段改為所需要的
create table dw_employee as select
cast(emp_id as int) emp_id
,emp_name
,status
,cast(salary as double)salary
,cast(status_salary as double)status_salary
,cast(in_work_date as date)in_work_date
,cast(leader_id as int)leader_id
,cast(dep_id as int)dep_id
from employee

dw_employee 為創建好的更改好字段的表
select * from dw_employee
查詢表結構語句
describe formatted dw_employee
同理更改第二張表的表結構
create table dw_department as
select cast(dep_id as int)dep_id
,dep_name
,address
from department

select * from dw_department
除了通過 as 複製表之外還可以通過 like 來克隆不過 like 克隆的隻有表結構沒有數據
create table employee_clone like employee;
select * from employee_clone

查找employee 表中的 id name 月薪季薪年薪
select emp_id
,emp_name
,salary mon_salary
,salary*3 season_salary
,salary*12 year_salary
from dw_employee
查找員工的月工資月薪加獎金
select emp_id
,emp_name
,salary+status_salary getMoney
from dw_employee
插入數據 id name 其他為null
insert into dw_employee(emp_id,emp_name) values(1111,'aaaa')
nvl 表示條件如果有字段信息則表示字段信息如果為null 則用後的表示
如有職位則表示沒有則用普通員工代替
如果有入職日期則表示沒有則用2015-5-1表示
select emp_id
,emp_name
,nvl(status,'普通員工') job
,nvl(in_work_date,'2015-5-1') in_work_date
from dw_employee

複製員工表，表名為emp_copy
create table emp_copy as select * from dw_employee
機構中有多少種職位(就是將所有的status 聚合展示出來展示出來幾種就有幾種)
select distinct status from dw_employee

薪水高於6000的員工

select * from dw_employee where salary>6000

職位是analyst 的員工

select * from dw_employee where status ='analyst'

以小寫格式展示職位信息（lower()）
select emp_id
,emp_name
,lower(status)
from dw_employee
忽略大小寫匹配職位等於‘ANALYST’的記錄
select * from dw_employee where upper(status)='ANALYST'

查詢出薪水在5000到8000之間的員工（between and）
select * from dw_employee where salary between 5000 and 8000;
查詢出2016年入職的員工
select * from dw_employee where year(in_work_date)=2016

薪水不在5000到8000的員工
select * from dw_employee where salary not between 5000 and 8000;

查詢出職位是Manager或者analyst的員工
select * from dw_employee where status in(Manager,analyst);

模煳查詢like %
查詢出沒有崗位工資的員工
select * from dw_employee where status_salary is null;

查詢有崗位工資的員工
select * from dw_employee where status_salary is not null;

查詢出不在10部門和不再30部門的員工
select * from where dw_employee where dep_id not in(10,30)

DDL 數據定義語言
insert select

create table employee_leader like dw_employee
insert into dw_employee_leader select * from dw_employee where leader_id is null

insert overwrite table dw_employee_leader select * from dw_employee where leader_id is null

排序問題
oder by 升序 asc 降序 desc
select * from dw_employee order by salary desc;

hive 的order by 全排序是通過隻設置一個reducer 節點的方式來實現的
通過hive可以很輕鬆的實現二次排序
select * from dw_employee order by salary desc,salary_status desc;

查詢多少個員工
select count(*) from dw_employee

分組表達式 Group by 聚合函數 Count max min avg sum
查詢有多少個員工
select count(*) from dw_employee
查詢有多少個姓張的員工
select count(*) from dw_employee where emp_name like '%張%'
計算員工的總薪水是多少
select sum(salary) from dw_employee

計算員工的人數總和、薪水綜合、平均薪水，最高薪水、最低薪水
select
count(*)
,sum(salary)
,avg(salary)
,max(salary)
,min(salary)
from dw_employee

求出每個部門的最高薪水、最低薪水、平均薪水、總薪水、總人數
select
count(*)
,sum(salary)
,avg(salary)
,max(salary)
,min(salary)
from dw_employee
group by dep_id;

----求出總人數超過兩人的部門的最高最低平均總薪水

select
count(*)
,sum(salary)
,avg(salary)
,max(salary)
,min(salary)
from dw_employee
group by dep_id;
having count(*)>2

having 的作用是在聚合後做一次刪選 Having 用來對分組後的結果進行進一步的過濾

select * from dw_employee where salary in (select max(salary) from dw_employee)

-----in 在後麵添加子查詢結果作為判斷條件如果子查詢中有索引的話 in 是用不到這個索引因此 in對接子查詢的效率最低

子查詢
查詢出最高薪水的人的信息
select * from dw_employee where salary in (select max(salary) from dw_employe)
最低薪水的人
select * from dw_employee where salary in (select min(salary) from dw_employe)

工資高於平均薪水的人的信息
目前 hive目前不支持子查詢的非等值查詢
所以使用笛卡爾乘積的方式

因為參數原因不允許使用笛卡爾乘積所以需要通過指令設置一下讓它允許
set hive.strict.checks.cartesian.product=false
select a.*
,b.*
from dw_employee a, (select avg(salary)avesalary from dw_employee)b
where a.salary >b.avgsalary
誰的薪水比小趙的薪水高
select a.*
from dw_employee a,(select salary from dw_employee where emp_name='小趙')b

where a.salary>b.salary
研發部有哪些職位
select status
from dw_employee a
where exists(select dep_id
from department b
where dep_name ='研發部'
and a.dep_id =b.dep_id)

ALL 子查詢 Any(子查詢)

誰是妖姬的同部門的同事
select * from dw_employee where dep_id in(select dep_id from dw_employee where emp_name ='妖姬')

如果有兩個叫妖姬誰是妖姬的同事???

inner join 是兩張表根據關聯條件相互過濾能夠關聯上的數據才會出現在結果集中

Exists
not exists
union 去重
union all 不去重

薪水大於8000或者小於2000或者等於5000的員工
select * from dw_employee where salary>8000
union
select * from dw_employee where salary <2000
union
select * from dw_employee where salary=5000

交集 intersect
差集 minus

lefter outer join right outer join

最後更新：2017-10-24 23:33:21

Hive SQL 練習(這個秒退是怎麼回事啊?寫了半天東西都沒了,瞬間整個人都淩亂了)

row format delimited fileds terminated by '\t' 表示將傳進來的文件按照空格分割寫入到表格中

上一篇：雲棲竹徑三株楓香如親密三兄弟相依相伴紅葉如映日彩霞

下一篇： Oracle實例恢複（初稿）

相關內容

熱門內容

最新內容

Hive SQL 練習(這個秒退是怎麼回事啊?寫了半天 東西都沒了,瞬間整個人都淩亂了)

row format delimited fileds terminated by '\t' 表示將傳進來的文件按照空格分割 寫入到表格中

上一篇： 雲棲竹徑三株楓香如親密三兄弟相依相伴 紅葉如映日彩霞

下一篇： Oracle實例恢複（初稿）

相關內容

熱門內容

最新內容

Hive SQL 練習(這個秒退是怎麼回事啊?寫了半天東西都沒了,瞬間整個人都淩亂了)

row format delimited fileds terminated by '\t' 表示將傳進來的文件按照空格分割寫入到表格中

上一篇：雲棲竹徑三株楓香如親密三兄弟相依相伴紅葉如映日彩霞