#编程
SQL语句的执行顺序是:
from -> join -> on -> where -> group by -> select后面的普通字段,聚合函数countsum -> having -> distinct -> order by -> limit
行转列:
例一:
核心代码:行转列的常规做法是,group by+sum(if())【或count(if())】
解答:
select year,
sum(if(month=1,amount,0)) as m1,
sum(if(month=2,amount,0)) as m2,
sum(if(month=3,amount,o)) as m3,
sum(if(month=4,amount,0)) as m4
from table2 group by year;
注:if(month=1,amount,0) 即 case when month=1 then amount else 0 end;
例二:
解答:
select Ddate,
count(case when shengfu='胜' then 1 else null end) as '胜',
count(case when shengfu='负' then 1 else null end) as '负'
from table1
group by ddate;
例三:
解答:
select qq,
concat_ws('_',collect_list(game)) as game
from tablea
group by qq;
反过来列转行解答:
select qq,
tmp.game
from tableb lateral view explode(split(game,'_')) tmp as game;
N日留存率:
核心代码 :
-> where日期 in(首日,1天后,7天后)
-> group by用户
->count(if(日期=首日,1,nu11)) as cnt
count(if(日期=1天后1nu1) as cnt2
count(if(日期=7天后,1nu11))as cnt8
->having cnt>0
->count(user_id) as 首日总数
count(if(cnt2>01nu11)) as 次日留存数
count(if(cnt8>0,1nu11)) as 7日留存数
->次日留存数/首日总数 as 次日留存率
7日留存数/首日总数 as 7日留存率
例一:
select count(cuid) as uv_4_1,
count(case when cnt_4_2>0 then 1 else null end) as uv_4_2.
count(case when cnt_4_8>0 then 1 else nullend)as uv_4_8
from(select cuid,
count(case when event day='2020-04-01' then 1 else null end) as cnt_4_1
count(case when event day=2020-04-02’ then 1 else null end) as cnt_4_2
count(case when event day=2020-04-08’ then 1 else nullend) as cnt 4 8
from tb_cuid_1d where event_day in (2020-04-01,2020-04-02',12020-04-08')
group by cuid
having cnt_4_1>0) as t
结果:
--提前过滤数据
where eventday in('2020-04-01'2020-04-02'2020-04-08')
group by cuid;
分组内TOP前几:
需求常见词:【每组xxx的第一个】【每组xxx的最后一个】【每组xxx的前n个】【每组最xx的前个】
公式:row_number()over(partition by 组名) as rn,再筛选rn<=N名
核心代码:
select * from
(select zzz,
row_number() over(partition by 组名xxx order by yyy) as rn
from table) as t
where rn<=N名
连续N天登录:
例一:
解答:
with t1 as (select distinct name ,'date’ from game),
t2 as (select *,
row_number() over (partition by name order by 'date`) as rn
from t1),
t3 as (select name,date_sub(`date',rn) as temp,count(1) as cnt
from t2
group by name,temp
having count(1)>=3)
select distinct nane from t3
例二:
解答:
with t1 as(select distinct name,dt from game),
t2 as(select *,
date_add(dt,2)as date2,
lead(dt,2)over(partition by name order by dt)as date3
from t1)
select distinct name from t2 where date2=date3;
以上SQL的核心代码:
->distinct
->date_add(dt,N-1) as date2
->lead(dt,N-1) over(partition by userid order by dt) as date3
->where date2=date3
->distinct
窗口函数:
特点:窗口函数最重要的特点是有OVER关键字,它代表定义窗口。
语法:窗口函数(字段名) over(partition by 分组字段 order by 排序字段)
注:order by 为不必要字段,不需要排序可不写。
常用函数分类:
聚合类的窗口函数: sum() over() 添加 order by 会计算累加,不加则为求和。
例句:
select *,
sum(score) over(partition by cid) as ’班级总分‘
sum(score) over (partition by cid order by score) as ’累加分数1‘
sum(score) over (partition by cid order by score rows between unbounded preceding and current row) as ‘累加分数2’
from score
结果:
2. count/avg/max/min
排序类的窗口函数:row_number, rank,dense_rank
例句:
select *,
--同一个班内,按分数排序打上序号
row_number(over (partition by cid order byscore) as '分数序号排名`
--考虑并列
rank() over (partition by cid order by score) as '分数序号排名2`
dense_rank()over(partition by cid order by score) as '分数序号排名3'
from score;
结果:
偏移类的,跨行的:lag/lead
例句一:
select *,
--同一班内,考得比自己低1名的分数是多少
lag(score,1,默认值)over(partition by cid order by score) as '低一名的分数',
--同一班内,考得比自己低2名的分数是多少
lag(score,2)over(partition by cid order by score) as '低2名的分数‘
from score;
结果:
例句二:
select *,
lead(score,1)over(partition by cid order by score) as '高一名的分数'
from score;
结果:
【仅了解,不常用】first_value / last_value
【仅了解,几乎不用】ntile / nth
实际案例:
with t1 as
--计算同一个人的分数总和
(select name,sum(amt) as sum_amt from emp group by name),
--排名及计算总分
t2 as (select *,
row_number()over(partition by null order by sum_amt desc)as rn,sum(sum_amt) over(partition bynull) as sum_all --全体总分
from t1)
select *,round(sum_amt*100/sum_all 2) || '%' as rate --占比
--|| 两根竖线等价于concat函数
from t2;
结果:
声明:有的资源均来自网络转载,版权归原作者所有,如有侵犯到您的权益 请联系邮箱:123456@qq.com 我们将配合处理!
原文地址:SQL面试常见题及核心代码示例发布于2024-04-01 12:02:07