MySQL 学习笔记（刷题篇）

本文介绍: ONLY_FULL_GROUP_BY的语义就是确定select target list中的所有列的值都是明确语义，因此这里的c oales ce是不好使的，可以通过 any_value()函数来抑制ONLY_FULL_GROUP_BY值被拒绝，any_value()会选择被分到同一组的数据里第一条数据的指定列值作为返回数据。题目：按年月进行分组，统计每组的用户 id 个数（也就是这个月有多少活跃用户），统计每组的用户活跃天数的平均值（总天数/总人数）个用户一个月内的登录天数之和。计算查询出的所有记录的。

SQL进阶挑战

聚合 分组 查询

SQL123

select tag, difficulty, 
    round((sum(score) - max(score) - min(score) ) / (count(score) - 2) ,1)
as clip_avg_score
from examination_info as ei, exam_record as er
where ei.exam_id = er.exam_id  
and ei.tag = 'SQL'  
and ei.difficulty = 'hard' 
and er.score is not null;

SQL124

IF(expr1 , expr2 , expr3)，expr1的值为TRUE 返回 ex pr2，否则返回 ex pr3
使用 distinct 是需要考虑 null 的，它会把 null 也算成一种情况
但是使用 count(字段) 是不用考虑 null 的，它不会计 null 为一种情况

select count(id) as total_pv, 
count(submit_time) as complete_pv, 
count(distinct if(submit_time is not null, exam_id, null)) as complete_exam_cnt
from exam_record

select count(id) as total_pv, 
count(submit_time) as complete_pv, 
count(distinct exam_id and score is not null) as complete_exam_cnt
from exam_record

SQL125

# 这样写为什么就错？
select min(score) as min_score_over_avg
from exam_record  #这样写没有保证查询的试卷类型是SQL
where score &gt;= (
    select avg(score)
    from exam_record as er , examination_info as ei
    where ei.tag = 'SQL'
    and ei.exam_id = er.exam_id
    and er.score is not null
);
# correct
select min(score) as min_score_over_avg
from exam_record as er , examination_info as ei
where ei.tag = 'SQL'
and ei.exam_id = er.exam_id
and er.score is not null
and score &gt;= (
    select avg(score)
    from exam_record as er
    where er.exam_id = ei.exam_id
    and er.score is not null
);

SQL126

题目：按年月进行分组，统计每组的用户id个数（也就是这个月有多少活跃用户），统计每组的用户活跃天数的平均值（总天数/总人数）
总天数计算方法：

∑

−

第

个用户一个月内的登录天数之和

sum_{i=1}^{i=last-user} 第i个用户一个月内的登录天数之和

$\sum_{i = 1}^{i = l a s t - u ser} 第 i 个用户一个月内的登录天数之和$

DATE_FORMAT(date,fmt) 按照字符串 fmt 格式化日期 date 值
YEAR(date) / MONTH(date) / DAY(date) 返回具体的日期值

count( distinct uid, date_format(submit_time, '%y%m%d') )，这里的知识点：count函数内本只能接收一个参数，dist in ct 修饰是所有字段的，并不是修饰一个字段
语句含义：去掉一个用户在一天内的多次登录计数的重复计数，保证如果同一用户在同一天进行了多次活动，只有一次会被计数。

select date_format(submit_time, '%Y%m') as month, # %Y四位年份，%m两位数字月份
    round( count(distinct uid, date_format(submit_time, '%y%m%d') )  / count(distinct uid) , 2) as avg_active_days,
    count(distinct uid) as mau #统计组内不同用户id数量
from exam_record
where submit_time is not null
and year(submit_time) = 2021
group by date_format(submit_time, '%Y%m') # 按照年月分组

SQL127

select date_format(submit_time, '%Y%m') as submit_month,
    count(distinct uid, submit_time) as month_q_cnt,
    round(count(distinct uid, submit_time) 
    / max(DAY(LAST_DAY(submit_time))) #这里必须用一个聚合函数，由于汇总时的天数按31算，因此用max最为合适，day+lasy_day一起得到当月的天数
    , 3) as avg_day_q_cnt
from practice_record
where year(submit_time) = '2021' #过滤字段写到分组前
group by submit_month

union

select '2021汇总' as submit_month,
count(distinct uid, submit_time) as month_q_cnt,
round(count(distinct uid, submit_time) / 31, 3) as avg_day_q_cnt
from practice_record
where year(submit_time) = '2021'

order by submit_month;

COALESCE 是一个函数，coales ce (expression_1, expression_2, …,expression_n) ，依次检验，返回第一个不是 null 的值

MySQL5.7之后，sql_mode中ONLY_FULL_GROUP_BY模式默认设置为打开状态。
ONLY_FULL_GROUP_BY的语义就是确定select target list中的所有列的值都是明确语义，因此这里的coales ce是不好使的，可以通过any_value()函数来抑制ONLY_FULL_GROUP_BY值被拒绝，any_value()会选择被分到同一组的数据里第一条数据的指定列值作为返回数据

GROUP BY中使用WITH ROLLUP
WITH ROLLUP，使用 WITH ROLLUP 关键字之后，在所有查询出的分组记录之后增加一条记录，该记录计算查询出的所有记录的总和
注意：当使用ROLLUP时，不能同时使用ORDER BY子句进行结果排序，即ROLLUP和ORDER BY是互相排斥的。

SELECT
    any_value(coalesce(DATE_FORMAT(submit_time,"%Y%m"),'2021汇总')) as submit_month,
    count(submit_time) as month_q_cnt,
    # 因为汇总除的数也是31，因此这里取max聚合
    round(count(submit_time) / max(day(last_day(submit_time))),3) as avg_day_q_cnt
FROM practice_record
WHERE year(submit_time) = '2021'
GROUP BY date_format(submit_time,"%Y%m") with rollup;

SQL 128

使用 count() 函数实现条件统计的基础是：对于值为NULL的记录不计数，利用这个性质我们可以轻松统计出值不为 NULL 的记录，再统计总记录，即可得到值为 NULL 的记录。

# 统计num大于200的记录
select count(num > 200 or null) from a;
# or null 作用就是当条件不满足时，函数变成了count(null)不会统计数量
# 但是 num > 200 这个条件不成立时的 false 是会被统计到的

GROUP_CONCAT() 函数是mysql中非常实用的聚合函数，将给分组内的值连接为一个字符串。其完整语法：

GROUP_CONCAT([DISTINCT] 要连接的字段 [ORDER BY 排序字段 ASC/DESC] [SEPARATOR ‘分隔符’])

select uid,
    count(uid) - count(submit_time) as incomplete_cnt,
    count(submit_time) as complete_cnt,
    group_concat(distinct date_format(start_time, '%Y-%m-%d'), ':', tag
    Order BY start_time ASC #排序字段
    SEPARATOR ';') as detail
from exam_record as er
inner join examination_info as ei
on er.exam_id = ei.exam_id
where year(start_time) = '2021'  #过滤字段写到分组前
group by uid
having incomplete_cnt < 5 and incomplete_cnt > 1
and complete_cnt >= 1
order by incomplete_cnt desc;

多表 查询

SQL 129

先考虑简单的，找出 “当月均完成试卷数”不小于3的用户们，然后按 tag 分组统计存在 start_time 的作答记录个数即可

select tag, count(start_time) as tag_cnt
from examination_info as ei
inner join exam_record as er
on ei.exam_id = er.exam_id
where uid in (
    select uid
    from exam_record as er
    inner join examination_info as ei
    on er.exam_id = ei.exam_id
    group by uid, date_format(start_time, '%Y%m')
    having count(date_format(submit_time, '%Y%m')) >= 3
)
group by tag
order by tag_cnt desc;

SQL 130

select ei.exam_id as exam_id,
    count(distinct uid) as uv,
    # round(avg(score) ,1) as avg_score
    round(sum(score) / count(score) , 1) as avg_score
from examination_info as ei
inner join exam_record as er
on ei.exam_id = er.exam_id
where date_format(start_time, '%Y%m%d') in ( # 时间
    select date_format(release_time, '%Y%m%d') # 先弄出SQL试卷的发出的时间字段
    from examination_info
    where tag = 'SQL'
)
and uid in ( # 用户
    select uid      # 再弄出等级大于5的用户的uid
    from user_info
    where level > 5
)
and tag = 'SQL'  # SQL试卷
group by ei.exam_id #所有的SQL试卷按exam_id分组
order by uv desc, avg_score;

SQL 131

select level, count(level) as level_cnt
from user_info as ui, (
    select uid
    from exam_record as er
    inner join examination_info as ei
    on er.exam_id = ei.exam_id
    where tag = 'SQL' &amp;&amp; score > 80
) as tmp
where ui.uid = tmp.uid
group by level
order by level_cnt desc;

SQL 132

再套一个 select 来使得子查询的排序独立

select * from (
select exam_id as tid, 
    count(distinct uid) as uv,
    count(start_time) as pv 
from exam_record
group by exam_id
order by uv desc, pv desc
) as t1

union

select * from (
select question_id as tid,
    count(distinct uid) as uv,
    count(submit_time) as pv
from practice_record
group by question_id
order by uv desc, pv desc
) as t2

SQL 133

TIME_TO_SEC() 将时间差转换为秒

select uid, 'activity1' as activity
from exam_record
group by uid
having min(score) >= 85

union

select uid, 'activity2' as activity
from examination_info as ei
inner join exam_record as er
on er.exam_id = ei.exam_id
where score > 80
and difficulty = 'hard'
and TIME_TO_SEC(timediff(submit_time, start_time)) < duration * 30

order by uid;

其他操作

SQL 146

select uid,
    floor(avg(any_value(coalesce(score, 0)))) as avg_score,
    round(avg(
        if(submit_time is not null, timestampdiff(minute, start_time, submit_time), duration)
        ), 1) as avg_time_took
from examination_info as ei
inner join exam_record as er
on ei.exam_id = er.exam_id
where difficulty = 'hard'
and uid in (
    select uid
    from user_info
    where level = 0
)
group by uid

SQL 147

select uid, nick_name, achievement
from user_info
where nick_name like '牛客%'
and nick_name like '%号'
and achievement between 1200 and 2500
and uid in (
    select uid
    from exam_record
    group by uid
    having max(date_format(start_time, '%Y%m')) = '202109'

    union 

    select uid
    from practice_record
    group by uid
    having max(date_format(submit_time, '%Y%m')) = '202109'
)

select uid, nick_name, achievement
from user_info
where nick_name like '牛客%'
and nick_name like '%号'
and achievement between 1200 and 2500
and (
    uid in(
        select uid
        from exam_record
        group by uid
        having max(date_format(start_time, '%Y%m')) = '202109'
    )
    or uid in(
        select uid
        from practice_record
        group by uid
        having max(date_format(submit_time, '%Y%m')) = '202109'
    )
)

SQL 148（正则表达式）

用正则表达式匹配纯数字或者中间纯数字

select uid, er.exam_id,
    round(avg(score) ,0) as avg_score
from examination_info as ei
inner join exam_record as er
on ei.exam_id = er.exam_id
where uid in (
    select uid
    from user_info
    where nick_name regexp '^牛客[0-9]+号$'
    or nick_name regexp '^[0-9]+$'
)
and ei.exam_id in (
    select exam_id
    from examination_info
    where tag regexp '^[Cc]'
)
and score is not null
group by uid, exam_id
order by uid, avg_score

SQL 149（WITH AS）

比较复杂的一个题，需要用 WITH AS 存一下查询

with t as (
    select ui.uid as uid,
        count(start_time) - count(submit_time) as incomplete_cnt,
        round(if(count(start_time) - count(submit_time) > 0,
                (count(start_time) - count(submit_time)) / count(start_time),
                0
                )
            ,3) as incomplete_rate,
        level,
        count(start_time) as total_cnt # 作答个数
    from user_info as ui
    left join exam_record as er
    on ui.uid = er.uid
    group by uid
)

select uid, incomplete_cnt, incomplete_rate
from t
where exists(
    select uid from t where level = 0 and incomplete_cnt > 2
)
and level = 0
union
select uid, incomplete_cnt, incomplete_rate
from t
where not exists (
    select uid from t where level = 0 and incomplete_cnt > 2
)
and total_cnt > 0 # 有作答记录的用户
order by incomplete_rate

SQL150（CASE WHEN THEN）

很烂但有用的代码

select ui.level,
    case 
        when score >= 90 then '优'
        when score >= 75 then '良'
        when score >= 60 then '中'
        when score >= 0 then '差' end as score_grade,
    round(count( 
        case 
        when score >= 90 then '优'
        when score >= 75 then '良'
        when score >= 60 then '中'
        when score >= 0 then '差' end) / num
        , 3) as ratio
from exam_record as er, user_info as ui, (
    select level, count(level) as num
    from exam_record as er
    inner join user_info as ui
    on er.uid = ui.uid
    where score is not null
    group by level
    order by level desc
) as tmp
where er.uid = ui.uid
and tmp.level = ui.level
and score is not null
group by level, score_grade
order by level desc, ratio desc

SQL 152

select er.uid, level, register_time, score as max_score
from exam_record as er
inner join user_info as ui
on er.uid = ui.uid
where exam_id in ( # 把exam_record筛的只剩下job为算法的人做的算法试卷记录
    select exam_id
    from examination_info
    where tag = '算法'
)
and er.uid in (
    select uid
    from user_info
    where job = '算法'
)
and score is not null # 还得做完
order by score desc
limit 6, 3;

SQL 153（substring_index）

substring_index(str,delim,count)，str：要处理的字符串，delm：分隔符

SELECT exam_id,
    substring_index(tag, ',', 1) AS tag,
    substring_index(substring_index(tag, ',', 2), ',', -1) AS difficulty,
    substring_index(tag, ',', -1) AS duration
FROM examination_info
WHERE tag LIKE '%,%';

SQL 154（IF）

简单的 IF 应用

select uid, (
    if(char_length(nick_name) > 13, 
    concat(substring(nick_name, 1, 10), '...'),
    nick_name
    )
) as nick_name
from user_info
where char_length(nick_name) > 10;

SQL 155

这个题写的我脑子有点乱

select t1.tag, t2.total_num
from (
    select tag, num # 查询试卷作答数小于3的exam_id对应的tag和个数
    from examination_info as ei, ( 
        select exam_id, count(exam_id) as num #按exam_id分组，并统计个数
        from exam_record
        group by exam_id
    ) as tmp
    where ei.exam_id = tmp.exam_id # 多表查询
    and num < 3
) as t1, (
    select tag, sum(num) as total_num #按tag分类，把大写的tag聚合起来统计个数
    from examination_info as ei, (
        select exam_id, count(exam_id) as num
        from exam_record
        group by exam_id
    ) as tmp
    where ei.exam_id = tmp.exam_id
    group by tag 
) as t2
where upper(t1.tag) = t2.tag  # 小写的t1.tag匹配大写的t2.tag
and t1.tag != t2.tag