文章目錄
- 一.需求
- 二.解決方案
- 2.1 同一天多次登陸去重
- 2.2 求上一次的登陸日期
- 2.3 打標記判斷是否間隔天數<=3
- 2.4 打分組的標記
- 2.5 分組求最大的間隔天數
- 三.MySQL 8.0寫法
一.需求
求每個用戶的最長連續登錄天數,兩個日期的間隔小于或等于 3 均視為連續登錄,比如 01-01 號登錄,最近的下一次登錄是 01-04 號,兩個日期的間隔等于 3 天,因此這兩個日期之間的天數都算作活躍天數,一共 4 天,
因為考慮MySQL版本問題,暫時不能使用分析函式,
二.解決方案
測驗資料:
create table test_login(user_id int,login_date timestamp);
insert into test_login values (1,'2020-01-01 00:01:00');
insert into test_login values (1,'2020-01-01 00:02:00');
insert into test_login values (1,'2020-01-01 00:03:00');
insert into test_login values (1,'2020-01-02 00:03:00');
insert into test_login values (1,'2020-01-05 00:03:00');
insert into test_login values (1,'2020-01-07 00:03:00');
insert into test_login values (1,'2020-01-11 00:03:00');
insert into test_login values (1,'2020-01-12 00:03:00');
insert into test_login values (1,'2020-01-13 00:03:00');
insert into test_login values (1,'2020-01-14 00:03:00');
insert into test_login values (1,'2020-01-17 00:03:00');
insert into test_login values (1,'2020-01-18 00:03:00');
insert into test_login values (1,'2020-01-19 00:03:00');
insert into test_login values (1,'2020-01-22 00:03:00');
insert into test_login values (2,'2020-01-01 00:04:00');
insert into test_login values (2,'2020-01-01 00:05:00');
insert into test_login values (2,'2020-01-01 00:06:00');
insert into test_login values (2,'2020-01-02 00:06:00');
insert into test_login values (2,'2020-01-07 00:06:00');
insert into test_login values (2,'2020-01-11 00:06:00');
insert into test_login values (2,'2020-01-12 00:06:00');
insert into test_login values (2,'2020-01-14 00:06:00');
insert into test_login values (2,'2020-01-18 00:06:00');
insert into test_login values (2,'2020-01-19 00:06:00');
insert into test_login values (2,'2020-01-22 00:06:00');
代碼
SELECT user_id,
max(diff_days) + 1 max_diff_days
from
(
SELECT user_id,
flag2,
min(login_date) min_login_date,
max(login_date) max_login_date,
datediff(max(login_date),min(login_date)) diff_days
from
(
SELECT tmp3.user_id,
tmp3.login_date,
tmp3.flag1,
sum(tmp4.flag1) flag2
from
(
SELECT user_id,
login_date,
case when up_login_date is null then 1 else case when DATEDIFF(login_date,up_login_date) <= 3 then 0 else 1 end end as flag1
from
(
SELECT user_id,
login_date,
(select max(date_format(login_date,'%Y-%m-%d')) from test_login t2 where t2.user_id = tmp1.user_id and t2.login_date < tmp1.login_date ) as up_login_date
from
(
SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
from test_login
group by user_id,date_format(login_date,'%Y-%m-%d')
) tmp1 -- tmp1表 分組去重 一天多次登陸的算一條
) tmp2 -- 通過標量子查詢實作上一天的登陸日期
) tmp3 -- 打標記 如上下間隔<=3 則為1 否則為0
left join
(
SELECT user_id,
login_date,
case when up_login_date is null then 1 else case when DATEDIFF(login_date,up_login_date) <= 3 then 0 else 1 end end as flag1
from
(
SELECT user_id,
login_date,
(select max(date_format(login_date,'%Y-%m-%d')) from test_login t2 where t2.user_id = tmp1.user_id and t2.login_date < tmp1.login_date ) as up_login_date
from
(
SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
from test_login
group by user_id,date_format(login_date,'%Y-%m-%d')
) tmp1 -- tmp1表 分組去重 一天多次登陸的算一條
) tmp2 -- 通過標量子查詢實作上一天的登陸日期
) tmp4 -- 功能同tmp3
on tmp3.user_id = tmp4.user_id
and tmp4.login_date <= tmp3.login_date
group by tmp3.user_id,tmp3.login_date,tmp3.flag1
) tmp5 -- 每一個用戶 每一個連續區間設定為不同的flag 標記為flag2
group by user_id,flag2
) tmp6 -- 計算出每個連續區間 最大值和最小值 以及連續間隔天數
group by user_id
這樣看起來是不是覺得非常的復雜,下面我們拆解開
2.1 同一天多次登陸去重
考慮同一天會有多次登陸的,此處用group by陳述句進行去重,同一天只保留一條記錄
代碼:
SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
from test_login
group by user_id,date_format(login_date,'%Y-%m-%d')
測驗記錄:
mysql> SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
-> from test_login
-> group by user_id,date_format(login_date,'%Y-%m-%d');
+---------+------------+
| user_id | login_date |
+---------+------------+
| 1 | 2020-01-01 |
| 1 | 2020-01-02 |
| 1 | 2020-01-05 |
| 1 | 2020-01-07 |
| 1 | 2020-01-11 |
| 1 | 2020-01-12 |
| 1 | 2020-01-13 |
| 1 | 2020-01-14 |
| 1 | 2020-01-17 |
| 1 | 2020-01-18 |
| 1 | 2020-01-19 |
| 1 | 2020-01-22 |
| 2 | 2020-01-01 |
| 2 | 2020-01-02 |
| 2 | 2020-01-07 |
| 2 | 2020-01-11 |
| 2 | 2020-01-12 |
| 2 | 2020-01-14 |
| 2 | 2020-01-18 |
| 2 | 2020-01-19 |
| 2 | 2020-01-22 |
+---------+------------+
21 rows in set (0.00 sec)
mysql>
2.2 求上一次的登陸日期
我們需要找到上一次登陸日期,與此次的登陸日期對比,才可以判斷是否符合小于等于3天的條件
此處,我們可以使用標量子查詢來實作
需要記錄的每個用戶的第一條登陸資訊上一天為null
代碼:
SELECT user_id,
login_date,
(select max(date_format(login_date,'%Y-%m-%d')) from test_login t2 where t2.user_id = tmp1.user_id and t2.login_date < tmp1.login_date ) as up_login_date
from
(
SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
from test_login
group by user_id,date_format(login_date,'%Y-%m-%d')
) tmp1 -- tmp1表 分組去重 一天多次登陸的算一條
測驗記錄:
mysql> SELECT user_id,
-> login_date,
-> (select max(date_format(login_date,'%Y-%m-%d')) from test_login t2 where t2.user_id = tmp1.user_id and t2.login_date < tmp1.login_date ) as up_login_date
-> from
-> (
-> SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
-> from test_login
-> group by user_id,date_format(login_date,'%Y-%m-%d')
-> ) tmp1 -- tmp1表 分組去重 一天多次登陸的算一條
-> ;
+---------+------------+---------------+
| user_id | login_date | up_login_date |
+---------+------------+---------------+
| 1 | 2020-01-01 | NULL |
| 1 | 2020-01-02 | 2020-01-01 |
| 1 | 2020-01-05 | 2020-01-02 |
| 1 | 2020-01-07 | 2020-01-05 |
| 1 | 2020-01-11 | 2020-01-07 |
| 1 | 2020-01-12 | 2020-01-11 |
| 1 | 2020-01-13 | 2020-01-12 |
| 1 | 2020-01-14 | 2020-01-13 |
| 1 | 2020-01-17 | 2020-01-14 |
| 1 | 2020-01-18 | 2020-01-17 |
| 1 | 2020-01-19 | 2020-01-18 |
| 1 | 2020-01-22 | 2020-01-19 |
| 2 | 2020-01-01 | NULL |
| 2 | 2020-01-02 | 2020-01-01 |
| 2 | 2020-01-07 | 2020-01-02 |
| 2 | 2020-01-11 | 2020-01-07 |
| 2 | 2020-01-12 | 2020-01-11 |
| 2 | 2020-01-14 | 2020-01-12 |
| 2 | 2020-01-18 | 2020-01-14 |
| 2 | 2020-01-19 | 2020-01-18 |
| 2 | 2020-01-22 | 2020-01-19 |
+---------+------------+---------------+
21 rows in set (0.00 sec)
2.3 打標記判斷是否間隔天數<=3
求出上一個登陸日期,與本次登陸日期比較,如果<=3,則標記為0,否則標記為1
需要注意的是上一步的空值直接標記為1
這樣通過flag1 我們就可以看到連續登陸的區間了
代碼:
SELECT user_id,
login_date,
case when up_login_date is null then 1 else case when DATEDIFF(login_date,up_login_date) <= 3 then 0 else 1 end end as flag1
from
(
SELECT user_id,
login_date,
(select max(date_format(login_date,'%Y-%m-%d')) from test_login t2 where t2.user_id = tmp1.user_id and t2.login_date < tmp1.login_date ) as up_login_date
from
(
SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
from test_login
group by user_id,date_format(login_date,'%Y-%m-%d')
) tmp1 -- tmp1表 分組去重 一天多次登陸的算一條
) tmp2 -- 通過標量子查詢實作上一天的登陸日期
測驗記錄:
mysql> SELECT user_id,
-> login_date,
-> case when up_login_date is null then 1 else case when DATEDIFF(login_date,up_login_date) <= 3 then 0 else 1 end end as flag1
-> from
-> (
-> SELECT user_id,
-> login_date,
-> (select max(date_format(login_date,'%Y-%m-%d')) from test_login t2 where t2.user_id = tmp1.user_id and t2.login_date < tmp1.login_date ) as up_login_date
-> from
-> (
-> SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
-> from test_login
-> group by user_id,date_format(login_date,'%Y-%m-%d')
-> ) tmp1 -- tmp1表 分組去重 一天多次登陸的算一條
-> ) tmp2 -- 通過標量子查詢實作上一天的登陸日期
-> ;
+---------+------------+-------+
| user_id | login_date | flag1 |
+---------+------------+-------+
| 1 | 2020-01-01 | 1 |
| 1 | 2020-01-02 | 0 |
| 1 | 2020-01-05 | 0 |
| 1 | 2020-01-07 | 0 |
| 1 | 2020-01-11 | 1 |
| 1 | 2020-01-12 | 0 |
| 1 | 2020-01-13 | 0 |
| 1 | 2020-01-14 | 0 |
| 1 | 2020-01-17 | 0 |
| 1 | 2020-01-18 | 0 |
| 1 | 2020-01-19 | 0 |
| 1 | 2020-01-22 | 0 |
| 2 | 2020-01-01 | 1 |
| 2 | 2020-01-02 | 0 |
| 2 | 2020-01-07 | 1 |
| 2 | 2020-01-11 | 1 |
| 2 | 2020-01-12 | 0 |
| 2 | 2020-01-14 | 0 |
| 2 | 2020-01-18 | 1 |
| 2 | 2020-01-19 | 0 |
| 2 | 2020-01-22 | 0 |
+---------+------------+-------+
21 rows in set (0.01 sec)
2.4 打分組的標記
因為同一個用戶存在多個連續登陸的區間,根據上一個步驟的flag1沒辦法區分開,此時需要區分開同一個用戶的不同連續登陸區間,
因為MySQL版本不支持分析函式,只能通過臨時表表連接的方式實作,再考慮去除重復,需要用到分組陳述句進行去重,
其實flag2是這個解決方案核心所在,同一個用戶第一次連續登陸區間標記為1,第二次則累加為2,以此類推,
代碼:
SELECT tmp3.user_id,
tmp3.login_date,
tmp3.flag1,
sum(tmp4.flag1) flag2
from
(
SELECT user_id,
login_date,
case when up_login_date is null then 1 else case when DATEDIFF(login_date,up_login_date) <= 3 then 0 else 1 end end as flag1
from
(
SELECT user_id,
login_date,
(select max(date_format(login_date,'%Y-%m-%d')) from test_login t2 where t2.user_id = tmp1.user_id and t2.login_date < tmp1.login_date ) as up_login_date
from
(
SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
from test_login
group by user_id,date_format(login_date,'%Y-%m-%d')
) tmp1 -- tmp1表 分組去重 一天多次登陸的算一條
) tmp2 -- 通過標量子查詢實作上一天的登陸日期
) tmp3 -- 打標記 如上下間隔<=3 則為1 否則為0
left join
(
SELECT user_id,
login_date,
case when up_login_date is null then 1 else case when DATEDIFF(login_date,up_login_date) <= 3 then 0 else 1 end end as flag1
from
(
SELECT user_id,
login_date,
(select max(date_format(login_date,'%Y-%m-%d')) from test_login t2 where t2.user_id = tmp1.user_id and t2.login_date < tmp1.login_date ) as up_login_date
from
(
SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
from test_login
group by user_id,date_format(login_date,'%Y-%m-%d')
) tmp1 -- tmp1表 分組去重 一天多次登陸的算一條
) tmp2 -- 通過標量子查詢實作上一天的登陸日期
) tmp4 -- 功能同tmp3
on tmp3.user_id = tmp4.user_id
and tmp4.login_date <= tmp3.login_date
group by tmp3.user_id,tmp3.login_date,tmp3.flag1
測驗記錄:
mysql> SELECT tmp3.user_id,
-> tmp3.login_date,
-> tmp3.flag1,
-> sum(tmp4.flag1) flag2
-> from
-> (
-> SELECT user_id,
-> login_date,
-> case when up_login_date is null then 1 else case when DATEDIFF(login_date,up_login_date) <= 3 then 0 else 1 end end as flag1
-> from
-> (
-> SELECT user_id,
-> login_date,
-> (select max(date_format(login_date,'%Y-%m-%d')) from test_login t2 where t2.user_id = tmp1.user_id and t2.login_date < tmp1.login_date ) as up_login_date
-> from
-> (
-> SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
-> from test_login
-> group by user_id,date_format(login_date,'%Y-%m-%d')
-> ) tmp1 -- tmp1表 分組去重 一天多次登陸的算一條
-> ) tmp2 -- 通過標量子查詢實作上一天的登陸日期
-> ) tmp3 -- 打標記 如上下間隔<=3 則為1 否則為0
-> left join
-> (
-> SELECT user_id,
-> login_date,
-> case when up_login_date is null then 1 else case when DATEDIFF(login_date,up_login_date) <= 3 then 0 else 1 end end as flag1
-> from
-> (
-> SELECT user_id,
-> login_date,
-> (select max(date_format(login_date,'%Y-%m-%d')) from test_login t2 where t2.user_id = tmp1.user_id and t2.login_date < tmp1.login_date ) as up_login_date
-> from
-> (
-> SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
-> from test_login
-> group by user_id,date_format(login_date,'%Y-%m-%d')
-> ) tmp1 -- tmp1表 分組去重 一天多次登陸的算一條
-> ) tmp2 -- 通過標量子查詢實作上一天的登陸日期
-> ) tmp4 -- 功能同tmp3
-> on tmp3.user_id = tmp4.user_id
-> and tmp4.login_date <= tmp3.login_date
-> group by tmp3.user_id,tmp3.login_date,tmp3.flag1;
+---------+------------+-------+-------+
| user_id | login_date | flag1 | flag2 |
+---------+------------+-------+-------+
| 1 | 2020-01-01 | 1 | 1 |
| 1 | 2020-01-02 | 0 | 1 |
| 1 | 2020-01-05 | 0 | 1 |
| 1 | 2020-01-07 | 0 | 1 |
| 1 | 2020-01-11 | 1 | 2 |
| 1 | 2020-01-12 | 0 | 2 |
| 1 | 2020-01-13 | 0 | 2 |
| 1 | 2020-01-14 | 0 | 2 |
| 1 | 2020-01-17 | 0 | 2 |
| 1 | 2020-01-18 | 0 | 2 |
| 1 | 2020-01-19 | 0 | 2 |
| 1 | 2020-01-22 | 0 | 2 |
| 2 | 2020-01-01 | 1 | 1 |
| 2 | 2020-01-02 | 0 | 1 |
| 2 | 2020-01-07 | 1 | 2 |
| 2 | 2020-01-11 | 1 | 3 |
| 2 | 2020-01-12 | 0 | 3 |
| 2 | 2020-01-14 | 0 | 3 |
| 2 | 2020-01-18 | 1 | 4 |
| 2 | 2020-01-19 | 0 | 4 |
| 2 | 2020-01-22 | 0 | 4 |
+---------+------------+-------+-------+
21 rows in set (0.01 sec)
2.5 分組求最大的間隔天數
其實有了上一個步驟的flag2,就可以判斷區分開每一個用戶的每一個連續登陸區間,直接進行分組就可以求出該連續區間 最大和最小的登陸日期,兩個日期差值就是間隔天數,最后根據user_id進行分組,求最大的間隔天數就是最終結果,
需要審題 01-01到01-04 間隔是3天,但是算4天連續登陸 所以datediff函式的結果要+1
代碼:
SELECT user_id,
max(diff_days) +1 max_diff_days
from
(
SELECT user_id,
flag2,
min(login_date) min_login_date,
max(login_date) max_login_date,
datediff(max(login_date),min(login_date)) diff_days
from
(
SELECT tmp3.user_id,
tmp3.login_date,
tmp3.flag1,
sum(tmp4.flag1) flag2
from
(
SELECT user_id,
login_date,
case when up_login_date is null then 1 else case when DATEDIFF(login_date,up_login_date) <= 3 then 0 else 1 end end as flag1
from
(
SELECT user_id,
login_date,
(select max(date_format(login_date,'%Y-%m-%d')) from test_login t2 where t2.user_id = tmp1.user_id and t2.login_date < tmp1.login_date ) as up_login_date
from
(
SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
from test_login
group by user_id,date_format(login_date,'%Y-%m-%d')
) tmp1 -- tmp1表 分組去重 一天多次登陸的算一條
) tmp2 -- 通過標量子查詢實作上一天的登陸日期
) tmp3 -- 打標記 如上下間隔<=3 則為1 否則為0
left join
(
SELECT user_id,
login_date,
case when up_login_date is null then 1 else case when DATEDIFF(login_date,up_login_date) <= 3 then 0 else 1 end end as flag1
from
(
SELECT user_id,
login_date,
(select max(date_format(login_date,'%Y-%m-%d')) from test_login t2 where t2.user_id = tmp1.user_id and t2.login_date < tmp1.login_date ) as up_login_date
from
(
SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
from test_login
group by user_id,date_format(login_date,'%Y-%m-%d')
) tmp1 -- tmp1表 分組去重 一天多次登陸的算一條
) tmp2 -- 通過標量子查詢實作上一天的登陸日期
) tmp4 -- 功能同tmp3
on tmp3.user_id = tmp4.user_id
and tmp4.login_date <= tmp3.login_date
group by tmp3.user_id,tmp3.login_date,tmp3.flag1
) tmp5 -- 每一個用戶 每一個連續區間設定為不同的flag 標記為flag2
group by user_id,flag2
) tmp6 -- 計算出每個連續區間 最大值和最小值 以及連續間隔天數
group by user_id
測驗記錄:
mysql> SELECT user_id,
-> flag2,
-> min(login_date) min_login_date,
-> max(login_date) max_login_date,
-> datediff(max(login_date),min(login_date)) diff_days
-> from
-> (
-> SELECT tmp3.user_id,
-> tmp3.login_date,
-> tmp3.flag1,
-> sum(tmp4.flag1) flag2
-> from
-> (
-> SELECT user_id,
-> login_date,
-> case when up_login_date is null then 1 else case when DATEDIFF(login_date,up_login_date) <= 3 then 0 else 1 end end as flag1
-> from
-> (
-> SELECT user_id,
-> login_date,
-> (select max(date_format(login_date,'%Y-%m-%d')) from test_login t2 where t2.user_id = tmp1.user_id and t2.login_date < tmp1.login_date ) as up_login_date
-> from
-> (
-> SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
-> from test_login
-> group by user_id,date_format(login_date,'%Y-%m-%d')
-> ) tmp1 -- tmp1表 分組去重 一天多次登陸的算一條
-> ) tmp2 -- 通過標量子查詢實作上一天的登陸日期
-> ) tmp3 -- 打標記 如上下間隔<=3 則為1 否則為0
-> left join
-> (
-> SELECT user_id,
-> login_date,
-> case when up_login_date is null then 1 else case when DATEDIFF(login_date,up_login_date) <= 3 then 0 else 1 end end as flag1
-> from
-> (
-> SELECT user_id,
-> login_date,
-> (select max(date_format(login_date,'%Y-%m-%d')) from test_login t2 where t2.user_id = tmp1.user_id and t2.login_date < tmp1.login_date ) as up_login_date
-> from
-> (
-> SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
-> from test_login
-> group by user_id,date_format(login_date,'%Y-%m-%d')
-> ) tmp1 -- tmp1表 分組去重 一天多次登陸的算一條
-> ) tmp2 -- 通過標量子查詢實作上一天的登陸日期
-> ) tmp4 -- 功能同tmp3
-> on tmp3.user_id = tmp4.user_id
-> and tmp4.login_date <= tmp3.login_date
-> group by tmp3.user_id,tmp3.login_date,tmp3.flag1
-> ) tmp5 -- 每一個用戶 每一個連續區間設定為不同的flag 標記為flag2
-> group by user_id,flag2;
+---------+-------+----------------+----------------+-----------+
| user_id | flag2 | min_login_date | max_login_date | diff_days |
+---------+-------+----------------+----------------+-----------+
| 1 | 1 | 2020-01-01 | 2020-01-07 | 6 |
| 1 | 2 | 2020-01-11 | 2020-01-22 | 11 |
| 2 | 1 | 2020-01-01 | 2020-01-02 | 1 |
| 2 | 2 | 2020-01-07 | 2020-01-07 | 0 |
| 2 | 3 | 2020-01-11 | 2020-01-14 | 3 |
| 2 | 4 | 2020-01-18 | 2020-01-22 | 4 |
+---------+-------+----------------+----------------+-----------+
6 rows in set (0.01 sec)
mysql>
mysql> SELECT user_id,
-> max(diff_days) +1 max_diff_days
-> from
-> (
-> SELECT user_id,
-> flag2,
-> min(login_date) min_login_date,
-> max(login_date) max_login_date,
-> datediff(max(login_date),min(login_date)) diff_days
-> from
-> (
-> SELECT tmp3.user_id,
-> tmp3.login_date,
-> tmp3.flag1,
-> sum(tmp4.flag1) flag2
-> from
-> (
-> SELECT user_id,
-> login_date,
-> case when up_login_date is null then 1 else case when DATEDIFF(login_date,up_login_date) <= 3 then 0 else 1 end end as flag1
-> from
-> (
-> SELECT user_id,
-> login_date,
-> (select max(date_format(login_date,'%Y-%m-%d')) from test_login t2 where t2.user_id = tmp1.user_id and t2.login_date < tmp1.login_date ) as up_login_date
-> from
-> (
-> SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
-> from test_login
-> group by user_id,date_format(login_date,'%Y-%m-%d')
-> ) tmp1 -- tmp1表 分組去重 一天多次登陸的算一條
-> ) tmp2 -- 通過標量子查詢實作上一天的登陸日期
-> ) tmp3 -- 打標記 如上下間隔<=3 則為1 否則為0
-> left join
-> (
-> SELECT user_id,
-> login_date,
-> case when up_login_date is null then 1 else case when DATEDIFF(login_date,up_login_date) <= 3 then 0 else 1 end end as flag1
-> from
-> (
-> SELECT user_id,
-> login_date,
-> (select max(date_format(login_date,'%Y-%m-%d')) from test_login t2 where t2.user_id = tmp1.user_id and t2.login_date < tmp1.login_date ) as up_login_date
-> from
-> (
-> SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
-> from test_login
-> group by user_id,date_format(login_date,'%Y-%m-%d')
-> ) tmp1 -- tmp1表 分組去重 一天多次登陸的算一條
-> ) tmp2 -- 通過標量子查詢實作上一天的登陸日期
-> ) tmp4 -- 功能同tmp3
-> on tmp3.user_id = tmp4.user_id
-> and tmp4.login_date <= tmp3.login_date
-> group by tmp3.user_id,tmp3.login_date,tmp3.flag1
-> ) tmp5 -- 每一個用戶 每一個連續區間設定為不同的flag 標記為flag2
-> group by user_id,flag2
-> ) tmp6 -- 計算出每個連續區間 最大值和最小值 以及連續間隔天數
-> group by user_id
-> ;
+---------+---------------+
| user_id | max_diff_days |
+---------+---------------+
| 1 | 12 |
| 2 | 5 |
+---------+---------------+
2 rows in set (0.00 sec)
mysql>
三.MySQL 8.0寫法
MySQL 8.0的with陳述句以及分析視窗函式,可以使上面的解決方案的代碼簡潔度大大提升
代碼:
with tmp1 AS
-- tmp1臨時表 一天多次登陸算一次
(
SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
from test_login
group by user_id,date_format(login_date,'%Y-%m-%d')
),
tmp2 as
-- tmp2臨時表 通過lag獲取上一次登陸時間 如是第一條給默認值
(
select user_id,
login_date,
lag(login_date,1,'1900-01-01') over(partition by user_id order by login_date) up_login_date
from tmp1
),
tmp3 AS
-- 判斷是否符合3天內標準 打標記flag1
(
select user_id,
login_date,
case when DATEDIFF(login_date,up_login_date) <= 3 then 0 else 1 end as flag1
from tmp2
),
tmp4 AS
-- 通過分析函式將每個用戶 每一個連續登陸期間進行標記 flag2
(
select user_id,
login_date,
flag1,
sum(flag1) over(partition by user_id order by login_date) as flag2
from tmp3
),
tmp5 AS
-- 通過user_id flag2進行分組
(
SELECT user_id,
flag2,
min(login_date) min_login_date,
max(login_date) max_login_date,
datediff(max(login_date),min(login_date)) diff_days
from tmp4
group by user_id,flag2
)
SELECT user_id,
max(diff_days) + 1 as diff_days
from tmp5
group by user_id;
測驗記錄:
mysql> select version();
+-----------+
| version() |
+-----------+
| 8.0.19 |
+-----------+
1 row in set (0.00 sec)
mysql> with tmp1 AS
-> -- tmp1臨時表 一天多次登陸算一次
-> (
-> SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
-> from test_login
-> group by user_id,date_format(login_date,'%Y-%m-%d')
-> ),
-> tmp2 as
-> -- tmp2臨時表 通過lag獲取上一次登陸時間 如是第一條給默認值
-> (
-> select user_id,
-> login_date,
-> lag(login_date,1,'1900-01-01') over(partition by user_id order by login_date) up_login_d
-> from tmp1
-> ),
-> tmp3 AS
-> -- 判斷是否符合3天內標準 打標記flag1
-> (
-> select user_id,
-> login_date,
-> case when DATEDIFF(login_date,up_login_date) <= 3 then 0 else 1 end as flag1
-> from tmp2
-> ),
-> tmp4 AS
-> -- 通過分析函式將每個用戶 每一個連續登陸期間進行標記 flag2
-> (
-> select user_id,
-> login_date,
-> flag1,
-> sum(flag1) over(partition by user_id order by login_date) as flag2
-> from tmp3
-> ),
-> tmp5 AS
-> -- 通過user_id flag2進行分組
-> (
-> SELECT user_id,
-> flag2,
-> min(login_date) min_login_date,
-> max(login_date) max_login_date,
-> datediff(max(login_date),min(login_date)) diff_days
-> from tmp4
-> group by user_id,flag2
-> )
-> SELECT user_id,
-> max(diff_days) + 1 as diff_days
-> from tmp5
-> group by user_id;
+---------+-----------+
| user_id | diff_days |
+---------+-----------+
| 1 | 12 |
| 2 | 5 |
+---------+-----------+
2 rows in set (0.00 sec)
轉載請註明出處,本文鏈接:https://www.uj5u.com/shujuku/242444.html
標籤:其他
上一篇:第 0 章 Readme
下一篇:Oracle資料庫實驗
