链表流程

1.从ODS层获取增量数据(上一天新增更新数据)
2.拿着DWD原始拉链表数据 left join 增量数据 ,修改原始拉链历史数据结束时间
3.拿着left join结果union all 增量数据
4.把最新拉链数据优先保存到DWD对应临时表中
5.使用insert+select 方式临时表中数据灌入DWD拉链表中

拉链实现流程:

DWD层开发

DWD层: 数仓明细层(清洗转换、降维操作) 此层核心目标: 基于数据探查情况, 对相关表数据进行合并

会员基础信息表:

建表操作:

CREATE TABLE IF NOT EXISTS dwd.dwd_mem_member_union_i(
    zt_id                    BIGINT COMMENT '中台会员ID',
    member_id                BIGINT COMMENT '会员ID',
    user_id                  BIGINT COMMENT '用户ID',
    card_no                  STRING COMMENT '卡号',
    member_name              STRING COMMENT '会员名称',
    mobile                   STRING COMMENT '手机号',
    user_email               STRING COMMENT '邮箱',
    sex                      BIGINT COMMENT '用户的性别,1男性,2女性,0未知',
    birthday_date            STRING COMMENT '生日',
    address                  STRING COMMENT '地址',
    reg_time                 TIMESTAMP COMMENT '注册时间',
    reg_md                   STRING COMMENT '注册门店',
    bind_md                  STRING COMMENT '绑定门店',
    flag                     BIGINT COMMENT '0正常,1删除',
    is_black                 BIGINT COMMENT '是否被拉黑 1被拉黑,0正常用户',
    user_state               BIGINT COMMENT '会员状态,0停用/注销,1正常,2冻结',
    user_type                STRING COMMENT '用户类型(-1:传智鲜用户;0:普通用户;1:企业用户 2:内部员工 3:黑马门店 4:商铺会员 5:大买家 6:中间商 7:军区员工)',
    member_type              BIGINT COMMENT '会员状态 10:未付费会员 20:付费会员',
    member_status            BIGINT COMMENT '付费会员状态 -1:未付费会员 1:正常 2:试用 3:过期 4:试用已过期',
    expired_time             TIMESTAMP COMMENT '过期时间',
    user_source              BIGINT COMMENT '用户来源 ',
    member_level             BIGINT COMMENT '会员等级',
    growth                   BIGINT COMMENT '成长值',
    invite_member_id         BIGINT COMMENT '邀请人标识',
    invite_type              BIGINT COMMENT '邀请类型,0为内部',
    register_store_leader_id BIGINT COMMENT '注册归属团长 ID',
    last_update_time         TIMESTAMP COMMENT '更新日期',
    end_date                 STRING COMMENT '生效结束日期'
)
comment '会员基础信息表'
partitioned by (start_date STRING COMMENT '生效开始日期')
row format delimited fields terminated by ','
stored as orc
tblproperties ('orc.compress'='SNAPPY');

需要对此表进行历史数据拉链

说明: 将每日会员基础信息表中新增更新的数据, 通过拉链方式记录下来

首次导入

-- 会员基础信息表首次导入:
insert overwrite table dwd.dwd_mem_member_union_i partition (start_date)
select
    zt_id,
    member_id,
    user_id,
    card_no,
    member_name,
    mobile,
    user_email,
    sex,
    birthday_date,
    address,
    reg_time,
    reg_md,
    bind_md,
    flag,
    is_black,
    user_state,
    user_type,
    member_type,
    member_status,
    expired_time,
    user_source,
    member_level,
    growth,
    invite_member_id,
    invite_type,
    register_store_leader_id,
    last_update_time,
    '9999-99-99' as end_date,
    '2023-11-29' as start_date
​
from ods.ods_mem_member_union_i;

后续导入

1.mysql业务数据变化

注意: 实际开发没有自己准备业务数据这一步!!!

-- 步骤一: 在MySQL中, 添加增量的测试数据, 包含 新增更新的数据 (测试)
-- 模拟新增数据
insert into member.member_union (zt_id, member_id, user_id, card_no, member_name, mobile, user_email, sex, birthday_date, address,reg_time, reg_md, bind_md, flag, is_black, user_state, user_type, member_type, member_status, expired_time, user_source, member_level, growth, invite_member_id, invite_type, register_store_leader_id,last_update_time)
values ('32015926',2160344,NULL,'','32015925',114,163,0,'','不详','2023-11-30 17:09:28','W121','W121',0,0,1,-1,10,-1,NULL,-1,0,0,NULL,NULL,NULL,'2023-11-30 17:09:28');
-- 模拟更新数据
UPDATE member.member_union SET SEX = 1, last_update_time = '2023-11-30 17:10:20' WHERE zt_id = '32015925';
​
-- 验证数据
select * 
from member.member_union
where date_format(reg_time,'%Y-%m-%d') = date_format(date_sub(Now(),INTERVAL 1 DAY),'%Y-%m-%d')
     OR date_format(last_update_time,'%Y-%m-%d') = date_format(date_sub(Now(),INTERVAL 1 DAY),'%Y-%m-%d')

2.dataxweb导数据到ods

-- 步骤二: 执行DataX, 将新增数据和增量数据导入到ODS层  (应该在数据采集中执行)
-- 说明: 此步骤详细过程参考day02实施
-- 注意: mysqlreader中记得补充条件
date_format(reg_time,'%Y-%m-%d') = date_format(date_sub(Now(),INTERVAL 1 DAY),'%Y-%m-%d')
OR date_format(last_update_time,'%Y-%m-%d') = date_format(date_sub(Now(),INTERVAL 1 DAY),'%Y-%m-%d')
​
-- 注意: hdfswriter中记得补一个后置sql语句,内容如下
"postSql":[
    "msck repair table ods.ods_mem_member_union_i"
],

3.hive增量导数据到dwd

-- 步骤三: 执行增量数据导入
​
-- 先创建一张目标表的临时表, 用于放置计算后的结果
CREATE TABLE IF NOT EXISTS dwd.dwd_mem_member_union_i_temp(
    zt_id                    BIGINT COMMENT '中台会员ID',
    member_id                BIGINT COMMENT '会员ID',
    user_id                  BIGINT COMMENT '用户ID',
    card_no                  STRING COMMENT '卡号',
    member_name              STRING COMMENT '会员名称',
    mobile                   STRING COMMENT '手机号',
    user_email               STRING COMMENT '邮箱',
    sex                      BIGINT COMMENT '用户的性别,1男性,2女性,0未知',
    birthday_date            STRING COMMENT '生日',
    address                  STRING COMMENT '地址',
    reg_time                 TIMESTAMP COMMENT '注册时间',
    reg_md                   STRING COMMENT '注册门店',
    bind_md                  STRING COMMENT '绑定门店',
    flag                     BIGINT COMMENT '0正常,1删除',
    is_black                 BIGINT COMMENT '是否被拉黑 1被拉黑,0正常用户',
    user_state               BIGINT COMMENT '会员状态,0停用/注销,1正常,2冻结',
    user_type                STRING COMMENT '用户类型(-1:传智鲜用户;0:普通用户;1:企业用户 2:内部员工 3:黑马门店 4:商铺会员 5:大买家 6:中间商 7:军区员工)',
    member_type              BIGINT COMMENT '会员状态 10:未付费会员 20:付费会员',
    member_status            BIGINT COMMENT '付费会员状态 -1:未付费会员 1:正常 2:试用 3:过期 4:试用已过期',
    expired_time             TIMESTAMP COMMENT '过期时间',
    user_source              BIGINT COMMENT '用户来源 ',
    member_level             BIGINT COMMENT '会员等级',
    growth                   BIGINT COMMENT '成长值',
    invite_member_id         BIGINT COMMENT '邀请人标识',
    invite_type              BIGINT COMMENT '邀请类型,0为内部',
    register_store_leader_id BIGINT COMMENT '注册归属团长 ID',
    last_update_time         TIMESTAMP COMMENT '更新日期',
    end_date                 STRING COMMENT '生效结束日期'
)
comment '会员基础信息表'
partitioned by (start_date STRING COMMENT '生效开始日期')
row format delimited fields terminated by ','
stored as orc
tblproperties ('orc.compress'='SNAPPY');
​
​
-- 循环导入数据
with t2 as (
    select
        t1.zt_id,
        t1.member_id,
        t1.user_id,
        t1.card_no,
        t1.member_name,
        t1.mobile,
        t1.user_email,
        t1.sex,
        t1.birthday_date,
        t1.address,
        t1.reg_time,
        t1.reg_md,
        t1.bind_md,
        t1.flag,
        t1.is_black,
        t1.user_state,
        t1.user_type,
        t1.member_type,
        t1.member_status,
        t1.expired_time,
        t1.user_source,
        t1.member_level,
        t1.growth,
        t1.invite_member_id,
        t1.invite_type,
        t1.register_store_leader_id,
        t1.last_update_time,
        if(
            t2.zt_id is null OR t1.end_date != '9999-99-99',
            t1.end_date,
            t2.dt
        ) as end_date,
        t1.start_date
    from dwd.dwd_mem_member_union_i t1 
        left join (select * from ods.ods_mem_member_union_i 
               where dt =   date_format(date_sub(current_date(),1),'yyyy-MM-dd')
         ) as t2 on t1.zt_id = t2.zt_id
    
    union all
    
    select
        zt_id,
        member_id,
        user_id,
        card_no,
        member_name,
        mobile,
        user_email,
        sex,
        birthday_date,
        address,
        reg_time,
        reg_md,
        bind_md,
        flag,
        is_black,
        user_state,
        user_type,
        member_type,
        member_status,
        expired_time,
        user_source,
        member_level,
        growth,
        invite_member_id,
        invite_type,
        register_store_leader_id,
        last_update_time,
        '9999-99-99' as end_date,
        date_format(date_sub(current_date(),1),'yyyy-MM-dd') as start_date
    from ods.ods_mem_member_union_i
    where dt = date_format(date_sub(current_date(),1),'yyyy-MM-dd')
)
insert overwrite table dwd.dwd_mem_member_union_i_temp partition (start_date)
select
*
from t2 ;
​
-- 将临时表数据覆盖目标表中
insert overwrite table dwd.dwd_mem_member_union_i partition (start_date)
select * from dwd.dwd_mem_member_union_i_temp;
​
-- 将临时删除
drop table dwd.dwd_mem_member_union_i_temp;

情况说明:

    1- 目前所做的拉链表是针对历史所有数据, 哪怕这个数据是五年前创建后, 然后五年后发生修改, 我们依然会进行维护
    
    2- 目前所有的拉链表是针对表中所有的字段, 只要表中任何字段发生变更, 都会进行维护
    
​
但是:
    在实际开发中,我们一般不需要维护历史所有数据, 也不需要维护表中所有的字段
    
    一般维护最近一段周期的数据(一个月、一个季度、一年(最常用))
    
    一般维护的核心与后续指标计算相关的字段: 用哪些一般维护哪些

会员积分变动表:

建表操作:

因为占用主体ID,分为两部分,一部分occupy_subject_id 为0,即全部,另一部分是各种主体,所以这里计算时,分为两部分计算,然后结果进行合并

CREATE TABLE IF NOT EXISTS dwd.dwd_mem_member_point_change_i(
    trade_date          STRING COMMENT '快照时间',
    zt_id            BIGINT COMMENT '中台ID',
    occupy_subject_id   BIGINT COMMENT '占用主体ID,0为全部,101优选,102传智鲜,103传智商城',
    point_add           BIGINT COMMENT '增加积分,没有则为0',
    point_reduce        BIGINT COMMENT '减少积分,没有则为0',
    point_change        BIGINT COMMENT '变动积分,没有则为0'
) 
comment '会员积分变动表'
partitioned by (dt STRING COMMENT '统计日期')
row format delimited fields terminated by ','
stored as orc
tblproperties ('orc.compress'='SNAPPY');

数据导入:

-- 会员主题 DWD层开发  会员积分变动表
-- 需求: 统计每天各个会员积分变动情况
-- 注意: 主体分为两部分 , 一部分是全部  一部分为各个主体
insert overwrite table dwd.dwd_mem_member_point_change_i partition(dt)
select
    dt as trade_date,
    zt_id,
    occupy_subject_id,
    sum( if( change_type = 1,point_c,0) ) as point_add,
    sum( if( change_type = 0,-point_c,0) ) as point_reduce,
    sum(if( change_type = 1,point_c,-point_c)) as point_change,
    dt
from ods.ods_mem_user_point_log_detailed_i
group by
    dt,
    zt_id,
    occupy_subject_id
    
union  all
​
select
    dt as trade_date,
    zt_id,
    0 as occupy_subject_id,
    sum( if( change_type = 1,point_c,0) ) as point_add,
    sum( if( change_type = 0,-point_c,0) ) as point_reduce,
    sum(if( change_type = 1,point_c,-point_c)) as point_change,
    dt
from ods.ods_mem_user_point_log_detailed_i
group by
    dt,
    zt_id;

线上会员每日余额变动表:

用于记录每个会员每天余额的变动情况(需统计)

建表操作:

CREATE TABLE IF NOT EXISTS dwd.dwd_mem_balance_change_i(
    trade_date      STRING COMMENT '统计日期',
    zt_id           BIGINT COMMENT '中台ID',
    member_id       BIGINT COMMENT '会员ID',
    record_type     BIGINT COMMENT '记录类型,0全部,1消费,2充值,3退款,4.清退余额,5.转化,6.系统清除,7.礼品卡兑换,8.现付结余,9.结余退款,10.退卡',
    times           BIGINT COMMENT '次数',
    change_amount   DECIMAL(27, 2) COMMENT '变动金额'
) 
comment '线上会员每日余额变动表'
partitioned by (dt STRING COMMENT '统计日期')
row format delimited fields terminated by ','
stored as orc
tblproperties ('orc.compress'='SNAPPY');

数据导入:

-- DWD 会员余额变动表
-- 需求: 统计每天各个会员余额变动情况
-- 注意:  记录类型也分为二部一个是全部  一个各个记录类型  union all 将两部分结果进行合并
insert overwrite table dwd.dwd_mem_balance_change_i partition (dt)
select
    dt as trade_date,
    zt_id,
    member_id,
    record_type,

    count(1) as times,
    sum(amount) as change_amount,
    dt
from ods.ods_mem_store_amount_record_i
group by
    dt,
    zt_id,
    member_id,
    record_type
    
union all

select
    dt as trade_date,
    zt_id,
    member_id,
    0 as record_type,

    count(1) as times,
    sum(amount) as change_amount,
    dt
from ods.ods_mem_store_amount_record_i
group by
    dt,
    zt_id,
    member_id;

线上会员每日余额表:

用于记录每个会员每天余额的情况(需统计)

建表操作:

CREATE TABLE IF NOT EXISTS dwd.dwd_mem_balance_online_i(
    trade_date          STRING COMMENT '统计日期',
    zt_id               BIGINT COMMENT '中台ID',
    member_id           BIGINT COMMENT '会员ID',    
    member_type         BIGINT COMMENT '会员类型 1:线下会员 2:线上会员',
    member_type_name    STRING COMMENT '会员类型名称',
    store_no            STRING COMMENT '门店编码',
    city_id             BIGINT COMMENT '城市ID',
    balance_amount      DECIMAL(27, 2) COMMENT '余额'
) 
comment '线上会员每日余额表'
partitioned by (dt STRING COMMENT '统计日期')
row format delimited fields terminated by ','
stored as orc
tblproperties ('orc.compress'='SNAPPY');

需求分析:

这个需求是将变动日志进行处理基于日志明细构建余额快照表。

先取到每天最后一条

select 
	date_format(trade_date,'yyyy-MM-dd') as trade_date,
	max(id) as id
from ods.ods_mem_store_amount_record_i
group by date_format(trade_date,'yyyy-MM-dd'),member_id 

然后使用 lead 函数按用户id进行分组,按日期进行排序,取到下一条对应的日期。

select
        trade_date,zt_id,member_id,store_no,city_id,left_store_amount,
        lead(trade_date,1,'9999-12-31') over(partition by member_id order by trade_date) as next_date
    from
        (select a.trade_date,b.zt_id,b.member_id,b.store_no,b.city_id,b.left_store_amount
        from
            (select date_format(trade_date,'yyyy-MM-dd') as trade_date,
                    max(id) as id
            from ods.ods_mem_store_amount_record_i
            group by date_format(trade_date,'yyyy-MM-dd'),member_id ) a
            inner join ods.ods_mem_store_amount_record_i b on a.id=b.id
        ) t

然后使用拉链表一样用’${inputdate}’去卡日期,即可取到当天对应的余额。

注意: 因为这个需求只是把有余额的记录记录到表中,所以需要去除掉 left_store_amount 为0 的情况。在这里left_store_amount<>0 写到了where条件中,是先对结果进行了过滤,这样在匹配时就匹配不到对应的数据了,也就是不会把对应的记录插入到表中了。

where trade_date<='${inputdate}'  and  '${inputdate}'<next_date and left_store_amount<>0

-- 思考: 如何拿到最后一次余额变动数据呢?
-- 尝试找到每天 每个用户 ID最大值
select
    dt,
    zt_id,
    max(id) as last_id
from ods.ods_mem_store_amount_record_i
group by dt,zt_id;

数据导入:

-- DWD 线上会员每日余额表
-- 说明: 此表是用于记录每个会员每天(某一天)会员余额是多少
-- 先找到最大id
-- 基于最大的ID, 找到对应的余额数据
-- 目前写的这条SQL 其实已经拿到了每天每个用户的余额,但是这个数据是来源于用户余额变动表, 如果用户在某一天没有变化, 在这一天就不会有这个用户余额
with t1 as (
    select
        dt as trade_date,
        zt_id,
        max(id) as last_id
    from ods.ods_mem_store_amount_record_i
    group by dt,zt_id
),
t2 as(
    select
        t1.trade_date as start_date,
        t1.zt_id,
        t2.member_id,
        t2.store_no,
        t2.city_id,
        t2.left_store_amount,
        lead(t1.trade_date,1,'9999-99-99') over(partition by t1.zt_id order by t1.trade_date) as end_date
    from t1 inner join ods.ods_mem_store_amount_record_i t2 on t1.last_id = t2.id
)
insert overwrite table dwd.dwd_mem_balance_online_i partition (dt)
select
    start_date as trade_date,
    zt_id,
    member_id,
    2 as member_type,
    '线上会员' as member_type_name,
    store_no,
    city_id,
    left_store_amount as balance_amount,
    '2023-12-01' as dt
from t2 where start_date <= '2023-12-01' and end_date > '2023-12-01' and left_store_amount <> 0;

DWM层开发

各类会员数量统计:
指标:新增注册会员数、累计注册会员数、新增消费会员数、累计消费会员数、新增复购会员数、累计复购会员数、活跃会员数、沉睡会员数、会员消费金额
维度: 时间维度(天、周、月)
​
门店会员分析:
指标: 门店销售额、门店总订单量、当日注册人数、累计注册会员数、当日注册且充值会员数、当日注册且充值且消费会员数、当日注册且消费会员数、充值会员数、充值金额、累计会员充值金额、当日有余额的会员人数、当日会员余额、余额消费人数/单量、余额支付金额、余额消费金额、会员消费人数/单量、会员消费金额、会员首单人数/订单量/销售额、会员非首单人数/订单量/销售维度: 时间维度(天、周、月)
​
说明:
由于各类会员数据统计分析和门店会员分析中, 有大量的指标存在一定的依赖关系, 所以在此处我们合并在一起进行分析, 向上抽取出一些公共的DWM层的数据表, 便于后续两个DWS层表数据的聚合统计, 本次主要涉及有四张DWM层表:会员销售单表、会员首次消费表、会员第二次消费表、会员行为天表

会员销售单表

建表操作:

CREATE TABLE IF NOT EXISTS dwm.dwm_mem_sell_order_i(
    create_time                 STRING COMMENT '订单创建时间',
    trade_date                  STRING COMMENT '交易日期',
    week_trade_date             STRING COMMENT '周一日期',
    month_trade_date            STRING COMMENT '月一日期',
    hourly                      BIGINT COMMENT '交易小时(0-23)',
    quarter                     BIGINT COMMENT '刻钟:1.0-15,2.15-30,3.30-45,4.45-60',
    quarters                    BIGINT COMMENT '刻钟数:hourly*4+quarters',
​
    parent_order_no             STRING COMMENT '父单订单号/源单号',
    order_no                    STRING COMMENT '订单编号',
    trade_type                  BIGINT COMMENT '结算类型(0.正常交易,1.赠品发放,2.退货,4.培训,5.取消交易)',
    source_type                 BIGINT COMMENT '交易来源1:线下POS;2:三方平台;3:传智鲜商城;4:黑马优选团;5:传智大客户;6:传智其他;7:黑马优选;8:优选海淘;9:优选大客户;10:优选POS;11:优选APP;12:优选H5;13:店长工具线下;14:店长工具线上;15:黑马其他',
    source_type_name            STRING COMMENT '交易来源名称',
    sale_type                   BIGINT COMMENT '销售类型 1.实物,2.代客,3.优选小程序,4.离店,5.传智鲜小程序,6.第三方平台,7.其他,8.大客户',
    is_online_order             BIGINT COMMENT '是否为线上单:0否,1是',
    member_type                 BIGINT COMMENT '会员类型:0非会员,1线上会员,2实体卡会员',
    is_balance_consume          BIGINT COMMENT '是否有余额支付:0否,1是',
    order_type                  BIGINT COMMENT '配送类型(真正的订单类型由业务类型来决定):1-及时送;2-隔日送;3-自提单;4-线下单',
    express_type                BIGINT COMMENT '配送方式:0-三方平台配送;1-自配送;2-快递;3-自提;4-线下',
​
    store_no                    STRING COMMENT '店铺编码',
    store_name                  STRING COMMENT '店铺名称',
    store_sale_type             BIGINT COMMENT '店铺销售类型',
    store_type_code             BIGINT COMMENT '分店类型',
    worker_num                  BIGINT COMMENT '员工人数',
    store_area                  DECIMAL(27, 2) COMMENT '门店面积',
    city_id                     BIGINT COMMENT '城市ID',
    city_name                   STRING COMMENT '城市名称',
    region_code                 STRING COMMENT '区域编码',
    region_name                 STRING COMMENT '区域名称',
    is_day_clear                BIGINT COMMENT '是否日清:0否,1是',
​
    is_cancel                   BIGINT COMMENT '是否取消',
    cancel_time                 STRING COMMENT '取消时间',
    cancel_reason               STRING COMMENT '取消原因',
    last_update_time            TIMESTAMP COMMENT '最新更新时间',
    
    cashier_no                  STRING COMMENT '收银员编码',
    cashier_name                STRING COMMENT '收银员名称',
​
    zt_id                       BIGINT COMMENT '中台ID',
    member_id                   BIGINT COMMENT '会员ID',
    card_no                     STRING COMMENT '卡号',
    r_name                      STRING COMMENT '收货人姓名',
    r_province                  STRING COMMENT '收货人省份',
    r_city                      STRING COMMENT '收货人城市',
    r_district                  STRING COMMENT '收货人区域',
​
    is_tuan_head                BIGINT COMMENT '是否为团长订单',
    store_leader_id             BIGINT COMMENT '团长id',
    order_group_no              STRING COMMENT '团单号',
​
    settle_amount               DECIMAL(27, 2) COMMENT '结算金额',
    share_user_id               BIGINT COMMENT '分享人用户ID',
    commission_amount           DECIMAL(27, 2) COMMENT '佣金',
​
    order_total_amount          DECIMAL(27, 2) COMMENT '订单总金额',
    product_total_amount        DECIMAL(27, 2) COMMENT '商品总金额(原价)',
    pack_amount                 DECIMAL(27, 2) COMMENT '餐盒费/打包费',
    delivery_amount             DECIMAL(27, 2) COMMENT '配送费',
    discount_amount             DECIMAL(27, 2) COMMENT '订单优惠金额=商家承担优惠金额+平台补贴金额',
    seller_discount_amount      DECIMAL(27, 2) COMMENT '商家承担优惠金额',
    platform_allowance_amount   DECIMAL(27, 2) COMMENT '平台补贴金额',
    real_paid_amount            DECIMAL(27, 2) COMMENT '实付金额',
    product_discount            DECIMAL(27, 2) COMMENT '商品优惠金额',
    real_product_amount         DECIMAL(27, 2) COMMENT '商品实际金额',
    
    round_amount                DECIMAL(27, 2) COMMENT '舍分金额',       
    wechat_amount               DECIMAL(27, 4) COMMENT '微信支付',
    ali_pay_amount              DECIMAL(27, 4) COMMENT '支付宝支付',
    cash_amount                 DECIMAL(27, 4) COMMENT '现金支付',
    balance_amount              DECIMAL(27, 4) COMMENT '余额支付',
    point_amount                DECIMAL(27, 4) COMMENT '积分支付',
    unionpay_amount             DECIMAL(27, 4) COMMENT '银行支付',
    member_card_amount          DECIMAL(27, 4) COMMENT '线下实体支付',
    gift_amount                 DECIMAL(27, 4) COMMENT '礼品卡支付',
    czapi_amount                DECIMAL(27, 4) COMMENT '传智支付',
    other_pay_amount            DECIMAL(27, 4) COMMENT '其他支付' 
)
comment '会员销售单表'
partitioned by (dt STRING COMMENT '销售日期')
row format delimited fields terminated by ','
stored as orc
tblproperties ('orc.compress'='SNAPPY');
​

思路分析:

构建这张表的主要原因在于,后续的会员数据分析可以基于这张会员销售明细表来进行计算,相比全量的销售明细表,可以极大地减少数据量

从dwm_sell_o2o_order_i表中获取,只取member_type 为1的。

准备数据:

在实际工作中, 部分表需要依赖于其他开发人员, 当天dwm_sell_o2o_order_i是属于售卖主题中的相关表

需要执行脚本目录中售卖主题准备工作脚本的<<售卖主题dwm_sell_o2o_order_i表>>

数据导入:

-- DWM层: 会员销售订单明细表
insert overwrite table dwm.dwm_mem_sell_order_i partition (dt)
select
    create_time,
    trade_date,
    week_trade_date,
    month_trade_date,
    hourly,
    quarter,
    quarters,
    parent_order_no,
    order_no,
    trade_type,
    source_type,
    source_type_name,
    sale_type,
    is_online_order,
    member_type,
    is_balance_consume,
    order_type,
    express_type,
    store_no,
    store_name,
    store_sale_type,
    store_type_code,
    worker_num,
    store_area,
    city_id,
    city_name,
    region_code,
    region_name,
    is_day_clear,
    is_cancel,
    cancel_time,
    cancel_reason,
    last_update_time,
    cashier_no,
    cashier_name,
    zt_id,
    member_id,
    card_no,
    r_name,
    r_province,
    r_city,
    r_district,
    is_tuan_head,
    store_leader_id,
    order_group_no,
    settle_amount,
    share_user_id,
    commission_amount,
    order_total_amount,
    product_total_amount,
    pack_amount,
    delivery_amount,
    discount_amount,
    seller_discount_amount,
    platform_allowance_amount,
    real_paid_amount,
    product_discount,
    real_product_amount,
    round_amount,
    wechat_amount,
    ali_pay_amount,
    cash_amount,
    balance_amount,
    point_amount,
    unionpay_amount,
    member_card_amount,
    gift_amount,
    czapi_amount,
    other_pay_amount,
    dt
from dwm.dwm_sell_o2o_order_i where member_type = 1;

会员首次消费表:

建表操作:

CREATE TABLE IF NOT EXISTS dwm.dwm_mem_first_buy_i(
    zt_id                       BIGINT COMMENT '中台 会员id',
    trade_date_time             STRING COMMENT '首次消费时间',
    trade_date                  STRING COMMENT '首次消费日期',
    week_trade_date             STRING COMMENT '周一日期',
    month_trade_date            STRING COMMENT '月一日期',
    
    store_no                    STRING COMMENT '消费门店',
    sale_amount                 DECIMAL(27, 2) COMMENT '消费金额',
    order_no                    STRING COMMENT '订单编号',
    source_type                 BIGINT COMMENT '交易来源'
) 
comment '会员首次消费表'
partitioned by (dt STRING COMMENT '消费日期')
row format delimited fields terminated by ','
stored as orc
tblproperties ('orc.compress'='SNAPPY');

思路分析:

注意:这里不能直接全量数据使用over窗口然后取 row_number() 为1的数据,因为这样会极大的消耗没有用的IO资源

思路通过思考发现,每日新增首次消费会员一定是当天消费中的首次会员,并且不在历史首次消费的会员中。基于这个特性可以先算中当天消费中的首次会员,然后再和 dwm_mem_first_buy_i 关联使用关联取出 dwm_mem_first_buy_i 中没有的,即关联不上的,则是首次消费会员,然后存到对应分区即可

数据导入:

-- DWM 会员首次消费表 dwm_mem_first_buy_i
--  第一步: 计算出当天首次消费的用户   (此用户并不代表历史首次消费)
with t1 as (
    select
        zt_id,
        create_time as trade_date_time,
        trade_date,
        week_trade_date,
        month_trade_date,
        store_no,
        real_paid_amount as sale_amount,
        order_no,
        source_type,
        row_number() over (partition by zt_id order by create_time) as rn
    from dwm.dwm_mem_sell_order_i where dt = '2023-11-14' and zt_id is not null
),
t2 as(
    select
        zt_id,
        trade_date_time,
        trade_date,
        week_trade_date,
        month_trade_date,
        store_no,
        sale_amount,
        order_no,
        source_type
    from t1 where rn = 1
)
-- 第二步: 用第一步结果 和 截止当天之前的历史首次消费表进行关联 (left Join)
insert overwrite table dwm.dwm_mem_first_buy_i partition (dt)
select
    t2.zt_id,
    t2.trade_date_time,
    t2.trade_date,
    t2.week_trade_date,
    t2.month_trade_date,
    t2.store_no,
    t2.sale_amount,
    t2.order_no,
    t2.source_type,
    '2023-11-14' as dt
from t2 left join dwm.dwm_mem_first_buy_i t3 on t2.zt_id = t3.zt_id and t3.dt < '2023-11-14'
-- 第三步: 判断: 如果 没有关联上, 说明在历史首次消费中并未发现有消费, 我们就认为当天的消费就是历史首次
where t3.zt_id is null;
​
-- 注意:运行完后可以依次修改时间把dwm_mem_sell_order_i所有分区数据都导入
-- 说明: 在实施中 大家需要调整日期, 依次将14~20号的数据跑出来即可

会员第二次消费表:

建表操作:

CREATE TABLE IF NOT EXISTS dwm.dwm_mem_second_buy_i(
    zt_id                       BIGINT COMMENT '中台 会员id',
    trade_date_time             STRING COMMENT '第二次消费时间',
    trade_date                  STRING COMMENT '第二次消费日期',
    week_trade_date             STRING COMMENT '周一日期',
    month_trade_date            STRING COMMENT '月一日期',
​
    store_no                    STRING COMMENT '消费门店',
    sale_amount                 DECIMAL(27, 2) COMMENT '消费金额',
    order_no                    STRING COMMENT '订单编号',
    source_type                 BIGINT COMMENT '交易来源'
)
comment '会员第二次消费表'
partitioned by (dt STRING COMMENT '消费日期')
row format delimited fields terminated by ','
stored as orc
tblproperties ('orc.compress'='SNAPPY');

思路分析:

注意:同dwm_mem_first_buy_i不能直接全量数据使用over窗口,然后取 row_number() 为2的数据。

思路:这种用户分为两种,一种是历史上有过首次购买的但没有二次购买的,这种用户如果当日有首次购买,则为第二次购买。另一种是历史上从没有购买过,这种用户如果当天首次购买,并且发生第二次购买则是第二次购买。

第一种会员:先求出有过首次购买但没有二次购买的会员

select f.zt_id
from dwm.dwm_mem_first_buy_i f
    left join dwm.dwm_mem_second_buy_i s on f.zt_id=s.zt_id and s.dt < '${inputdate}'
where f.dt < '${inputdate}' and s.zt_id is null

然后和当天首次消费的会员进行关联

第二种:先求出当天购买两次的会员。

select
    *
from
    (select
        zt_id,
        create_time as trade_date_time,
        trade_date,
        week_trade_date,
        month_trade_date,
        store_no,
        real_paid_amount as sale_amount,
        order_no,
        source_type,
        row_number() over(partition by zt_id order by create_time) as rn
    from dwm.dwm_mem_sell_order_i
    where dt = '${inputdate}' ) t
where t.rn=2

然后和当天是首单的会员进行关联:

inner join dwm.dwm_mem_first_buy_i tmp
on t.zt_id=tmp.zt_id and tmp.dt = '${inputdate}'

数据导入:

-- DWM层:  会员二次消费表
--思路: 统计日期: 2023-11-14
-- 情况一: 历史上有过首次购买, 但没有二次购买  和 今日的首次购买用户进行 关联 得出二次购买用户
with t3 as (
    -- 步骤一: 得到史上有过首次购买, 但没有二次购买用户有哪些
    select
        t1.zt_id,
        t1.trade_date_time,
        t1.trade_date,
        t1.week_trade_date,
        t1.month_trade_date,
        t1.store_no,
        t1.sale_amount,
        t1.order_no,
        t1.source_type
    from (select * from dwm.dwm_mem_first_buy_i where dt < '2023-11-14') t1 -- 历史所有的首次购买
        left join dwm.dwm_mem_second_buy_i t2 on t1.zt_id = t2.zt_id and t2.dt < '2023-11-14' -- 历史所有的二次购买用户
    where t2.zt_id is null  -- 判断 如果关联不上, 那就表示有过历史首次购买, 但没有二次购买用户
),
-- 步骤二: 基于这个结果 和 今日首次购买的用户进行关联
t5 as (  -- 历史上有过首次购买 但没有二次购买和当日首次购买用户结果 (情况一结果表)
    select
        t3.zt_id,
        t3.trade_date_time,
        t3.trade_date,
        t3.week_trade_date,
        t3.month_trade_date,
        t3.store_no,
        t3.sale_amount,
        t3.order_no,
        t3.source_type
    from t3
        inner join
            (
                select
                    zt_id,
                    create_time as trade_date_time,
                    trade_date,
                    week_trade_date,
                    month_trade_date,
                    store_no,
                    real_paid_amount as sale_amount,
                    order_no,
                    source_type,
                    row_number() over (partition by zt_id order by create_time) as rn
                from dwm.dwm_mem_sell_order_i  where dt = '2023-11-14'
            ) t4
        on t3.zt_id = t4.zt_id and t4.rn = 1
),
​
​
​
​
-- 情况二: 历史上从没有购买过, 但是当天发生了多次购买, 获取其中第二次购买即可
t6 as (
    select
        zt_id,
        create_time as trade_date_time,
        trade_date,
        week_trade_date,
        month_trade_date,
        store_no,
        real_paid_amount as sale_amount,
        order_no,
        source_type,
        row_number() over (partition by zt_id order by create_time) as rn
    from dwm.dwm_mem_sell_order_i  where dt = '2023-11-14'
),
t7 as (
    select
        zt_id,
        trade_date_time,
        trade_date,
        week_trade_date,
        month_trade_date,
        store_no,
        sale_amount,
        order_no,
        source_type
    from t6 where rn = 2
),
-- 历史上没有购买过, 但是当天发生了二次购买的用户(情况二结果表)
t9 as (
    select
        t7.zt_id,
        t7.trade_date_time,
        t7.trade_date,
        t7.week_trade_date,
        t7.month_trade_date,
        t7.store_no,
        t7.sale_amount,
        t7.order_no,
        t7.source_type
    from t7 inner join dwm.dwm_mem_first_buy_i t8 on t7.zt_id = t8.zt_id and t8.dt = '2023-11-14'
)
insert overwrite table dwm.dwm_mem_second_buy_i partition (dt)
select
    zt_id,
    trade_date_time,
    trade_date,
    week_trade_date,
    month_trade_date,
    store_no,
    sale_amount,
    order_no,
    source_type,
    '2023-11-14' as dt
from t5 where zt_id is not null
​
union all
​
select
    zt_id,
    trade_date_time,
    trade_date,
    week_trade_date,
    month_trade_date,
    store_no,
    sale_amount,
    order_no,
    source_type,
    '2023-11-14' as dt
from t9 where zt_id is not null
​
​
-- 说明: 在实施中 大家需要调整日期, 依次将14~20号的数据跑出来即可
/*  在生产环境中(工作中), 我们可以通过海豚调度器提供的补数方案, 指定需要补数的范围, 调度器会自动将过去的几天数据全部补回来(无需执行, 了解即可, 面试中按照这个说即可)*/

会员行为天表

建表操作:

CREATE TABLE IF NOT EXISTS dwm.dwm_mem_member_behavior_day_i(
    trade_date              STRING COMMENT '时间',
    week_trade_date         STRING COMMENT '周一日期',
    month_trade_date        STRING COMMENT '月一日期',
    
    zt_id                   BIGINT COMMENT '中台 会员id',
    bind_md                 STRING COMMENT '归属门店(绑定门店)',
    reg_md                  STRING COMMENT '注册门店',
    reg_time                TIMESTAMP COMMENT '中台 注册时间',
    is_register             BIGINT COMMENT '当日是否注册',
    is_recharge             BIGINT COMMENT '当日是否充值',
    recharge_times          BIGINT COMMENT '充值次数,没有充值则为0',
    recharge_amount         DECIMAL(27, 2) COMMENT '充值金额,没有充值则为0',
    is_consume              BIGINT COMMENT '当日是否消费',
    consume_times           BIGINT COMMENT '消费次数,没有消费则为0',
    consume_amount          DECIMAL(27, 2) COMMENT '消费金额,没有消费则为0',
    is_first_consume        BIGINT COMMENT '当日是否首次消费',
    first_consume_store     STRING COMMENT '首次消费门店,没有则为null',
    first_consume_amount    DECIMAL(27, 2) COMMENT '首次消费金额,没有消费则为0',
    is_balance_consume      BIGINT COMMENT '当日是否余额消费',
    balance_consume_times   BIGINT COMMENT '余额消费次数,没有消费则为0',
    balance_pay_amount      DECIMAL(27, 2) COMMENT '余额支付金额,没有消费则为0',
    balance_consume_amount  DECIMAL(27, 2) COMMENT '余额消费金额,没有消费则为0',
    is_point_consume        BIGINT COMMENT '当日是否积分消费',
    point_consume_times     BIGINT COMMENT '积分消费次数,没有消费则为0',
    point_pay_amount        DECIMAL(27, 2) COMMENT '积分支付金额,没有消费则为0',
    point_consume_amount    DECIMAL(27, 2) COMMENT '积分消费金额,没有消费则为0',
    point_add               BIGINT COMMENT '增加积分,没有则为0',
    point_reduce            BIGINT COMMENT '减少积分,没有则为0',
    point_change            BIGINT COMMENT '变动积分,没有则为0',
    online_consume_times    BIGINT COMMENT '线上订单量',
    online_consume_amount   DECIMAL(27, 2) COMMENT '线上消费金额',
    offline_consume_times   BIGINT COMMENT '线下订单量',
    offline_consume_amount  DECIMAL(27, 2) COMMENT '线下消费金额'
) 
comment '会员行为天表'
partitioned by (dt STRING COMMENT '统计日期')
row format delimited fields terminated by ','
stored as orc
tblproperties ('orc.compress'='SNAPPY');

思路分析:

从dwd.dwd_mem_member_union_i中获取注册信息,

dwd.dwd_mem_balance_change_i中获取充值信息,

dwm.dwm_mem_sell_order_i中获取销售信息,

dwm.dwm_mem_first_buy_i中获取首次消费信息,

dwd.dwd_mem_member_point_change_i中获取积分信息。

数据导入:

-- DWM: 会员行为数据表with t1 as (
    -- 注册信息数据
    select
        '2023-11-14' as trade_date,
        zt_id,
        if(
            date_format(reg_time,'yyyy-MM-dd') =  '2023-11-14',1,0
        ) as is_register,
        0 as is_recharge,
        0 as recharge_times,
        0 as recharge_amount,
        0 as is_consume,
        0 as consume_times,
        0 as consume_amount,
        0 as is_first_consume,
        '' as first_consume_store,
        0 as first_consume_amount,
        0 as is_balance_consume,
        0 as balance_consume_times,
        0 as balance_pay_amount,
        0 as balance_consume_amount,
        0 as is_point_consume,
        0 as point_consume_times,
        0 as point_pay_amount,
        0 as point_consume_amount,
        0 as point_add,
        0 as point_reduce,
        0 as point_change,
        0 as online_consume_times,
        0 as online_consume_amount,
        0 as offline_consume_times,
        0 as offline_consume_amount
    from dwd.dwd_mem_member_union_i
    -- 第一次导入:  start_date 更改为 <=  但是第二次及其后续, 直接用 = 获取当天的日期注册数据
    where date_format(reg_time,'yyyy-MM-dd') <= '2023-11-14' and end_date = '9999-99-99'
    union all
    -- 充值数据
    select
        trade_date,
        zt_id,
        0 as is_register,
        1 as is_recharge,
        times as recharge_times,
        change_amount as recharge_amount,
        0 as is_consume,
        0 as consume_times,
        0 as consume_amount,
        0 as is_first_consume,
        '' as first_consume_store,
        0 as first_consume_amount,
        0 as is_balance_consume,
        0 as balance_consume_times,
        0 as balance_pay_amount,
        0 as balance_consume_amount,
        0 as is_point_consume,
        0 as point_consume_times,
        0 as point_pay_amount,
        0 as point_consume_amount,
        0 as point_add,
        0 as point_reduce,
        0 as point_change,
        0 as online_consume_times,
        0 as online_consume_amount,
        0 as offline_consume_times,
        0 as offline_consume_amount
    from dwd.dwd_mem_balance_change_i
    where dt ='2023-11-14' and record_type = 2
    union all
    -- 消费情况
    select
        trade_date,
        zt_id,
​
        0 as is_register,
​
        0 as is_recharge,
        0 as recharge_times,
        0 as recharge_amount,
​
        1 as is_consume,
        count( distinct  if(trade_type = 0,parent_order_no,NULL)) - count( distinct  if(trade_type = 5,parent_order_no,NULL)) as consume_times,
        sum(real_paid_amount) as consume_amount,
​
        0 as is_first_consume,
        '' as first_consume_store,
        0 as first_consume_amount,
​
        max(is_balance_consume) as is_balance_consume,
        count( distinct  if(trade_type = 0 and is_balance_consume = 1,parent_order_no,NULL)) - count( distinct  if(trade_type = 5 and is_balance_consume = 1,parent_order_no,NULL)) as balance_consume_times,
        sum(
           if(is_balance_consume = 1,balance_amount,0)
        ) as balance_pay_amount,
        sum(
           if(is_balance_consume = 1,real_paid_amount,0)
        ) as balance_consume_amount,
        max(
            if(point_amount > 0,1,0)
        ) as is_point_consume,
        count( DISTINCT if(trade_type = 0 and point_amount > 0,parent_order_no,NULL) ) - count( DISTINCT if(trade_type = 5 and point_amount > 0,parent_order_no,NULL) ) as point_consume_times,
        sum(
           if(point_amount > 0,point_amount,0)
        ) as point_pay_amount,
        sum(
           if(point_amount > 0,real_paid_amount,0)
        ) as point_consume_amount,
        0 as point_add,
        0 as point_reduce,
        0 as point_change,
​
        count( DISTINCT  if(trade_type = 0 and is_online_order = 1,parent_order_no,NULL) ) - count( DISTINCT  if(trade_type = 5 and is_online_order = 1,parent_order_no,NULL) )  as online_consume_times,
        sum(
            if(is_online_order = 1,real_paid_amount,0)
        ) as online_consume_amount,
        count( DISTINCT  if(trade_type = 0 and is_online_order = 0,parent_order_no,NULL) ) - count( DISTINCT  if(trade_type = 5 and is_online_order = 0,parent_order_no,NULL) )   as offline_consume_times,
        sum(
            if(is_online_order = 0,real_paid_amount,0)
        )  as offline_consume_amount
    from dwm.dwm_mem_sell_order_i where dt = '2023-11-14'
    group by trade_date,zt_id
    union all
    -- 首次消费
    select
        trade_date,
        zt_id,
​
        0 as is_register,
        0 as is_recharge,
        0 as recharge_times,
        0 as recharge_amount,
        0 as is_consume,
        0 as consume_times,
        0 as consume_amount,
​
        1 as is_first_consume,
        store_no as first_consume_store,
        sale_amount as first_consume_amount,
​
        0 as is_balance_consume,
        0 as balance_consume_times,
        0 as balance_pay_amount,
        0 as balance_consume_amount,
        0 as is_point_consume,
        0 as point_consume_times,
        0 as point_pay_amount,
        0 as point_consume_amount,
        0 as point_add,
        0 as point_reduce,
        0 as point_change,
        0 as online_consume_times,
        0 as online_consume_amount,
        0 as offline_consume_times,
        0 as offline_consume_amount
​
    from dwm.dwm_mem_first_buy_i where dt = '2023-11-14'
    union all
    -- 积分变动表
    select
        trade_date,
        zt_id,
​
        0 as is_register,
        0 as is_recharge,
        0 as recharge_times,
        0 as recharge_amount,
        0 as is_consume,
        0 as consume_times,
        0 as consume_amount,
​
        0 as is_first_consume,
        '' as first_consume_store,
        0 as first_consume_amount,
​
        0 as is_balance_consume,
        0 as balance_consume_times,
        0 as balance_pay_amount,
        0 as balance_consume_amount,
        0 as is_point_consume,
        0 as point_consume_times,
        0 as point_pay_amount,
        0 as point_consume_amount,
        point_add,
        point_reduce,
        point_change,
        0 as online_consume_times,
        0 as online_consume_amount,
        0 as offline_consume_times,
        0 as offline_consume_amount
    from dwd.dwd_mem_member_point_change_i where dt = '2023-11-14'
),
t2 as (
    select
        trade_date,
        zt_id,
​
        max(is_register) as is_register,
​
        max(is_recharge) as is_recharge,
        sum(recharge_times) as recharge_times,
        sum(recharge_amount) as recharge_amount,
​
        max(is_consume)  as is_consume,
        sum(consume_times) as consume_times,
        sum(consume_amount) as consume_amount,
​
        max(is_first_consume) as is_first_consume,
        max(first_consume_store) as first_consume_store,
        sum(first_consume_amount) as first_consume_amount,
​
        max(is_balance_consume) as is_balance_consume,
        sum(balance_consume_times) as balance_consume_times,
        sum(balance_pay_amount) as balance_pay_amount,
        sum(balance_consume_amount) as balance_consume_amount,
​
        max(is_point_consume) as is_point_consume,
        sum(point_consume_times) as point_consume_times,
        sum(point_pay_amount) as point_pay_amount,
        sum(point_consume_amount) as point_consume_amount,
​
        sum(point_add) as point_add,
        sum(point_reduce) as point_reduce,
        sum(point_change) as point_change,
​
        sum(online_consume_times) as online_consume_times,
        sum(online_consume_amount) as online_consume_amount,
        sum(offline_consume_times) as offline_consume_times,
        sum(offline_consume_amount) as offline_consume_amount,
​
        trade_date as dt
    from t1
    group by trade_date,zt_id
)
insert overwrite table dwm.dwm_mem_member_behavior_day_i partition (dt)
select
    t2.trade_date,
    t3.week_trade_date,
    t3.month_trade_date,
    t2.zt_id,
    t4.bind_md,
    t4.reg_md,
    t4.reg_time,
    t2.is_register,
    t2.is_recharge,
    t2.recharge_times,
    t2.recharge_amount,
    t2.is_consume,
    t2.consume_times,
    t2.consume_amount,
    t2.is_first_consume,
    t2.first_consume_store,
    t2.first_consume_amount,
    t2.is_balance_consume,
    t2.balance_consume_times,
    t2.balance_pay_amount,
    t2.balance_consume_amount,
    t2.is_point_consume,
    t2.point_consume_times,
    t2.point_pay_amount,
    t2.point_consume_amount,
    t2.point_add,
    t2.point_reduce,
    t2.point_change,
    t2.online_consume_times,
    t2.online_consume_amount,
    t2.offline_consume_times,
    t2.offline_consume_amount,
    t2.dt
​
from t2 left join dim.dwd_dim_date_f t3 on t2.trade_date = t3.trade_date
    left join dwd.dwd_mem_member_union_i t4 on t2.zt_id = t4.zt_id and t4.end_date = '9999-99-99'
    
-- 说明: 在实施中 大家需要调整日期, 依次将14~20号的每天会员的余额数据跑出来即可
/*  在生产环境中(工作中), 我们可以通过海豚调度器提供的补数方案, 指定需要补数的范围, 调度器会自动将过去的几天数据全部补回来(无需执行, 了解即可, 面试中按照这个说即可)*/

原文地址:https://blog.csdn.net/qq_52442855/article/details/134793848

本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任

如若转载,请注明出处:http://www.7code.cn/show_40506.html

如若内容造成侵权/违法违规/事实不符,请联系代码007邮箱:suwngjj01@126.com进行投诉反馈,一经查实,立即删除

发表回复

您的邮箱地址不会被公开。 必填项已用 * 标注