数仓搭建(hive):DM搭建(数据集市层)
- 开源代码
- 2025-08-24 22:42:01

DM层 数据集市层 (Data Mart)
粒度上卷(Roll-up):
指的是沿着维度层次向上聚合汇总数据,从细粒度到粗粒度观察数据的操作。
示例
数仓的上一层DWS的是按日汇总
DM层基于DWS层主题日宽表上卷统计出按年,月,周的数据 >>用DWS层的宽表连接DWD层的时间维度表
创建DM层 : 建数据库>>建表
CREATE DATABASE if NOT EXISTS DM;建表: 表结构和DWS层的表结构几乎一致, 只多了关于日期的维度字段
建表sql
CREATE TABLE dm.dm_sale( date_time string COMMENT '统计日期,不能用来分组统计' ,--记录哪一天干活 time_type string COMMENT '统计时间维度:year、month、week、date(就是天day)', year_code string COMMENT '年code', year_month string COMMENT '年月', month_code string COMMENT '月份编码', day_month_num string COMMENT '一月第几天', dim_date_id string COMMENT '日期', year_week_name_cn string COMMENT '年中第几周', group_type string COMMENT '分组类型:store,trade_area,city,brand,min_class,mid_class,max_class,all', city_id string COMMENT '城市id', city_name string COMMENT '城市name', trade_area_id string COMMENT '商圈id', trade_area_name string COMMENT '商圈名称', store_id string COMMENT '店铺的id', store_name string COMMENT '店铺名称', brand_id string COMMENT '品牌id', brand_name string COMMENT '品牌名称', max_class_id string COMMENT '商品大类id', max_class_name string COMMENT '大类名称', mid_class_id string COMMENT '中类id', mid_class_name string COMMENT '中类名称', min_class_id string COMMENT '小类id', min_class_name string COMMENT '小类名称', -- =======统计======= sale_amt DECIMAL(38,2) COMMENT '销售收入', plat_amt DECIMAL(38,2) COMMENT '平台收入', deliver_sale_amt DECIMAL(38,2) COMMENT '配送成交额', mini_app_sale_amt DECIMAL(38,2) COMMENT '小程序成交额', android_sale_amt DECIMAL(38,2) COMMENT '安卓APP成交额', ios_sale_amt DECIMAL(38,2) COMMENT '苹果APP成交额', pcweb_sale_amt DECIMAL(38,2) COMMENT 'PC商城成交额', order_cnt BIGINT COMMENT '成交单量', eva_order_cnt BIGINT COMMENT '参评单量comment=>cmt', bad_eva_order_cnt BIGINT COMMENT '差评单量negtive-comment=>ncmt', deliver_order_cnt BIGINT COMMENT '配送单量', refund_order_cnt BIGINT COMMENT '退款单量', miniapp_order_cnt BIGINT COMMENT '小程序成交单量', android_order_cnt BIGINT COMMENT '安卓APP订单量', ios_order_cnt BIGINT COMMENT '苹果APP订单量', pcweb_order_cnt BIGINT COMMENT 'PC商城成交单量' ) COMMENT '销售主题宽表' ROW format delimited fields terminated BY '\t' stored AS orc tblproperties ('orc press' = 'SNAPPY');插入数据sql
WITH TEMP AS ( SELECT D.year_code, D.year_month, D.month_code, D.day_month_num, D.dim_date_id, D.year_week_name_cn, city_id, city_name, trade_area_id, trade_area_name, store_id, store_name, brand_id, brand_name, max_class_id, max_class_name, mid_class_id, mid_class_name, min_class_id, min_class_name, sale_amt, plat_amt, deliver_sale_amt, mini_app_sale_amt, android_sale_amt, ios_sale_amt, pcweb_sale_amt, order_cnt, eva_order_cnt, bad_eva_order_cnt, deliver_order_cnt, refund_order_cnt, miniapp_order_cnt, android_order_cnt, ios_order_cnt, pcweb_order_cnt, dt FROM DWS.DWS_SALE_DAYCOUNT S INNER JOIN DWD.DIM_DATE D ON S.dt = D.date_code ) INSERT overwrite table dm.dm_sale SELECT CURRENT_DATE AS DATE_TIME, CASE WHEN dim_date_id IS NOT NULL THEN 'DATE' WHEN year_week_name_cn IS NOT NULL THEN 'WEEK' WHEN month_code IS NOT NULL THEN 'MONTH' WHEN year_code IS NOT NULL THEN 'YEAR' END AS TIME_TYPE, year_code, year_month, month_code, day_month_num, dim_date_id, year_week_name_cn, CASE WHEN T.store_id IS NOT NULL THEN '店铺' WHEN T.trade_area_id IS NOT NULL THEN '商圈' WHEN T.city_id IS NOT NULL THEN '城市' WHEN T.min_class_id IS NOT NULL THEN '小类' WHEN T.mid_class_id IS NOT NULL THEN '中类' WHEN T.max_class_id IS NOT NULL THEN '大类' WHEN T.brand_id IS NOT NULL THEN '品牌' ELSE '日期' END AS GROUP_TYPE, city_id, city_name, trade_area_ID, trade_area_name, store_id, store_name, brand_id, brand_name, max_class_id, max_class_name, mid_class_id, mid_class_name, min_class_id, min_class_name, SUM(sale_amt), SUM(plat_amt), SUM(deliver_sale_amt), SUM(mini_app_sale_amt), SUM(android_sale_amt), SUM(ios_sale_amt), SUM(pcweb_sale_amt), SUM(order_cnt), SUM(eva_order_cnt), SUM(bad_eva_order_cnt), SUM(deliver_order_cnt), SUM(refund_order_cnt), SUM(miniapp_order_cnt), SUM(android_order_cnt), SUM(ios_order_cnt), SUM(pcweb_order_cnt) FROM TEMP T GROUP BY -- 所有 GROUPING SETS 中出现的列都要包含在 GROUP BY 中 day_month_num, dim_date_id, city_id, city_name, trade_area_id, trade_area_name, store_id, store_name, brand_id, brand_name, max_class_id, max_class_name, mid_class_id, mid_class_name, min_class_id, min_class_name, year_code, year_month, month_code, year_week_name_cn GROUPING SETS ( (day_month_num, dim_date_id), (day_month_num, dim_date_id, city_id, city_name), (day_month_num, dim_date_id, city_id, city_name, trade_area_id, trade_area_name), (day_month_num, dim_date_id, city_id, city_name, trade_area_id, trade_area_name, store_id, store_name), (day_month_num, dim_date_id, brand_id, brand_name), (day_month_num, dim_date_id, max_class_id, max_class_name), (day_month_num, dim_date_id, max_class_id, max_class_name, mid_class_name, mid_class_id), (day_month_num, dim_date_id, max_class_id, max_class_name, mid_class_name, mid_class_id, min_class_id, min_class_name), (year_week_name_cn), (year_week_name_cn, city_id, city_name), (year_week_name_cn, city_id, city_name, trade_area_id, trade_area_name), (year_week_name_cn, city_id, city_name, trade_area_id, trade_area_name, store_id, store_name), (year_week_name_cn, brand_id, brand_name), (year_week_name_cn, max_class_id, max_class_name), (year_week_name_cn, max_class_id, max_class_name, mid_class_name, mid_class_id), (year_week_name_cn, max_class_id, max_class_name, mid_class_name, mid_class_id, min_class_id, min_class_name), (year_month, month_code), (year_month, month_code, city_id, city_name), (year_month, month_code, city_id, city_name, trade_area_id, trade_area_name), (year_month, month_code, city_id, city_name, trade_area_id, trade_area_name, store_id, store_name), (year_month, month_code, brand_id, brand_name), (year_month, month_code, max_class_id, max_class_name), (year_month, month_code, max_class_id, max_class_name, mid_class_name, mid_class_id), (year_month, month_code, max_class_id, max_class_name, mid_class_name, mid_class_id, min_class_id, min_class_name), (year_code), (year_code, city_id, city_name), (year_code, city_id, city_name, trade_area_id, trade_area_name), (year_code, city_id, city_name, trade_area_id, trade_area_name, store_id, store_name), (year_code, brand_id, brand_name), (year_code, max_class_id, max_class_name), (year_code, max_class_id, max_class_name, mid_class_name, mid_class_id), (year_code, max_class_id, max_class_name, mid_class_name, mid_class_id, min_class_id, min_class_name) );插入sql分析
查询DWS层的宽表>>确认连接字段dt的数据格式
查询时间维度表>>找到和DWS层的宽表的连接字段数据格式一样的字段>>查找新维度的相应字段
select * from DWD.DIM_DATE
在with as临时表里面把用DWS层的宽表连接DWD层的时间维度表, 内连接,连接字段dt(日)
在临时表查询语句中把目标表新增的时间维度的字段添加进去
目标表
临时表
INSERT overwrite table dm.dm_sale 是hive中全量插入的语法
在查询语句中把目标表新增的列实现
group_type string COMMENT '分组类型:store,trade_area,city,brand,min_class,mid_class,max_class,all',
枚举类型>>case when
group by 分组后面跟除了指标字段及 group_type 的所有字段(维度字段)
用grouping sets 写出需要的维度组合
数仓搭建(hive):DM搭建(数据集市层)由讯客互联开源代码栏目发布,感谢您对讯客互联的认可,以及对我们原创作品以及文章的青睐,非常欢迎各位朋友分享到个人网站或者朋友圈,但转载请说明文章出处“数仓搭建(hive):DM搭建(数据集市层)”