数仓搭建(hive):DWS层(服务数据层)
- 电脑硬件
- 2025-08-26 07:45:01

DWS层示例: 搭建日主题宽表 需求 维度 步骤 在hive中建数据库dws >>建表 CREATE DATABASE if NOT EXISTS DWS; 建表sql CREATE TABLE yp_dws.dws_sale_daycount( --维度 city_id string COMMENT '城市id', city_name string COMMENT '城市name', trade_area_id string COMMENT '商圈id', trade_area_name string COMMENT '商圈名称', store_id string COMMENT '店铺的id', store_name string COMMENT '店铺名称', brand_id string COMMENT '品牌id', brand_name string COMMENT '品牌名称', max_class_id string COMMENT '商品大类id', max_class_name string COMMENT '大类名称', mid_class_id string COMMENT '中类id', mid_class_name string COMMENT '中类名称', min_class_id string COMMENT '小类id', min_class_name string COMMENT '小类名称', group_type string COMMENT '分组类型:store,trade_area,city,brand, min_class,mid_class,max_class,all', -- =======日统计======= sale_amt DECIMAL(38,2) COMMENT '销售收入', plat_amt DECIMAL(38,2) COMMENT '平台收入', deliver_sale_amt DECIMAL(38,2) COMMENT '配送成交额', mini_app_sale_amt DECIMAL(38,2) COMMENT '小程序成交额', android_sale_amt DECIMAL(38,2) COMMENT '安卓APP成交额', ios_sale_amt DECIMAL(38,2) COMMENT '苹果APP成交额', pcweb_sale_amt DECIMAL(38,2) COMMENT 'PC商城成交额', order_cnt BIGINT COMMENT '成交单量', eva_order_cnt BIGINT COMMENT '参评单量comment=>cmt', bad_eva_order_cnt BIGINT COMMENT '差评单量negtive-comment=>ncmt', deliver_order_cnt BIGINT COMMENT '配送单量', refund_order_cnt BIGINT COMMENT '退款单量', miniapp_order_cnt BIGINT COMMENT '小程序成交单量', android_order_cnt BIGINT COMMENT '安卓APP订单量', ios_order_cnt BIGINT COMMENT '苹果APP订单量', pcweb_order_cnt BIGINT COMMENT 'PC商城成交单量' ) COMMENT '销售主题日统计宽表' PARTITIONED BY(dt STRING) ROW format delimited fields terminated BY '\t' stored AS orc tblproperties ('orc press' = 'SNAPPY'); 查询数据sql set hive.exec.mode.local.auto=true; WITH TEMP AS ( SELECT -- 先抽取维度字段 O.dt -- 城市 ,S.city_id ,S.city_name -- 商圈 ,S.trade_area_id ,S.trade_area_name -- 店铺 ,S.id ,S.store_name -- 品牌 ,G.brand_id ,G.brand_name -- 大类 ,G.max_class_id ,G.max_class_name -- 中 ,G.mid_class_id ,G.mid_class_name -- 小 ,G.min_class_id ,G.min_class_name -- 抽取字段字段 -- 订单量指标 ,O.order_id -- 金额指标 ,O.order_amount ,O.goods_price ,O.plat_fee ,O.settlement_amount ,O.dispatcher_money ,O.order_from ,O.evaluation_state ,O.geval_scores ,O.is_delivery -- 是否配送 ,O.refund_id -- 退款单号 -- 去重 ,ROW_NUMBER()OVER(PARTITION BY O.order_id ORDER BY order_id) RN FROM DWB.DWB_ORDER_DETAIL1 O LEFT JOIN DWB.DWB_SHOP_DETAIL S ON O.store_id = S.id LEFT JOIN DWB.dwb_goods_detail G ON G.store_id = S.id ) SELECT T.city_id ,T.city_name ,T.trade_area_id ,T.trade_area_name ,T.ID AS STORE_ID ,T.store_name ,T.brand_id ,T.brand_name ,T.max_class_id ,T.max_class_name ,T.mid_class_id ,T.mid_class_name ,T.min_class_id ,T.min_class_name ,(CASE WHEN T.ID IS NOT NULL THEN '店铺' WHEN T.trade_area_id IS NOT NULL THEN '商圈' WHEN T.city_id IS NOT NULL THEN '城市' WHEN T.min_class_id IS NOT NULL THEN '小类' WHEN T.mid_class_id IS NOT NULL THEN '中类' WHEN T.max_class_id IS NOT NULL THEN '大类' WHEN T.brand_id IS NOT NULL THEN '品牌' ELSE '日期' END ) AS GROUP_TYPE ,SUM(CASE WHEN RN = 1 THEN T.order_amount END) AS SALE_AMT ,SUM(CASE WHEN RN = 1 THEN T.plat_fee END) AS PLAT_AMT ,SUM(CASE WHEN RN = 1 AND T.is_delivery = 1 THEN T.order_amount END) AS DELIVER_SALE_AMT ,SUM(CASE WHEN RN = 1 AND T.order_from = 'miniapp' THEN T.order_amount END) AS MINI_APP_SALE_AMT ,SUM(CASE WHEN RN = 1 AND T.order_from = 'android' THEN T.order_amount END) AS android_SALE_AMT ,SUM(CASE WHEN RN = 1 AND T.order_from = 'ios' THEN T.order_amount END) AS ios_SALE_AMT ,SUM(CASE WHEN RN = 1 AND T.order_from = 'pcweb' THEN T.order_amount END) AS PCWEB_SALE_AMT ,COUNT(CASE WHEN RN = 1 THEN T.order_id END ) AS ORDER_CNT ,COUNT(CASE WHEN RN = 1 AND T.evaluation_state = 1 THEN 1 END) AS EVA_ORDER_CNT ,COUNT(CASE WHEN RN = 1 AND T.geval_scores < 3 THEN 1 END) AS BAD_EVA_ORDER_CNT ,COUNT(CASE WHEN RN = 1 AND T.is_delivery = 1 THEN 1 END) AS DELIVER_ORDER_CNT ,COUNT(CASE WHEN RN = 1 AND T.refund_id IS NOT NULL THEN 1 END) AS REFUND_ORDER_CNT ,COUNT(CASE WHEN RN = 1 AND T.order_from = 'miniapp' THEN 1 END) AS MINI_APP_SALE_CNT ,COUNT(CASE WHEN RN = 1 AND T.order_from = 'android' THEN 1 END) AS android_SALE_CNT ,COUNT(CASE WHEN RN = 1 AND T.order_from = 'ios' THEN 1 END) AS ios_SALE_CNT ,COUNT(CASE WHEN RN = 1 AND T.order_from = 'pcweb' THEN 1 END) AS PCWEB_SALE_CNT ,T.dt FROM TEMP T GROUP BY T.dt ,T.city_id,T.city_name ,T.trade_area_id,T.trade_area_name ,T.id,T.store_name ,T.brand_id,T.brand_name ,T.max_class_id,T.max_class_name ,T.mid_class_id,T.mid_class_name ,T.min_class_id,T.min_class_name GROUPING SETS ( (T.dt) ,(T.dt,T.city_id,T.city_name) ,(T.dt,T.city_id,T.city_name,T.trade_area_id,T.trade_area_name) ,(T.dt,T.city_id,T.city_name,T.trade_area_id,T.trade_area_name,T.id,T.store_name) ,(T.dt,T.brand_id,T.brand_name) ,(T.dt,T.max_class_id,T.max_class_name) ,(T.dt,T.max_class_id,T.max_class_name,T.mid_class_id,T.mid_class_name) ,(T.dt,T.max_class_id,T.max_class_name,T.mid_class_id,T.mid_class_name,T.min_class_id,T.min_class_name) );
在hive中查询数据很慢
方法一:
打开hive的本地模式 (默认是false关闭状态) set hive.exec.mode.local.auto=true;方法二: Hue上执行
插入数据
查看表结构
要插入的目标表是分区表 >> 开启动态插入模式/非严格模式
在hive中,insert into 要紧跟select
set hive.exec.mode.local.auto=true; -- 本地模式 SET hive.exec.dynamic.partition = true; -- 动态分区 SET hive.exec.dynamic.partition.mode=nonstrict; -- 非严格模式 WITH TEMP AS ( SELECT -- 先抽取维度字段 O.dt -- 城市 ,S.city_id ,S.city_name -- 商圈 ,S.trade_area_id ,S.trade_area_name -- 店铺 ,S.id ,S.store_name -- 品牌 ,G.brand_id ,G.brand_name -- 大类 ,G.max_class_id ,G.max_class_name -- 中 ,G.mid_class_id ,G.mid_class_name -- 小 ,G.min_class_id ,G.min_class_name -- 抽取字段字段 -- 订单量指标 ,O.order_id -- 金额指标 ,O.order_amount ,O.goods_price ,O.plat_fee ,O.settlement_amount ,O.dispatcher_money ,O.order_from ,O.evaluation_state ,O.geval_scores ,O.is_delivery -- 是否配送 ,O.refund_id -- 退款单号 -- 去重 ,ROW_NUMBER()OVER(PARTITION BY O.order_id ORDER BY order_id) RN FROM DWB.DWB_ORDER_DETAIL1 O LEFT JOIN DWB.DWB_SHOP_DETAIL S ON O.store_id = S.id LEFT JOIN DWB.dwb_goods_detail G ON G.store_id = S.id ) insert into dws.dws_sale_daycount(dt) SELECT T.city_id ,T.city_name ,T.trade_area_id ,T.trade_area_name ,T.ID AS STORE_ID ,T.store_name ,T.brand_id ,T.brand_name ,T.max_class_id ,T.max_class_name ,T.mid_class_id ,T.mid_class_name ,T.min_class_id ,T.min_class_name ,(CASE WHEN T.ID IS NOT NULL THEN '店铺' WHEN T.trade_area_id IS NOT NULL THEN '商圈' WHEN T.city_id IS NOT NULL THEN '城市' WHEN T.min_class_id IS NOT NULL THEN '小类' WHEN T.mid_class_id IS NOT NULL THEN '中类' WHEN T.max_class_id IS NOT NULL THEN '大类' WHEN T.brand_id IS NOT NULL THEN '品牌' ELSE '日期' END ) AS GROUP_TYPE ,SUM(CASE WHEN RN = 1 THEN T.order_amount END) AS SALE_AMT ,SUM(CASE WHEN RN = 1 THEN T.plat_fee END) AS PLAT_AMT ,SUM(CASE WHEN RN = 1 AND T.is_delivery = 1 THEN T.order_amount END) AS DELIVER_SALE_AMT ,SUM(CASE WHEN RN = 1 AND T.order_from = 'miniapp' THEN T.order_amount END) AS MINI_APP_SALE_AMT ,SUM(CASE WHEN RN = 1 AND T.order_from = 'android' THEN T.order_amount END) AS android_SALE_AMT ,SUM(CASE WHEN RN = 1 AND T.order_from = 'ios' THEN T.order_amount END) AS ios_SALE_AMT ,SUM(CASE WHEN RN = 1 AND T.order_from = 'pcweb' THEN T.order_amount END) AS PCWEB_SALE_AMT ,COUNT(CASE WHEN RN = 1 THEN T.order_id END ) AS ORDER_CNT ,COUNT(CASE WHEN RN = 1 AND T.evaluation_state = 1 THEN 1 END) AS EVA_ORDER_CNT ,COUNT(CASE WHEN RN = 1 AND T.geval_scores < 3 THEN 1 END) AS BAD_EVA_ORDER_CNT ,COUNT(CASE WHEN RN = 1 AND T.is_delivery = 1 THEN 1 END) AS DELIVER_ORDER_CNT ,COUNT(CASE WHEN RN = 1 AND T.refund_id IS NOT NULL THEN 1 END) AS REFUND_ORDER_CNT ,COUNT(CASE WHEN RN = 1 AND T.order_from = 'miniapp' THEN 1 END) AS MINI_APP_SALE_CNT ,COUNT(CASE WHEN RN = 1 AND T.order_from = 'android' THEN 1 END) AS android_SALE_CNT ,COUNT(CASE WHEN RN = 1 AND T.order_from = 'ios' THEN 1 END) AS ios_SALE_CNT ,COUNT(CASE WHEN RN = 1 AND T.order_from = 'pcweb' THEN 1 END) AS PCWEB_SALE_CNT ,T.dt FROM TEMP T GROUP BY T.dt ,T.city_id,T.city_name ,T.trade_area_id,T.trade_area_name ,T.id,T.store_name ,T.brand_id,T.brand_name ,T.max_class_id,T.max_class_name ,T.mid_class_id,T.mid_class_name ,T.min_class_id,T.min_class_name GROUPING SETS ( (T.dt) ,(T.dt,T.city_id,T.city_name) ,(T.dt,T.city_id,T.city_name,T.trade_area_id,T.trade_area_name) ,(T.dt,T.city_id,T.city_name,T.trade_area_id,T.trade_area_name,T.id,T.store_name) ,(T.dt,T.brand_id,T.brand_name) ,(T.dt,T.max_class_id,T.max_class_name) ,(T.dt,T.max_class_id,T.max_class_name,T.mid_class_id,T.mid_class_name) ,(T.dt,T.max_class_id,T.max_class_name,T.mid_class_id,T.mid_class_name,T.min_class_id,T.min_class_name) ); 查询数据sql分析数据来源>>DWB层的数据表
下面分析维度/指标需要哪些数据信息(字段)以及来自哪些表
维度:
日期(dt)>>DWB_ORDER_DETAIL1
城市(city_id, city_name) >>DWB_SHOP_DETAIL
商圈(trade_area_id, trade_area_name)>>DWB_SHOP_DETAIL
店铺(id, store_name)>>DWB_SHOP_DETAIL
品牌(brand_id, brand_name)>>dwb_goods_detail
大类(max_class_id, max_class_name)>>dwb_goods_detail
中类(mid_class_id, mid_class_name)>> dwb_goods_detail
小类(min_class_id, min_class_name) >>dwb_goods_detail
指标
订单量(order_id)>>DWB_ORDER_DETAIL1
金额(order_amount, goods_price, plat_fee, settlement_amount, dispatcher_money, order_from, evaluation_state, geval_scores, is_delivery, refund_id)>>DWB_ORDER_DETAIL1
维度/指标的数据来源于3张表, 且订单表(DWB_ORDER_DETAIL1)最多>>表连接时把订单表作为主表
使用with as 做一个公共表达式先把指标/维度相关的字段数据抽取出来>>再用select 语句对数据进行分组汇总>>插入数据
抽取数据注意: 订单表的重复数据>>去重>>ROW_NUMBER()
重复数据原因:
订单表是一个宽表, 由多张事实表连接到一起, 容易产生重复数据
比如一张订单里面有多家商铺的商品,那么就会产生多条同样的订单号
select 语句的字段及数据类型, 注意和目标表的字段相对应
目标表
目标表的group_type字段,用case when 实现
字段信息: group_type string COMMENT '分组类型:store,trade_area,city,brand, min_class,mid_class,max_class,all',
注意: '分组类型'字段在用case when进行条件判断时, 只有false不满足条件才进行下一步判断>>先判断小维度,再到大维度>>减少sql 量
比如: 在维度中, 店铺 < 商圈 < 城市 ; 小类 < 中类 < 大类 ;
目标表的指标字段
select 语句中的指标数据汇总实现
(目标表)收入/成交额>>sum() 汇总
(目标表)单量>>count() 汇总
注意: case when 去重/过滤条件
目标表的分区字段 dt >>T.dt
数仓搭建(hive):DWS层(服务数据层)由讯客互联电脑硬件栏目发布,感谢您对讯客互联的认可,以及对我们原创作品以及文章的青睐,非常欢迎各位朋友分享到个人网站或者朋友圈,但转载请说明文章出处“数仓搭建(hive):DWS层(服务数据层)”
上一篇
宇树机器人G1SDK实战和交付
下一篇
idea升级安装新版本无法启动