hivesql将json格式字符串转为数组
- 创业
- 2025-07-21 19:25:33

hivesql 将json格式字符串转为数组
完整过程SQL在文末
json 格式字符串本案例 json 字符串参考格式,请勿使用本数据
{ "data": [ { "province": 11, "id_card": "110182198903224674", "name": "闾丘饱乾" }, { "province": 21, "id_card": "210182198903224674", "name": "贺巧" } ] } 测试数据本案例测试数据,复制保存后请勿格式化
{"data":[{"province":11,"id_card":"110182198903224674","name":"闾丘饱乾"},{"province":21,"id_card":"210182198903224674","name":"贺巧"},{"province":31,"id_card":"310182198903224674","name":"方加牡"},{"province":41,"id_card":"410182198903224674","name":"邱赣"},{"province":42,"id_card":"420182198903224674","name":"郝郑惭"},{"province":52,"id_card":"520182198903224674","name":"余烂"},{"province":62,"id_card":"620182198903224674","name":"宇文酚"},{"province":81,"id_card":"810182198903224674","name":"赖队瞻"}]} 创建测试数据库 create database test; use test; 创建数据表本案例为数仓分层设计
创建ods层原始数据表创建dwd层维度数据表ETL转换ods层数据插入到dwd层 创建ods层原始数据表 create table people_ods( info string );加载测试数据
load data local inpath "/root/people.json" overwrite into table people_ods; 创建dwd层维度数据表 create table people_dwd( id_card string, name string, province string ); ETL转换ods层数据插入到dwd层 insert overwrite table people_dwd ( select json_tuple(people,'id_card','name','province') as (id_card,name,province) from ( select people from (select split(regexp_replace(regexp_replace(get_json_object(info,'$.data'),'\\[|\\]',''),'\\}\\,\\{','\\}\\;\\{'),";") people_list from people_ods) ods lateral view explode(people_list) t1 as people) t2); 查询测试 select * from people_dwd; ETL 解析 查询原始数据 select info from people_ods; 获取json格式数组字符串使用 get_json_object 函数获取 data 属性
select get_json_object(info,'$.data') from people_ods; 将字符串两端的 [] 去掉使用 regexp_replace 函数将 开头的 [ 和结尾的 ] 替换为 空字, 注意:由于hive使用java语言编写所以需要使用转义字符
select regexp_replace(get_json_object(info,'$.data'),'\\[|\\]','') from people_ods;清洗后的格式
{"province":11,"id_card":"110182198903224674","name":"闾丘饱乾"},{"province":12,"id_card":"120182198903224674","name":"慕容芋岛"} 将字符串中 },{ 转为 };{ 对象之间使用分号间隔json格式字符串对象分隔符和属性分隔符都使用的是逗号 使用split函数切分的时候无法区分对象和属性 故而将对象分隔符替换为分号便于split函数切分
select regexp_replace(regexp_replace(get_json_object(info,'$.data'),'\\[|\\]',''),'\\}\\,\\{','\\}\\;\\{') from people_ods;清洗后的格式
{"province":11,"id_card":"110182198903224674","name":"闾丘饱乾"};{"province":12,"id_card":"120182198903224674","name":"慕容芋岛"}两次清洗后结果对比
转为 字符串数组 select split(regexp_replace(regexp_replace(get_json_object(info,'$.data'),'\\[|\\]',''),'\\}\\,\\{','\\}\\;\\{'),';') from people_ods; 列转行使用 UDTF 裂函数 将单行数据转换为多行数据
select people from (select split(regexp_replace(regexp_replace(get_json_object(info,'$.data'),'\\[|\\]',''),'\\}\\,\\{','\\}\\;\\{'),";") people_list from people_ods) ods lateral view explode(people_list) t as people; json 解析使用 get_json_object 或者 json_tuple 函数 对json对象进行解析 本案例使用 json_tuple 函数
select json_tuple(people,'id_card','name','province') as (id_card,name,province) from ( select people from (select split(regexp_replace(regexp_replace(get_json_object(info,'$.data'),'\\[|\\]',''),'\\}\\,\\{','\\}\\;\\{'),";") people_list from people_ods) ods lateral view explode(people_list) t1 as people) t2; 完成过程SQL -- 创建测试数据库 create database test; -- 使用测试数据库 use test; -- 创建ods层原始数据表 create table people_ods( info string ); -- 加载数据 load data local inpath "/root/people.json" overwrite into table people_ods; -- 查询ods层袁术数据 select info from people_ods; -- 获取json格式数组字符串 select get_json_object(info,'$.data') from people_ods; -- 将字符串两端的 [] 去掉 select regexp_replace(get_json_object(info,'$.data'),'\\[|\\]','') from people_ods; -- 将字符串中 },{ 转为 };{ 对象之间使用分号间隔 select regexp_replace(regexp_replace(get_json_object(info,'$.data'),'\\[|\\]',''),'\\}\\,\\{','\\}\\;\\{') from people_ods; -- 转为 字符串数组 select split(regexp_replace(regexp_replace(get_json_object(info,'$.data'),'\\[|\\]',''),'\\}\\,\\{','\\}\\;\\{'),';') from people_ods; -- 列转行 select people from (select split(regexp_replace(regexp_replace(get_json_object(info,'$.data'),'\\[|\\]',''),'\\}\\,\\{','\\}\\;\\{'),";") people_list from people_ods) ods lateral view explode(people_list) t as people; -- 转json对象后解析 select json_tuple(people,'id_card','name','province') as (id_card,name,province) from ( select people from (select split(regexp_replace(regexp_replace(get_json_object(info,'$.data'),'\\[|\\]',''),'\\}\\,\\{','\\}\\;\\{'),";") people_list from people_ods) ods lateral view explode(people_list) t1 as people) t2; -- 创建dwd层维度数据表 create table people_dwd( id_card string, name string, province string ); -- ETL转换ods层数据插入到dwd层 insert overwrite table people_dwd ( select json_tuple(people,'id_card','name','province') as (id_card,name,province) from ( select people from (select split(regexp_replace(regexp_replace(get_json_object(info,'$.data'),'\\[|\\]',''),'\\}\\,\\{','\\}\\;\\{'),";") people_list from people_ods) ods lateral view explode(people_list) t1 as people) t2); select * from people_dwd;hivesql将json格式字符串转为数组由讯客互联创业栏目发布,感谢您对讯客互联的认可,以及对我们原创作品以及文章的青睐,非常欢迎各位朋友分享到个人网站或者朋友圈,但转载请说明文章出处“hivesql将json格式字符串转为数组”
上一篇
vue3+ts指令拖拽案例