ddl:
CREATE TABLE IF NOT EXISTS dim_date_day (
date_id string COMMENT '日ID',
date_code string COMMENT '日编码',
date_name string COMMENT '日名称',
date_short_name string COMMENT '日名称简称',
week_id string COMMENT '周ID',
month_id string COMMENT '月份ID',
season_id string COMMENT '季度ID',
year_id string COMMENT '年份ID',
pre_date_id string COMMENT '前一天ID',
after_date_id string COMMENT '后一天ID',
is_workday string COMMENT '是否工作日',
un_workday_name string COMMENT '非工作日名称',
data_source string COMMENT '数据来源',
created_time string COMMENT '创建时间'
) comment '日维度'
STORED AS ORC;
sql脚本:
-- 通过date_add累加
create temporary table dates as (
select date_add("1980-01-01", a.pos) as date_code
from (
select posexplode(split(repeat("o", datediff("2050-12-31", "1980-01-01")), "o"))) a
);
insert overwrite table dim_date_day
select
regexp_replace(date_code,'-','') as date_id,
date_code as date_code,
concat(year(date_code),'年',substr(date_code,6,2),'月',substr(date_code,9,2),'日') as date_name,
concat(cast(substr(date_code,9,2) as int),'日') as date_short_name,
year(date_sub(next_day(date_code,'MO'),4))*100 + weekofyear(date_code) as week_id,
substr(regexp_replace(date_code,'-',''),1,6) as month_id,
concat(year(date_code),floor(substr(date_code,6,2) / 3.1) + 1) as season_id,
year(date_code) as year_id,
regexp_replace(date_add(date_code,-1),'-','') as pre_date_id,
regexp_replace(date_add(date_code,1),'-','') as after_date_id,
'' as is_workday,
'' as un_workday_name,
'' as data_source,
from_unixtime(unix_timestamp(), 'yyyy-MM-dd HH:dd:ss') as created_time
from dates
order by date_code;
解析:
posexplode函数
select posexplode(split(repeat("o", datediff("2050-12-31", "1980-01-01")), "o"))
SELECT
single_id,
single_time
FROM tempTable
lateral view posexplode(split(id,',')) as single_id_index,single_id
lateral view posexplode(split(time,',')) as single_time_index,single_time
WHERE
single_id_index=single_time_index
所以
select split(repeat("o", datediff("2050-12-31", "1980-01-01")), "o") 先获取两个时间间隔天数,然后转化成
1 |
["","","","","","","","","","","","","","","","","".........} |
然后repeat("o", datediff("2050-12-31", "1980-01-01")
repeat 复制 第一个参数 ,第二个参数多少次
相隔天数有多少天就有多少个“”
from (
select posexplode(split(repeat("o", datediff("2050-12-31", "1980-01-01")), "o"))) a
结果是:
一直到相差天数
select date_add("1980-01-01", a.pos) as date_code
然后从 1980-01-01开始累加
关于posexplode的函数推荐一篇文章:
Hive中的explode使用全解 - 阿誠的数据杂货铺的文章 - 知乎 https://zhuanlan.zhihu.com/p/115918587