Алгоритм "Схлопывание" (Сollapse)

Встречается ситуация когда история создается но не несет какой либо полезной информации, или может даже мешать. Например у сущности  с определенным ключом есть поле которое часто меняется но нам не нужно грузить его в хранилище. Например мы строим в хранилище связку [Абонент]-> [ФИО] (которое может менятся), а в таблице источника есть например поле [Город] изменения которого влияют на историю.



Реализация алгоритма на Oracle:
WITH 
  --таблица с историей и ключом из двух полей 
  hist_table AS 
             (SELECT 1 AS KEY, 'Иванов' fio,'Москва' city, DATE '2010-01-01' AS eff_dttm, DATE '2011-01-01' AS exp_dttm FROM dual UNION
              SELECT 1 AS KEY, 'Иванов' fio,'Киев' city, DATE '2011-01-01' AS eff_dttm, DATE '2014-01-01' AS exp_dttm FROM dual UNION
              SELECT 1 AS KEY, 'Петров' fio, 'Киев' city, DATE '2014-01-01' AS eff_dttm, DATE '2999-12-31' AS exp_dttm FROM dual UNION
              SELECT 2 AS KEY, 'Сергеев' fio,'Киев' city,DATE '2012-01-01' AS eff_dttm, DATE '2999-12-31' AS exp_dttm FROM dual),
  
  --считаем хитрый псевдостолбец
  gr_table AS 
            (SELECT t.*,
                   ROW_NUMBER() OVER(PARTITION BY t.KEY ORDER BY t.eff_dttm) - --ключ таблицы
                   ROW_NUMBER() OVER(PARTITION BY t.KEY, t.fio ORDER BY t.eff_dttm) AS V$GROUP  --результирующие поля
            FROM
            hist_table t)
  --результат
  SELECT t.key,
         t.fio,
         MIN(t.eff_dttm) AS eff_dttm,
         MAX(t.exp_dttm) AS exp_dttm 
  FROM 
         gr_table t
  GROUP BY
        t.key,
        t.fio,
        t.v$group  
  ORDER BY 1,3 

В результате получим:
   KEYFIOEFF_DTTMEXP_DTTM
11Иванов01.01.201001.01.2014
21Петров01.01.201431.12.2999
32Сергеев01.01.201231.12.2999

Комментарии

  1. with t as
    (
    select 1 as subs, 1 as srls,10 as b, 20 as c from dual
    union all
    select 1 as subs,1 as srls,20 as b, 30 as c from dual
    union all
    select 1 as subs,1 as srls,30 as b, 40 as c from dual
    union all
    select 1 as subs,2 as srls,40 as b, 60 as c from dual
    union all
    select 1 as subs,1 as srls,50 as b, 70 as c from dual
    union all

    )
    select distinct
    subs,srls,
    min(b) over (partition by subs,groupsum),
    max(c) over (partition by subs,groupsum)
    from
    (select subs,srls,b,c,
    sum(prev) over(partition by subs order by b ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) groupsum
    from
    (select subs,srls,b,c,
    case when (lag(srls) OVER (partition by subs ORDER BY b)!=srls) or (lag(srls) OVER (partition by subs ORDER BY b) is null) then srls
    else 0 end as prev

    from t)
    )

    ОтветитьУдалить

Отправить комментарий