英文:
count(distinct) over (partition by ... range function)
问题
我想计算mm
(日期)上的不同yyyydd
的数量(+- 2天)。
然而,distinct函数无法与over一起使用。
如果我删除distinct,它将给我yyyydd
的总计数,但yyyydd
可能有很多重复。这就是为什么我想要添加distinct的原因。
这与 https://stackoverflow.com/questions/55347200/countdistinct-over-partition-by-doesnt-work-in-oracle-sql 有点类似,但不同
with tbl1 as
(select 'tay' cst_name, 'toy1' product_name, '20230501' yyyymmdd from dual union all
select 'tay' cst_name, 'toy1' product_name, '20230502' yyyymmdd from dual union all
select 'tay' cst_name, 'toy1' product_name, '20230507' yyyymmdd from dual union all
select 'ray' cst_name, 'toy2' product_name, '20230321' yyyymmdd from dual union all
select 'ray' cst_name, 'toy2' product_name, '20230421' yyyymmdd from dual union all
select 'ray' cst_name, 'toy2' product_name, '20230422' yyyymmdd from dual union all
select 'ray' cst_name, 'toy1' product_name, '20230423' yyyymmdd from dual union all
select 'ray' cst_name, 'toy1' product_name, '20230423' yyyymmdd from dual union all
select 'ray' cst_name, 'toy1' product_name, '20230527' yyyymmdd from dual union all
select 'ray' cst_name, 'toy2' product_name, '20230527' yyyymmdd from dual )
,
tbl2 as(
select a.*, substr(yyyymmdd,1,6) as yyyymm, substr(yyyymmdd ,7,9) as mm
from tbl1 a)
select
b.*,
count(1) over (partition by cst_name, product_name order by to_number(mm) range between 2 preceding and 2 following) as cnt,
count(distinct yyyymm) over (partition by cst_name, product_name order by to_number(mm) range between 2 preceding and 2 following) as cnt_wanted
from tbl2 b
| cst_name |prod_name | yyyyddmm | mm | cnt(wrong)| cnt(wanted)
| -------- | -------- |
| tay |toy1 | 20230501 | 01 | 2 | 1
| tay |toy1 | 20230502 | 02 | 2 | 1
| tay |toy1 | 20230507 | 07 | 1 | 1
| ray |toy2 | 20230321 | 21 | 3 | 2
| ray |toy2 | 20230421 | 21 | 3 | 2
| ray |toy2 | 20230422 | 22 | 3 | 2
| ray |toy1 | 20230423 | 23 | 2 | 1
| ray |toy1 | 20230423 | 23 | 2 | 1
| ray |toy1 | 20230527 | 27 | 1 | 1
| ray |toy2 | 20230527 | 27 | 1 | 1
英文:
I want to count the distinct yyyydd
over the mm
(date). (+- 2days)
However, the distinct function can't be used with over.
If I delete distinct, it will give me the total count of the yyyydd
, but yyyydd
can have lots of duplicate. So that's why I want to add distinct.
It's a bit similar with https://stackoverflow.com/questions/55347200/countdistinct-over-partition-by-doesnt-work-in-oracle-sql but different
with tbl1 as
(select 'tay' cst_name, 'toy1' product_name, '20230501' yyyymmdd from dual union all
select 'tay' cst_name, 'toy1' product_name, '20230502' yyyymmdd from dual union all
select 'tay' cst_name, 'toy1' product_name, '20230507' yyyymmdd from dual union all
select 'ray' cst_name, 'toy2' product_name, '20230321' yyyymmdd from dual union all
select 'ray' cst_name, 'toy2' product_name, '20230421' yyyymmdd from dual union all
select 'ray' cst_name, 'toy2' product_name, '20230422' yyyymmdd from dual union all
select 'ray' cst_name, 'toy1' product_name, '20230423' yyyymmdd from dual union all
select 'ray' cst_name, 'toy1' product_name, '20230423' yyyymmdd from dual union all
select 'ray' cst_name, 'toy1' product_name, '20230527' yyyymmdd from dual union all
select 'ray' cst_name, 'toy2' product_name, '20230527' yyyymmdd from dual )
,
tbl2 as(
select a.*, substr(yyyymmdd,1,6) as yyyymm, substr(yyyymmdd ,7,9) as mm
from tbl1 a)
select
b.*
, count(1) over (partition by cst_name, product_name order by to_number(mm) range between 2 preceding and 2 following) as cnt
, count(distinct yyyymm) over (partition by cst_name, product_name order by to_number(mm) range between 2 preceding and 2 following) as cnt -- error
from tbl2 b
| cst_name |prod_name | yyyyddmm | mm | cnt(wrong)| cnt(wanted)
| -------- | -------- |
| tay |toy1 | 20230501 | 01 | 2 | 1
| tay |toy1 | 20230502 | 02 | 2 | 1
| tay |toy1 | 20230507 | 07 | 1 | 1
| ray |toy2 | 20230321 | 21 | 3 | 2
| ray |toy2 | 20230421 | 21 | 3 | 2
| ray |toy2 | 20230422 | 22 | 3 | 2
| ray |toy1 | 20230423 | 23 | 2 | 1
| ray |toy1 | 20230423 | 23 | 2 | 1
| ray |toy1 | 20230527 | 27 | 1 | 1
| ray |toy2 | 20230527 | 27 | 1 | 1
答案1
得分: 1
您可以使用model
子句来模拟窗口函数,并执行不允许在窗口函数内部执行的操作。
with tbl1 as (
select '20230321' yyyymmdd from dual union all
select '20230327' yyyymmdd from dual union all
select '20230422' yyyymmdd from dual union all
select '20230423' yyyymmdd from dual union all
select '20230501' yyyymmdd from dual union all
select '20230502' yyyymmdd from dual union all
select '20230507' yyyymmdd from dual union all
select '20230521' yyyymmdd from dual union all
select '20230523' yyyymmdd from dual union all
select '20230527' yyyymmdd from dual
)
select *
from tbl1 b
model
/*关闭维度值的唯一性检查*/
unique single reference
/*用于偏移的内容*/
dimension by (substr(yyyymmdd ,7,9) as mm)
measures (
/*包含在输出中的内容*/
yyyymmdd,
/*允许在规则右侧引用的内容*/
substr(yyyymmdd,1,6) as yyyymm,
/*DISTINCT的计数器*/
0 as cnt
)
rules update (
cnt[any] = count(distinct yyyymm)[mm between cv(mm) - 2 and cv(mm) + 2]
)
order by 1, yyyymmdd
MM | YYYYMMDD | YYYYMM | CNT |
---|---|---|---|
01 | 20230501 | 202305 | 1 |
02 | 20230502 | 202305 | 1 |
07 | 20230507 | 202305 | 1 |
21 | 20230321 | 202303 | 3 |
21 | 20230521 | 202305 | 3 |
22 | 20230422 | 202304 | 3 |
23 | 20230423 | 202304 | 3 |
23 | 20230523 | 202305 | 3 |
27 | 20230327 | 202303 | 2 |
27 | 20230527 | 202305 | 2 |
英文:
You may use model
clause to emulate window function with operations that are not allowed within them.
with tbl1 as (
select '20230321' yyyymmdd from dual union all
select '20230327' yyyymmdd from dual union all
select '20230422' yyyymmdd from dual union all
select '20230423' yyyymmdd from dual union all
select '20230501' yyyymmdd from dual union all
select '20230502' yyyymmdd from dual union all
select '20230507' yyyymmdd from dual union all
select '20230521' yyyymmdd from dual union all
select '20230523' yyyymmdd from dual union all
select '20230527' yyyymmdd from dual
)
select *
from tbl1 b
model
/*To turn off uniqueness check of dimension values*/
unique single reference
/*What should be used for offsets*/
dimension by (substr(yyyymmdd ,7,9) as mm)
measures (
/*To include in the output*/
yyyymmdd,
/*To allow reference in the right side of RULES*/
substr(yyyymmdd,1,6) as yyyymm,
/*Counter for DISTINCT*/
0 as cnt
)
rules update (
cnt[any] = count(distinct yyyymm)[mm between cv(mm) - 2 and cv(mm) + 2]
)
order by 1, yyyymmdd
MM | YYYYMMDD | YYYYMM | CNT |
---|---|---|---|
01 | 20230501 | 202305 | 1 |
02 | 20230502 | 202305 | 1 |
07 | 20230507 | 202305 | 1 |
21 | 20230321 | 202303 | 3 |
21 | 20230521 | 202305 | 3 |
22 | 20230422 | 202304 | 3 |
23 | 20230423 | 202304 | 3 |
23 | 20230523 | 202305 | 3 |
27 | 20230327 | 202303 | 2 |
27 | 20230527 | 202305 | 2 |
答案2
得分: 1
如果在分析函数中不允许使用带有order by
的distinct
,则请使用子查询:
with tbl1 as (
select '20230321' yyyymmdd from dual union all
select '20230327' yyyymmdd from dual union all
select '20230422' yyyymmdd from dual union all
select '20230423' yyyymmdd from dual union all
select '20230501' yyyymmdd from dual union all
select '20230502' yyyymmdd from dual union all
select '20230507' yyyymmdd from dual union all
select '20230521' yyyymmdd from dual union all
select '20230523' yyyymmdd from dual union all
select '20230527' yyyymmdd from dual
),
tbl2 as (
select a.*, substr(yyyymmdd, 1, 6) as yyyymm, substr(yyyymmdd, 7, 2) as dd
from tbl1 a
)
select yyyymm, dd,
(select count(distinct yyyymm)
from tbl2 x where dd between b.dd - 2 and b.dd + 2) cnt
from tbl2 b order by dd
英文:
If distinct
with order by
in analytic function is not allowed then use subquery:
with tbl1 as (
select '20230321' yyyymmdd from dual union all
select '20230327' yyyymmdd from dual union all
select '20230422' yyyymmdd from dual union all
select '20230423' yyyymmdd from dual union all
select '20230501' yyyymmdd from dual union all
select '20230502' yyyymmdd from dual union all
select '20230507' yyyymmdd from dual union all
select '20230521' yyyymmdd from dual union all
select '20230523' yyyymmdd from dual union all
select '20230527' yyyymmdd from dual ),
tbl2 as(
select a.*, substr(yyyymmdd,1,6) as yyyymm, substr(yyyymmdd ,7,2) as dd
from tbl1 a)
select yyyymm, dd,
(select count(distinct yyyymm)
from tbl2 x where dd between b.dd - 2 and b.dd + 2 ) cnt
from tbl2 b order by dd
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论