在Snowflake中重写SQL。

huangapple go评论56阅读模式
英文:

rewrite the sql in snowflake

问题

以下是翻译好的部分:

原始 MS SQL 查询:

SELECT 
CASE WHEN c.[Committee Date] is not NULL THEN 1 ELSE 0 end [C_DATE]
, CASE WHEN	r1.[roster] = 1 THEN 1
    ELSE CASE WHEN coalesce(r2.[R-NID],0) = 1 AND coalesce(r3.[R-TID],0) = 0 THEN 2	
    ELSE CASE WHEN coalesce(r2.[R-NID],0) = 0 AND coalesce(r3.[R-TID],0) = 1 THEN 3	
    ELSE CASE WHEN coalesce(r2.[R-NID],0) = 1 AND coalesce(r3.[R-TID],0) = 1 THEN 4	
    ELSE 10
    END END END END [Roster_IND]
FROM Stage os
OUTER APPLY (SELECT TOP 1 1 [roster]
        FROM [Master_R] m
        where m.[N_ID] = s.N_ID
        AND m.[T_ID] = s.T_ID
        )r1
OUTER APPLY (SELECT TOP 1 1 [R-NID]
        FROM [Master_R] m
        where m.[N_ID] = s.N_ID
        AND m.[T_ID] <> s.T_ID
        )r2
OUTER APPLY (SELECT TOP 1 1 [R-TID]
        FROM [Master_R] m
        where m.[N_ID] <> s.N_ID
        AND m.[T_ID] = s.T_ID
        )r3
OUTER APPLY (select TOP 1 [Committee Date]
            FROM C_SOURCE r
            WHERE r.N_ID = s.N_ID
            )c 
WHERE s.load_date = '2023-07-17';

Snowflake 重写:

with MR_CTR  as (
  SELECT T_ID ,N_ID
  FROM Master_R 
  QUALIFY ROW_NUMBER() OVER 
    (PARTITION BY T_ID, N_ID ORDER BY N_ID) = 1
)
select 
 CASE WHEN r1.N_ID is not NULL and r1.T_ID is not NULL THEN 1
 ELSE CASE WHEN r2.N_ID is not null and r3.T_ID is NULL then 2
 ELSE CASE WHEN r2.N_ID is null and r3.T_ID is NOT NULL then 3
 ELSE CASE WHEN r2.N_ID is not null and r3.T_ID is NOT NULL then 4
 ELSE 10
 END END END END OPS_Roster  
FROM Roster_Stage as s,
LEFT JOIN LATERAL (
 select * 
 from MR_CTR r
 where r.N_ID  = s.N_ID and 
 r.T_ID = s.T_ID ) as r1 
LEFT JOIN LATERAL (
 select *  
 from MR_CTR r 
 where r.N_ID  = s.N_ID and 
 r.T_ID <> s.T_ID ) as r2 
LEFT JOIN LATERAL (
  select *  
  from MR_CTR r 
  where r.N_ID  <> s.N_ID and 
  r.T_ID = s.T_ID ) as r3
LEFT JOIN (
  select TOP 1 Committee_Date 
  FROM C_SOURCE r 
  WHERE r.N_ID = s.N_ID) c 
WHERE s.load_date = '2023-07-17';

Snowflake 重写(简化版):

with MR_CTR  as (
  SELECT T_ID ,N_ID
  FROM Master_R 
  QUALIFY ROW_NUMBER() OVER (PARTITION BY T_ID, N_ID ORDER BY N_ID) = 1
)
select 
  CASE WHEN r1.N_ID is not NULL and r1.T_ID is not NULL THEN 1
  ELSE CASE WHEN r2.N_ID is not null and r3.T_ID is NULL then 2
  ELSE CASE WHEN r2.N_ID is null and r3.T_ID is NOT NULL then 3
  ELSE CASE WHEN r2.N_ID is not null and r3.T_ID is NOT NULL then 4
  ELSE 10
  END END END END OPS_Roster  
FROM Roster_Stage as s
LEFT JOIN LATERAL (
  select * 
  from MR_CTR r
  where r.N_ID  = s.N_ID and 
  r.T_ID = s.T_ID
) as r1 
LEFT JOIN LATERAL (
  select *  
  from MR_CTR r 
  where r.N_ID  = s.N_ID and 
  r.T_ID <> s.T_ID
) as r2 
LEFT JOIN LATERAL (
  select *  
  from MR_CTR r 
  where r.N_ID  <> s.N_ID and 
  r.T_ID = s.T_ID
) as r3
LEFT JOIN (
  select TOP 1 Committee_Date 
  FROM C_SOURCE r 
  WHERE r.N_ID = s.N_ID
) c 
WHERE s.load_date = '2023-07-17';
英文:

I am in the process of rewriting a query from old MQ SQL project with snowflake sql. I have trouble for the three subquery results from the outer APPLY. Since the MS SQL only return first row of the query. I have tried using snowflake Lateral with QUALIFY ROW_NUMBER(), the LATERAL works in sequence.

Original MS SQL query:

SELECT 
CASE WHEN c.[Committee Date] is not NULL THEN 1 ELSE 0 end [C_DATE]
, CASE WHEN	r1.[roster] = 1 THEN 1
    ELSE CASE WHEN coalesce(r2.[R-NID],0) = 1 AND coalesce(r3.[R-TID],0) = 0 THEN 2	
    ELSE CASE WHEN coalesce(r2.[R-NID],0) = 0 AND coalesce(r3.[R-TID],0) = 1 THEN 3	
    ELSE CASE WHEN coalesce(r2.[R-NID],0) = 1 AND coalesce(r3.[R-TID],0) = 1 THEN 4	
    ELSE 10
    END END END END [Roster_IND]
FROM Stage os
OUTER APPLY (SELECT TOP 1 1 [roster]
        FROM [Master_R] m
        where m.[N_ID] = s.N_ID
        AND m.[T_ID] = s.T_ID
        )r1
OUTER APPLY (SELECT TOP 1 1 [R-NID]
        FROM [Master_R] m
        where m.[N_ID] = s.N_ID
        AND m.[T_ID] &lt;&gt; s.T_ID
        )r2
OUTER APPLY (SELECT TOP 1 1 [R-TID]
        FROM [Master_R] m
        where m.[N_ID] &lt;&gt; s.N_ID
        AND m.[T_ID] = s.T_ID
        )r3
OUTER APPLY (select TOP 1 [Committee Date]
            FROM C_SOURCE r
            WHERE r.N_ID = s.N_ID
            )c 
WHERE s.load_date = &#39;2023-07-17&#39;;

The rewrite snowflake:

with MR_CTR  as (
  SELECT T_ID ,N_ID
  FROM Master_R 
  QUALIFY ROW_NUMBER() OVER 
    (PARTITION BY T_ID, N_ID ORDER BY N_ID) = 1
)
select 
 CASE WHEN r1.N_ID is not NULL and r1.T_ID is not NULL THEN 1
 ELSE CASE WHEN r2.N_ID is not null and r3.T_ID is NULL then 2
 ELSE CASE WHEN r2.N_ID is null and r3.T_ID is NOT NULL then 3
 ELSE CASE WHEN r2.N_ID is not null and r3.T_ID is NOT NULL then 4
ELSE 10
END END END END OPS_Roster  
FROM Roster_Stage as s,
LEFT JOIN LATERAL (
 select * 
 from MR_CTR r
 where r.N_ID  = s.N_ID and 
 r.T_ID = s.T_ID ) as r1 
LEFT JOIN LATERAL (
 select *  
 from MR_CTR r 
 where r.N_ID  = s.N_ID and 
r.T_ID &lt;&gt; s.T_ID ) as r2 
LEFT JOIN LATERAL (
  select *  
  from MR_CTR r 
  where r.N_ID  &lt;&gt; s.N_ID and 
  r.T_ID = s.T_ID ) as r3
LEFT JOIN (
  select TOP 1 Committee_Date 
  FROM C_SOURCE r 
  WHERE r.N_ID = s.N_ID) c 
WHERE s.load_date = &#39;2023-07-17&#39;;

I have tried using snowflake Lateral with QUALIFY ROW_NUMBER(), the LATERAL works in sequence.

The rewrite snowflake:

with MR_CTR  as (
SELECT T_ID ,N_ID
        FROM Master_R 
    QUALIFY ROW_NUMBER() OVER (PARTITION BY T_ID, N_ID ORDER BY N_ID) = 1
)
select
CASE WHEN r1.N_ID is not NULL and r1.T_ID is not NULL THEN 1
    ELSE CASE WHEN r2.N_ID is not null and r3.T_ID is NULL then 2
    ELSE CASE WHEN r2.N_ID is null and r3.T_ID is NOT NULL then 3
    ELSE CASE WHEN r2.N_ID is not null and r3.T_ID is NOT NULL then 4
    ELSE 10
    END END END END OPS_Roster
FROM Roster_Stage as s,
LEFT JOIN LATERAL (select * from  MR_CTR  r where r.N_ID  = s.N_ID and r.T_ID = s.T_ID ) as r1
LEFT JOIN LATERAL (select *  from MR_CTR r where r.N_ID  = s.N_ID and r.T_ID &lt;&gt; s.T_ID ) as r2
LEFT JOIN LATERAL (select *  from MR_CTR r where r.N_ID  &lt;&gt; s.N_ID and r.T_ID = s.T_ID ) as r3
LEFT JOIN (select TOP 1 Committee_Date FROM C_SOURCE r WHERE r.N_ID = s.N_ID)c 
WHERE s.load_date = &#39;2023-07-17&#39;;

答案1

得分: 1

以下是翻译好的部分:

不是主要重点,但让我们重写CASE语句,使其只有一级。 CASE语句的WHEN子句按照规则呈现的顺序进行评估,因此您可以使用许多WHEN子句,它会依次执行:

所以现在可以这样写:

更加简洁..

所以如果我简化SQL以确保JOIN的感觉正确,并进行测试:

这给出了我期望的行:

现在我们可以在Rn值上放置一个COUNT,得到与原始SQL中的SELECT 1相同的结果:

将计数移到CTE以便于阅读,然后将之前的CASE语句插入回来

使用:

现在需要注意的是,日期使用了ANY_VALUE,因为原始的`TOP 1`没有排序顺序,所以您实际上得到的是一个基本上是随机值,所以我已经明确指出了这一点,但我假设您实际上想要的是类似于MAX或MIN

请注意,我已按照您的要求仅提供翻译,没有额外的内容。

英文:

not the main point, but lets rewrite the CASE to be just one level. CASE statements WHEN clauses are evaluated in the order of the rules presented, so you can just many WHEN clauses, and it keeps falling through:

CASE 
	WHEN r1.N_ID is not NULL and r1.T_ID is not NULL THEN 1
	ELSE CASE 
		WHEN r2.N_ID is not null and r3.T_ID is NULL then 2
		ELSE CASE 
			WHEN r2.N_ID is null and r3.T_ID is NOT NULL then 3
			ELSE CASE 
				WHEN r2.N_ID is not null and r3.T_ID is NOT NULL then 4
				ELSE 10
			END 
		END
	END
END as OPS_Roster

so now this can be written:

CASE 
	WHEN r1.N_ID is not NULL and r1.T_ID is not NULL THEN 1
	WHEN r2.N_ID is not null and r3.T_ID is NULL then 2
	WHEN r2.N_ID is null and r3.T_ID is NOT NULL then 3
	WHEN r2.N_ID is not null and r3.T_ID is NOT NULL then 4
	ELSE 10
END as OPS_Roster

much more relaxed..

so if I simplify the SQL to get the JOIN's feeling right, and to test with:

WITH Roster_Stage(N_ID, T_ID) as (
    select * from values
        (1, 10), -- exact
        (2, 20), -- other T_ID
        (3, 30), -- other N_ID
        (4, 42), -- others for BOTH
        (5, 50)  -- no match
), Master_R(N_ID, T_ID) as (
    select * from values
        (1, 10),
        (1, 10),    
        (2, 21),
        (2, 22),     
        (101, 30), 
        (102, 30),
        (4,43),
        (103,42)
), C_SOURCE(N_ID, Committee_Date) as (
    select * from values
        (1, &#39;2023-07-18&#39;::date), 
        (1, &#39;2023-07-17&#39;::date), 
        (2, &#39;2023-07-16&#39;::date), 
        (3, &#39;2023-07-15&#39;::date),
        (4, &#39;2023-07-14&#39;::date)
), MR_CTR AS (
    SELECT DISTINCT T_ID ,N_ID
    FROM Master_R 
)
select *
FROM Roster_Stage as s
LEFT JOIN MR_CTR as r1 
	ON r1.N_ID = s.N_ID and r1.T_ID = s.T_ID
LEFT JOIN MR_CTR as r2
	ON r2.N_ID = s.N_ID and r2.T_ID &lt;&gt; s.T_ID
LEFT JOIN MR_CTR as r3
	ON r3.N_ID &lt;&gt; s.N_ID and r3.T_ID = s.T_ID
LEFT JOIN (
	select 
		N_ID, 
		ANY_VALUE(Committee_Date) as Committee_Date
	FROM C_SOURCE
	GROUP BY 1
) as c
	ON c.n_id = s.n_id
ORDER BY 1,2;

This gives the rows I would expect:

N_ID T_ID T_ID_2 N_ID_2 T_ID_3 N_ID_3 T_ID_4 N_ID_4 N_ID_5 COMMITTEE_DATE
1 10 10 1 1 2023-07-18
2 20 21 2 2 2023-07-16
2 20 22 2 2 2023-07-16
3 30 30 102 3 2023-07-15
3 30 30 101 3 2023-07-15
4 42 43 4 42 103 4 2023-07-14
5 50

we can now put a COUNT on the Rn values get the same as the SELECT 1 from the original SQL:

select 
	s.*
	,count(r1.n_id) as c1
	,count(r2.n_id) as c2
	,count(r3.n_id) as c3
	,c.Committee_Date
FROM Roster_Stage as s
LEFT JOIN MR_CTR as r1 
	ON r1.N_ID = s.N_ID and r1.T_ID = s.T_ID
LEFT JOIN MR_CTR as r2
	ON r2.N_ID = s.N_ID and r2.T_ID &lt;&gt; s.T_ID
LEFT JOIN MR_CTR as r3
	ON r3.N_ID &lt;&gt; s.N_ID and r3.T_ID = s.T_ID
LEFT JOIN (
	select 
		N_ID, 
		ANY_VALUE(Committee_Date) as Committee_Date
	FROM C_SOURCE
	GROUP BY 1
) as c
	ON c.n_id = s.n_id
GROUP BY ALL
ORDER BY 1,2;

giving:

N_ID T_ID C1 C2 C3 COMMITTEE_DATE
1 10 1 0 0 2023-07-18
2 20 0 2 0 2023-07-16
3 30 0 0 2 2023-07-15
4 42 0 1 1 2023-07-14
5 50 0 0 0 null

moving the counting to a CTE for easy reading, and plugging the CASE statement from earlier back in we get:

WITH Roster_Stage(N_ID, T_ID) as (
    select * from values
        (1, 10), -- exact
        (2, 20), -- other T_ID
        (3, 30), -- other N_ID
        (4, 42), -- others for BOTH
        (5, 50)  -- no match
), Master_R(N_ID, T_ID) as (
    select * from values
        (1, 10),
        (1, 10),    
        (2, 21),
        (2, 22),     
        (101, 30), 
        (102, 30),
        (4,43),
        (103,42)
), C_SOURCE(N_ID, Committee_Date) as (
    select * from values
        (1, &#39;2023-07-18&#39;::date), 
        (1, &#39;2023-07-17&#39;::date), 
        (2, &#39;2023-07-16&#39;::date), 
        (3, &#39;2023-07-15&#39;::date),
        (4, &#39;2023-07-14&#39;::date)
), MR_CTR AS (
    SELECT DISTINCT T_ID ,N_ID
    FROM Master_R 
), counting_done as (
    select 
        s.*
        ,count(r1.n_id) as c1
        ,count(r2.n_id) as c2
        ,count(r3.n_id) as c3
        ,c.Committee_Date
    FROM Roster_Stage as s
    LEFT JOIN MR_CTR as r1 
        ON r1.N_ID = s.N_ID and r1.T_ID = s.T_ID
    LEFT JOIN MR_CTR as r2
        ON r2.N_ID = s.N_ID and r2.T_ID &lt;&gt; s.T_ID
    LEFT JOIN MR_CTR as r3
        ON r3.N_ID &lt;&gt; s.N_ID and r3.T_ID = s.T_ID
    LEFT JOIN (
        select 
            N_ID, 
            ANY_VALUE(Committee_Date) as Committee_Date
        FROM C_SOURCE
        GROUP BY 1
    ) as c
        ON c.n_id = s.n_id
    GROUP BY ALL
    ORDER BY 1,2
)
select 
    n_id
    ,t_id
    ,Committee_Date
    ,CASE 
        WHEN c1 &gt; 0 THEN 1
        WHEN c2 &gt; 0 and c3 = 0 then 2
        WHEN c2 = 0 and c3 &gt; 0 then 3
        WHEN c2 &gt; 0 and c3 &gt; 0 then 4
        ELSE 10
    END as OPS_Roster
FROM counting_done 
ORDER BY 1,2
;

with:

N_ID T_ID COMMITTEE_DATE OPS_ROSTER
1 10 2023-07-18 1
2 20 2023-07-16 2
3 30 2023-07-15 3
4 42 2023-07-14 4
5 50 null 10

Now of note, the DATE is using a ANY_VALUE because the original TOP 1 has no sort order, so you where getting a effectively random value, so I have made that explicit, but I assume you really want something like MAX or MIN

huangapple
  • 本文由 发表于 2023年7月18日 01:56:51
  • 转载请务必保留本文链接:https://go.coder-hub.com/76707010.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定