英文:
Ranking only the observations over a certain value
问题
以下是您要的翻译部分:
我有以下表格:
DROP TABLE IF EXISTS #df
CREATE TABLE #df
(
CommID VARCHAR(10),
ProvID VARCHAR(5),
VisitCount int,
[% Score] INT,
TimePeriod VARCHAR(10),
Median_VisitCount INT,
Average_VisitCount INT
);
INSERT INTO #df (CommID, ProvID, VisitCount, [% Score], TimePeriod, Median_VisitCount, Average_VisitCount)
VALUES
('AB345', '001', '65', .45, 'ThisYear', 48.5, 42),
('AB345', '002', '67', .64, 'ThisYear', 48.5, 42),
('AB345', '003', '32', .78, 'ThisYear', 48.5, 42),
('AB345', '004', '4', .32, 'ThisYear', 48.5, 42),
('AB345', '001', '23', .45, 'LastYear', 42.5, 41),
('AB345', '002', '56', .64, 'LastYear', 48.5, 41),
('AB345', '003', '31', .78, 'LastYear', 48.5, 41),
('AB345', '004', '54', .32, 'LastYear', 48.5, 41)
SELECT * FROM #df
我希望最终的输出如下:
DROP TABLE IF EXISTS #final
CREATE TABLE #final
(
CommID VARCHAR(10),
ProvID VARCHAR(5),
VisitCount int,
[% Score] INT,
TimePeriod VARCHAR(10),
Median_VisitCount INT,
Average_VisitCount INT,
Highest INT,
Lowest INT
);
INSERT INTO #final (CommID, ProvID, VisitCount, [% Score], TimePeriod, Median_VisitCount, Average_VisitCount, Highest, Lowest)
VALUES
('AB345', '001', '65', .45, 'ThisYear', 48.5, 42, 3, 1),
('AB345', '002', '67', .64, 'ThisYear', 48.5, 42, 2, 2),
('AB345', '003', '32', .78, 'ThisYear', 48.5, 42, 1, 3),
('AB345', '004', '4', .32, 'ThisYear', 48.5, 42, NULL, NULL),
('AB345', '001', '23', .45, 'LastYear', 42.5, 41, NULL, NULL),
('AB345', '002', '56', .64, 'LastYear', 48.5, 41, 1, 2),
('AB345', '003', '31', .78, 'LastYear', 48.5, 41, NULL, NULL),
('AB345', '004', '54', .32, 'LastYear', 48.5, 41, 2, 1)
SELECT * FROM #final
对于给定的CommID和TimePeriod,我希望排名[% Score],但仅考虑[VisitCounts] >= Average_VisitCount。
这是我编写的代码,但排名仍然考虑了低于Average_VisitCount的值。我希望不考虑VisitCount低于AverageVisitCount的行:
SELECT a.CommID
, a.ProvID
, a.VisitCount
, a.[% Score]
, CASE WHEN VisitCount >= a.Average_VisitCount
THEN RANK() OVER (PARTITION BY a.CommID, TimePeriod ORDER BY [a].[% Score] DESC)
ELSE NULL END AS Highest
, CASE WHEN VisitCount >= a.Average_VisitCount
THEN RANK() OVER (PARTITION BY a.CommID, TimePeriod ORDER BY [a].[% Score])
ELSE NULL END AS Lowest
, a.TimePeriod
, a.Median_VisitCount
, a.Average_VisitCount
FROM #df a
ORDER BY a.CommID, a.TimePeriod, a.VisitCount DESC
英文:
I have the following table:
DROP TABLE IF EXISTS #df
CREATE TABLE #df
(
CommID VARCHAR(10),
ProvID VARCHAR(5),
VisitCount int,
[% Score] INT,
TimePeriod VARCHAR(10),
Median_VisitCount INT,
Average_VisitCount INT
);
INSERT INTO #df (CommID, ProvID, VisitCount, [% Score], TimePeriod, Median_VisitCount, Average_VisitCount)
VALUES
('AB345', '001', '65', .45, 'ThisYear', 48.5, 42),
('AB345', '002', '67', .64, 'ThisYear', 48.5, 42),
('AB345', '003', '32', .78, 'ThisYear', 48.5, 42),
('AB345', '004', '4', .32, 'ThisYear', 48.5, 42),
('AB345', '001', '23', .45, 'LastYear', 42.5, 41),
('AB345', '002', '56', .64, 'LastYear', 48.5, 41),
('AB345', '003', '31', .78, 'LastYear', 48.5, 41),
('AB345', '004', '54', .32, 'LastYear', 48.5, 41)
SELECT * FROM #df
And would like my final output to be like this:
DROP TABLE IF EXISTS #final
CREATE TABLE #final
(
CommID VARCHAR(10),
ProvID VARCHAR(5),
VisitCount int,
[% Score] INT,
TimePeriod VARCHAR(10),
Median_VisitCount INT,
Average_VisitCount INT,
Highest INT,
Lowest INT
);
INSERT INTO #final (CommID, ProvID, VisitCount, [% Score], TimePeriod, Median_VisitCount, Average_VisitCount, Highest, Lowest)
VALUES
('AB345', '001', '65', .45, 'ThisYear', 48.5, 42, 3, 1),
('AB345', '002', '67', .64, 'ThisYear', 48.5, 42, 2, 2),
('AB345', '003', '32', .78, 'ThisYear', 48.5, 42, 1, 3),
('AB345', '004', '4', .32, 'ThisYear', 48.5, 42, NULL, NULL),
('AB345', '001', '23', .45, 'LastYear', 42.5, 41, NULL, NULL),
('AB345', '002', '56', .64, 'LastYear', 48.5, 41, 1, 2),
('AB345', '003', '31', .78, 'LastYear', 48.5, 41, NULL, NULL),
('AB345', '004', '54', .32, 'LastYear', 48.5, 41, 2, 1)
SELECT * FROM #final
For a given CommID and TimePeriod, I would like to rank the [% Score] but only for [VisitCounts] >= Average_VisitCount.
This is the code that I have written but the ranking is still considering the values that are below the Average_VisitCount. I would like any rows with visitcounts less than the AverageVisit count to not be considered in the ranking:
SELECT a.CommID
, a.ProvID
, a.VisitCount
, a.[% Score]
, CASE WHEN VisitCount >= a.Average_VisitCount
THEN RANK() OVER (PARTITION BY a.CommID, TimePeriod ORDER BY [a].[% Score] DESC)
ELSE NULL END AS Highest
, CASE WHEN VisitCount >= a.Average_VisitCount
THEN RANK() OVER (PARTITION BY a.CommID, TimePeriod ORDER BY [a].[% Score])
ELSE NULL END AS Lowest
, a.TimePeriod
, a.Median_VisitCount
, a.Average_VisitCount
FROM #df a
ORDER BY a.CommID, a.TimePeriod, a.VisitCount DESC
答案1
得分: 1
您的演示数据将[% Score]
指定为INT
,因此我将其更改为DECIMAL(5,2)
,否则每一行的分数都是0
。
您已经完成了大部分工作。基本上,将那些超出您范围的项排在底部,以便它们不干扰您感兴趣的排名,并且要么将它们保留在排名底部,要么导致它们显示为NULL:
SELECT *, CASE WHEN VisitCount >= Average_VisitCount THEN DENSE_RANK() OVER (PARTITION BY CommID, TimePeriod ORDER BY CASE WHEN VisitCount >= Average_VisitCount THEN [% Score] ELSE 999 END) END
FROM #df
内部的CASE
表达式使用值999(一些任意超出范围的值)对它们进行排名,外部的CASE
表达式使列对这些值返回NULL。
CommID | ProvID | VisitCount | % Score | TimePeriod | Median_VisitCount | Average_VisitCount | Ranking |
---|---|---|---|---|---|---|---|
AB345 | 004 | 54 | 0.32 | 去年 | 48 | 41 | 1 |
AB345 | 002 | 56 | 0.64 | 去年 | 48 | 41 | 2 |
AB345 | 003 | 31 | 0.78 | 去年 | 48 | 41 | |
AB345 | 001 | 23 | 0.45 | 去年 | 42 | 41 | |
AB345 | 001 | 65 | 0.45 | 今年 | 48 | 42 | 1 |
AB345 | 002 | 67 | 0.64 | 今年 | 48 | 42 | 2 |
AB345 | 003 | 32 | 0.78 | 今年 | 48 | 42 | |
AB345 | 004 | 4 | 0.32 | 今年 | 48 | 42 |
英文:
Your demo data specified [% Score]
as an INT
, so I changed it to DECIMAL(5,2)
, otherwise every row scored 0
.
You're most of the way there. Basically, rank those that fall out of your range at the bottom so they don't interfere with the ranking you're interested in, and either leave them at the bottom of the ranking, or cause them to display a NULL:
SELECT *, CASE WHEN VisitCount >= Average_VisitCount THEN DENSE_RANK() OVER (PARTITION BY CommID, TimePeriod ORDER BY CASE WHEN VisitCount >= Average_VisitCount THEN [% Score] ELSE 999 END) END
FROM #df
The inner CASE
expression ranks them using the value of 999 (some arbitrary out of range value), the outer CASE
expression causes the column to return NULL for those values.
CommID | ProvID | VisitCount | % Score | TimePeriod | Median_VisitCount | Average_VisitCount | Ranking |
---|---|---|---|---|---|---|---|
AB345 | 004 | 54 | 0.32 | LastYear | 48 | 41 | 1 |
AB345 | 002 | 56 | 0.64 | LastYear | 48 | 41 | 2 |
AB345 | 003 | 31 | 0.78 | LastYear | 48 | 41 | |
AB345 | 001 | 23 | 0.45 | LastYear | 42 | 41 | |
AB345 | 001 | 65 | 0.45 | ThisYear | 48 | 42 | 1 |
AB345 | 002 | 67 | 0.64 | ThisYear | 48 | 42 | 2 |
AB345 | 003 | 32 | 0.78 | ThisYear | 48 | 42 | |
AB345 | 004 | 4 | 0.32 | ThisYear | 48 | 42 |
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论