英文:
Extracting a Start and End Date from a Single Date Column
问题
我尝试了不同的示例,但我无法让查询识别时间的中断和重新开始:
SELECT *
ReportDate '报告日期'
,ProjectID '项目ID'
,TeamID '团队ID'
,TeamDays '与团队一起的天数'
,StartDate '开始日期'
,EndDate '结束日期'
FROM (
SELECT
ReportDate
,ProjectID
,TeamID
,TeamDays
,CAST(DATEADD(day, - TeamDays, DATEADD(D, 1,CAST(ReportDate AS DATE))) AS DATE) StartDate
,CAST(MAX(ReportDate AS DATE) EndDate
,ROW_NUMBER() OVER(PARTITION BY ProjectID, TeamID ORDER BY ReportDate) RN
FROM TEAM_PROJECTS
WHERE TeamDays = 1
GROUP BY
ReportDate
,ProjectID
,TeamID
,TeamDays) Projects
WHERE RN = 1
希望这可以帮助你。
英文:
(*Note: I updated the values to provide for gaps in report date as not all of my actual data is daily. I thank the first two responses - those helped, but it didn't address accounting for gaps in reporting dates. My real need is determining the End Date as I can easily get the start with a case statement using Days with the Team value against the Report Date.)
I have searched through the various versions of this question, but I am looking for a way to get the start and end date derived from a single column (known as report date). My situation is that a project will move from one team to the next and maybe back to a team for repeat work. Each time it comes back to a team, the days with the team start over as well as the start date resets.
This is the data:
Report Date | Project ID | Team ID | Days with Team |
---|---|---|---|
6/1/2023 | 12345 | C89 | 1 |
6/2/2023 | 12345 | C89 | 2 |
6/3/2023 | 12345 | C89 | 3 |
6/4/2023 | 12345 | C89 | 4 |
6/5/2023 | 12345 | C89 | 5 |
6/6/2023 | 12345 | B11 | 1 |
6/7/2023 | 12345 | B11 | 2 |
6/8/2023 | 12345 | B11 | 3 |
6/9/2023 | 12345 | C89 | 1 |
6/10/2023 | 12345 | C89 | 2 |
6/15/2023 | 12345 | C89 | 1 |
6/16/2023 | 12345 | C89 | 2 |
6/17/2023 | 12345 | C89 | 3 |
7/1/2023 | 12345 | B11 | 1 |
7/2/2023 | 12345 | B11 | 2 |
The results I seek is this:
Project ID | Team ID | Start Date | End Date |
---|---|---|---|
12345 | C89 | 6/1/2023 | 6/5/2023 |
12345 | B11 | 6/6/2023 | 6/8/2023 |
12345 | C89 | 6/9/2023 | 6/10/2023 |
12345 | C89 | 6/15/2023 | 6/17/2023 |
12345 | B11 | 7/1/2023 | 7/2/2023 |
I have tried different examples I found on Stack Overflow, but I can't get to where the query recognizes a break in time and a restart:
SELECT *
ReportDate 'Report Date'
,ProjectID 'Project ID'
,TeamID 'Team ID'
,TeamDays 'Days with Team'
,StartDate 'Start Date'
,EndDate 'End Date'
FROM (
SELECT
ReportDate
,ProjectID
,TeamID
,TeamDays
,CAST(DATEADD(day, - TeamDays, DATEADD(D, 1,CAST(ReportDate AS DATE))) AS DATE) StartDate
,CAST(MAX(ReportDate AS DATE) EndDate
,ROW_NUMBER() OVER(PARTITION BY ProjectID, TeamID ORDER BY ReportDate) RN
FROM TEAM_PROJECTS
WHERE TeamDays = 1
GROUP BY
ReportDate
,ProjectID
,TeamID
,TeamDays) Projects
WHERE RN = 1
Any tips would be most appreciated.
答案1
得分: 1
以下是使用窗口函数来检测分组,然后按照该分组进行分组的代码:
;WITH CTE AS (
SELECT *
, datediff(d, ReportDate, row_number() over (partition by ProjectID, TeamID order by ReportDate)) as grp
FROM data
)
SELECT
ProjectID,
TeamID,
min(ReportDate) AS StartDate,
max(ReportDate) AS EndDate
FROM CTE
GROUP BY ProjectID, TeamID, grp
ORDER BY StartDate
ProjectID | TeamID | StartDate | EndDate |
---|---|---|---|
12345 | C89 | 2023-06-01 | 2023-06-05 |
12345 | B11 | 2023-06-06 | 2023-06-08 |
12345 | C89 | 2023-06-09 | 2023-06-10 |
12345 | C89 | 2023-06-15 | 2023-06-17 |
12345 | B11 | 2023-07-01 | 2023-07-02 |
英文:
here one way using window functions to detect the groups and then group by that:
;WITH CTE AS (
SELECT *
, datediff(d, ReportDate, row_number() over (partition by ProjectID, TeamID order by ReportDate)) as grp
FROM data
)
SELECT
ProjectID,
TeamID,
min(ReportDate) AS StartDate,
max(ReportDate) AS EndDate
FROM CTE
GROUP BY ProjectID, TeamID, grp
ORDER BY StartDate
ProjectID | TeamID | StartDate | EndDate |
---|---|---|---|
12345 | C89 | 2023-06-01 | 2023-06-05 |
12345 | B11 | 2023-06-06 | 2023-06-08 |
12345 | C89 | 2023-06-09 | 2023-06-10 |
12345 | C89 | 2023-06-15 | 2023-06-17 |
12345 | B11 | 2023-07-01 | 2023-07-02 |
答案2
得分: 1
以下是翻译好的代码部分:
第一个版本:
这是一个有一些间隔和岛屿问题的问题,但更简单,我为了这个问题提供了另一种方法:
WITH data AS (
SELECT t.[Days WITH Team], t.[Project ID], t.[Team ID], cast(t.[report date] AS datetime) AS rep
FROM
(
VALUES (N'6/1/2023', 12345, N'C89', 1)
, (N'6/2/2023', 12345, N'C89', 2)
, (N'6/3/2023', 12345, N'C89', 3)
, (N'6/4/2023', 12345, N'C89', 4)
, (N'6/5/2023', 12345, N'C89', 5)
, (N'6/6/2023', 12345, N'B11', 1)
, (N'6/7/2023', 12345, N'B11', 2)
, (N'6/8/2023', 12345, N'B11', 3)
, (N'6/9/2023', 12345, N'C89', 1)
, (N'6/10/2023', 12345, N'C89', 2)
, (N'6/11/2023', 12345, N'C89', 3)
, (N'6/12/2023', 12345, N'C89', 4)
, (N'6/13/2023', 12345, N'C89', 5)
, (N'6/14/2023', 12345, N'C89', 6)
, (N'6/15/2023', 12345, N'C89', 7)
) t ([Report Date],[Project ID],[Team ID],[Days with Team])
)
SELECT [Project ID], [Team Id], MIN(rep) startdate, ISNULL(nextdate -1, MAX(rep)) enddate
FROM (
SELECT *, (SELECT MIN(d2.rep) FROM data d2 WHERE d2.[Project ID] = d.[Project ID] AND d2.rep > d.rep AND d2.[Team ID] <> d.[Team ID]) AS nextdate
FROM data d
) x
GROUP BY [Project ID], [Team Id], nextdate
ORDER BY startdate
第二个版本:
另一个版本,带有间隔:
;with data as (
select cast([Report Date] as date) as rep,[Project ID] as proj,[Team ID] as team,[Days with Team] as days
from (
VALUES (N'6/1/2023', 12345, N'C89', 1)
, (N'6/2/2023', 12345, N'C89', 2)
, (N'6/3/2023', 12345, N'C89', 3)
, (N'6/4/2023', 12345, N'C89', 4)
, (N'6/5/2023', 12345, N'C89', 5)
, (N'6/6/2023', 12345, N'B11', 1)
, (N'6/7/2023', 12345, N'B11', 2)
, (N'6/8/2023', 12345, N'B11', 3)
, (N'6/9/2023', 12345, N'C89', 1)
, (N'6/10/2023', 12345, N'C89', 2)
, (N'6/15/2023', 12345, N'C89', 1)
, (N'6/16/2023', 12345, N'C89', 2)
, (N'6/17/2023', 12345, N'C89', 3)
, (N'7/1/2023', 12345, N'B11', 1)
, (N'7/2/2023', 12345, N'B11', 2)
) t ([Report Date],[Project ID],[Team ID],[Days with Team])
)
select proj, team, MIN(rep) as s, max(rep) as e
FROM (
select sum(prevTeam) over(partition by proj order by rep) as grouping
, *
from (
select case when lag(team) over(partition by proj order by rep) <> team then 1 else 0 end as prevTeam
, *
from data
) x
) x
group by proj, team, x.grouping
希望这对你有所帮助。
英文:
This is a bit of a gaps and islands problem, but simpler, i'm including an alternative method of doing it just for this question:
WITH data AS (
SELECT t.[Days WITH Team], t.[Project ID], t.[Team ID], cast(t.[report date] AS datetime) AS rep
FROM
(
VALUES (N'6/1/2023', 12345, N'C89', 1)
, (N'6/2/2023', 12345, N'C89', 2)
, (N'6/3/2023', 12345, N'C89', 3)
, (N'6/4/2023', 12345, N'C89', 4)
, (N'6/5/2023', 12345, N'C89', 5)
, (N'6/6/2023', 12345, N'B11', 1)
, (N'6/7/2023', 12345, N'B11', 2)
, (N'6/8/2023', 12345, N'B11', 3)
, (N'6/9/2023', 12345, N'C89', 1)
, (N'6/10/2023', 12345, N'C89', 2)
, (N'6/11/2023', 12345, N'C89', 3)
, (N'6/12/2023', 12345, N'C89', 4)
, (N'6/13/2023', 12345, N'C89', 5)
, (N'6/14/2023', 12345, N'C89', 6)
, (N'6/15/2023', 12345, N'C89', 7)
) t ([Report Date],[Project ID],[Team ID],[Days with Team])
)
SELECT [Project ID], [Team Id], MIN(rep) startdate, ISNULL(nextdate -1, MAX(rep)) enddate
FROM (
SELECT *, (SELECT MIN(d2.rep) FROM data d2 WHERE d2.[Project ID] = d.[Project ID] AND d2.rep > d.rep AND d2.[Team ID] <> d.[Team ID]) AS nextdate
FROM data d
) x
GROUP BY [Project ID], [Team Id], nextdate
ORDER BY startdate
What it does is finding the least date for a project where team id differs, and then considers it's as the finished date. By then grouping the start and finish date, one can calculate the changes in the Team ID. A little hack is needed to calculate the end date, i'm simply substracting the next date with another team by one day.
If there's no change date, we assume that maximum startdate is the enddate.
Output:
Project ID | Team Id | startdate | enddate |
---|---|---|---|
12345 | C89 | 2023-06-01 | 2023-06-05 |
12345 | B11 | 2023-06-06 | 2023-06-08 |
12345 | C89 | 2023-06-09 | 2023-06-15 |
Edit another version with gaps:
;with data as (
select cast([Report Date] as date) as rep,[Project ID] as proj,[Team ID] as team,[Days with Team] as days
from (
VALUES (N'6/1/2023', 12345, N'C89', 1)
, (N'6/2/2023', 12345, N'C89', 2)
, (N'6/3/2023', 12345, N'C89', 3)
, (N'6/4/2023', 12345, N'C89', 4)
, (N'6/5/2023', 12345, N'C89', 5)
, (N'6/6/2023', 12345, N'B11', 1)
, (N'6/7/2023', 12345, N'B11', 2)
, (N'6/8/2023', 12345, N'B11', 3)
, (N'6/9/2023', 12345, N'C89', 1)
, (N'6/10/2023', 12345, N'C89', 2)
, (N'6/15/2023', 12345, N'C89', 1)
, (N'6/16/2023', 12345, N'C89', 2)
, (N'6/17/2023', 12345, N'C89', 3)
, (N'7/1/2023', 12345, N'B11', 1)
, (N'7/2/2023', 12345, N'B11', 2)
) t ([Report Date],[Project ID],[Team ID],[Days with Team])
)
select proj, team, MIN(rep) as s, max(rep) as e
FROM (
select sum(prevTeam) over(partition by proj order by rep) as grouping
, *
from (
select case when lag(team) over(partition by proj order by rep) <> team then 1 else 0 end as prevTeam
, *
from data
) x
) x
group by proj, team, x.grouping
This version follows the gaps and island solution and create a running grouping that watches for switches in the team, which is then used to group on to get the start / end date.
Output:
proj | team | s | e |
---|---|---|---|
12345 | C89 | 2023-06-01 | 2023-06-05 |
12345 | B11 | 2023-06-06 | 2023-06-08 |
12345 | C89 | 2023-06-09 | 2023-06-17 |
12345 | B11 | 2023-07-01 | 2023-07-02 |
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论